pkg: fix wrong package name, closes FS#148, reported from Andrew Pogrebennyk
[sip-router] / re.c
1 /* 
2  * $Id$
3  *
4  * regexp and regexp substitutions implementations
5  * 
6  * Copyright (C) 2001-2003 FhG Fokus
7  *
8  * This file is part of ser, a free SIP server.
9  *
10  * ser is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version
14  *
15  * For a license to use the ser software under conditions
16  * other than those described here, or to purchase support for this
17  * software, please contact iptel.org by e-mail at the following addresses:
18  *    info@iptel.org
19  *
20  * ser is distributed in the hope that it will be useful,
21  * but WITHOUT ANY WARRANTY; without even the implied warranty of
22  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  * GNU General Public License for more details.
24  *
25  * You should have received a copy of the GNU General Public License 
26  * along with this program; if not, write to the Free Software 
27  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28  *
29  *
30  * History:
31  * --------
32  *   2003-08-04  created by andrei
33  *   2004-11-12  minor api extension, added *count (andrei)
34  */
35
36 /*!
37  * \file
38  * \brief SIP-router core ::  regexp and regexp substitutions implementations
39  * \ingroup core
40  * Module: \ref core
41  */
42
43
44 #include "dprint.h"
45 #include "mem/mem.h"
46 #include "re.h"
47
48 #include <string.h>
49
50 #define MAX_REPLACE_WITH 100
51 #define REPLACE_BUFFER_SIZE 1024
52
53 void subst_expr_free(struct subst_expr* se)
54 {
55         if (se->replacement.s) pkg_free(se->replacement.s);
56         if (se->re) { regfree(se->re); pkg_free(se->re); };
57         pkg_free(se);
58 }
59
60
61
62 /* frees the entire list, head (l) too */
63 void replace_lst_free(struct replace_lst* l)
64 {
65         struct replace_lst* t;
66         
67         while (l){
68                 t=l;
69                 l=l->next;
70                 if (t->rpl.s) pkg_free(t->rpl.s);
71                 pkg_free(t);
72         }
73 }
74
75 int parse_repl(struct replace_with * rw, char ** begin, 
76                                 char * end, int *max_token_nb, int with_sep)
77 {
78
79         char* p0;
80         char * repl;
81         str s;
82         int token_nb;
83         int escape;
84         int max_pmatch;
85         char *p, c;
86
87         /* parse replacement */
88         p = *begin;
89         c = *p;
90         if(with_sep)
91                 p++;
92         repl= p;
93         token_nb=0;
94         max_pmatch=0;
95         escape=0;
96         for(;p<end; p++){
97                 if (escape){
98                         escape=0;
99                         switch (*p){
100                                 /* special char escapes */
101                                 case '\\':
102                                         rw[token_nb].size=2;
103                                         rw[token_nb].offset=(p-1)-repl;
104                                         rw[token_nb].type=REPLACE_CHAR;
105                                         rw[token_nb].u.c='\\';
106                                         break;
107                                 case 'n':
108                                         rw[token_nb].size=2;
109                                         rw[token_nb].offset=(p-1)-repl;
110                                         rw[token_nb].type=REPLACE_CHAR;
111                                         rw[token_nb].u.c='\n';
112                                         break;
113                                 case 'r':
114                                         rw[token_nb].size=2;
115                                         rw[token_nb].offset=(p-1)-repl;
116                                         rw[token_nb].type=REPLACE_CHAR;
117                                         rw[token_nb].u.c='\r';
118                                         break;
119                                 case 't':
120                                         rw[token_nb].size=2;
121                                         rw[token_nb].offset=(p-1)-repl;
122                                         rw[token_nb].type=REPLACE_CHAR;
123                                         rw[token_nb].u.c='\t';
124                                         break;
125                                 case PV_MARKER:
126                                         rw[token_nb].size=2;
127                                         rw[token_nb].offset=(p-1)-repl;
128                                         rw[token_nb].type=REPLACE_CHAR;
129                                         rw[token_nb].u.c=PV_MARKER;
130                                         break;
131                                 /* special sip msg parts escapes */
132                                 case 'u':
133                                         rw[token_nb].size=2;
134                                         rw[token_nb].offset=(p-1)-repl;
135                                         rw[token_nb].type=REPLACE_URI;
136                                         break;
137                                 /* re matches */
138                                 case '0': /* allow 0, too, reference to the whole match */
139                                 case '1':
140                                 case '2':
141                                 case '3':
142                                 case '4':
143                                 case '5':
144                                 case '6':
145                                 case '7':
146                                 case '8':
147                                 case '9':
148                                         rw[token_nb].size=2;
149                                         rw[token_nb].offset=(p-1)-repl;
150                                         rw[token_nb].type=REPLACE_NMATCH;
151                                         rw[token_nb].u.nmatch=(*p)-'0';
152                                                                 /* 0 is the whole matched str*/
153                                         if (max_pmatch<rw[token_nb].u.nmatch) 
154                                                 max_pmatch=rw[token_nb].u.nmatch;
155                                         break;
156                                 default: /* just print current char */
157                                         if (*p!=c){
158                                                 WARN("subst_parser:\\%c unknown escape in %s\n", *p, *begin);
159                                         }
160                                         rw[token_nb].size=2;
161                                         rw[token_nb].offset=(p-1)-repl;
162                                         rw[token_nb].type=REPLACE_CHAR;
163                                         rw[token_nb].u.c=*p;
164                                         break;
165                         }
166
167                         token_nb++;
168
169                         if (token_nb>=MAX_REPLACE_WITH){
170                                 ERR("subst_parser: too many escapes in the replace part %s\n", *begin);
171                                 goto error;
172                         }
173                 }else if (*p=='\\') {
174                         escape=1;
175                 }else if (*p==PV_MARKER) {
176                         s.s = p;
177                         s.len = end - s.s;
178                         p0 = pv_parse_spec(&s, &rw[token_nb].u.spec);
179                         if(p0==NULL)
180                         {
181                                 ERR("subst_parser: bad specifier in replace part %s\n", *begin);
182                                 goto error;
183                         }
184                         rw[token_nb].size=p0-p;
185                         rw[token_nb].offset=p-repl;
186                         rw[token_nb].type=REPLACE_SPEC;
187                         token_nb++;
188                         p=p0-1;
189                 }else  if (*p==c && with_sep){
190                                 goto found_repl;
191                 }
192         }
193         if(with_sep){
194                 ERR("subst_parser: missing separator: %s\n", *begin);
195                 goto error;
196         }
197
198 found_repl:
199
200         *max_token_nb = max_pmatch;
201         *begin = p;
202         return token_nb;
203
204 error:
205         return -1;
206 }
207
208
209 /* parse a /regular expression/replacement/flags into a subst_expr structure */
210 struct subst_expr* subst_parser(str* subst)
211 {
212         char c;
213         char* end;
214         char* p;
215         char* re;
216         char* re_end;
217         char* repl;
218         char* repl_end;
219         struct replace_with rw[MAX_REPLACE_WITH];
220         int rw_no;
221         int cflags; /* regcomp flags */
222         int replace_all;
223         struct subst_expr* se;
224         regex_t* regex;
225         int max_pmatch;
226         int r;
227         
228         /* init */
229         se=0;
230         regex=0;
231         cflags=REG_EXTENDED  | REG_NEWLINE; /* don't match newline */
232         replace_all=0;
233         if (subst->len<3){
234                 LOG(L_ERR, "ERROR: subst_parser: expression is too short: %.*s\n",
235                                 subst->len, subst->s);
236                 goto error;
237         }
238         
239         p=subst->s;
240         c=*p;
241         if (c=='\\'){
242                 LOG(L_ERR, "ERROR: subst_parser: invalid separator char <%c>"
243                                 " in %.*s\n", c, subst->len, subst->s);
244                 goto error;
245         }
246         p++;
247         end=subst->s+subst->len;
248         /* find re */
249         re=p;
250         for (;p<end;p++){
251                 /* if unescaped sep. char */
252                 if ((*p==c) && (*(p-1)!='\\')) goto found_re;
253         }
254         LOG(L_ERR, "ERROR: subst_parser: no separator found: %.*s\n", subst->len, 
255                         subst->s);
256         goto error;
257 found_re:
258         re_end=p;
259         if (end < (p + 2)) {
260                 ERR("subst_parser: String too short\n");
261                 goto error;
262         }
263         repl=p+1;
264         if ((rw_no = parse_repl(rw, &p, end, &max_pmatch, WITH_SEP)) < 0)
265                 goto error;
266         repl_end = p;
267         p++;
268         
269         /* parse flags */
270         for(;p<end; p++){
271                 switch(*p){
272                         case 'i':
273                                 cflags|=REG_ICASE;
274                                 break;
275                         case 's':
276                                 cflags&=(~REG_NEWLINE);
277                                 break;
278                         case 'g':
279                                 replace_all=1;
280                                 break;
281                         default:
282                                 LOG(L_ERR, "ERROR: subst_parser: unknown flag %c in %.*s\n",
283                                                 *p, subst->len, subst->s);
284                                 goto error;
285                 }
286         }
287
288         /* compile the re */
289         if ((regex=pkg_malloc(sizeof(regex_t)))==0){
290                 LOG(L_ERR, "ERROR: subst_parser: out of memory (re)\n");
291                 goto error;
292         }
293         c=*re_end; /* regcomp expects null terminated strings -- save */
294         *re_end=0;
295         if (regcomp(regex, re, cflags)!=0){
296                 pkg_free(regex);
297                 regex=0;
298                 *re_end=c; /* restore */
299                 LOG(L_ERR, "ERROR: subst_parser: bad regular expression %.*s in "
300                                 "%.*s\n", (int)(re_end-re), re, subst->len, subst->s);
301                 goto error;
302         }
303         *re_end=c; /* restore */
304         /* construct the subst_expr structure */
305         se=pkg_malloc(sizeof(struct subst_expr)+
306                                         ((rw_no)?(rw_no-1)*sizeof(struct replace_with):0));
307                 /* 1 replace_with structure is  already included in subst_expr */
308         if (se==0){
309                 LOG(L_ERR, "ERROR: subst_parser: out of memory (subst_expr)\n");
310                 goto error;
311         }
312         memset((void*)se, 0, sizeof(struct subst_expr));
313         se->replacement.len=repl_end-repl;
314         if (se->replacement.len > 0) {
315                 if ((se->replacement.s=pkg_malloc(se->replacement.len))==0){
316                         LOG(L_ERR, "ERROR: subst_parser: out of memory (replacement)\n");
317                         goto error;
318                 }
319                 /* start copying */
320                 memcpy(se->replacement.s, repl, se->replacement.len);
321         } else {
322                 se->replacement.s = NULL;
323         }
324         se->re=regex;
325         se->replace_all=replace_all;
326         se->n_escapes=rw_no;
327         se->max_pmatch=max_pmatch;
328         for (r=0; r<rw_no; r++) se->replace[r]=rw[r];
329         DBG("subst_parser: ok, se is %p\n", se);
330         return se;
331         
332 error:
333         if (se) { subst_expr_free(se); regex=0; }
334         if (regex) { regfree (regex); pkg_free(regex); }
335         return 0;
336 }
337
338 /* rpl.s will be alloc'ed with the proper size & rpl.len set
339  * returns 0 on success, <0 on error*/
340 static int replace_build(const char* match, int nmatch, regmatch_t* pmatch,
341                                         struct subst_expr* se, struct sip_msg* msg, str* rpl)
342 {
343         int r;
344         str* uri;
345         pv_value_t sv;
346         char* p;
347         char* dest;
348         char* end;
349         int size;
350         static char rbuf[REPLACE_BUFFER_SIZE];
351
352 #define RBUF_APPEND(dst, src, size) \
353         if ((dst) - rbuf + (size) >= REPLACE_BUFFER_SIZE - 1) { \
354                 ERR("replace_build: Buffer too small\n");                       \
355                 goto error;                                                                                     \
356         }                                                                                                               \
357         memcpy((dst), (src), (size));                                                   \
358         (dst) += (size);
359
360         p=se->replacement.s;
361         end=p+se->replacement.len;
362         dest=rbuf;
363         
364         for (r=0; r<se->n_escapes; r++){
365                 /* copy the unescaped parts */
366                 size=se->replacement.s+se->replace[r].offset-p;
367                 RBUF_APPEND(dest, p, size);
368                 p+=size+se->replace[r].size;
369                 switch(se->replace[r].type){
370                         case REPLACE_NMATCH:
371                                 if ((se->replace[r].u.nmatch<nmatch)&&(
372                                                 pmatch[se->replace[r].u.nmatch].rm_so!=-1)){
373                                                 /* do the replace */
374                                                 size=pmatch[se->replace[r].u.nmatch].rm_eo-
375                                                                 pmatch[se->replace[r].u.nmatch].rm_so;
376                                                 RBUF_APPEND(dest, 
377                                                                         match+pmatch[se->replace[r].u.nmatch].rm_so,
378                                                                         size);
379                                 };
380                                 break;
381                         case REPLACE_CHAR:
382                                 RBUF_APPEND(dest, &se->replace[r].u.c, 1);
383                                 break;
384                         case REPLACE_URI:
385                                 if (msg->first_line.type!=SIP_REQUEST){
386                                         LOG(L_CRIT, "BUG: replace_build: uri substitution on"
387                                                                 " a reply\n");
388                                         break; /* ignore, we can continue */
389                                 }
390                                 uri= (msg->new_uri.s)?(&msg->new_uri):
391                                         (&msg->first_line.u.request.uri);
392                                 RBUF_APPEND(dest, uri->s, uri->len);
393                                 break;
394                         case REPLACE_SPEC:
395                                 if(pv_get_spec_value(msg, &se->replace[r].u.spec, &sv)!=0) {
396                                         ERR("replace_build: item substitution returned error\n");
397                                         break; /* ignore, we can continue */
398                                 }
399                                 RBUF_APPEND(dest, sv.rs.s, sv.rs.len);
400                                 break;
401                         default:
402                                 LOG(L_CRIT, "BUG: replace_build: unknown type %d\n", 
403                                                 se->replace[r].type);
404                                 /* ignore it */
405                 }
406         }
407         RBUF_APPEND(dest, p, end-p);
408         rpl->len = dest - rbuf;
409         if ((rpl->s = pkg_malloc(rpl->len)) == NULL) {
410                 ERR("replace_build: Out of pkg memory\n");
411                 goto error;
412         }
413         memcpy(rpl->s, rbuf, rpl->len);
414         return 0;
415 error:
416         return -1;
417 }
418
419
420
421 /* WARNING: input must be 0 terminated! */
422 /* returns: 0 if no match or error, or subst result; if count!=0
423  *           it will be set to 0 (no match), the number of matches
424  *           or -1 (error).
425  */
426 struct replace_lst* subst_run(struct subst_expr* se, const char* input,
427                                                                 struct sip_msg* msg, int* count)
428 {
429         struct replace_lst *head;
430         struct replace_lst **crt;
431         const char *p;
432         int r;
433         regmatch_t* pmatch;
434         int nmatch;
435         int eflags;
436         int cnt;
437         
438         
439         /* init */
440         head=0;
441         cnt=0;
442         crt=&head;
443         p=input;
444         nmatch=se->max_pmatch+1;
445         /* no of () referenced + 1 for the whole string: pmatch[0] */
446         pmatch=pkg_malloc(nmatch*sizeof(regmatch_t));
447         if (pmatch==0){
448                 LOG(L_ERR, "ERROR: subst_run_ out of mem. (pmatch)\n");
449                 goto error;
450         }
451         eflags=0;
452         do{
453                 r=regexec(se->re, p, nmatch, pmatch, eflags);
454                 DBG("subst_run: running. r=%d\n", r);
455                 /* subst */
456                 if (r==0){ /* != REG_NOMATCH */
457                         if (pmatch[0].rm_so==-1) {
458                                 ERR("subst_run: Unknown offset?\n");
459                                 goto error;
460                         }
461                         if (pmatch[0].rm_so==pmatch[0].rm_eo) {
462                                 ERR("subst_run: Matched string is empty, invalid regexp?\n");
463                                 goto error;
464                         }
465                         *crt=pkg_malloc(sizeof(struct replace_lst));
466                         if (*crt==0){
467                                 LOG(L_ERR, "ERROR: subst_run: out of mem (crt)\n");
468                                 goto error;
469                         }
470                         memset(*crt, 0, sizeof(struct replace_lst));
471                         (*crt)->offset=pmatch[0].rm_so+(int)(p-input);
472                         (*crt)->size=pmatch[0].rm_eo-pmatch[0].rm_so;
473                         DBG("subst_run: matched (%d, %d): [%.*s]\n",
474                                         (*crt)->offset, (*crt)->size, 
475                                         (*crt)->size, input+(*crt)->offset);
476                         /* create subst. string */
477                         /* construct the string from replace[] */
478                         if (replace_build(p, nmatch, pmatch, se, msg, &((*crt)->rpl))<0){
479                                 goto error;
480                         }
481                         crt=&((*crt)->next);
482                         p+=pmatch[0].rm_eo;
483                         if (*(p-1) == '\n' || *(p-1) == '\r') eflags&=~REG_NOTBOL;
484                         else eflags|=REG_NOTBOL;
485                         cnt++;
486                 }
487         }while((r==0) && se->replace_all);
488         pkg_free(pmatch);
489         if (count)*count=cnt;
490         return head;
491 error:
492         if (head) replace_lst_free(head);
493         if (pmatch) pkg_free(pmatch);
494         if (count) *count=-1;
495         return 0;
496 }
497
498
499
500 /* returns the substitution result in a str, input must be 0 term
501  *  0 on no match or malloc error
502  *  if count is non zero it will be set to the number of matches, or -1
503  *   if error 
504  */ 
505 str* subst_str(const char *input, struct sip_msg* msg, struct subst_expr* se,
506                                 int* count)
507 {
508         str* res;
509         struct replace_lst *lst;
510         struct replace_lst* l;
511         int len;
512         int size;
513         const char* p;
514         char* dest;
515         const char* end;
516         
517         
518         /* compute the len */
519         len=strlen(input);
520         end=input+len;
521         lst=subst_run(se, input, msg, count);
522         if (lst==0){
523                 DBG("subst_str: no match\n");
524                 return 0;
525         }
526         for (l=lst; l; l=l->next)
527                 len+=(int)(l->rpl.len)-l->size;
528         res=pkg_malloc(sizeof(str));
529         if (res==0){
530                 LOG(L_ERR, "ERROR: subst_str: mem. allocation error\n");
531                 goto error;
532         }
533         res->s=pkg_malloc(len+1); /* space for null termination */
534         if (res->s==0){
535                 LOG(L_ERR, "ERROR: subst_str: mem. allocation error (res->s)\n");
536                 goto error;
537         }
538         res->s[len]=0;
539         res->len=len;
540         
541         /* replace */
542         dest=res->s;
543         p=input;
544         for(l=lst; l; l=l->next){
545                 size=l->offset+input-p;
546                 memcpy(dest, p, size); /* copy till offset */
547                 p+=size + l->size; /* skip l->size bytes */
548                 dest+=size;
549                 if (l->rpl.len){
550                         memcpy(dest, l->rpl.s, l->rpl.len);
551                         dest+=l->rpl.len;
552                 }
553         }
554         memcpy(dest, p, end-p);
555         if(lst) replace_lst_free(lst);
556         return res;
557 error:
558         if (lst) replace_lst_free(lst);
559         if (res){
560                 if (res->s) pkg_free(res->s);
561                 pkg_free(res);
562         }
563         if (count) *count=-1;
564         return 0;
565 }