- Spelling checked
[sip-router] / re.c
1 /* 
2  * $Id$
3  *
4  * regexp and regexp substitutions implementations
5  * 
6  * Copyright (C) 2001-2003 FhG Fokus
7  *
8  * This file is part of ser, a free SIP server.
9  *
10  * ser is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version
14  *
15  * For a license to use the ser software under conditions
16  * other than those described here, or to purchase support for this
17  * software, please contact iptel.org by e-mail at the following addresses:
18  *    info@iptel.org
19  *
20  * ser is distributed in the hope that it will be useful,
21  * but WITHOUT ANY WARRANTY; without even the implied warranty of
22  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  * GNU General Public License for more details.
24  *
25  * You should have received a copy of the GNU General Public License 
26  * along with this program; if not, write to the Free Software 
27  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28  *
29  *
30  * History:
31  * --------
32  *   2003-08-04  created by andrei
33  */
34
35
36 #include "dprint.h"
37 #include "mem/mem.h"
38 #include "re.h"
39
40 #include <string.h>
41
42
43
44 void subst_expr_free(struct subst_expr* se)
45 {
46         if (se->replacement.s) pkg_free(se->replacement.s);
47         if (se->re) { regfree(se->re); pkg_free(se->re); };
48         pkg_free(se);
49 }
50
51
52
53 /* frees the entire list, head (l) too */
54 void replace_lst_free(struct replace_lst* l)
55 {
56         struct replace_lst* t;
57         
58         while (l){
59                 t=l;
60                 l=l->next;
61                 if (t->rpl.s) pkg_free(t->rpl.s);
62                 pkg_free(t);
63         }
64 }
65
66
67
68 /* parse a /regular expression/replacement/flags into a subst_expr structure */
69 struct subst_expr* subst_parser(str* subst)
70 {
71 #define MAX_REPLACE_WITH 100
72         char c;
73         char* end;
74         char* p;
75         char* re;
76         char* re_end;
77         char* repl;
78         char* repl_end;
79         struct replace_with rw[MAX_REPLACE_WITH];
80         int rw_no;
81         int escape;
82         int cflags; /* regcomp flags */
83         int replace_all;
84         struct subst_expr* se;
85         regex_t* regex;
86         int max_pmatch;
87         int r;
88         
89         /* init */
90         se=0;
91         regex=0;
92         cflags=REG_EXTENDED  | REG_NEWLINE; /* don't match newline */
93         replace_all=0;
94         if (subst->len<3){
95                 LOG(L_ERR, "ERROR: subst_parser: expression is too short: %.*s\n",
96                                 subst->len, subst->s);
97                 goto error;
98         }
99         
100         p=subst->s;
101         c=*p;
102         if (c=='\\'){
103                 LOG(L_ERR, "ERROR: subst_parser: invalid separator char <%c>"
104                                 " in %.*s\n", c, subst->len, subst->s);
105                 goto error;
106         }
107         p++;
108         end=subst->s+subst->len;
109         /* find re */
110         re=p;
111         for (;p<end;p++){
112                 /* if unescaped sep. char */
113                 if ((*p==c) && (*(p-1)!='\\')) goto found_re;
114         }
115         LOG(L_ERR, "ERROR: subst_parser: no separator found: %.*s\n", subst->len, 
116                         subst->s);
117         goto error;
118 found_re:
119         re_end=p;
120         p++;
121         /* parse replacement */
122         repl=p;
123         rw_no=0;
124         max_pmatch=0;
125         escape=0;
126         for(;p<end; p++){
127                 if (escape){
128                         escape=0;
129                         switch (*p){
130                                 /* special char escapes */
131                                 case '\\':
132                                         rw[rw_no].size=2;
133                                         rw[rw_no].offset=(p-1)-repl;
134                                         rw[rw_no].type=REPLACE_CHAR;
135                                         rw[rw_no].u.c='\\';
136                                         break;
137                                 case 'n':
138                                         rw[rw_no].size=2;
139                                         rw[rw_no].offset=(p-1)-repl;
140                                         rw[rw_no].type=REPLACE_CHAR;
141                                         rw[rw_no].u.c='\n';
142                                         break;
143                                 case 'r':
144                                         rw[rw_no].size=2;
145                                         rw[rw_no].offset=(p-1)-repl;
146                                         rw[rw_no].type=REPLACE_CHAR;
147                                         rw[rw_no].u.c='\r';
148                                         break;
149                                 case 't':
150                                         rw[rw_no].size=2;
151                                         rw[rw_no].offset=(p-1)-repl;
152                                         rw[rw_no].type=REPLACE_CHAR;
153                                         rw[rw_no].u.c='\t';
154                                         break;
155                                 /* special sip msg parts escapes */
156                                 case 'u':
157                                         rw[rw_no].size=2;
158                                         rw[rw_no].offset=(p-1)-repl;
159                                         rw[rw_no].type=REPLACE_URI;
160                                         break;
161                                 /* re matches */
162                                 case '0': /* allow 0, too, reference to the whole match */
163                                 case '1':
164                                 case '2':
165                                 case '3':
166                                 case '4':
167                                 case '5':
168                                 case '6':
169                                 case '7':
170                                 case '8':
171                                 case '9':
172                                         rw[rw_no].size=2;
173                                         rw[rw_no].offset=(p-1)-repl;
174                                         rw[rw_no].type=REPLACE_NMATCH;
175                                         rw[rw_no].u.nmatch=(*p)-'0';/* 0 is the whole matched str*/
176                                         if (max_pmatch<rw[rw_no].u.nmatch) 
177                                                 max_pmatch=rw[rw_no].u.nmatch;
178                                         break;
179                                 default: /* just print current char */
180                                         if (*p!=c){
181                                                 LOG(L_WARN, "subst_parser: WARNING: \\%c unknown"
182                                                                 " escape in %.*s\n", *p, subst->len, subst->s);
183                                         }
184                                         rw[rw_no].size=2;
185                                         rw[rw_no].offset=(p-1)-repl;
186                                         rw[rw_no].type=REPLACE_CHAR;
187                                         rw[rw_no].u.c=*p;
188                                         break;
189                         }
190                         rw_no++;
191                         if (rw_no>=MAX_REPLACE_WITH){
192                                 LOG(L_ERR, "ERROR: subst_parser: too many escapes in the"
193                                                         " replace part %.*s\n", subst->len, subst->s);
194                                 goto error;
195                         }
196                 }else if (*p=='\\') escape=1;
197                 else  if (*p==c) goto found_repl;
198         }
199         LOG(L_ERR, "ERROR: subst_parser: missing separator: %.*s\n", subst->len, 
200                         subst->s);
201         goto error;
202 found_repl:
203         repl_end=p;
204         p++;
205         /* parse flags */
206         for(;p<end; p++){
207                 switch(*p){
208                         case 'i':
209                                 cflags|=REG_ICASE;
210                                 break;
211                         case 's':
212                                 cflags&=(~REG_NEWLINE);
213                                 break;
214                         case 'g':
215                                 replace_all=1;
216                                 break;
217                         default:
218                                 LOG(L_ERR, "ERROR: subst_parser: unknown flag %c in %.*s\n",
219                                                 *p, subst->len, subst->s);
220                                 goto error;
221                 }
222         }
223
224         /* compile the re */
225         if ((regex=pkg_malloc(sizeof(regex_t)))==0){
226                 LOG(L_ERR, "ERROR: subst_parser: out of memory (re)\n");
227                 goto error;
228         }
229         c=*re_end; /* regcomp expects null terminated strings -- save */
230         *re_end=0;
231         if (regcomp(regex, re, cflags)!=0){
232                 pkg_free(regex);
233                 *re_end=c; /* restore */
234                 LOG(L_ERR, "ERROR: subst_parser: bad regular expression %.*s in "
235                                 "%.*s\n", (int)(re_end-re), re, subst->len, subst->s);
236                 goto error;
237         }
238         *re_end=c; /* restore */
239         /* construct the subst_expr structure */
240         se=pkg_malloc(sizeof(struct subst_expr)+
241                                         ((rw_no)?(rw_no-1)*sizeof(struct replace_with):0));
242                 /* 1 replace_with structure is  already included in subst_expr */
243         if (se==0){
244                 LOG(L_ERR, "ERROR: subst_parser: out of memory (subst_expr)\n");
245                 goto error;
246         }
247         memset((void*)se, 0, sizeof(struct subst_expr));
248         se->replacement.len=repl_end-repl;
249         if ((se->replacement.s=pkg_malloc(se->replacement.len))==0){
250                 LOG(L_ERR, "ERROR: subst_parser: out of memory (replacement)\n");
251                 goto error;
252         }
253         /* start copying */
254         memcpy(se->replacement.s, repl, se->replacement.len);
255         se->re=regex;
256         se->replace_all=replace_all;
257         se->n_escapes=rw_no;
258         se->max_pmatch=max_pmatch;
259         for (r=0; r<rw_no; r++) se->replace[r]=rw[r];
260         DBG("subst_parser: ok, se is %p\n", se);
261         return se;
262         
263 error:
264         if (se) { subst_expr_free(se); regex=0; }
265         if (regex) { regfree (regex); pkg_free(regex); }
266         return 0;
267 }
268
269
270
271 static int replace_len(char* match, int nmatch, regmatch_t* pmatch,
272                                         struct subst_expr* se, struct sip_msg* msg)
273 {
274         int r;
275         int len;
276         str* uri;
277         
278         len=se->replacement.len;
279         for (r=0; r<se->n_escapes; r++){
280                 switch(se->replace[r].type){
281                         case REPLACE_NMATCH:
282                                 len-=se->replace[r].size;
283                                 if ((se->replace[r].u.nmatch<nmatch)&&(
284                                                 pmatch[se->replace[r].u.nmatch].rm_so!=-1)){
285                                                 /* do the replace */
286                                                 len+=pmatch[se->replace[r].u.nmatch].rm_eo-
287                                                                 pmatch[se->replace[r].u.nmatch].rm_so;
288                                 };
289                                 break;
290                         case REPLACE_CHAR:
291                                 len-=(se->replace[r].size-1);
292                                 break;
293                         case REPLACE_URI:
294                                 len-=se->replace[r].size;
295                                 if (msg->first_line.type!=SIP_REQUEST){
296                                         LOG(L_CRIT, "BUG: replace_len: uri substitution on"
297                                                                 " a reply\n");
298                                         break; /* ignore, we can continue */
299                                 }
300                                 uri= (msg->new_uri.s)?(&msg->new_uri):
301                                         (&msg->first_line.u.request.uri);
302                                 len+=uri->len;
303                                 break;
304                         default:
305                                 LOG(L_CRIT, "BUG: replace_len: unknown type %d\n", 
306                                                 se->replace[r].type);
307                                 /* ignore it */
308                 }
309         }
310         return len;
311 }
312
313
314
315 /* rpl.s will be alloc'ed with the proper size & rpl.len set
316  * returns 0 on success, <0 on error*/
317 static int replace_build(char* match, int nmatch, regmatch_t* pmatch,
318                                         struct subst_expr* se, struct sip_msg* msg, str* rpl)
319 {
320         int r;
321         str* uri;
322         char* p;
323         char* dest;
324         char* end;
325         int size;
326         
327         rpl->len=replace_len(match, nmatch, pmatch, se, msg);
328         if (rpl->len==0){
329                 rpl->s=0; /* empty string */
330                 return 0;
331         }
332         rpl->s=pkg_malloc(rpl->len);
333         if (rpl->s==0){
334                 LOG(L_ERR, "ERROR: replace_build: out of mem (rpl)\n");
335                 goto error;
336         }
337         p=se->replacement.s;
338         end=p+se->replacement.len;
339         dest=rpl->s;
340         for (r=0; r<se->n_escapes; r++){
341                 /* copy the unescaped parts */
342                 size=se->replacement.s+se->replace[r].offset-p;
343                 memcpy(dest, p, size);
344                 p+=size+se->replace[r].size;
345                 dest+=size;
346                 switch(se->replace[r].type){
347                         case REPLACE_NMATCH:
348                                 if ((se->replace[r].u.nmatch<nmatch)&&(
349                                                 pmatch[se->replace[r].u.nmatch].rm_so!=-1)){
350                                                 /* do the replace */
351                                                 size=pmatch[se->replace[r].u.nmatch].rm_eo-
352                                                                 pmatch[se->replace[r].u.nmatch].rm_so;
353                                                 memcpy(dest, 
354                                                                 match+pmatch[se->replace[r].u.nmatch].rm_so,
355                                                                 size);
356                                                 dest+=size;
357                                 };
358                                 break;
359                         case REPLACE_CHAR:
360                                 *dest=se->replace[r].u.c;
361                                 dest++;
362                                 break;
363                         case REPLACE_URI:
364                                 if (msg->first_line.type!=SIP_REQUEST){
365                                         LOG(L_CRIT, "BUG: replace_build: uri substitution on"
366                                                                 " a reply\n");
367                                         break; /* ignore, we can continue */
368                                 }
369                                 uri= (msg->new_uri.s)?(&msg->new_uri):
370                                         (&msg->first_line.u.request.uri);
371                                 memcpy(dest, uri->s, uri->len);
372                                 dest+=uri->len;
373                                 break;
374                         default:
375                                 LOG(L_CRIT, "BUG: replace_build: unknown type %d\n", 
376                                                 se->replace[r].type);
377                                 /* ignore it */
378                 }
379         }
380         memcpy(dest, p, end-p);
381         return 0;
382 error:
383         return -1;
384 }
385
386
387
388 /* WARNING: input must be 0 terminated! */
389 struct replace_lst* subst_run(struct subst_expr* se, char* input,
390                                                                 struct sip_msg* msg)
391 {
392         struct replace_lst *head;
393         struct replace_lst **crt;
394         char *p;
395         int r;
396         regmatch_t* pmatch;
397         int nmatch;
398         int eflags;
399         
400         
401         /* init */
402         head=0;
403         crt=&head;
404         p=input;
405         nmatch=se->max_pmatch+1;
406         /* no of () referenced + 1 for the whole string: pmatch[0] */
407         pmatch=pkg_malloc(nmatch*sizeof(regmatch_t));
408         if (pmatch==0){
409                 LOG(L_ERR, "ERROR: subst_run_ out of mem. (pmatch)\n");
410                 goto error;
411         }
412         eflags=0;
413         do{
414                 r=regexec(se->re, p, nmatch, pmatch, eflags);
415                 DBG("subst_run: running. r=%d\n", r);
416                 /* subst */
417                 if (r==0){ /* != REG_NOMATCH */
418                         /* change eflags, not to match any more at string start */
419                         eflags|=REG_NOTBOL;
420                         *crt=pkg_malloc(sizeof(struct replace_lst));
421                         if (*crt==0){
422                                 LOG(L_ERR, "ERROR: subst_run: out of mem (crt)\n");
423                                 goto error;
424                         }
425                         memset(*crt, 0, sizeof(struct replace_lst));
426                         if (pmatch[0].rm_so==-1){
427                                 LOG(L_ERR, "ERROR: subst_run: unknown offset?\n");
428                                 goto error;
429                         }
430                         (*crt)->offset=pmatch[0].rm_so+(int)(p-input);
431                         (*crt)->size=pmatch[0].rm_eo-pmatch[0].rm_so;
432                         DBG("subst_run: matched (%d, %d): [%.*s]\n",
433                                         (*crt)->offset, (*crt)->size, 
434                                         (*crt)->size, input+(*crt)->offset);
435                         /* create subst. string */
436                         /* construct the string from replace[] */
437                         if (replace_build(p, nmatch, pmatch, se, msg, &((*crt)->rpl))<0){
438                                 goto error;
439                         }
440                         crt=&((*crt)->next);
441                         p+=pmatch[0].rm_eo;
442                 }
443         }while((r==0) && se->replace_all);
444         pkg_free(pmatch);
445         return head;
446 error:
447         if (head) replace_lst_free(head);
448         if (pmatch) pkg_free(pmatch);
449         return 0;
450 }
451
452
453
454 /* returns the substitution result in a str, input must be 0 term
455  *  0 on no match or malloc error */ 
456 str* subst_str(char *input, struct sip_msg* msg, struct subst_expr* se)
457 {
458         str* res;
459         struct replace_lst *lst;
460         struct replace_lst* l;
461         int len;
462         int size;
463         char* p;
464         char* dest;
465         char* end;
466         
467         
468         /* compute the len */
469         len=strlen(input);
470         end=input+len;
471         lst=subst_run(se, input, msg);
472         if (lst==0){
473                 DBG("subst_str: no match\n");
474                 return 0;
475         }
476         for (l=lst; l; l=l->next)
477                 len+=(int)(l->rpl.len)-l->size;
478         res=pkg_malloc(sizeof(str));
479         if (res==0){
480                 LOG(L_ERR, "ERROR: subst_str: mem. allocation error\n");
481                 goto error;
482         }
483         res->s=pkg_malloc(len+1); /* space for null termination */
484         if (res->s==0){
485                 LOG(L_ERR, "ERROR: subst_str: mem. allocation error (res->s)\n");
486                 goto error;
487         }
488         res->s[len]=0;
489         res->len=len;
490         
491         /* replace */
492         dest=res->s;
493         p=input;
494         for(l=lst; l; l=l->next){
495                 size=l->offset+input-p;
496                 memcpy(dest, p, size); /* copy till offset */
497                 p+=size + l->size; /* skip l->size bytes */
498                 dest+=size;
499                 if (l->rpl.len){
500                         memcpy(dest, l->rpl.s, l->rpl.len);
501                         dest+=l->rpl.len;
502                 }
503         }
504         memcpy(dest, p, end-p);
505         if(lst) replace_lst_free(lst);
506         return res;
507 error:
508         if (lst) replace_lst_free(lst);
509         if (res){
510                 if (res->s) pkg_free(res->s);
511                 pkg_free(res);
512         }
513         return 0;
514 }