- added flags to tm onsend callbacks that should tell if the callbacks is
[sip-router] / modules / tm / timer.c
1 /*
2  * $Id$
3  *
4  *
5  * Copyright (C) 2001-2003 FhG Fokus
6  *
7  * This file is part of ser, a free SIP server.
8  *
9  * ser is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version
13  *
14  * For a license to use the ser software under conditions
15  * other than those described here, or to purchase support for this
16  * software, please contact iptel.org by e-mail at the following addresses:
17  *    info@iptel.org
18  *
19  * ser is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22  * GNU General Public License for more details.
23  *
24  * You should have received a copy of the GNU General Public License 
25  * along with this program; if not, write to the Free Software 
26  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
27  */
28
29
30 /* 
31   timer.c is where we implement TM timers. It has been designed
32   for high performance using some techniques of which timer users
33   need to be aware.
34
35         One technique is "fixed-timer-length". We maintain separate 
36         timer lists, all of them include elements of the same time
37         to fire. That allows *appending* new events to the list as
38         opposed to inserting them by time, which is costly due to
39         searching time spent in a mutex. The performance benefit is
40         noticeable. The limitation is you need a new timer list for
41         each new timer length.
42
43         Another technique is the timer process slices off expired elements
44         from the list in a mutex, but executes the timer after the mutex
45         is left. That saves time greatly as whichever process wants to
46         add/remove a timer, it does not have to wait until the current
47         list is processed. However, be aware the timers may hit in a delayed
48         manner; you have no guarantee in your process that after resetting a timer, 
49         it will no more hit. It might have been removed by timer process,
50     and is waiting to be executed.  The following example shows it:
51
52                         PROCESS1                                TIMER PROCESS
53
54         0.                                                              timer hits, it is removed from queue and
55                                                                         about to be executed
56         1.      process1 decides to
57                 reset the timer 
58         2.                                                              timer is executed now
59         3.      if the process1 naively
60                 thinks the timer could not 
61                 have been executed after 
62                 resetting the timer, it is
63                 WRONG -- it was (step 2.)
64
65         So be careful when writing the timer handlers. Currently defined timers 
66         don't hurt if they hit delayed, I hope at least. Retransmission timer 
67         may results in a useless retransmission -- not too bad. FR timer not too
68         bad either as timer processing uses a REPLY mutex making it safe to other
69         processing affecting transaction state. Wait timer not bad either -- processes
70         putting a transaction on wait don't do anything with it anymore.
71
72                 Example when it does not hurt:
73
74                         P1                                              TIMER
75         0.                                                              RETR timer removed from list and
76                                                                         scheduled for execution
77         1. 200/BYE received->
78            reset RETR, put_on_wait
79         2.                                                              RETR timer executed -- too late but it does
80                                                                         not hurt
81         3.                                                              WAIT handler executed
82
83         The rule of thumb is don't touch data you put under a timer. Create data,
84     put them under a timer, and let them live until they are safely destroyed from
85     wait/delete timer.  The only safe place to manipulate the data is 
86     from timer process in which delayed timers cannot hit (all timers are
87     processed sequentially).
88
89         A "bad example" -- rewriting content of retransmission buffer
90         in an unprotected way is bad because a delayed retransmission timer might 
91         hit. Thats why our reply retransmission procedure is enclosed in 
92         a REPLY_LOCK.
93
94 */
95 /*
96  * History:
97  * --------
98  *  2003-06-27  timers are not unlinked if timerlist is 0 (andrei)
99  *  2004-02-13  t->is_invite, t->local, t->noisy_ctimer replaced;
100  *              timer_link.payload removed (bogdan)
101  *  2005-10-03  almost completely rewritten to use the new timers (andrei)
102  *  2005-12-12  on final response marked the rb as removed to avoid deleting
103  *              it from the timer handle; timer_allow_del()  (andrei)
104  *  2006-08-11  final_response_handler dns failover support for timeout-ed
105  *              invites (andrei)
106  *  2006-09-28  removed the 480 on fr_inv_timeout reply: on timeout always 
107  *               return a 408
108  *              set the corresponding "faked" failure route sip_msg->msg_flags 
109  *               on timeout or if the branch received a reply (andrei)
110  *  2007-03-15  TMCB_ONSEND callbacks support (andrei)
111  */
112
113 #include "defs.h"
114
115
116
117 #include "config.h"
118 #include "h_table.h"
119 #include "timer.h"
120 #include "../../dprint.h"
121 #include "lock.h"
122 #include "t_stats.h"
123
124 #include "../../hash_func.h"
125 #include "../../dprint.h"
126 #include "../../config.h"
127 #include "../../parser/parser_f.h"
128 #include "../../ut.h"
129 #include "../../timer_ticks.h"
130 #include "t_funcs.h"
131 #include "t_reply.h"
132 #include "t_cancel.h"
133 #include "t_hooks.h"
134 #ifdef USE_DNS_FAILOVER
135 #include "t_fwd.h" /* t_send_branch */
136 #endif
137 #ifdef USE_DST_BLACKLIST
138 #include "../../dst_blacklist.h"
139 #endif
140
141
142
143 int noisy_ctimer=0;
144
145 struct msgid_var user_fr_timeout;
146 struct msgid_var user_fr_inv_timeout;
147
148 /* default values of timeouts for all the timer list */
149
150 ticks_t fr_timeout              =       FR_TIME_OUT;
151 ticks_t fr_inv_timeout  =       INV_FR_TIME_OUT;
152 ticks_t wait_timeout    =       WT_TIME_OUT;
153 ticks_t delete_timeout  =       DEL_TIME_OUT;
154 ticks_t rt_t1_timeout   =       RETR_T1;
155 ticks_t rt_t2_timeout   =       RETR_T2;
156
157 /* fix timer values to ticks */
158 int tm_init_timers()
159 {
160         fr_timeout=MS_TO_TICKS(fr_timeout); 
161         fr_inv_timeout=MS_TO_TICKS(fr_inv_timeout);
162         wait_timeout=MS_TO_TICKS(wait_timeout);
163         delete_timeout=MS_TO_TICKS(delete_timeout);
164         rt_t1_timeout=MS_TO_TICKS(rt_t1_timeout);
165         rt_t2_timeout=MS_TO_TICKS(rt_t2_timeout);
166         /* fix 0 values to 1 tick (minimum possible wait time ) */
167         if (fr_timeout==0) fr_timeout=1;
168         if (fr_inv_timeout==0) fr_inv_timeout=1;
169         if (wait_timeout==0) wait_timeout=1;
170         if (delete_timeout==0) delete_timeout=1;
171         if (rt_t2_timeout==0) rt_t2_timeout=1;
172         if (rt_t1_timeout==0) rt_t1_timeout=1;
173         
174         memset(&user_fr_timeout, 0, sizeof(user_fr_timeout));
175         memset(&user_fr_inv_timeout, 0, sizeof(user_fr_inv_timeout));
176         
177         DBG("tm: tm_init_timers: fr=%d fr_inv=%d wait=%d delete=%d t1=%d t2=%d\n",
178                         fr_timeout, fr_inv_timeout, wait_timeout, delete_timeout,
179                         rt_t1_timeout, rt_t2_timeout);
180         return 0;
181 }
182
183 /******************** handlers ***************************/
184
185
186
187 inline static void cleanup_localcancel_timers( struct cell *t )
188 {
189         int i;
190         for (i=0; i<t->nr_of_outgoings; i++ )
191                 stop_rb_timers(&t->uac[i].local_cancel);
192 }
193
194
195
196 inline static void unlink_timers( struct cell *t )
197 {
198         int i;
199
200         stop_rb_timers(&t->uas.response);
201         for (i=0; i<t->nr_of_outgoings; i++)
202                 stop_rb_timers(&t->uac[i].request);
203         cleanup_localcancel_timers(t);
204 }
205
206
207
208 /* returns number of ticks before retrying the del, or 0 if the del.
209  * was succesfull */
210 inline static ticks_t  delete_cell( struct cell *p_cell, int unlock )
211 {
212         /* there may still be FR/RETR timers, which have been reset
213            (i.e., time_out==TIMER_DELETED) but are stilled linked to
214            timer lists and must be removed from there before the
215            structures are released
216         */
217         unlink_timers( p_cell );
218         /* still in use ... don't delete */
219         if ( IS_REFFED_UNSAFE(p_cell) ) {
220                 if (unlock) UNLOCK_HASH(p_cell->hash_index);
221                 DBG("DEBUG: delete_cell %p: can't delete -- still reffed (%d)\n",
222                                 p_cell, p_cell->ref_count);
223                 /* delay the delete */
224                 /* TODO: change refcnts and delete on refcnt==0 */
225                 return delete_timeout;
226         } else {
227                 if (unlock) UNLOCK_HASH(p_cell->hash_index);
228 #ifdef EXTRA_DEBUG
229                 DBG("DEBUG: delete transaction %p\n", p_cell );
230 #endif
231                 free_cell( p_cell );
232                 return 0;
233         }
234 }
235
236
237
238
239 /* generate a fake reply
240  * it assumes the REPLY_LOCK is already held and returns unlocked */
241 static void fake_reply(struct cell *t, int branch, int code )
242 {
243         branch_bm_t cancel_bitmap;
244         short do_cancel_branch;
245         enum rps reply_status;
246
247         do_cancel_branch = is_invite(t) && should_cancel_branch(t, branch);
248         if ( is_local(t) ) {
249                 reply_status=local_reply( t, FAKED_REPLY, branch, 
250                                           code, &cancel_bitmap );
251                 if (reply_status == RPS_COMPLETED) {
252                         put_on_wait(t);
253                 }
254         } else {
255                 reply_status=relay_reply( t, FAKED_REPLY, branch, code,
256                                           &cancel_bitmap );
257
258 #if 0
259                 if (reply_status==RPS_COMPLETED) {
260                              /* don't need to cleanup uac_timers -- they were cleaned
261                                 branch by branch and this last branch's timers are
262                                 reset now too
263                              */
264                              /* don't need to issue cancels -- local cancels have been
265                                 issued branch by branch and this last branch was
266                                 canceled now too
267                              */
268                              /* then the only thing to do now is to put the transaction
269                                 on FR/wait state 
270                              */
271                              /*
272                                set_final_timer(  t );
273                              */
274                 }
275 #endif
276
277         }
278         /* now when out-of-lock do the cancel I/O */
279         if (do_cancel_branch) cancel_branch(t, branch, 0);
280         /* it's cleaned up on error; if no error occurred and transaction
281            completed regularly, I have to clean-up myself
282         */
283 }
284
285
286
287 /* return (ticks_t)-1 on error/disable and 0 on success */
288 inline static ticks_t retransmission_handler( struct retr_buf *r_buf )
289 {
290 #ifdef EXTRA_DEBUG
291         if (r_buf->my_T->flags & T_IN_AGONY) {
292                 LOG( L_ERR, "ERROR: transaction %p scheduled for deletion and"
293                         " called from RETR timer (flags %x)\n",
294                         r_buf->my_T, r_buf->my_T->flags );
295                 abort();
296         }       
297 #endif
298         if ( r_buf->activ_type==TYPE_LOCAL_CANCEL 
299                 || r_buf->activ_type==TYPE_REQUEST ) {
300 #ifdef EXTRA_DEBUG
301                         DBG("DEBUG: retransmission_handler : "
302                                 "request resending (t=%p, %.9s ... )\n", 
303                                 r_buf->my_T, r_buf->buffer);
304 #endif
305                         if (SEND_BUFFER( r_buf )==-1) {
306                                 /* disable retr. timers => return -1 */
307                                 fake_reply(r_buf->my_T, r_buf->branch, 503 );
308                                 return (ticks_t)-1;
309                         }
310 #ifdef TMCB_ONSEND
311                         run_onsend_callbacks(TMCB_REQUEST_SENT, r_buf, TMCB_RETR_F);
312 #endif
313         } else {
314 #ifdef EXTRA_DEBUG
315                         DBG("DEBUG: retransmission_handler : "
316                                 "reply resending (t=%p, %.9s ... )\n", 
317                                 r_buf->my_T, r_buf->buffer);
318 #endif
319                         t_retransmit_reply(r_buf->my_T);
320         }
321         
322         return 0;
323 }
324
325
326
327 inline static void final_response_handler(      struct retr_buf* r_buf,
328                                                                                         struct cell* t)
329 {
330         int silent;
331 #ifdef USE_DNS_FAILOVER
332         /*int i; 
333         int added_branches;
334         */
335         int branch_ret;
336         int prev_branch;
337 #endif
338
339 #       ifdef EXTRA_DEBUG
340         if (t->flags & T_IN_AGONY) 
341         {
342                 LOG( L_ERR, "ERROR: transaction %p scheduled for deletion and"
343                         " called from FR timer (flags %x)\n", t, t->flags);
344                 abort();
345         }
346 #       endif
347         /* FR for local cancels.... */
348         if (r_buf->activ_type==TYPE_LOCAL_CANCEL)
349         {
350 #ifdef TIMER_DEBUG
351                 DBG("DEBUG: final_response_handler: stop retr for Local Cancel\n");
352 #endif
353                 return;
354         }
355         /* FR for replies (negative INVITE replies) */
356         if (r_buf->activ_type>0) {
357 #               ifdef EXTRA_DEBUG
358                 if (t->uas.request->REQ_METHOD!=METHOD_INVITE
359                         || t->uas.status < 200 ) {
360                         LOG(L_CRIT, "BUG: final_response_handler: unknown type reply"
361                                         " buffer\n");
362                         abort();
363                 }
364 #               endif
365                 put_on_wait( t );
366                 return;
367         };
368
369         /* lock reply processing to determine how to proceed reliably */
370         LOCK_REPLIES( t );
371         /* now it can be only a request retransmission buffer;
372            try if you can simply discard the local transaction 
373            state without compellingly removing it from the
374            world */
375         silent=
376                 /* not for UACs */
377                 !is_local(t)
378                 /* invites only */
379                 && is_invite(t)
380                 /* parallel forking does not allow silent state discarding */
381                 && t->nr_of_outgoings==1
382                 /* on_negativ reply handler not installed -- serial forking 
383                  * could occur otherwise */
384                 && t->on_negative==0
385                 /* the same for FAILURE callbacks */
386                 && !has_tran_tmcbs( t, TMCB_ON_FAILURE_RO|TMCB_ON_FAILURE) 
387                 /* something received -- we will not be silent on error */
388                 && t->uac[r_buf->branch].last_received>0
389                 /* don't go silent if disallowed globally ... */
390                 && noisy_ctimer==0
391                 /* ... or for this particular transaction */
392                 && has_noisy_ctimer(t) == 0;
393         if (silent) {
394                 UNLOCK_REPLIES(t);
395 #ifdef EXTRA_DEBUG
396                 DBG("DEBUG: final_response_handler: transaction silently dropped (%p)\n",t);
397 #endif
398                 put_on_wait( t );
399                 return;
400         }
401 #ifdef EXTRA_DEBUG
402         DBG("DEBUG: final_response_handler:stop retr. and send CANCEL (%p)\n", t);
403 #endif
404         if ((r_buf->branch < MAX_BRANCHES) && /* r_buf->branch is always >=0 */
405                         (t->uac[r_buf->branch].last_received==0)){
406                 /* no reply received */
407 #ifdef USE_DST_BLACKLIST
408                 if (use_dst_blacklist)
409                         dst_blacklist_add( BLST_ERR_TIMEOUT, &r_buf->dst);
410 #endif
411 #ifdef USE_DNS_FAILOVER
412                 /* if this is an invite, the destination resolves to more ips, and
413                  *  it still hasn't passed more than fr_inv_timeout since we
414                  *  started, add another branch/uac */
415                 if (is_invite(t) && use_dns_failover &&
416                                 ((get_ticks_raw()-(r_buf->fr_expire-t->fr_timeout)) <
417                                         t->fr_inv_timeout)){
418                         branch_ret=add_uac_dns_fallback(t, t->uas.request,
419                                                                                                 &t->uac[r_buf->branch], 0);
420                         prev_branch=-1;
421                         while((branch_ret>=0) &&(branch_ret!=prev_branch)){
422                                 prev_branch=branch_ret;
423                                 branch_ret=t_send_branch(t, branch_ret, t->uas.request , 0, 0);
424                         }
425                 }
426 #endif
427         }
428         fake_reply(t, r_buf->branch, 408);
429 }
430
431
432
433 /* handles retransmissions and fr timers */
434 /* the following assumption are made (to avoid deleting/re-adding the timer):
435  *  retr_buf->retr_interval < ( 1<<((sizeof(ticks_t)*8-1) )
436  *  if retr_buf->retr_interval==0 => timer disabled
437  *                            ==(ticks_t) -1 => retr. disabled (fr working)
438  *     retr_buf->retr_interval & (1 <<(sizeof(ticks_t)*8-1) => retr. & fr reset
439  *     (we never reset only retr, it's either reset both of them or retr 
440  *      disabled & reset fr). In this case the fr_origin will contain the 
441  *      "time" of the reset and next retr should occur at 
442  *      fr->origin+retr_interval (we also assume that we'll never reset retr
443  *      to a lower value then the current one)
444  */
445 ticks_t retr_buf_handler(ticks_t ticks, struct timer_ln* tl, void *p)
446 {
447         struct retr_buf* rbuf ;
448         ticks_t fr_remainder;
449         ticks_t retr_remainder;
450         ticks_t retr_interval;
451         struct cell *t;
452
453         rbuf=(struct  retr_buf*)
454                         ((void*)tl-(void*)(&((struct retr_buf*)0)->timer));
455         t=rbuf->my_T;
456         
457 #ifdef TIMER_DEBUG
458         DBG("tm: timer retr_buf_handler @%d (%p -> %p -> %p)\n",
459                         ticks, tl, rbuf, t);
460 #endif
461         /* overflow safe check (should work ok for fr_intervals < max ticks_t/2) */
462         if ((s_ticks_t)(rbuf->fr_expire-ticks)<=0){
463                 /* final response */
464                 rbuf->t_active=0; /* mark the timer as removed 
465                                                          (both timers disabled)
466                                                           a little race risk, but
467                                                           nothing bad would happen */
468                 rbuf->flags|=F_RB_TIMEOUT;
469                 timer_allow_del(); /* [optional] allow timer_dels, since we're done
470                                                           and there is no race risk */
471                 final_response_handler(rbuf, t);
472                 return 0;
473         }else{
474                 /*  4 possible states running (t1), t2, paused, disabled */
475                         if ((s_ticks_t)(rbuf->retr_expire-ticks)<=0){
476                                 if (rbuf->flags & F_RB_RETR_DISABLED)
477                                         goto disabled;
478                                 /* retr_interval= min (2*ri, rt_t2) */
479                                 /* no branch version: 
480                                         #idef CC_SIGNED_RIGHT_SHIFT
481                                                 ri=  rt_t2+((2*ri-rt_t2) & 
482                                                 ((signed)(2*ri-rt_t2)>>(sizeof(ticks_t)*8-1));
483                                         #else
484                                                 ri=rt_t2+((2*ri-rt_t2)& -(2*ri<rt_t2));
485                                         #endif
486                                 */
487                                 
488                                 /* get the  current interval from timer param. */
489                                 if ((rbuf->flags & F_RB_T2) || 
490                                                 (((ticks_t)(unsigned long)p<<1)>rt_t2_timeout))
491                                         retr_interval=rt_t2_timeout;
492                                 else
493                                         retr_interval=(ticks_t)(unsigned long)p<<1;
494 #ifdef TIMER_DEBUG
495                                 DBG("tm: timer: retr: new interval %d (max %d)\n", 
496                                                 retr_interval, rt_t2_timeout);
497 #endif
498                                 /* we could race with the reply_received code, but the 
499                                  * worst thing that can happen is to delay a reset_to_t2
500                                  * for crt_interval and send an extra retr.*/
501                                 rbuf->retr_expire=ticks+retr_interval;
502                                 /* set new interval to -1 on error, or retr_int. on success */
503                                 retr_remainder=retransmission_handler(rbuf) | retr_interval;
504                                 retr_remainder=retr_interval;
505                                 /* store the crt. retr. interval inside the timer struct,
506                                  * in the data member */
507                                 tl->data=(void*)(unsigned long)retr_interval;
508                         }else{
509                                 retr_remainder= rbuf->retr_expire-ticks;
510                                 DBG("tm: timer: retr: nothing to do, expire in %d\n", 
511                                                 retr_remainder);
512                         }
513         }
514 /* skip: */
515         /* return minimum of the next retransmission handler and the 
516          * final response (side benefit: it properly cancels timer if ret==0 and
517          *  sleeps for fr_remainder if retr. is canceled [==(ticks_t)-1]) */
518         fr_remainder=rbuf->fr_expire-ticks; /* to be more precise use
519                                                                                         get_ticks_raw() instead of ticks
520                                                                                         (but make sure that 
521                                                                                         crt. ticks < fr_expire */
522 #ifdef TIMER_DEBUG
523         DBG("tm: timer retr_buf_handler @%d (%p ->%p->%p) exiting min (%d, %d)\n",
524                         ticks, tl, rbuf, t, retr_remainder, fr_remainder);
525 #endif
526         if (retr_remainder<fr_remainder)
527                 return retr_remainder;
528         else
529                 return fr_remainder;
530 disabled:
531         return rbuf->fr_expire-ticks;
532 }
533
534
535
536 ticks_t wait_handler(ticks_t ti, struct timer_ln *wait_tl, void* data)
537 {
538         struct cell *p_cell;
539         ticks_t ret;
540
541         p_cell=(struct cell*)data;
542 #ifdef TIMER_DEBUG
543         DBG("DEBUG: WAIT timer hit @%d for %p (timer_lm %p)\n", 
544                         ti, p_cell, wait_tl);
545 #endif
546
547         if (p_cell->flags & T_IN_AGONY){
548                 /* delayed delete */
549                 /* we call delete now without any locking on hash/ref_count;
550                    we can do that because delete_handler is only entered after
551                    the delete timer was installed from wait_handler, which
552                    removed transaction from hash table and did not destroy it
553                    because some processes were using it; that means that the
554                    processes currently using the transaction can unref and no
555                    new processes can ref -- we can wait until ref_count is
556                    zero safely without locking
557                 */
558                 ret=delete_cell( p_cell, 0 /* don't unlock on return */ );
559         }else{
560                 /* stop cancel timers if any running */
561                 if ( is_invite(p_cell) ) cleanup_localcancel_timers( p_cell );
562                 /* remove the cell from the hash table */
563                 LOCK_HASH( p_cell->hash_index );
564                 remove_from_hash_table_unsafe(  p_cell );
565                 p_cell->flags |= T_IN_AGONY;
566                 /* delete (returns with UNLOCK-ed_HASH) */
567                 ret=delete_cell( p_cell, 1 /* unlock on return */ );
568         }
569         return ret;
570 }
571