Modifying the return value of cfg_set* functions, in order to make
[sip-router] / tcp_main.c
1 /*
2  * $Id$
3  *
4  * Copyright (C) 2001-2003 FhG Fokus
5  *
6  * This file is part of ser, a free SIP server.
7  *
8  * ser is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version
12  *
13  * For a license to use the ser software under conditions
14  * other than those described here, or to purchase support for this
15  * software, please contact iptel.org by e-mail at the following addresses:
16  *    info@iptel.org
17  *
18  * ser is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26  */
27 /*
28  * History:
29  * --------
30  *  2002-11-29  created by andrei
31  *  2002-12-11  added tcp_send (andrei)
32  *  2003-01-20  locking fixes, hashtables (andrei)
33  *  2003-02-20  s/lock_t/gen_lock_t/ to avoid a conflict on solaris (andrei)
34  *  2003-02-25  Nagle is disabled if -DDISABLE_NAGLE (andrei)
35  *  2003-03-29  SO_REUSEADDR before calling bind to allow
36  *              server restart, Nagle set on the (hopefuly) 
37  *              correct socket (jiri)
38  *  2003-03-31  always try to find the corresponding tcp listen socket for
39  *               a temp. socket and store in in *->bind_address: added
40  *               find_tcp_si, modified tcpconn_connect (andrei)
41  *  2003-04-14  set sockopts to TOS low delay (andrei)
42  *  2003-06-30  moved tcp new connect checking & handling to
43  *               handle_new_connect (andrei)
44  *  2003-07-09  tls_close called before closing the tcp connection (andrei)
45  *  2003-10-24  converted to the new socket_info lists (andrei)
46  *  2003-10-27  tcp port aliases support added (andrei)
47  *  2003-11-04  always lock before manipulating refcnt; sendchild
48  *              does not inc refcnt by itself anymore (andrei)
49  *  2003-11-07  different unix sockets are used for fd passing
50  *              to/from readers/writers (andrei)
51  *  2003-11-17  handle_new_connect & tcp_connect will close the 
52  *              new socket if tcpconn_new return 0 (e.g. out of mem) (andrei)
53  *  2003-11-28  tcp_blocking_write & tcp_blocking_connect added (andrei)
54  *  2004-11-08  dropped find_tcp_si and replaced with find_si (andrei)
55  *  2005-06-07  new tcp optimized code, supports epoll (LT), sigio + real time
56  *               signals, poll & select (andrei)
57  *  2005-06-26  *bsd kqueue support (andrei)
58  *  2005-07-04  solaris /dev/poll support (andrei)
59  *  2005-07-08  tcp_max_connections, tcp_connection_lifetime, don't accept
60  *               more connections if tcp_max_connections is exceeded (andrei)
61  *  2005-10-21  cleanup all the open connections on exit
62  *              decrement the no. of open connections on timeout too    (andrei) *  2006-01-30  queue send_fd request and execute them at the end of the
63  *              poll loop  (#ifdef) (andrei)
64  *              process all children requests, before attempting to send
65  *              them new stuff (fixes some deadlocks) (andrei)
66  *  2006-02-03  timers are run only once per s (andrei)
67  *              tcp children fds can be non-blocking; send fds are queued on
68  *              EAGAIN; lots of bug fixes (andrei)
69  *  2006-02-06  better tcp_max_connections checks, tcp_connections_no moved to
70  *              shm (andrei)
71  *  2006-04-12  tcp_send() changed to use struct dest_info (andrei)
72  *  2006-11-02  switched to atomic ops for refcnt, locking improvements 
73  *               (andrei)
74  *  2006-11-04  switched to raw ticks (to fix conversion errors which could
75  *               result in inf. lifetime) (andrei)
76  *  2007-07-25  tcpconn_connect can now bind the socket on a specified
77  *                source addr/port (andrei)
78  *  2007-07-26   tcp_send() and tcpconn_get() can now use a specified source
79  *                addr./port (andrei)
80  *  2007-08-23   getsockname() for INADDR_ANY(SI_IS_ANY) sockets (andrei)
81  *  2007-08-27   split init_sock_opt into a lightweight init_sock_opt_accept() 
82  *               used when accepting connections and init_sock_opt used for 
83  *               connect/ new sockets (andrei)
84  *  2007-11-22  always add the connection & clear the coresponding flags before
85  *               io_watch_add-ing its fd - it's safer this way (andrei)
86  *  2007-11-26  improved tcp timers: switched to local_timer (andrei)
87  *  2007-11-27  added send fd cache and reader fd reuse (andrei)
88  *  2007-11-28  added support for TCP_DEFER_ACCEPT, KEEPALIVE, KEEPINTVL,
89  *               KEEPCNT, QUICKACK, SYNCNT, LINGER2 (andrei)
90  *  2007-12-04  support for queueing write requests (andrei)
91  */
92
93
94 #ifdef USE_TCP
95
96
97 #ifndef SHM_MEM
98 #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
99 #endif
100
101 #include <sys/time.h>
102 #include <sys/types.h>
103 #include <sys/select.h>
104 #include <sys/socket.h>
105 #include <netinet/in.h>
106 #include <netinet/in_systm.h>
107 #include <netinet/ip.h>
108 #include <netinet/tcp.h>
109 #include <sys/uio.h>  /* writev*/
110 #include <netdb.h>
111 #include <stdlib.h> /*exit() */
112
113 #include <unistd.h>
114
115 #include <errno.h>
116 #include <string.h>
117
118 #ifdef HAVE_SELECT
119 #include <sys/select.h>
120 #endif
121 #include <sys/poll.h>
122
123
124 #include "ip_addr.h"
125 #include "pass_fd.h"
126 #include "tcp_conn.h"
127 #include "globals.h"
128 #include "pt.h"
129 #include "locking.h"
130 #include "mem/mem.h"
131 #include "mem/shm_mem.h"
132 #include "timer.h"
133 #include "sr_module.h"
134 #include "tcp_server.h"
135 #include "tcp_init.h"
136 #include "tsend.h"
137 #include "timer_ticks.h"
138 #include "local_timer.h"
139 #ifdef CORE_TLS
140 #include "tls/tls_server.h"
141 #define tls_loaded() 1
142 #else
143 #include "tls_hooks_init.h"
144 #include "tls_hooks.h"
145 #endif
146
147 #include "tcp_info.h"
148 #include "tcp_options.h"
149 #include "ut.h"
150 #include "cfg/cfg_struct.h"
151
152 #define local_malloc pkg_malloc
153 #define local_free   pkg_free
154
155 #define HANDLE_IO_INLINE
156 #include "io_wait.h"
157 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
158
159
160 #define TCP_PASS_NEW_CONNECTION_ON_DATA /* don't pass a new connection
161                                                                                    immediately to a child, wait for
162                                                                                    some data on it first */
163 #define TCP_LISTEN_BACKLOG 1024
164 #define SEND_FD_QUEUE /* queue send fd requests on EAGAIN, instead of sending 
165                                                         them immediately */
166 #define TCP_CHILD_NON_BLOCKING 
167 #ifdef SEND_FD_QUEUE
168 #ifndef TCP_CHILD_NON_BLOCKING
169 #define TCP_CHILD_NON_BLOCKING
170 #endif
171 #define MAX_SEND_FD_QUEUE_SIZE  tcp_main_max_fd_no
172 #define SEND_FD_QUEUE_SIZE              128  /* initial size */
173 #define MAX_SEND_FD_RETRIES             96       /* FIXME: not used for now */
174 #define SEND_FD_QUEUE_TIMEOUT   MS_TO_TICKS(2000)  /* 2 s */
175 #endif
176
177 /* maximum accepted lifetime (maximum possible is  ~ MAXINT/2) */
178 #define MAX_TCP_CON_LIFETIME    ((1U<<(sizeof(ticks_t)*8-1))-1)
179 /* minimum interval local_timer_run() is allowed to run, in ticks */
180 #define TCPCONN_TIMEOUT_MIN_RUN 1  /* once per tick */
181 #define TCPCONN_WAIT_TIMEOUT 1 /* 1 tick */
182
183 #ifdef TCP_BUF_WRITE
184 #define TCP_WBUF_SIZE   1024 /* FIXME: after debugging switch to 16-32k */
185 static unsigned int* tcp_total_wq=0;
186 #endif
187
188
189 enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
190                                 F_TCPCONN, F_TCPCHILD, F_PROC };
191
192
193 #ifdef TCP_FD_CACHE
194
195 #define TCP_FD_CACHE_SIZE 8
196
197 struct fd_cache_entry{
198         struct tcp_connection* con;
199         int id;
200         int fd;
201 };
202
203
204 static struct fd_cache_entry fd_cache[TCP_FD_CACHE_SIZE];
205 #endif /* TCP_FD_CACHE */
206
207 static int is_tcp_main=0;
208
209 int tcp_accept_aliases=0; /* by default don't accept aliases */
210 /* flags used for adding new aliases */
211 int tcp_alias_flags=TCP_ALIAS_FORCE_ADD;
212 /* flags used for adding the default aliases of a new tcp connection */
213 int tcp_new_conn_alias_flags=TCP_ALIAS_REPLACE;
214 int tcp_connect_timeout=DEFAULT_TCP_CONNECT_TIMEOUT;
215 int tcp_send_timeout=DEFAULT_TCP_SEND_TIMEOUT;
216 int tcp_con_lifetime=DEFAULT_TCP_CONNECTION_LIFETIME;
217 enum poll_types tcp_poll_method=0; /* by default choose the best method */
218 int tcp_max_connections=DEFAULT_TCP_MAX_CONNECTIONS;
219 int tcp_main_max_fd_no=0;
220
221 static union sockaddr_union tcp_source_ipv4_addr; /* saved bind/srv v4 addr. */
222 static union sockaddr_union* tcp_source_ipv4=0;
223 #ifdef USE_IPV6
224 static union sockaddr_union tcp_source_ipv6_addr; /* saved bind/src v6 addr. */
225 static union sockaddr_union* tcp_source_ipv6=0;
226 #endif
227
228 static int* tcp_connections_no=0; /* current open connections */
229
230 /* connection hash table (after ip&port) , includes also aliases */
231 struct tcp_conn_alias** tcpconn_aliases_hash=0;
232 /* connection hash table (after connection id) */
233 struct tcp_connection** tcpconn_id_hash=0;
234 gen_lock_t* tcpconn_lock=0;
235
236 struct tcp_child* tcp_children;
237 static int* connection_id=0; /*  unique for each connection, used for 
238                                                                 quickly finding the corresponding connection
239                                                                 for a reply */
240 int unix_tcp_sock;
241
242 static int tcp_proto_no=-1; /* tcp protocol number as returned by
243                                                            getprotobyname */
244
245 static io_wait_h io_h;
246
247 static struct local_timer tcp_main_ltimer;
248
249
250 static ticks_t tcpconn_main_timeout(ticks_t , struct timer_ln* , void* );
251
252 inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
253                                                                                 struct ip_addr* l_ip, int l_port,
254                                                                                 int flags);
255
256
257
258 /* sets source address used when opening new sockets and no source is specified
259  *  (by default the address is choosen by the kernel)
260  * Should be used only on init.
261  * returns -1 on error */
262 int tcp_set_src_addr(struct ip_addr* ip)
263 {
264         switch (ip->af){
265                 case AF_INET:
266                         ip_addr2su(&tcp_source_ipv4_addr, ip, 0);
267                         tcp_source_ipv4=&tcp_source_ipv4_addr;
268                         break;
269                 #ifdef USE_IPV6
270                 case AF_INET6:
271                         ip_addr2su(&tcp_source_ipv6_addr, ip, 0);
272                         tcp_source_ipv6=&tcp_source_ipv6_addr;
273                         break;
274                 #endif
275                 default:
276                         return -1;
277         }
278         return 0;
279 }
280
281
282
283 static inline int init_sock_keepalive(int s)
284 {
285         int optval;
286         
287 #ifdef HAVE_SO_KEEPALIVE
288         if (tcp_options.keepalive){
289                 optval=1;
290                 if (setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, &optval,
291                                                 sizeof(optval))<0){
292                         LOG(L_WARN, "WARNING: init_sock_keepalive: failed to enable"
293                                                 " SO_KEEPALIVE: %s\n", strerror(errno));
294                         return -1;
295                 }
296         }
297 #endif
298 #ifdef HAVE_TCP_KEEPINTVL
299         if (tcp_options.keepintvl){
300                 optval=tcp_options.keepintvl;
301                 if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &optval,
302                                                 sizeof(optval))<0){
303                         LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
304                                                 " keepalive probes interval: %s\n", strerror(errno));
305                 }
306         }
307 #endif
308 #ifdef HAVE_TCP_KEEPIDLE
309         if (tcp_options.keepidle){
310                 optval=tcp_options.keepidle;
311                 if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &optval,
312                                                 sizeof(optval))<0){
313                         LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
314                                                 " keepalive idle interval: %s\n", strerror(errno));
315                 }
316         }
317 #endif
318 #ifdef HAVE_TCP_KEEPCNT
319         if (tcp_options.keepcnt){
320                 optval=tcp_options.keepcnt;
321                 if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &optval,
322                                                 sizeof(optval))<0){
323                         LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
324                                                 " maximum keepalive count: %s\n", strerror(errno));
325                 }
326         }
327 #endif
328         return 0;
329 }
330
331
332
333 /* set all socket/fd options for new sockets (e.g. before connect): 
334  *  disable nagle, tos lowdelay, reuseaddr, non-blocking
335  *
336  * return -1 on error */
337 static int init_sock_opt(int s)
338 {
339         int flags;
340         int optval;
341         
342 #ifdef DISABLE_NAGLE
343         flags=1;
344         if ( (tcp_proto_no!=-1) && (setsockopt(s, tcp_proto_no , TCP_NODELAY,
345                                         &flags, sizeof(flags))<0) ){
346                 LOG(L_WARN, "WARNING: init_sock_opt: could not disable Nagle: %s\n",
347                                 strerror(errno));
348         }
349 #endif
350         /* tos*/
351         optval = tos;
352         if (setsockopt(s, IPPROTO_IP, IP_TOS, (void*)&optval,sizeof(optval)) ==-1){
353                 LOG(L_WARN, "WARNING: init_sock_opt: setsockopt tos: %s\n",
354                                 strerror(errno));
355                 /* continue since this is not critical */
356         }
357 #if  !defined(TCP_DONT_REUSEADDR) 
358         optval=1;
359         if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,
360                                                 (void*)&optval, sizeof(optval))==-1){
361                 LOG(L_ERR, "ERROR: setsockopt SO_REUSEADDR %s\n",
362                                 strerror(errno));
363                 /* continue, not critical */
364         }
365 #endif /* !TCP_DONT_REUSEADDR */
366 #ifdef HAVE_TCP_SYNCNT
367         if (tcp_options.syncnt){
368                 optval=tcp_options.syncnt;
369                 if (setsockopt(s, IPPROTO_TCP, TCP_SYNCNT, &optval,
370                                                 sizeof(optval))<0){
371                         LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
372                                                 " maximum SYN retr. count: %s\n", strerror(errno));
373                 }
374         }
375 #endif
376 #ifdef HAVE_TCP_LINGER2
377         if (tcp_options.linger2){
378                 optval=tcp_options.linger2;
379                 if (setsockopt(s, IPPROTO_TCP, TCP_LINGER2, &optval,
380                                                 sizeof(optval))<0){
381                         LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
382                                                 " maximum LINGER2 timeout: %s\n", strerror(errno));
383                 }
384         }
385 #endif
386 #ifdef HAVE_TCP_QUICKACK
387         if (tcp_options.delayed_ack){
388                 optval=0; /* reset quick ack => delayed ack */
389                 if (setsockopt(s, IPPROTO_TCP, TCP_QUICKACK, &optval,
390                                                 sizeof(optval))<0){
391                         LOG(L_WARN, "WARNING: init_sock_opt: failed to reset"
392                                                 " TCP_QUICKACK: %s\n", strerror(errno));
393                 }
394         }
395 #endif /* HAVE_TCP_QUICKACK */
396         init_sock_keepalive(s);
397         
398         /* non-blocking */
399         flags=fcntl(s, F_GETFL);
400         if (flags==-1){
401                 LOG(L_ERR, "ERROR: init_sock_opt: fnctl failed: (%d) %s\n",
402                                 errno, strerror(errno));
403                 goto error;
404         }
405         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
406                 LOG(L_ERR, "ERROR: init_sock_opt: fcntl: set non-blocking failed:"
407                                 " (%d) %s\n", errno, strerror(errno));
408                 goto error;
409         }
410         return 0;
411 error:
412         return -1;
413 }
414
415
416
417 /* set all socket/fd options for "accepted" sockets 
418  *  only nonblocking is set since the rest is inherited from the
419  *  "parent" (listening) socket
420  *  Note: setting O_NONBLOCK is required on linux but it's not needed on
421  *        BSD and possibly solaris (where the flag is inherited from the 
422  *        parent socket). However since there is no standard document 
423  *        requiring a specific behaviour in this case it's safer to always set
424  *        it (at least for now)  --andrei
425  *  TODO: check on which OSes  O_NONBLOCK is inherited and make this 
426  *        function a nop.
427  *
428  * return -1 on error */
429 static int init_sock_opt_accept(int s)
430 {
431         int flags;
432         
433         /* non-blocking */
434         flags=fcntl(s, F_GETFL);
435         if (flags==-1){
436                 LOG(L_ERR, "ERROR: init_sock_opt_accept: fnctl failed: (%d) %s\n",
437                                 errno, strerror(errno));
438                 goto error;
439         }
440         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
441                 LOG(L_ERR, "ERROR: init_sock_opt_accept: "
442                                         "fcntl: set non-blocking failed: (%d) %s\n",
443                                         errno, strerror(errno));
444                 goto error;
445         }
446         return 0;
447 error:
448         return -1;
449 }
450
451
452
453 /* blocking connect on a non-blocking fd; it will timeout after
454  * tcp_connect_timeout 
455  * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
456  * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
457  */
458 static int tcp_blocking_connect(int fd, const struct sockaddr *servaddr,
459                                                                 socklen_t addrlen)
460 {
461         int n;
462 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
463         fd_set sel_set;
464         fd_set orig_set;
465         struct timeval timeout;
466 #else
467         struct pollfd pf;
468 #endif
469         int elapsed;
470         int to;
471         int ticks;
472         int err;
473         unsigned int err_len;
474         int poll_err;
475         
476         poll_err=0;
477         to=tcp_connect_timeout;
478         ticks=get_ticks();
479 again:
480         n=connect(fd, servaddr, addrlen);
481         if (n==-1){
482                 if (errno==EINTR){
483                         elapsed=(get_ticks()-ticks)*TIMER_TICK;
484                         if (elapsed<to)         goto again;
485                         else goto error_timeout;
486                 }
487                 if (errno!=EINPROGRESS && errno!=EALREADY){
488                         LOG(L_ERR, "ERROR: tcp_blocking_connect: (%d) %s\n",
489                                         errno, strerror(errno));
490                         goto error;
491                 }
492         }else goto end;
493         
494         /* poll/select loop */
495 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
496                 FD_ZERO(&orig_set);
497                 FD_SET(fd, &orig_set);
498 #else
499                 pf.fd=fd;
500                 pf.events=POLLOUT;
501 #endif
502         while(1){
503                 elapsed=(get_ticks()-ticks)*TIMER_TICK;
504                 if (elapsed<to)
505                         to-=elapsed;
506                 else 
507                         goto error_timeout;
508 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
509                 sel_set=orig_set;
510                 timeout.tv_sec=to;
511                 timeout.tv_usec=0;
512                 n=select(fd+1, 0, &sel_set, 0, &timeout);
513 #else
514                 n=poll(&pf, 1, to*1000);
515 #endif
516                 if (n<0){
517                         if (errno==EINTR) continue;
518                         LOG(L_ERR, "ERROR: tcp_blocking_connect: poll/select failed:"
519                                         " (%d) %s\n", errno, strerror(errno));
520                         goto error;
521                 }else if (n==0) /* timeout */ continue;
522 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
523                 if (FD_ISSET(fd, &sel_set))
524 #else
525                 if (pf.revents&(POLLERR|POLLHUP|POLLNVAL)){ 
526                         LOG(L_ERR, "ERROR: tcp_blocking_connect: poll error: flags %x\n",
527                                         pf.revents);
528                         poll_err=1;
529                 }
530 #endif
531                 {
532                         err_len=sizeof(err);
533                         getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
534                         if ((err==0) && (poll_err==0)) goto end;
535                         if (err!=EINPROGRESS && err!=EALREADY){
536                                 LOG(L_ERR, "ERROR: tcp_blocking_connect: SO_ERROR (%d) %s\n",
537                                                 err, strerror(err));
538                                 goto error;
539                         }
540                 }
541         }
542 error_timeout:
543         /* timeout */
544         LOG(L_ERR, "ERROR: tcp_blocking_connect: timeout %d s elapsed from %d s\n",
545                         elapsed, tcp_connect_timeout);
546 error:
547         return -1;
548 end:
549         return 0;
550 }
551
552
553
554 inline static int _tcpconn_write_nb(int fd, struct tcp_connection* c,
555                                                                         char* buf, int len);
556
557
558 #ifdef TCP_BUF_WRITE
559
560
561 inline static int wbufq_add(struct  tcp_connection* c, char* data, 
562                                                         unsigned int size)
563 {
564         struct tcp_wbuffer_queue* q;
565         struct tcp_wbuffer* wb;
566         unsigned int last_free;
567         unsigned int wb_size;
568         unsigned int crt_size;
569         ticks_t t;
570         
571         q=&c->wbuf_q;
572         t=get_ticks_raw();
573         if (unlikely(   ((q->queued+size)>tcp_options.tcpconn_wq_max) ||
574                                         ((*tcp_total_wq+size)>tcp_options.tcp_wq_max) ||
575                                         (q->first &&
576                                         TICKS_GT(t, c->last_write+tcp_options.tcp_wq_timeout)) )){
577                 LOG(L_ERR, "ERROR: wbufq_add(%d bytes): write queue full or timeout "
578                                         " (%d, total %d, last write %d s ago)\n",
579                                         size, q->queued, *tcp_total_wq,
580                                         TICKS_TO_S(t-c->last_write));
581                 goto error;
582         }
583         
584         if (unlikely(q->last==0)){
585                 wb_size=MAX_unsigned(TCP_WBUF_SIZE, size);
586                 wb=shm_malloc(sizeof(*wb)+wb_size-1);
587                 if (unlikely(wb==0))
588                         goto error;
589                 wb->b_size=wb_size;
590                 wb->next=0;
591                 q->last=wb;
592                 q->first=wb;
593                 q->last_used=0;
594                 q->offset=0;
595                 c->last_write=get_ticks_raw(); /* start with the crt. time */
596         }else{
597                 wb=q->last;
598         }
599         
600         while(size){
601                 last_free=wb->b_size-q->last_used;
602                 if (last_free==0){
603                         wb_size=MAX_unsigned(TCP_WBUF_SIZE, size);
604                         wb=shm_malloc(sizeof(*wb)+wb_size-1);
605                         if (unlikely(wb==0))
606                                 goto error;
607                         wb->b_size=wb_size;
608                         wb->next=0;
609                         q->last->next=wb;
610                         q->last=wb;
611                         q->last_used=0;
612                         last_free=wb->b_size;
613                 }
614                 crt_size=MIN_unsigned(last_free, size);
615                 memcpy(wb->buf, data, crt_size);
616                 q->last_used+=crt_size;
617                 size-=crt_size;
618                 data+=crt_size;
619                 q->queued+=crt_size;
620                 atomic_add_int((int*)tcp_total_wq, crt_size);
621         }
622         return 0;
623 error:
624         return -1;
625 }
626
627
628
629 inline static void wbufq_destroy( struct  tcp_wbuffer_queue* q)
630 {
631         struct tcp_wbuffer* wb;
632         struct tcp_wbuffer* next_wb;
633         int unqueued;
634         
635         unqueued=0;
636         if (likely(q->first)){
637                 wb=q->first;
638                 do{
639                         next_wb=wb->next;
640                         unqueued+=(wb==q->last)?q->last_used:wb->b_size;
641                         if (wb==q->first)
642                                 unqueued-=q->offset;
643                         shm_free(wb);
644                         wb=next_wb;
645                 }while(wb);
646         }
647         memset(q, 0, sizeof(*q));
648         atomic_add_int((int*)tcp_total_wq, -unqueued);
649 }
650
651
652
653 /* tries to empty the queue
654  * returns -1 on error, bytes written on success (>=0) 
655  * if the whole queue is emptied => sets *empty*/
656 inline static int wbufq_run(int fd, struct tcp_connection* c, int* empty)
657 {
658         struct tcp_wbuffer_queue* q;
659         struct tcp_wbuffer* wb;
660         int n;
661         int ret;
662         int block_size;
663         ticks_t t;
664         char* buf;
665         
666         *empty=0;
667         ret=0;
668         t=get_ticks_raw();
669         lock_get(&c->write_lock);
670         q=&c->wbuf_q;
671         while(q->first){
672                 block_size=((q->first==q->last)?q->last_used:q->first->b_size)-
673                                                 q->offset;
674                 buf=q->first->buf+q->offset;
675                 n=_tcpconn_write_nb(fd, c, buf, block_size);
676                 if (likely(n>0)){
677                         ret+=n;
678                         if (likely(n==block_size)){
679                                 wb=q->first;
680                                 q->first=q->first->next; 
681                                 shm_free(wb);
682                                 q->offset=0;
683                                 q->queued-=block_size;
684                                 atomic_add_int((int*)tcp_total_wq, -block_size);
685                         }else{
686                                 q->offset+=n;
687                                 q->queued-=n;
688                                 atomic_add_int((int*)tcp_total_wq, -n);
689                                 break;
690                         }
691                         c->last_write=t;
692                         c->state=S_CONN_OK;
693                 }else{
694                         if (n<0){
695                                 /* EINTR is handled inside _tcpconn_write_nb */
696                                 if (!(errno==EAGAIN || errno==EWOULDBLOCK)){
697                                         ret=-1;
698                                         LOG(L_ERR, "ERROR: wbuf_runq: %s [%d]\n",
699                                                 strerror(errno), errno);
700                                 }
701                         }
702                         break;
703                 }
704         }
705         if (likely(q->first==0)){
706                 q->last=0;
707                 q->last_used=0;
708                 q->offset=0;
709                 *empty=1;
710         }
711         if (unlikely(c->state==S_CONN_CONNECT && (ret>0)))
712                         c->state=S_CONN_OK;
713         lock_release(&c->write_lock);
714         return ret;
715 }
716
717 #endif /* TCP_BUF_WRITE */
718
719
720
721 #if 0
722 /* blocking write even on non-blocking sockets 
723  * if TCP_TIMEOUT will return with error */
724 static int tcp_blocking_write(struct tcp_connection* c, int fd, char* buf,
725                                                                 unsigned int len)
726 {
727         int n;
728         fd_set sel_set;
729         struct timeval timeout;
730         int ticks;
731         int initial_len;
732         
733         initial_len=len;
734 again:
735         
736         n=send(fd, buf, len,
737 #ifdef HAVE_MSG_NOSIGNAL
738                         MSG_NOSIGNAL
739 #else
740                         0
741 #endif
742                 );
743         if (n<0){
744                 if (errno==EINTR)       goto again;
745                 else if (errno!=EAGAIN && errno!=EWOULDBLOCK){
746                         LOG(L_ERR, "tcp_blocking_write: failed to send: (%d) %s\n",
747                                         errno, strerror(errno));
748                         goto error;
749                 }
750         }else if (n<len){
751                 /* partial write */
752                 buf+=n;
753                 len-=n;
754         }else{
755                 /* success: full write */
756                 goto end;
757         }
758         while(1){
759                 FD_ZERO(&sel_set);
760                 FD_SET(fd, &sel_set);
761                 timeout.tv_sec=tcp_send_timeout;
762                 timeout.tv_usec=0;
763                 ticks=get_ticks();
764                 n=select(fd+1, 0, &sel_set, 0, &timeout);
765                 if (n<0){
766                         if (errno==EINTR) continue; /* signal, ignore */
767                         LOG(L_ERR, "ERROR: tcp_blocking_write: select failed: "
768                                         " (%d) %s\n", errno, strerror(errno));
769                         goto error;
770                 }else if (n==0){
771                         /* timeout */
772                         if (get_ticks()-ticks>=tcp_send_timeout){
773                                 LOG(L_ERR, "ERROR: tcp_blocking_write: send timeout (%d)\n",
774                                                 tcp_send_timeout);
775                                 goto error;
776                         }
777                         continue;
778                 }
779                 if (FD_ISSET(fd, &sel_set)){
780                         /* we can write again */
781                         goto again;
782                 }
783         }
784 error:
785                 return -1;
786 end:
787                 return initial_len;
788 }
789 #endif
790
791
792
793 struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
794                                                                         union sockaddr_union* local_addr,
795                                                                         struct socket_info* ba, int type, 
796                                                                         int state)
797 {
798         struct tcp_connection *c;
799         
800         c=(struct tcp_connection*)shm_malloc(sizeof(struct tcp_connection));
801         if (c==0){
802                 LOG(L_ERR, "ERROR: tcpconn_new: mem. allocation failure\n");
803                 goto error;
804         }
805         memset(c, 0, sizeof(struct tcp_connection)); /* zero init */
806         c->s=sock;
807         c->fd=-1; /* not initialized */
808         if (lock_init(&c->write_lock)==0){
809                 LOG(L_ERR, "ERROR: tcpconn_new: init lock failed\n");
810                 goto error;
811         }
812         
813         c->rcv.src_su=*su;
814         
815         atomic_set(&c->refcnt, 0);
816         timer_init(&c->timer, tcpconn_main_timeout, c, 0);
817         su2ip_addr(&c->rcv.src_ip, su);
818         c->rcv.src_port=su_getport(su);
819         c->rcv.bind_address=ba;
820         if (likely(local_addr)){
821                 su2ip_addr(&c->rcv.dst_ip, local_addr);
822                 c->rcv.dst_port=su_getport(local_addr);
823         }else if (ba){
824                 c->rcv.dst_ip=ba->address;
825                 c->rcv.dst_port=ba->port_no;
826         }
827         print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
828         DBG(     "tcpconn_new: on port %d, type %d\n", c->rcv.src_port, type);
829         init_tcp_req(&c->req);
830         c->id=(*connection_id)++;
831         c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
832         c->rcv.proto_reserved2=0;
833         c->state=state;
834         c->extra_data=0;
835 #ifdef USE_TLS
836         if (type==PROTO_TLS){
837                 if (tls_tcpconn_init(c, sock)==-1) goto error;
838         }else
839 #endif /* USE_TLS*/
840         {
841                 c->type=PROTO_TCP;
842                 c->rcv.proto=PROTO_TCP;
843                 c->timeout=get_ticks_raw()+tcp_con_lifetime;
844         }
845         c->flags|=F_CONN_REMOVED;
846         
847         return c;
848         
849 error:
850         if (c) shm_free(c);
851         return 0;
852 }
853
854
855
856 struct tcp_connection* tcpconn_connect( union sockaddr_union* server, 
857                                                                                 union sockaddr_union* from,
858                                                                                 int type)
859 {
860         int s;
861         struct socket_info* si;
862         union sockaddr_union my_name;
863         socklen_t my_name_len;
864         struct tcp_connection* con;
865         struct ip_addr ip;
866         enum tcp_conn_states state;
867 #ifdef TCP_BUF_WRITE
868         int n;
869 #endif /* TCP_BUF_WRITE */
870
871         s=-1;
872         
873         if (*tcp_connections_no >= tcp_max_connections){
874                 LOG(L_ERR, "ERROR: tcpconn_connect: maximum number of connections"
875                                         " exceeded (%d/%d)\n",
876                                         *tcp_connections_no, tcp_max_connections);
877                 goto error;
878         }
879         s=socket(AF2PF(server->s.sa_family), SOCK_STREAM, 0);
880         if (s==-1){
881                 LOG(L_ERR, "ERROR: tcpconn_connect: socket: (%d) %s\n",
882                                 errno, strerror(errno));
883                 goto error;
884         }
885         if (init_sock_opt(s)<0){
886                 LOG(L_ERR, "ERROR: tcpconn_connect: init_sock_opt failed\n");
887                 goto error;
888         }
889         
890         if (from && bind(s, &from->s, sockaddru_len(*from)) != 0)
891                 LOG(L_WARN, "WARNING: tcpconn_connect: binding to source address"
892                                         " failed: %s [%d]\n", strerror(errno), errno);
893 #ifdef TCP_BUF_WRITE
894         if (likely(tcp_options.tcp_buf_write)){
895 again:
896                 n=connect(s, &server->s, sockaddru_len(*server));
897                 if (unlikely(n==-1)){
898                         if (errno==EINTR) goto again;
899                         if (errno!=EINPROGRESS && errno!=EALREADY){
900                                 LOG(L_ERR, "ERROR: tcpconn_connect: connect: (%d) %s\n",
901                                                 errno, strerror(errno));
902                                 goto error;
903                         }
904                         state=S_CONN_CONNECT;
905                 }
906         }else{
907 #endif /* TCP_BUF_WRITE */
908                 if (tcp_blocking_connect(s, &server->s, sockaddru_len(*server))<0){
909                         LOG(L_ERR, "ERROR: tcpconn_connect: tcp_blocking_connect"
910                                                 " failed\n");
911                         goto error;
912                 }
913                 state=S_CONN_OK;
914 #ifdef TCP_BUF_WRITE
915         }
916 #endif /* TCP_BUF_WRITE */
917         if (from){
918                 su2ip_addr(&ip, from);
919                 if (!ip_addr_any(&ip))
920                         /* we already know the source ip, skip the sys. call */
921                         goto find_socket;
922         }
923         my_name_len=sizeof(my_name);
924         if (getsockname(s, &my_name.s, &my_name_len)!=0){
925                 LOG(L_ERR, "ERROR: tcp_connect: getsockname failed: %s(%d)\n",
926                                 strerror(errno), errno);
927                 si=0; /* try to go on */
928                 goto skip;
929         }
930         from=&my_name; /* update from with the real "from" address */
931         su2ip_addr(&ip, &my_name);
932 find_socket:
933 #ifdef USE_TLS
934         if (type==PROTO_TLS)
935                 si=find_si(&ip, 0, PROTO_TLS);
936         else
937 #endif
938                 si=find_si(&ip, 0, PROTO_TCP);
939 skip:
940         if (si==0){
941                 LOG(L_WARN, "WARNING: tcp_connect: could not find corresponding"
942                                 " listening socket, using default...\n");
943                 if (server->s.sa_family==AF_INET) si=sendipv4_tcp;
944 #ifdef USE_IPV6
945                 else si=sendipv6_tcp;
946 #endif
947         }
948         con=tcpconn_new(s, server, from, si,  type, state);
949         if (con==0){
950                 LOG(L_ERR, "ERROR: tcp_connect: tcpconn_new failed, closing the "
951                                  " socket\n");
952                 goto error;
953         }
954         return con;
955         /*FIXME: set sock idx! */
956 error:
957         if (s!=-1) close(s); /* close the opened socket */
958         return 0;
959 }
960
961
962
963 /* adds a tcp connection to the tcpconn hashes
964  * Note: it's called _only_ from the tcp_main process */
965 inline static struct tcp_connection*  tcpconn_add(struct tcp_connection *c)
966 {
967         struct ip_addr zero_ip;
968
969         if (likely(c)){
970                 ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
971                 c->id_hash=tcp_id_hash(c->id);
972                 c->aliases=0;
973                 TCPCONN_LOCK;
974                 /* add it at the begining of the list*/
975                 tcpconn_listadd(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
976                 /* set the aliases */
977                 /* first alias is for (peer_ip, peer_port, 0 ,0) -- for finding
978                  *  any connection to peer_ip, peer_port
979                  * the second alias is for (peer_ip, peer_port, local_addr, 0) -- for
980                  *  finding any conenction to peer_ip, peer_port from local_addr 
981                  * the third alias is for (peer_ip, peer_port, local_addr, local_port) 
982                  *   -- for finding if a fully specified connection exists */
983                 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &zero_ip, 0,
984                                                                                                         tcp_new_conn_alias_flags);
985                 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
986                                                                                                         tcp_new_conn_alias_flags);
987                 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
988                                                                         c->rcv.dst_port, tcp_new_conn_alias_flags);
989                 /* ignore add_alias errors, there are some valid cases when one
990                  *  of the add_alias would fail (e.g. first add_alias for 2 connections
991                  *   with the same destination but different src. ip*/
992                 TCPCONN_UNLOCK;
993                 DBG("tcpconn_add: hashes: %d:%d:%d, %d\n",
994                                                                                                 c->con_aliases[0].hash,
995                                                                                                 c->con_aliases[1].hash,
996                                                                                                 c->con_aliases[2].hash,
997                                                                                                 c->id_hash);
998                 return c;
999         }else{
1000                 LOG(L_CRIT, "tcpconn_add: BUG: null connection pointer\n");
1001                 return 0;
1002         }
1003 }
1004
1005
1006 static inline void _tcpconn_detach(struct tcp_connection *c)
1007 {
1008         int r;
1009         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
1010         /* remove all the aliases */
1011         for (r=0; r<c->aliases; r++)
1012                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
1013                                                 &c->con_aliases[r], next, prev);
1014 }
1015
1016
1017
1018 static inline void _tcpconn_free(struct tcp_connection* c)
1019 {
1020 #ifdef TCP_BUF_WRITE
1021         if (unlikely(c->wbuf_q.first))
1022                 wbufq_destroy(&c->wbuf_q);
1023 #endif
1024         lock_destroy(&c->write_lock);
1025 #ifdef USE_TLS
1026         if (unlikely(c->type==PROTO_TLS)) tls_tcpconn_clean(c);
1027 #endif
1028         shm_free(c);
1029 }
1030
1031
1032
1033 /* unsafe tcpconn_rm version (nolocks) */
1034 void _tcpconn_rm(struct tcp_connection* c)
1035 {
1036         _tcpconn_detach(c);
1037         _tcpconn_free(c);
1038 }
1039
1040
1041
1042 void tcpconn_rm(struct tcp_connection* c)
1043 {
1044         int r;
1045         TCPCONN_LOCK;
1046         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
1047         /* remove all the aliases */
1048         for (r=0; r<c->aliases; r++)
1049                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
1050                                                 &c->con_aliases[r], next, prev);
1051         TCPCONN_UNLOCK;
1052         lock_destroy(&c->write_lock);
1053 #ifdef USE_TLS
1054         if ((c->type==PROTO_TLS)&&(c->extra_data)) tls_tcpconn_clean(c);
1055 #endif
1056         shm_free(c);
1057 }
1058
1059
1060 /* finds a connection, if id=0 uses the ip addr, port, local_ip and local port
1061  *  (host byte order) and tries to find the connection that matches all of
1062  *   them. Wild cards can be used for local_ip and local_port (a 0 filled
1063  *   ip address and/or a 0 local port).
1064  * WARNING: unprotected (locks) use tcpconn_get unless you really
1065  * know what you are doing */
1066 struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port,
1067                                                                                 struct ip_addr* l_ip, int l_port)
1068 {
1069
1070         struct tcp_connection *c;
1071         struct tcp_conn_alias* a;
1072         unsigned hash;
1073         int is_local_ip_any;
1074         
1075 #ifdef EXTRA_DEBUG
1076         DBG("tcpconn_find: %d  port %d\n",id, port);
1077         if (ip) print_ip("tcpconn_find: ip ", ip, "\n");
1078 #endif
1079         if (likely(id)){
1080                 hash=tcp_id_hash(id);
1081                 for (c=tcpconn_id_hash[hash]; c; c=c->id_next){
1082 #ifdef EXTRA_DEBUG
1083                         DBG("c=%p, c->id=%d, port=%d\n",c, c->id, c->rcv.src_port);
1084                         print_ip("ip=", &c->rcv.src_ip, "\n");
1085 #endif
1086                         if ((id==c->id)&&(c->state!=S_CONN_BAD)) return c;
1087                 }
1088         }else if (likely(ip)){
1089                 hash=tcp_addr_hash(ip, port, l_ip, l_port);
1090                 is_local_ip_any=ip_addr_any(l_ip);
1091                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
1092 #ifdef EXTRA_DEBUG
1093                         DBG("a=%p, c=%p, c->id=%d, alias port= %d port=%d\n", a, a->parent,
1094                                         a->parent->id, a->port, a->parent->rcv.src_port);
1095                         print_ip("ip=",&a->parent->rcv.src_ip,"\n");
1096 #endif
1097                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
1098                                         ((l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
1099                                         (ip_addr_cmp(ip, &a->parent->rcv.src_ip)) &&
1100                                         (is_local_ip_any ||
1101                                                 ip_addr_cmp(l_ip, &a->parent->rcv.dst_ip))
1102                                 )
1103                                 return a->parent;
1104                 }
1105         }
1106         return 0;
1107 }
1108
1109
1110
1111 /* _tcpconn_find with locks and timeout
1112  * local_addr contains the desired local ip:port. If null any local address 
1113  * will be used.  IN*ADDR_ANY or 0 port are wild cards.
1114  */
1115 struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
1116                                                                         union sockaddr_union* local_addr,
1117                                                                         ticks_t timeout)
1118 {
1119         struct tcp_connection* c;
1120         struct ip_addr local_ip;
1121         int local_port;
1122         
1123         local_port=0;
1124         if (likely(ip)){
1125                 if (unlikely(local_addr)){
1126                         su2ip_addr(&local_ip, local_addr);
1127                         local_port=su_getport(local_addr);
1128                 }else{
1129                         ip_addr_mk_any(ip->af, &local_ip);
1130                         local_port=0;
1131                 }
1132         }
1133         TCPCONN_LOCK;
1134         c=_tcpconn_find(id, ip, port, &local_ip, local_port);
1135         if (likely(c)){ 
1136                         atomic_inc(&c->refcnt);
1137                         /* update the timeout only if the connection is not handled
1138                          * by a tcp reader (the tcp reader process uses c->timeout for 
1139                          * its own internal timeout and c->timeout will be overwritten
1140                          * anyway on return to tcp_main) */
1141                         if (likely(c->reader_pid==0))
1142                                 c->timeout=get_ticks_raw()+timeout;
1143         }
1144         TCPCONN_UNLOCK;
1145         return c;
1146 }
1147
1148
1149
1150 /* add c->dst:port, local_addr as an alias for the "id" connection, 
1151  * flags: TCP_ALIAS_FORCE_ADD  - add an alias even if a previous one exists
1152  *        TCP_ALIAS_REPLACE    - if a prev. alias exists, replace it with the
1153  *                                new one
1154  * returns 0 on success, <0 on failure ( -1  - null c, -2 too many aliases,
1155  *  -3 alias already present and pointing to another connection)
1156  * WARNING: must be called with TCPCONN_LOCK held */
1157 inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
1158                                                                                 struct ip_addr* l_ip, int l_port,
1159                                                                                 int flags)
1160 {
1161         unsigned hash;
1162         struct tcp_conn_alias* a;
1163         struct tcp_conn_alias* nxt;
1164         int is_local_ip_any;
1165         
1166         a=0;
1167         is_local_ip_any=ip_addr_any(l_ip);
1168         if (likely(c)){
1169                 hash=tcp_addr_hash(&c->rcv.src_ip, port, l_ip, l_port);
1170                 /* search the aliases for an already existing one */
1171                 for (a=tcpconn_aliases_hash[hash], nxt=0; a; a=nxt){
1172                         nxt=a->next;
1173                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
1174                                         ( (l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
1175                                         (ip_addr_cmp(&c->rcv.src_ip, &a->parent->rcv.src_ip)) &&
1176                                         ( is_local_ip_any || 
1177                                           ip_addr_cmp(&a->parent->rcv.dst_ip, l_ip))
1178                                         ){
1179                                 /* found */
1180                                 if (unlikely(a->parent!=c)){
1181                                         if (flags & TCP_ALIAS_FORCE_ADD)
1182                                                 /* still have to walk the whole list to check if
1183                                                  * the alias was not already added */
1184                                                 continue;
1185                                         else if (flags & TCP_ALIAS_REPLACE){
1186                                                 /* remove the current one */
1187                                                 tcpconn_listrm(tcpconn_aliases_hash[hash],
1188                                                                                                                 a, next, prev);
1189                                                 a->next=0;
1190                                                 a->prev=0;
1191                                         }else
1192                                                 goto error_sec;
1193                                 }else goto ok;
1194                         }
1195                 }
1196                 if (unlikely(c->aliases>=TCP_CON_MAX_ALIASES)) goto error_aliases;
1197                 c->con_aliases[c->aliases].parent=c;
1198                 c->con_aliases[c->aliases].port=port;
1199                 c->con_aliases[c->aliases].hash=hash;
1200                 tcpconn_listadd(tcpconn_aliases_hash[hash], 
1201                                                                 &c->con_aliases[c->aliases], next, prev);
1202                 c->aliases++;
1203         }else goto error_not_found;
1204 ok:
1205 #ifdef EXTRA_DEBUG
1206         if (a) DBG("_tcpconn_add_alias_unsafe: alias already present\n");
1207         else   DBG("_tcpconn_add_alias_unsafe: alias port %d for hash %d, id %d\n",
1208                         port, hash, c->id);
1209 #endif
1210         return 0;
1211 error_aliases:
1212         /* too many aliases */
1213         return -2;
1214 error_not_found:
1215         /* null connection */
1216         return -1;
1217 error_sec:
1218         /* alias already present and pointing to a different connection
1219          * (hijack attempt?) */
1220         return -3;
1221 }
1222
1223
1224
1225 /* add port as an alias for the "id" connection, 
1226  * returns 0 on success,-1 on failure */
1227 int tcpconn_add_alias(int id, int port, int proto)
1228 {
1229         struct tcp_connection* c;
1230         int ret;
1231         struct ip_addr zero_ip;
1232         
1233         /* fix the port */
1234         port=port?port:((proto==PROTO_TLS)?SIPS_PORT:SIP_PORT);
1235         TCPCONN_LOCK;
1236         /* check if alias already exists */
1237         c=_tcpconn_find(id, 0, 0, 0, 0);
1238         if (likely(c)){
1239                 ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
1240                 
1241                 /* alias src_ip:port, 0, 0 */
1242                 ret=_tcpconn_add_alias_unsafe(c, port,  &zero_ip, 0, 
1243                                                                                 tcp_alias_flags);
1244                 if (ret<0 && ret!=-3) goto error;
1245                 /* alias src_ip:port, local_ip, 0 */
1246                 ret=_tcpconn_add_alias_unsafe(c, port,  &c->rcv.dst_ip, 0, 
1247                                                                                 tcp_alias_flags);
1248                 if (ret<0 && ret!=-3) goto error;
1249                 /* alias src_ip:port, local_ip, local_port */
1250                 ret=_tcpconn_add_alias_unsafe(c, port, &c->rcv.dst_ip, c->rcv.dst_port,
1251                                                                                 tcp_alias_flags);
1252                 if (unlikely(ret<0)) goto error;
1253         }else goto error_not_found;
1254         TCPCONN_UNLOCK;
1255         return 0;
1256 error_not_found:
1257         TCPCONN_UNLOCK;
1258         LOG(L_ERR, "ERROR: tcpconn_add_alias: no connection found for id %d\n",id);
1259         return -1;
1260 error:
1261         TCPCONN_UNLOCK;
1262         switch(ret){
1263                 case -2:
1264                         LOG(L_ERR, "ERROR: tcpconn_add_alias: too many aliases"
1265                                         " for connection %p (%d)\n", c, c->id);
1266                         break;
1267                 case -3:
1268                         LOG(L_ERR, "ERROR: tcpconn_add_alias: possible port"
1269                                         " hijack attempt\n");
1270                         LOG(L_ERR, "ERROR: tcpconn_add_alias: alias for %d port %d already"
1271                                                 " present and points to another connection \n",
1272                                                 c->id, port);
1273                         break;
1274                 default:
1275                         LOG(L_ERR, "ERROR: tcpconn_add_alias: unkown error %d\n", ret);
1276         }
1277         return -1;
1278 }
1279
1280
1281
1282 #ifdef TCP_FD_CACHE
1283
1284 static void tcp_fd_cache_init()
1285 {
1286         int r;
1287         for (r=0; r<TCP_FD_CACHE_SIZE; r++)
1288                 fd_cache[r].fd=-1;
1289 }
1290
1291
1292 inline static struct fd_cache_entry* tcp_fd_cache_get(struct tcp_connection *c)
1293 {
1294         int h;
1295         
1296         h=c->id%TCP_FD_CACHE_SIZE;
1297         if ((fd_cache[h].fd>0) && (fd_cache[h].id==c->id) && (fd_cache[h].con==c))
1298                 return &fd_cache[h];
1299         return 0;
1300 }
1301
1302
1303 inline static void tcp_fd_cache_rm(struct fd_cache_entry* e)
1304 {
1305         e->fd=-1;
1306 }
1307
1308
1309 inline static void tcp_fd_cache_add(struct tcp_connection *c, int fd)
1310 {
1311         int h;
1312         
1313         h=c->id%TCP_FD_CACHE_SIZE;
1314         if (likely(fd_cache[h].fd>0))
1315                 close(fd_cache[h].fd);
1316         fd_cache[h].fd=fd;
1317         fd_cache[h].id=c->id;
1318         fd_cache[h].con=c;
1319 }
1320
1321 #endif /* TCP_FD_CACHE */
1322
1323
1324 /* finds a tcpconn & sends on it
1325  * uses the dst members to, proto (TCP|TLS) and id and tries to send
1326  *  from the "from" address (if non null and id==0)
1327  * returns: number of bytes written (>=0) on success
1328  *          <0 on error */
1329 int tcp_send(struct dest_info* dst, union sockaddr_union* from,
1330                                         char* buf, unsigned len)
1331 {
1332         struct tcp_connection *c;
1333         struct tcp_connection *tmp;
1334         struct ip_addr ip;
1335         int port;
1336         int fd;
1337         long response[2];
1338         int n;
1339         int do_close_fd;
1340 #ifdef TCP_BUF_WRITE
1341         int enable_write_watch;
1342 #endif /* TCP_BUF_WRITE */
1343 #ifdef TCP_FD_CACHE
1344         struct fd_cache_entry* fd_cache_e;
1345         
1346         fd_cache_e=0;
1347 #endif /* TCP_FD_CACHE */
1348         do_close_fd=1; /* close the fd on exit */
1349         port=su_getport(&dst->to);
1350         if (likely(port)){
1351                 su2ip_addr(&ip, &dst->to);
1352                 c=tcpconn_get(dst->id, &ip, port, from, tcp_con_lifetime); 
1353         }else if (likely(dst->id)){
1354                 c=tcpconn_get(dst->id, 0, 0, 0, tcp_con_lifetime);
1355         }else{
1356                 LOG(L_CRIT, "BUG: tcp_send called with null id & to\n");
1357                 return -1;
1358         }
1359         
1360         if (likely(dst->id)){
1361                 if (unlikely(c==0)) {
1362                         if (likely(port)){
1363                                 /* try again w/o id */
1364                                 c=tcpconn_get(0, &ip, port, from, tcp_con_lifetime);
1365                                 goto no_id;
1366                         }else{
1367                                 LOG(L_ERR, "ERROR: tcp_send: id %d not found, dropping\n",
1368                                                 dst->id);
1369                                 return -1;
1370                         }
1371                 }else goto get_fd;
1372         }
1373 no_id:
1374                 if (unlikely(c==0)){
1375                         DBG("tcp_send: no open tcp connection found, opening new one\n");
1376                         /* create tcp connection */
1377                         if (likely(from==0)){
1378                                 /* check to see if we have to use a specific source addr. */
1379                                 switch (dst->to.s.sa_family) {
1380                                         case AF_INET:
1381                                                         from = tcp_source_ipv4;
1382                                                 break;
1383 #ifdef USE_IPV6
1384                                         case AF_INET6:
1385                                                         from = tcp_source_ipv6;
1386                                                 break;
1387 #endif
1388                                         default:
1389                                                 /* error, bad af, ignore ... */
1390                                                 break;
1391                                 }
1392                         }
1393                         if (unlikely((c=tcpconn_connect(&dst->to, from, dst->proto))==0)){
1394                                 LOG(L_ERR, "ERROR: tcp_send: connect failed\n");
1395                                 return -1;
1396                         }
1397                         atomic_set(&c->refcnt, 1); /* ref. only from here for now */
1398                         fd=c->s;
1399                         
1400                         /* send the new tcpconn to "tcp main" */
1401                         response[0]=(long)c;
1402                         response[1]=CONN_NEW;
1403                         n=send_fd(unix_tcp_sock, response, sizeof(response), c->s);
1404                         if (unlikely(n<=0)){
1405                                 LOG(L_ERR, "BUG: tcp_send: failed send_fd: %s (%d)\n",
1406                                                 strerror(errno), errno);
1407                                 n=-1;
1408                                 goto end;
1409                         }       
1410                         goto send_it;
1411                 }
1412 get_fd:
1413 #ifdef TCP_BUF_WRITE
1414                 /* if data is already queued, we don't need the fd any more */
1415                 if (unlikely(tcp_options.tcp_buf_write && c->wbuf_q.first)){
1416                         lock_get(&c->write_lock);
1417                                 if (likely(c->wbuf_q.first)){
1418                                         do_close_fd=0;
1419                                         if (unlikely(wbufq_add(c, buf, len)<0)){
1420                                                 lock_release(&c->write_lock);
1421                                                 n=-1;
1422                                                 goto error;
1423                                         }
1424                                         n=len;
1425                                         lock_release(&c->write_lock);
1426                                         goto release_c;
1427                                 }
1428                         lock_release(&c->write_lock);
1429                 }
1430 #endif /* TCP_BUF_WRITE */
1431                 /* check if this is not the same reader process holding
1432                  *  c  and if so send directly on c->fd */
1433                 if (c->reader_pid==my_pid()){
1434                         DBG("tcp_send: send from reader (%d (%d)), reusing fd\n",
1435                                         my_pid(), process_no);
1436                         fd=c->fd;
1437                         do_close_fd=0; /* don't close the fd on exit, it's in use */
1438 #ifdef TCP_FD_CACHE
1439                 }else if (likely(tcp_options.fd_cache && 
1440                                                         ((fd_cache_e=tcp_fd_cache_get(c))!=0))){
1441                         fd=fd_cache_e->fd;
1442                         do_close_fd=0;
1443                         DBG("tcp_send: found fd in cache ( %d, %p, %d)\n",
1444                                         fd, c, fd_cache_e->id);
1445 #endif /* TCP_FD_CACHE */
1446                 }else{
1447                         DBG("tcp_send: tcp connection found (%p), acquiring fd\n", c);
1448                         /* get the fd */
1449                         response[0]=(long)c;
1450                         response[1]=CONN_GET_FD;
1451                         n=send_all(unix_tcp_sock, response, sizeof(response));
1452                         if (unlikely(n<=0)){
1453                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(write):%s (%d)\n",
1454                                                 strerror(errno), errno);
1455                                 n=-1;
1456                                 goto release_c;
1457                         }
1458                         DBG("tcp_send, c= %p, n=%d\n", c, n);
1459                         n=receive_fd(unix_tcp_sock, &tmp, sizeof(tmp), &fd, MSG_WAITALL);
1460                         if (unlikely(n<=0)){
1461                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(receive_fd):"
1462                                                         " %s (%d)\n", strerror(errno), errno);
1463                                 n=-1;
1464                                 do_close_fd=0;
1465                                 goto release_c;
1466                         }
1467                         if (unlikely(c!=tmp)){
1468                                 LOG(L_CRIT, "BUG: tcp_send: get_fd: got different connection:"
1469                                                 "  %p (id= %d, refcnt=%d state=%d) != "
1470                                                 "  %p (n=%d)\n",
1471                                                   c,   c->id,   atomic_get(&c->refcnt),   c->state,
1472                                                   tmp, n
1473                                    );
1474                                 n=-1; /* fail */
1475                                 goto end;
1476                         }
1477                         DBG("tcp_send: after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
1478                 }
1479         
1480         
1481 send_it:
1482         DBG("tcp_send: sending...\n");
1483         lock_get(&c->write_lock);
1484 #ifdef TCP_BUF_WRITE
1485         if (likely(tcp_options.tcp_buf_write)){
1486                 if (c->wbuf_q.first){
1487                         if (unlikely(wbufq_add(c, buf, len)<0)){
1488                                 lock_release(&c->write_lock);
1489                                 n=-1;
1490                                 goto error;
1491                         }
1492                         lock_release(&c->write_lock);
1493                         n=len;
1494                         goto end;
1495                 }
1496                 n=_tcpconn_write_nb(fd, c, buf, len);
1497         }else{
1498 #endif /* TCP_BUF_WRITE */
1499 #ifdef USE_TLS
1500         if (c->type==PROTO_TLS)
1501                 n=tls_blocking_write(c, fd, buf, len);
1502         else
1503 #endif
1504                 /* n=tcp_blocking_write(c, fd, buf, len); */
1505                 n=tsend_stream(fd, buf, len, tcp_send_timeout*1000); 
1506 #ifdef TCP_BUF_WRITE
1507         }
1508 #endif /* TCP_BUF_WRITE */
1509         lock_release(&c->write_lock);
1510         DBG("tcp_send: after write: c= %p n=%d fd=%d\n",c, n, fd);
1511         DBG("tcp_send: buf=\n%.*s\n", (int)len, buf);
1512         if (unlikely(n<0)){
1513 #ifdef TCP_BUF_WRITE
1514                 if (tcp_options.tcp_buf_write && 
1515                                 (errno==EAGAIN || errno==EWOULDBLOCK)){
1516                         lock_get(&c->write_lock);
1517                         enable_write_watch=(c->wbuf_q.first==0);
1518                         if (unlikely(wbufq_add(c, buf, len)<0)){
1519                                 lock_release(&c->write_lock);
1520                                 n=-1;
1521                                 goto error;
1522                         }
1523                         lock_release(&c->write_lock);
1524                         n=len;
1525                         if (enable_write_watch){
1526                                 response[0]=(long)c;
1527                                 response[1]=CONN_QUEUED_WRITE;
1528                                 if (send_all(unix_tcp_sock, response, sizeof(response))<=0){
1529                                         LOG(L_ERR, "BUG: tcp_send: error return failed "
1530                                                         "(write):%s (%d)\n", strerror(errno), errno);
1531                                         n=-1;
1532                                         goto error;
1533                                 }
1534                         }
1535                         goto end;
1536                 }
1537 error:
1538 #endif /* TCP_BUF_WRITE */
1539                 LOG(L_ERR, "ERROR: tcp_send: failed to send\n");
1540                 /* error on the connection , mark it as bad and set 0 timeout */
1541                 c->state=S_CONN_BAD;
1542                 c->timeout=get_ticks_raw();
1543                 /* tell "main" it should drop this (optional it will t/o anyway?)*/
1544                 response[0]=(long)c;
1545                 response[1]=CONN_ERROR;
1546                 if (send_all(unix_tcp_sock, response, sizeof(response))<=0){
1547                         LOG(L_ERR, "BUG: tcp_send: error return failed (write):%s (%d)\n",
1548                                         strerror(errno), errno);
1549                         tcpconn_put(c); /* deref. it manually */
1550                         n=-1;
1551                 }
1552                 /* CONN_ERROR will auto-dec refcnt => we must not call tcpconn_put 
1553                  * if it succeeds */
1554 #ifdef TCP_FD_CACHE
1555                 if (unlikely(fd_cache_e)){
1556                         LOG(L_ERR, "ERROR: tcp_send: error on cached fd, removing from the"
1557                                         "cache (%d, %p, %d)\n", 
1558                                         fd, fd_cache_e->con, fd_cache_e->id);
1559                         tcp_fd_cache_rm(fd_cache_e);
1560                         close(fd);
1561                 }else
1562 #endif /* TCP_FD_CACHE */
1563                 if (do_close_fd) close(fd);
1564                 return n; /* error return, no tcpconn_put */
1565         }
1566 #ifdef TCP_BUF_WRITE
1567         if (likely(tcp_options.tcp_buf_write)){
1568                 if (unlikely(c->state==S_CONN_CONNECT))
1569                         c->state=S_CONN_OK;
1570                 c->last_write=get_ticks_raw();
1571         }
1572 #endif /* TCP_BUF_WRITE */
1573 end:
1574 #ifdef TCP_FD_CACHE
1575         if (unlikely((fd_cache_e==0) && tcp_options.fd_cache)){
1576                 tcp_fd_cache_add(c, fd);
1577         }else
1578 #endif /* TCP_FD_CACHE */
1579         if (do_close_fd) close(fd);
1580 release_c:
1581         tcpconn_put(c); /* release c (lock; dec refcnt; unlock) */
1582         return n;
1583 }
1584
1585
1586
1587 int tcp_init(struct socket_info* sock_info)
1588 {
1589         union sockaddr_union* addr;
1590         int optval;
1591 #ifdef HAVE_TCP_ACCEPT_FILTER
1592         struct accept_filter_arg afa;
1593 #endif /* HAVE_TCP_ACCEPT_FILTER */
1594 #ifdef DISABLE_NAGLE
1595         int flag;
1596         struct protoent* pe;
1597
1598         if (tcp_proto_no==-1){ /* if not already set */
1599                 pe=getprotobyname("tcp");
1600                 if (pe==0){
1601                         LOG(L_ERR, "ERROR: tcp_init: could not get TCP protocol number\n");
1602                         tcp_proto_no=-1;
1603                 }else{
1604                         tcp_proto_no=pe->p_proto;
1605                 }
1606         }
1607 #endif
1608         
1609         addr=&sock_info->su;
1610         /* sock_info->proto=PROTO_TCP; */
1611         if (init_su(addr, &sock_info->address, sock_info->port_no)<0){
1612                 LOG(L_ERR, "ERROR: tcp_init: could no init sockaddr_union\n");
1613                 goto error;
1614         }
1615         sock_info->socket=socket(AF2PF(addr->s.sa_family), SOCK_STREAM, 0);
1616         if (sock_info->socket==-1){
1617                 LOG(L_ERR, "ERROR: tcp_init: socket: %s\n", strerror(errno));
1618                 goto error;
1619         }
1620 #ifdef DISABLE_NAGLE
1621         flag=1;
1622         if ( (tcp_proto_no!=-1) &&
1623                  (setsockopt(sock_info->socket, tcp_proto_no , TCP_NODELAY,
1624                                          &flag, sizeof(flag))<0) ){
1625                 LOG(L_ERR, "ERROR: tcp_init: could not disable Nagle: %s\n",
1626                                 strerror(errno));
1627         }
1628 #endif
1629
1630
1631 #if  !defined(TCP_DONT_REUSEADDR) 
1632         /* Stevens, "Network Programming", Section 7.5, "Generic Socket
1633      * Options": "...server started,..a child continues..on existing
1634          * connection..listening server is restarted...call to bind fails
1635          * ... ALL TCP servers should specify the SO_REUSEADDRE option 
1636          * to allow the server to be restarted in this situation
1637          *
1638          * Indeed, without this option, the server can't restart.
1639          *   -jiri
1640          */
1641         optval=1;
1642         if (setsockopt(sock_info->socket, SOL_SOCKET, SO_REUSEADDR,
1643                                 (void*)&optval, sizeof(optval))==-1) {
1644                 LOG(L_ERR, "ERROR: tcp_init: setsockopt %s\n",
1645                         strerror(errno));
1646                 goto error;
1647         }
1648 #endif
1649         /* tos */
1650         optval = tos;
1651         if (setsockopt(sock_info->socket, IPPROTO_IP, IP_TOS, (void*)&optval, 
1652                                 sizeof(optval)) ==-1){
1653                 LOG(L_WARN, "WARNING: tcp_init: setsockopt tos: %s\n", strerror(errno));
1654                 /* continue since this is not critical */
1655         }
1656 #ifdef HAVE_TCP_DEFER_ACCEPT
1657         /* linux only */
1658         if (tcp_options.defer_accept){
1659                 optval=tcp_options.defer_accept;
1660                 if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_DEFER_ACCEPT,
1661                                         (void*)&optval, sizeof(optval)) ==-1){
1662                         LOG(L_WARN, "WARNING: tcp_init: setsockopt TCP_DEFER_ACCEPT %s\n",
1663                                                 strerror(errno));
1664                 /* continue since this is not critical */
1665                 }
1666         }
1667 #endif /* HAVE_TCP_DEFFER_ACCEPT */
1668 #ifdef HAVE_TCP_SYNCNT
1669         if (tcp_options.syncnt){
1670                 optval=tcp_options.syncnt;
1671                 if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_SYNCNT, &optval,
1672                                                 sizeof(optval))<0){
1673                         LOG(L_WARN, "WARNING: tcp_init: failed to set"
1674                                                 " maximum SYN retr. count: %s\n", strerror(errno));
1675                 }
1676         }
1677 #endif
1678 #ifdef HAVE_TCP_ACCEPT_FILTER
1679         /* freebsd */
1680         if (tcp_options.defer_accept){
1681                 memset(&afa, 0, sizeof(afa));
1682                 strcpy(afa.af_name, "dataready");
1683                 if (setsockopt(sock_info->socket, SOL_SOCKET, SO_ACCEPTFILTER,
1684                                         (void*)&afal, sizeof(afa)) ==-1){
1685                         LOG(L_WARN, "WARNING: tcp_init: setsockopt SO_ACCEPTFILTER %s\n",
1686                                                 strerror(errno));
1687                 /* continue since this is not critical */
1688                 }
1689         }
1690 #endif /* HAVE_TCP_ACCEPT_FILTER */
1691 #ifdef HAVE_TCP_LINGER2
1692         if (tcp_options.linger2){
1693                 optval=tcp_options.linger2;
1694                 if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_LINGER2, &optval,
1695                                                 sizeof(optval))<0){
1696                         LOG(L_WARN, "WARNING: tcp_init: failed to set"
1697                                                 " maximum LINGER2 timeout: %s\n", strerror(errno));
1698                 }
1699         }
1700 #endif
1701         init_sock_keepalive(sock_info->socket);
1702         if (bind(sock_info->socket, &addr->s, sockaddru_len(*addr))==-1){
1703                 LOG(L_ERR, "ERROR: tcp_init: bind(%x, %p, %d) on %s:%d : %s\n",
1704                                 sock_info->socket,  &addr->s, 
1705                                 (unsigned)sockaddru_len(*addr),
1706                                 sock_info->address_str.s,
1707                                 sock_info->port_no,
1708                                 strerror(errno));
1709                 goto error;
1710         }
1711         if (listen(sock_info->socket, TCP_LISTEN_BACKLOG)==-1){
1712                 LOG(L_ERR, "ERROR: tcp_init: listen(%x, %p, %d) on %s: %s\n",
1713                                 sock_info->socket, &addr->s, 
1714                                 (unsigned)sockaddru_len(*addr),
1715                                 sock_info->address_str.s,
1716                                 strerror(errno));
1717                 goto error;
1718         }
1719         
1720         return 0;
1721 error:
1722         if (sock_info->socket!=-1){
1723                 close(sock_info->socket);
1724                 sock_info->socket=-1;
1725         }
1726         return -1;
1727 }
1728
1729
1730
1731 /* used internally by tcp_main_loop()
1732  * tries to destroy a tcp connection (if it cannot it will force a timeout)
1733  * Note: it's called _only_ from the tcp_main process */
1734 static void tcpconn_destroy(struct tcp_connection* tcpconn)
1735 {
1736         int fd;
1737         ticks_t t;
1738
1739         /* always try to remove the timer to protect against tcpconn_destroy
1740          *  being called several times for the same connection 
1741          *  (if the timer is already removed, nothing happens) */
1742         if (likely(!(tcpconn->flags & F_CONN_READER)))
1743                 local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
1744 #ifdef TCP_BUF_WRITE
1745         if (unlikely((tcpconn->flags & F_CONN_WRITE_W) ||
1746                                 !(tcpconn->flags & F_CONN_REMOVED))){
1747                 LOG(L_CRIT, "tcpconn_destroy: possible BUG: flags = %0x\n",
1748                                         tcpconn->flags);
1749         }
1750         if (unlikely(tcpconn->wbuf_q.first)){
1751                 lock_get(&tcpconn->write_lock);
1752                         /* check again, while holding the lock */
1753                         if (likely(tcpconn->wbuf_q.first))
1754                                 wbufq_destroy(&tcpconn->wbuf_q);
1755                 lock_release(&tcpconn->write_lock);
1756         }
1757 #endif /* TCP_BUF_WRITE */
1758         TCPCONN_LOCK; /*avoid races w/ tcp_send*/
1759         if (likely(atomic_dec_and_test(&tcpconn->refcnt))){ 
1760                 _tcpconn_detach(tcpconn);
1761                 TCPCONN_UNLOCK;
1762                 DBG("tcpconn_destroy: destroying connection %p (%d, %d) flags %04x\n",
1763                                 tcpconn, tcpconn->id, tcpconn->s, tcpconn->flags);
1764                 fd=tcpconn->s;
1765 #ifdef USE_TLS
1766                 /*FIXME: lock ->writelock ? */
1767                 if (tcpconn->type==PROTO_TLS)
1768                         tls_close(tcpconn, fd);
1769 #endif
1770                 _tcpconn_free(tcpconn); /* destroys also the wbuf_q if still present*/
1771 #ifdef TCP_FD_CACHE
1772                 if (likely(tcp_options.fd_cache)) shutdown(fd, SHUT_RDWR);
1773 #endif /* TCP_FD_CACHE */
1774                 if (unlikely(close(fd)<0)){
1775                         LOG(L_ERR, "ERROR: tcpconn_destroy; close() failed: %s (%d)\n",
1776                                         strerror(errno), errno);
1777                 }
1778                 (*tcp_connections_no)--;
1779         }else{
1780                 TCPCONN_UNLOCK;
1781                 /* force timeout */
1782                 t=get_ticks_raw();
1783                 tcpconn->timeout=t+TCPCONN_WAIT_TIMEOUT;
1784                 tcpconn->state=S_CONN_BAD;
1785                 if (!(tcpconn->flags & F_CONN_READER)){
1786                         /* re-activate the timer only if the connection is handled
1787                          * by tcp_main (and not by a tcp reader)*/
1788                         tcpconn->timer.f=tcpconn_main_timeout;
1789                         timer_reinit(&tcpconn->timer);
1790                         local_timer_add(&tcp_main_ltimer, &tcpconn->timer, 
1791                                                                         TCPCONN_WAIT_TIMEOUT, t);
1792                 }
1793                 DBG("tcpconn_destroy: delaying (%p, flags %04x) ...\n",
1794                                 tcpconn, tcpconn->flags);
1795         }
1796 }
1797
1798
1799
1800 #ifdef SEND_FD_QUEUE
1801 struct send_fd_info{
1802         struct tcp_connection* tcp_conn;
1803         ticks_t expire;
1804         int unix_sock;
1805         unsigned int retries; /* debugging */
1806 };
1807
1808 struct tcp_send_fd_q{
1809         struct send_fd_info* data; /* buffer */
1810         struct send_fd_info* crt;  /* pointer inside the buffer */
1811         struct send_fd_info* end;  /* points after the last valid position */
1812 };
1813
1814
1815 static struct tcp_send_fd_q send2child_q;
1816
1817
1818
1819 static int send_fd_queue_init(struct tcp_send_fd_q *q, unsigned int size)
1820 {
1821         q->data=pkg_malloc(size*sizeof(struct send_fd_info));
1822         if (q->data==0){
1823                 LOG(L_ERR, "ERROR: send_fd_queue_init: out of memory\n");
1824                 return -1;
1825         }
1826         q->crt=&q->data[0];
1827         q->end=&q->data[size];
1828         return 0;
1829 }
1830
1831 static void send_fd_queue_destroy(struct tcp_send_fd_q *q)
1832 {
1833         if (q->data){
1834                 pkg_free(q->data);
1835                 q->data=0;
1836                 q->crt=q->end=0;
1837         }
1838 }
1839
1840
1841
1842 static int init_send_fd_queues()
1843 {
1844         if (send_fd_queue_init(&send2child_q, SEND_FD_QUEUE_SIZE)!=0)
1845                 goto error;
1846         return 0;
1847 error:
1848         LOG(L_ERR, "ERROR: init_send_fd_queues: init failed\n");
1849         return -1;
1850 }
1851
1852
1853
1854 static void destroy_send_fd_queues()
1855 {
1856         send_fd_queue_destroy(&send2child_q);
1857 }
1858
1859
1860
1861
1862 inline static int send_fd_queue_add(    struct tcp_send_fd_q* q, 
1863                                                                                 int unix_sock,
1864                                                                                 struct tcp_connection *t)
1865 {
1866         struct send_fd_info* tmp;
1867         unsigned long new_size;
1868         
1869         if (q->crt>=q->end){
1870                 new_size=q->end-&q->data[0];
1871                 if (new_size< MAX_SEND_FD_QUEUE_SIZE/2){
1872                         new_size*=2;
1873                 }else new_size=MAX_SEND_FD_QUEUE_SIZE;
1874                 if (unlikely(q->crt>=&q->data[new_size])){
1875                         LOG(L_ERR, "ERROR: send_fd_queue_add: queue full: %ld/%ld\n",
1876                                         (long)(q->crt-&q->data[0]-1), new_size);
1877                         goto error;
1878                 }
1879                 LOG(L_CRIT, "INFO: send_fd_queue: queue full: %ld, extending to %ld\n",
1880                                 (long)(q->end-&q->data[0]), new_size);
1881                 tmp=pkg_realloc(q->data, new_size*sizeof(struct send_fd_info));
1882                 if (unlikely(tmp==0)){
1883                         LOG(L_ERR, "ERROR: send_fd_queue_add: out of memory\n");
1884                         goto error;
1885                 }
1886                 q->crt=(q->crt-&q->data[0])+tmp;
1887                 q->data=tmp;
1888                 q->end=&q->data[new_size];
1889         }
1890         q->crt->tcp_conn=t;
1891         q->crt->unix_sock=unix_sock;
1892         q->crt->expire=get_ticks_raw()+SEND_FD_QUEUE_TIMEOUT;
1893         q->crt->retries=0;
1894         q->crt++;
1895         return 0;
1896 error:
1897         return -1;
1898 }
1899
1900
1901
1902 inline static void send_fd_queue_run(struct tcp_send_fd_q* q)
1903 {
1904         struct send_fd_info* p;
1905         struct send_fd_info* t;
1906         
1907         for (p=t=&q->data[0]; p<q->crt; p++){
1908                 if (unlikely(send_fd(p->unix_sock, &(p->tcp_conn),
1909                                         sizeof(struct tcp_connection*), p->tcp_conn->s)<=0)){
1910                         if ( ((errno==EAGAIN)||(errno==EWOULDBLOCK)) && 
1911                                                         ((s_ticks_t)(p->expire-get_ticks_raw())>0)){
1912                                 /* leave in queue for a future try */
1913                                 *t=*p;
1914                                 t->retries++;
1915                                 t++;
1916                         }else{
1917                                 LOG(L_ERR, "ERROR: run_send_fd_queue: send_fd failed"
1918                                                    " on socket %d , queue entry %ld, retries %d,"
1919                                                    " connection %p, tcp socket %d, errno=%d (%s) \n",
1920                                                    p->unix_sock, (long)(p-&q->data[0]), p->retries,
1921                                                    p->tcp_conn, p->tcp_conn->s, errno,
1922                                                    strerror(errno));
1923 #ifdef TCP_BUF_WRITE
1924                                 if (p->tcp_conn->flags & F_CONN_WRITE_W){
1925                                         io_watch_del(&io_h, p->tcp_conn->s, -1, IO_FD_CLOSING);
1926                                         p->tcp_conn->flags &=~F_CONN_WRITE_W;
1927                                 }
1928 #endif
1929                                 p->tcp_conn->flags &= ~F_CONN_READER;
1930                                 tcpconn_destroy(p->tcp_conn);
1931                         }
1932                 }
1933         }
1934         q->crt=t;
1935 }
1936 #else
1937 #define send_fd_queue_run(q)
1938 #endif
1939
1940
1941 /* non blocking write() on a tcpconnection, unsafe version (should be called
1942  * while holding  c->write_lock). The fd should be non-blocking.
1943  *  returns number of bytes written on success, -1 on error (and sets errno)
1944  */
1945 inline static int _tcpconn_write_nb(int fd, struct tcp_connection* c,
1946                                                                         char* buf, int len)
1947 {
1948         int n;
1949         
1950 again:
1951 #ifdef USE_TLS
1952         if (unlikely(c->type==PROTO_TLS))
1953                 /* FIXME: tls_nonblocking_write !! */
1954                 n=tls_blocking_write(c, fd, buf, len);
1955         else
1956 #endif /* USE_TLS */
1957                 n=send(fd, buf, len,
1958 #ifdef HAVE_MSG_NOSIGNAL
1959                                         MSG_NOSIGNAL
1960 #else
1961                                         0
1962 #endif /* HAVE_MSG_NOSIGNAL */
1963                           );
1964         if (unlikely(n<0)){
1965                 if (errno==EINTR) goto again;
1966         }
1967         return n;
1968 }
1969
1970
1971
1972 /* handles io from a tcp child process
1973  * params: tcp_c - pointer in the tcp_children array, to the entry for
1974  *                 which an io event was detected 
1975  *         fd_i  - fd index in the fd_array (usefull for optimizing
1976  *                 io_watch_deletes)
1977  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
1978  *           io events queued), >0 on success. success/error refer only to
1979  *           the reads from the fd.
1980  */
1981 inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
1982 {
1983         struct tcp_connection* tcpconn;
1984         long response[2];
1985         int cmd;
1986         int bytes;
1987         int n;
1988         ticks_t t;
1989         
1990         if (unlikely(tcp_c->unix_sock<=0)){
1991                 /* (we can't have a fd==0, 0 is never closed )*/
1992                 LOG(L_CRIT, "BUG: handle_tcp_child: fd %d for %d "
1993                                 "(pid %d, ser no %d)\n", tcp_c->unix_sock,
1994                                 (int)(tcp_c-&tcp_children[0]), tcp_c->pid, tcp_c->proc_no);
1995                 goto error;
1996         }
1997         /* read until sizeof(response)
1998          * (this is a SOCK_STREAM so read is not atomic) */
1999         bytes=recv_all(tcp_c->unix_sock, response, sizeof(response), MSG_DONTWAIT);
2000         if (unlikely(bytes<(int)sizeof(response))){
2001                 if (bytes==0){
2002                         /* EOF -> bad, child has died */
2003                         DBG("DBG: handle_tcp_child: dead tcp child %d (pid %d, no %d)"
2004                                         " (shutting down?)\n", (int)(tcp_c-&tcp_children[0]), 
2005                                         tcp_c->pid, tcp_c->proc_no );
2006                         /* don't listen on it any more */
2007                         io_watch_del(&io_h, tcp_c->unix_sock, fd_i, 0); 
2008                         goto error; /* eof. so no more io here, it's ok to return error */
2009                 }else if (bytes<0){
2010                         /* EAGAIN is ok if we try to empty the buffer
2011                          * e.g.: SIGIO_RT overflow mode or EPOLL ET */
2012                         if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
2013                                 LOG(L_CRIT, "ERROR: handle_tcp_child: read from tcp child %ld "
2014                                                 " (pid %d, no %d) %s [%d]\n",
2015                                                 (long)(tcp_c-&tcp_children[0]), tcp_c->pid,
2016                                                 tcp_c->proc_no, strerror(errno), errno );
2017                         }else{
2018                                 bytes=0;
2019                         }
2020                         /* try to ignore ? */
2021                         goto end;
2022                 }else{
2023                         /* should never happen */
2024                         LOG(L_CRIT, "BUG: handle_tcp_child: too few bytes received (%d)\n",
2025                                         bytes );
2026                         bytes=0; /* something was read so there is no error; otoh if
2027                                           receive_fd returned less then requested => the receive
2028                                           buffer is empty => no more io queued on this fd */
2029                         goto end;
2030                 }
2031         }
2032         
2033         DBG("handle_tcp_child: reader response= %lx, %ld from %d \n",
2034                                         response[0], response[1], (int)(tcp_c-&tcp_children[0]));
2035         cmd=response[1];
2036         tcpconn=(struct tcp_connection*)response[0];
2037         if (unlikely(tcpconn==0)){
2038                 /* should never happen */
2039                 LOG(L_CRIT, "BUG: handle_tcp_child: null tcpconn pointer received"
2040                                  " from tcp child %d (pid %d): %lx, %lx\n",
2041                                         (int)(tcp_c-&tcp_children[0]), tcp_c->pid,
2042                                         response[0], response[1]) ;
2043                 goto end;
2044         }
2045         switch(cmd){
2046                 case CONN_RELEASE:
2047                         tcp_c->busy--;
2048                         if (unlikely(tcpconn->state==S_CONN_BAD)){ 
2049 #ifdef TCP_BUF_WRITE
2050                                 if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
2051                                         io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
2052                                         tcpconn->flags &= ~F_CONN_WRITE_W;
2053                                 }
2054 #endif /* TCP_BUF_WRITE */
2055                                 tcpconn_destroy(tcpconn);
2056                                 break;
2057                         }
2058                         /* update the timeout*/
2059                         t=get_ticks_raw();
2060                         tcpconn->timeout=t+tcp_con_lifetime;
2061                         tcpconn_put(tcpconn);
2062                         /* re-activate the timer */
2063                         tcpconn->timer.f=tcpconn_main_timeout;
2064                         timer_reinit(&tcpconn->timer);
2065                         local_timer_add(&tcp_main_ltimer, &tcpconn->timer, 
2066                                                                 tcp_con_lifetime, t);
2067                         /* must be after the de-ref*/
2068                         tcpconn->flags&=~(F_CONN_REMOVED|F_CONN_READER);
2069 #ifdef TCP_BUF_WRITE
2070                         if (unlikely(tcpconn->flags & F_CONN_WRITE_W))
2071                                 n=io_watch_chg(&io_h, tcpconn->s, POLLIN| POLLOUT, -1);
2072                         else
2073 #endif /* TCP_BUF_WRITE */
2074                                 n=io_watch_add(&io_h, tcpconn->s, POLLIN, F_TCPCONN, tcpconn);
2075                         if (unlikely(n<0)){
2076                                 LOG(L_CRIT, "ERROR: tcp_main: handle_tcp_child: failed to add"
2077                                                 " new socket to the fd list\n");
2078                                 tcpconn->flags|=F_CONN_REMOVED;
2079 #ifdef TCP_BUF_WRITE
2080                                 if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
2081                                         io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
2082                                         tcpconn->flags&=~F_CONN_WRITE_W;
2083                                 }
2084 #endif /* TCP_BUF_WRITE */
2085                                 tcpconn_destroy(tcpconn); /* closes also the fd */
2086                         }
2087                         DBG("handle_tcp_child: CONN_RELEASE  %p refcnt= %d\n", 
2088                                                         tcpconn, atomic_get(&tcpconn->refcnt));
2089                         break;
2090                 case CONN_ERROR:
2091                 case CONN_DESTROY:
2092                 case CONN_EOF:
2093                         /* WARNING: this will auto-dec. refcnt! */
2094                                 tcp_c->busy--;
2095                                 /* main doesn't listen on it => we don't have to delete it
2096                                  if (tcpconn->s!=-1)
2097                                         io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
2098                                 */
2099 #ifdef TCP_BUF_WRITE
2100                                 if ((tcpconn->flags & F_CONN_WRITE_W) && (tcpconn->s!=-1)){
2101                                         io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
2102                                         tcpconn->flags&=~F_CONN_WRITE_W;
2103                                 }
2104 #endif /* TCP_BUF_WRITE */
2105                                 tcpconn_destroy(tcpconn); /* closes also the fd */
2106                                 break;
2107                 default:
2108                                 LOG(L_CRIT, "BUG: handle_tcp_child:  unknown cmd %d"
2109                                                                         " from tcp reader %d\n",
2110                                                                         cmd, (int)(tcp_c-&tcp_children[0]));
2111         }
2112 end:
2113         return bytes;
2114 error:
2115         return -1;
2116 }
2117
2118
2119
2120 /* handles io from a "generic" ser process (get fd or new_fd from a tcp_send)
2121  * 
2122  * params: p     - pointer in the ser processes array (pt[]), to the entry for
2123  *                 which an io event was detected
2124  *         fd_i  - fd index in the fd_array (usefull for optimizing
2125  *                 io_watch_deletes)
2126  * returns:  handle_* return convention:
2127  *          -1 on error reading from the fd,
2128  *           0 on EAGAIN  or when no  more io events are queued 
2129  *             (receive buffer empty),
2130  *           >0 on successfull reads from the fd (the receive buffer might
2131  *             be non-empty).
2132  */
2133 inline static int handle_ser_child(struct process_table* p, int fd_i)
2134 {
2135         struct tcp_connection* tcpconn;
2136         long response[2];
2137         int cmd;
2138         int bytes;
2139         int ret;
2140         int fd;
2141         int flags;
2142         ticks_t t;
2143         
2144         ret=-1;
2145         if (unlikely(p->unix_sock<=0)){
2146                 /* (we can't have a fd==0, 0 is never closed )*/
2147                 LOG(L_CRIT, "BUG: handle_ser_child: fd %d for %d "
2148                                 "(pid %d)\n", p->unix_sock, (int)(p-&pt[0]), p->pid);
2149                 goto error;
2150         }
2151                         
2152         /* get all bytes and the fd (if transmitted)
2153          * (this is a SOCK_STREAM so read is not atomic) */
2154         bytes=receive_fd(p->unix_sock, response, sizeof(response), &fd,
2155                                                 MSG_DONTWAIT);
2156         if (unlikely(bytes<(int)sizeof(response))){
2157                 /* too few bytes read */
2158                 if (bytes==0){
2159                         /* EOF -> bad, child has died */
2160                         DBG("DBG: handle_ser_child: dead child %d, pid %d"
2161                                         " (shutting down?)\n", (int)(p-&pt[0]), p->pid);
2162                         /* don't listen on it any more */
2163                         io_watch_del(&io_h, p->unix_sock, fd_i, 0);
2164                         goto error; /* child dead => no further io events from it */
2165                 }else if (bytes<0){
2166                         /* EAGAIN is ok if we try to empty the buffer
2167                          * e.g: SIGIO_RT overflow mode or EPOLL ET */
2168                         if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
2169                                 LOG(L_CRIT, "ERROR: handle_ser_child: read from child %d  "
2170                                                 "(pid %d):  %s [%d]\n", (int)(p-&pt[0]), p->pid,
2171                                                 strerror(errno), errno);
2172                                 ret=-1;
2173                         }else{
2174                                 ret=0;
2175                         }
2176                         /* try to ignore ? */
2177                         goto end;
2178                 }else{
2179                         /* should never happen */
2180                         LOG(L_CRIT, "BUG: handle_ser_child: too few bytes received (%d)\n",
2181                                         bytes );
2182                         ret=0; /* something was read so there is no error; otoh if
2183                                           receive_fd returned less then requested => the receive
2184                                           buffer is empty => no more io queued on this fd */
2185                         goto end;
2186                 }
2187         }
2188         ret=1; /* something was received, there might be more queued */
2189         DBG("handle_ser_child: read response= %lx, %ld, fd %d from %d (%d)\n",
2190                                         response[0], response[1], fd, (int)(p-&pt[0]), p->pid);
2191         cmd=response[1];
2192         tcpconn=(struct tcp_connection*)response[0];
2193         if (unlikely(tcpconn==0)){
2194                 LOG(L_CRIT, "BUG: handle_ser_child: null tcpconn pointer received"
2195                                  " from child %d (pid %d): %lx, %lx\n",
2196                                         (int)(p-&pt[0]), p->pid, response[0], response[1]) ;
2197                 goto end;
2198         }
2199         switch(cmd){
2200                 case CONN_ERROR:
2201                         if ( (!(tcpconn->flags & F_CONN_REMOVED) ||
2202                                         (tcpconn->flags & F_CONN_WRITE_W) ) && (tcpconn->s!=-1)){
2203                                 io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
2204                                 tcpconn->flags|=F_CONN_REMOVED;
2205                                 tcpconn->flags&=~F_CONN_WRITE_W;
2206                         }
2207                         LOG(L_ERR, "handle_ser_child: ERROR: received CON_ERROR for %p"
2208                                         " (id %d), refcnt %d\n", 
2209                                         tcpconn, tcpconn->id, atomic_get(&tcpconn->refcnt));
2210                         tcpconn_destroy(tcpconn); /* will close also the fd */
2211                         break;
2212                 case CONN_GET_FD:
2213                         /* send the requested FD  */
2214                         /* WARNING: take care of setting refcnt properly to
2215                          * avoid race condition */
2216                         if (unlikely(send_fd(p->unix_sock, &tcpconn, sizeof(tcpconn),
2217                                                                 tcpconn->s)<=0)){
2218                                 LOG(L_ERR, "ERROR: handle_ser_child: send_fd failed\n");
2219                         }
2220                         break;
2221                 case CONN_NEW:
2222                         /* update the fd in the requested tcpconn*/
2223                         /* WARNING: take care of setting refcnt properly to
2224                          * avoid race condition */
2225                         if (unlikely(fd==-1)){
2226                                 LOG(L_CRIT, "BUG: handle_ser_child: CONN_NEW:"
2227                                                         " no fd received\n");
2228                                 break;
2229                         }
2230                         (*tcp_connections_no)++;
2231                         tcpconn->s=fd;
2232                         /* add tcpconn to the list*/
2233                         tcpconn_add(tcpconn);
2234                         /* update the timeout*/
2235                         t=get_ticks_raw();
2236                         tcpconn->timeout=t+tcp_con_lifetime;
2237                         /* activate the timer (already properly init. in tcpconn_new() */
2238                         local_timer_add(&tcp_main_ltimer, &tcpconn->timer, 
2239                                                                 tcp_con_lifetime, t);
2240                         tcpconn->flags&=~F_CONN_REMOVED;
2241                         flags=POLLIN 
2242 #ifdef TCP_BUF_WRITE
2243                                         /* not used for now, the connection is sent to tcp_main
2244                                          * before knowing if we can write on it or we should 
2245                                          * wait */
2246                                         | (((int)!(tcpconn->flags & F_CONN_WRITE_W)-1) & POLLOUT)
2247 #endif /* TCP_BUF_WRITE */
2248                                         ;
2249                         if (unlikely(
2250                                         io_watch_add(&io_h, tcpconn->s, flags,
2251                                                                                                 F_TCPCONN, tcpconn)<0)){
2252                                 LOG(L_CRIT, "ERROR: tcp_main: handle_ser_child: failed to add"
2253                                                 " new socket to the fd list\n");
2254                                 tcpconn->flags|=F_CONN_REMOVED;
2255                                 tcpconn->flags&=~F_CONN_WRITE_W;
2256                                 tcpconn_destroy(tcpconn); /* closes also the fd */
2257                         }
2258                         break;
2259 #ifdef TCP_BUF_WRITE
2260                 case CONN_QUEUED_WRITE:
2261                         if (!(tcpconn->flags & F_CONN_WRITE_W)){
2262                                 if (tcpconn->flags& F_CONN_REMOVED){
2263                                         if (unlikely(io_watch_add(&io_h, tcpconn->s, POLLOUT,
2264                                                                                                 F_TCPCONN, tcpconn)<0)){
2265                                                 LOG(L_CRIT, "ERROR: tcp_main: handle_ser_child: failed"
2266                                                                     " to enable write watch on socket\n");
2267                                                 tcpconn_destroy(tcpconn);
2268                                                 break;
2269                                         }
2270                                 }else{
2271                                         if (unlikely(io_watch_chg(&io_h, tcpconn->s,
2272                                                                                                 POLLIN|POLLOUT, -1)<0)){
2273                                                 LOG(L_CRIT, "ERROR: tcp_main: handle_ser_child: failed"
2274                                                                     " to change socket watch events\n");
2275                                                 io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
2276                                                 tcpconn->flags|=F_CONN_REMOVED;
2277                                                 tcpconn_destroy(tcpconn);
2278                                                 break;
2279                                         }
2280                                 }
2281                                 tcpconn->flags|=F_CONN_WRITE_W;
2282                         }else{
2283                                 LOG(L_WARN, "tcp_main: hanlder_ser_child: connection %p"
2284                                                         " already watched for write\n", tcpconn);
2285                         }
2286                         break;
2287 #endif /* TCP_BUF_WRITE */
2288                 default:
2289                         LOG(L_CRIT, "BUG: handle_ser_child: unknown cmd %d\n", cmd);
2290         }
2291 end:
2292         return ret;
2293 error:
2294         return -1;
2295 }
2296
2297
2298
2299 /* sends a tcpconn + fd to a choosen child */
2300 inline static int send2child(struct tcp_connection* tcpconn)
2301 {
2302         int i;
2303         int min_busy;
2304         int idx;
2305         static int crt=0; /* current child */
2306         int last;
2307         
2308         min_busy=tcp_children[0].busy;
2309         idx=0;
2310         last=crt+tcp_children_no;
2311         for (; crt<last; crt++){
2312                 i=crt%tcp_children_no;
2313                 if (!tcp_children[i].busy){
2314                         idx=i;
2315                         min_busy=0;
2316                         break;
2317                 }else if (min_busy>tcp_children[i].busy){
2318                         min_busy=tcp_children[i].busy;
2319                         idx=i;
2320                 }
2321         }
2322         crt=idx+1; /* next time we start with crt%tcp_children_no */
2323         
2324         tcp_children[idx].busy++;
2325         tcp_children[idx].n_reqs++;
2326         if (unlikely(min_busy)){
2327                 DBG("WARNING: send2child: no free tcp receiver, "
2328                                 " connection passed to the least busy one (%d)\n",
2329                                 min_busy);
2330         }
2331         DBG("send2child: to tcp child %d %d(%d), %p\n", idx, 
2332                                         tcp_children[idx].proc_no,
2333                                         tcp_children[idx].pid, tcpconn);
2334         /* first make sure this child doesn't have pending request for
2335          * tcp_main (to avoid a possible deadlock: e.g. child wants to
2336          * send a release command, but the master fills its socket buffer
2337          * with new connection commands => deadlock) */
2338         /* answer tcp_send requests first */
2339         while(handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0);
2340         /* process tcp readers requests */
2341         while(handle_tcp_child(&tcp_children[idx], -1)>0);
2342                 
2343 #ifdef SEND_FD_QUEUE
2344         /* if queue full, try to queue the io */
2345         if (unlikely(send_fd(tcp_children[idx].unix_sock, &tcpconn,
2346                                                         sizeof(tcpconn), tcpconn->s)<=0)){
2347                 if ((errno==EAGAIN)||(errno==EWOULDBLOCK)){
2348                         /* FIXME: remove after debugging */
2349                          LOG(L_CRIT, "INFO: tcp child %d, socket %d: queue full,"
2350                                                 " %d requests queued (total handled %d)\n",
2351                                         idx, tcp_children[idx].unix_sock, min_busy,
2352                                         tcp_children[idx].n_reqs-1);
2353                         if (send_fd_queue_add(&send2child_q, tcp_children[idx].unix_sock, 
2354                                                 tcpconn)!=0){
2355                                 LOG(L_ERR, "ERROR: send2child: queue send op. failed\n");
2356                                 return -1;
2357                         }
2358                 }else{
2359                         LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
2360                         return -1;
2361                 }
2362         }
2363 #else
2364         if (unlikely(send_fd(tcp_children[idx].unix_sock, &tcpconn,
2365                                                 sizeof(tcpconn), tcpconn->s)<=0)){
2366                 LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
2367                 return -1;
2368         }
2369 #endif
2370         
2371         return 0;
2372 }
2373
2374
2375
2376 /* handles a new connection, called internally by tcp_main_loop/handle_io.
2377  * params: si - pointer to one of the tcp socket_info structures on which
2378  *              an io event was detected (connection attempt)
2379  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
2380  *           io events queued), >0 on success. success/error refer only to
2381  *           the accept.
2382  */
2383 static inline int handle_new_connect(struct socket_info* si)
2384 {
2385         union sockaddr_union su;
2386         union sockaddr_union sock_name;
2387         unsigned sock_name_len;
2388         union sockaddr_union* dst_su;
2389         struct tcp_connection* tcpconn;
2390         socklen_t su_len;
2391         int new_sock;
2392         
2393         /* got a connection on r */
2394         su_len=sizeof(su);
2395         new_sock=accept(si->socket, &(su.s), &su_len);
2396         if (unlikely(new_sock==-1)){
2397                 if ((errno==EAGAIN)||(errno==EWOULDBLOCK))
2398                         return 0;
2399                 LOG(L_ERR,  "WARNING: handle_new_connect: error while accepting"
2400                                 " connection(%d): %s\n", errno, strerror(errno));
2401                 return -1;
2402         }
2403         if (unlikely(*tcp_connections_no>=tcp_max_connections)){
2404                 LOG(L_ERR, "ERROR: maximum number of connections exceeded: %d/%d\n",
2405                                         *tcp_connections_no, tcp_max_connections);
2406                 close(new_sock);
2407                 return 1; /* success, because the accept was succesfull */
2408         }
2409         if (unlikely(init_sock_opt_accept(new_sock)<0)){
2410                 LOG(L_ERR, "ERROR: handle_new_connect: init_sock_opt failed\n");
2411                 close(new_sock);
2412                 return 1; /* success, because the accept was succesfull */
2413         }
2414         (*tcp_connections_no)++;
2415         
2416         dst_su=&si->su;
2417         if (unlikely(si->flags & SI_IS_ANY)){
2418                 /* INADDR_ANY => get local dst */
2419                 sock_name_len=sizeof(sock_name);
2420                 if (getsockname(new_sock, &sock_name.s, &sock_name_len)!=0){
2421                         LOG(L_ERR, "ERROR: handle_new_connect:"
2422                                                 " getsockname failed: %s(%d)\n",
2423                                                 strerror(errno), errno);
2424                         /* go on with the 0.0.0.0 dst from the sock_info */
2425                 }else{
2426                         dst_su=&sock_name;
2427                 }
2428         }
2429         /* add socket to list */
2430         tcpconn=tcpconn_new(new_sock, &su, dst_su, si, si->proto, S_CONN_ACCEPT);
2431         if (likely(tcpconn)){
2432 #ifdef TCP_PASS_NEW_CONNECTION_ON_DATA
2433                 tcpconn_add(tcpconn);
2434                 /* activate the timer */
2435                 local_timer_add(&tcp_main_ltimer, &tcpconn->timer, 
2436                                                                 tcp_con_lifetime, get_ticks_raw());
2437                 tcpconn->flags&=~F_CONN_REMOVED;
2438                 if (unlikely(io_watch_add(&io_h, tcpconn->s, POLLIN, 
2439                                                                                                         F_TCPCONN, tcpconn)<0)){
2440                         LOG(L_CRIT, "ERROR: tcp_main: handle_new_connect: failed to add"
2441                                                 " new socket to the fd list\n");
2442                         tcpconn->flags|=F_CONN_REMOVED;
2443                         tcpconn_destroy(tcpconn); /* closes also the fd */
2444                 }
2445 #else
2446                 atomic_set(&tcpconn->refcnt, 1); /* safe, not yet available to the
2447                                                                                         outside world */
2448                 tcpconn_add(tcpconn);
2449                 DBG("handle_new_connect: new connection: %p %d flags: %04x\n",
2450                         tcpconn, tcpconn->s, tcpconn->flags);
2451                 /* pass it to a child */
2452                 tcpconn->flags|=F_CONN_READER;
2453                 if(unlikely(send2child(tcpconn)<0)){
2454                         LOG(L_ERR,"ERROR: handle_new_connect: no children "
2455                                         "available\n");
2456                         tcpconn->flags&=~F_CONN_READER;
2457                         tcpconn_destroy(tcpconn);
2458                 }
2459 #endif
2460         }else{ /*tcpconn==0 */
2461                 LOG(L_ERR, "ERROR: handle_new_connect: tcpconn_new failed, "
2462                                 "closing socket\n");
2463                 close(new_sock);
2464                 (*tcp_connections_no)--;
2465         }
2466         return 1; /* accept() was succesfull */
2467 }
2468
2469
2470
2471 /* handles an io event on one of the watched tcp connections
2472  * 
2473  * params: tcpconn - pointer to the tcp_connection for which we have an io ev.
2474  *         fd_i    - index in the fd_array table (needed for delete)
2475  * returns:  handle_* return convention, but on success it always returns 0
2476  *           (because it's one-shot, after a succesful execution the fd is
2477  *            removed from tcp_main's watch fd list and passed to a child =>
2478  *            tcp_main is not interested in further io events that might be
2479  *            queued for this fd)
2480  */
2481 inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, short ev, 
2482                                                                                 int fd_i)
2483 {
2484 #ifdef TCP_BUF_WRITE
2485         int empty_q;
2486 #endif /* TCP_BUF_WRITE */
2487         /*  is refcnt!=0 really necessary? 
2488          *  No, in fact it's a bug: I can have the following situation: a send only
2489          *   tcp connection used by n processes simultaneously => refcnt = n. In 
2490          *   the same time I can have a read event and this situation is perfectly
2491          *   valid. -- andrei
2492          */
2493 #if 0
2494         if ((tcpconn->refcnt!=0)){
2495                 /* FIXME: might be valid for sigio_rt iff fd flags are not cleared
2496                  *        (there is a short window in which it could generate a sig
2497                  *         that would be catched by tcp_main) */
2498                 LOG(L_CRIT, "BUG: handle_tcpconn_ev: io event on referenced"
2499                                         " tcpconn (%p), refcnt=%d, fd=%d\n",
2500                                         tcpconn, tcpconn->refcnt, tcpconn->s);
2501                 return -1;
2502         }
2503 #endif
2504         /* pass it to child, so remove it from the io watch list  and the local
2505          *  timer */
2506         DBG("handle_tcpconn_ev: ev (%0x) on %p %d\n", ev, tcpconn, tcpconn->s);
2507 #ifdef TCP_BUF_WRITE
2508         if (unlikely((ev & POLLOUT) && (tcpconn->flags & F_CONN_WRITE_W))){
2509                 if (unlikely(wbufq_run(tcpconn->s, tcpconn, &empty_q)<0)){
2510                         io_watch_del(&io_h, tcpconn->s, fd_i, 0);
2511                         tcpconn->flags|=F_CONN_REMOVED;
2512                         tcpconn->flags&=~F_CONN_WRITE_W;
2513                         tcpconn_destroy(tcpconn);
2514                         goto error;
2515                 }
2516                 if (empty_q){
2517                         if (tcpconn->flags & F_CONN_REMOVED){
2518                                 if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1))
2519                                         goto error;
2520                         }else{
2521                                 if (unlikely(io_watch_chg(&io_h, tcpconn->s,
2522                                                                                         POLLIN, fd_i)==-1))
2523                                         goto error;
2524                         }
2525                 }
2526         }
2527         if (likely((ev & POLLIN) && !(tcpconn->flags & F_CONN_REMOVED))){
2528                 if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
2529                         if (unlikely(io_watch_chg(&io_h, tcpconn->s, POLLOUT, fd_i)==-1))
2530                                 goto error;
2531                 }else
2532 #else
2533         {
2534 #endif /* TCP_BUF_WRITE */
2535                         if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1))
2536                                 goto error;
2537                 tcpconn->flags|=F_CONN_REMOVED|F_CONN_READER;
2538                 local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
2539                 tcpconn_ref(tcpconn); /* refcnt ++ */
2540                 if (unlikely(send2child(tcpconn)<0)){
2541                         LOG(L_ERR,"ERROR: handle_tcpconn_ev: no children available\n");
2542                         tcpconn->flags&=~F_CONN_READER;
2543 #ifdef TCP_BUF_WRITE
2544                         if (tcpconn->flags & F_CONN_WRITE_W){
2545                                 io_watch_del(&io_h, tcpconn->s, fd_i, 0);
2546                                 tcpconn->flags&=~F_CONN_WRITE_W;
2547                         }
2548 #endif /* TCP_BUF_WRITE */
2549                         tcpconn_destroy(tcpconn);
2550                 }
2551         }
2552         return 0; /* we are not interested in possibly queued io events, 
2553                                  the fd was either passed to a child, closed, or for writes,
2554                                  everything possible was already written */
2555 error:
2556         return -1;
2557 }
2558
2559
2560
2561 /* generic handle io routine, it will call the appropiate
2562  *  handle_xxx() based on the fd_map type
2563  *
2564  * params:  fm  - pointer to a fd hash entry
2565  *          idx - index in the fd_array (or -1 if not known)
2566  * return: -1 on error
2567  *          0 on EAGAIN or when by some other way it is known that no more 
2568  *            io events are queued on the fd (the receive buffer is empty).
2569  *            Usefull to detect when there are no more io events queued for
2570  *            sigio_rt, epoll_et, kqueue.
2571  *         >0 on successfull read from the fd (when there might be more io
2572  *            queued -- the receive buffer might still be non-empty)
2573  */
2574 inline static int handle_io(struct fd_map* fm, short ev, int idx)
2575 {       
2576         int ret;
2577
2578         /* update the local config */
2579         cfg_update();
2580         
2581         switch(fm->type){
2582                 case F_SOCKINFO:
2583                         ret=handle_new_connect((struct socket_info*)fm->data);
2584                         break;
2585                 case F_TCPCONN:
2586                         ret=handle_tcpconn_ev((struct tcp_connection*)fm->data, ev, idx);
2587                         break;
2588                 case F_TCPCHILD:
2589                         ret=handle_tcp_child((struct tcp_child*)fm->data, idx);
2590                         break;
2591                 case F_PROC:
2592                         ret=handle_ser_child((struct process_table*)fm->data, idx);
2593                         break;
2594                 case F_NONE:
2595                         LOG(L_CRIT, "BUG: handle_io: empty fd map: %p {%d, %d, %p},"
2596                                                 " idx %d\n", fm, fm->fd, fm->type, fm->data, idx);
2597                         goto error;
2598                 default:
2599                         LOG(L_CRIT, "BUG: handle_io: uknown fd type %d\n", fm->type); 
2600                         goto error;
2601         }
2602         return ret;
2603 error:
2604         return -1;
2605 }
2606
2607
2608
2609 /* timer handler for tcpconnection handled by tcp_main */
2610 static ticks_t tcpconn_main_timeout(ticks_t t, struct timer_ln* tl, void* data)
2611 {
2612         struct tcp_connection *c;
2613         int fd;
2614         
2615         c=(struct tcp_connection*)data; 
2616         /* or (struct tcp...*)(tl-offset(c->timer)) */
2617         
2618         if (TICKS_LT(t, c->timeout)){
2619                 /* timeout extended, exit */
2620                 return (ticks_t)(c->timeout - t);
2621         }
2622         if (likely(atomic_get(&c->refcnt)==0)){
2623                 TCPCONN_LOCK;
2624                         /* check again to avoid races with tcp_send() */
2625                         if (likely(atomic_get(&c->refcnt)==0)){
2626                                 /* delete */
2627                                 _tcpconn_detach(c);
2628                                 TCPCONN_UNLOCK; /* unlock as soon as possible */
2629                                 fd=c->s;
2630                                 if (likely(fd>0)){
2631                                         if (likely(!(c->flags & F_CONN_REMOVED)
2632 #ifdef TCP_BUF_WRITE
2633                                                                 || (c->flags & F_CONN_WRITE_W)
2634 #endif /* TCP_BUF_WRITE */
2635                                                                 )){
2636                                                 io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
2637                                                 c->flags|=F_CONN_REMOVED;
2638 #ifdef TCP_BUF_WRITE
2639                                                 c->flags&=~F_CONN_WRITE_W;
2640 #endif /* TCP_BUF_WRITE */
2641                                         }
2642 #ifdef USE_TLS
2643                                         if (unlikely(c->type==PROTO_TLS ))
2644                                                 tls_close(c, fd);
2645 #endif /* USE_TLS */
2646                                         _tcpconn_free(c);
2647 #ifdef TCP_FD_CACHE
2648                                         if (likely(tcp_options.fd_cache)) shutdown(fd, SHUT_RDWR);
2649 #endif /* TCP_FD_CACHE */
2650                                         close(fd);
2651                                 }
2652                                 (*tcp_connections_no)--; /* modified only in tcp_main
2653                                                                                          => no lock needed */
2654                                 return 0; /* don't prolong the timer anymore */
2655                         }
2656                 TCPCONN_UNLOCK;
2657         }
2658         /* if we are here we can't delete the connection, it's still referenced
2659          *  => we just delay deleting it */
2660         return TCPCONN_WAIT_TIMEOUT;
2661 }
2662
2663
2664
2665 static inline void tcp_timer_run()
2666 {
2667         ticks_t ticks;
2668         static ticks_t prev_ticks=0;
2669         
2670         ticks=get_ticks_raw();
2671         if (unlikely((ticks-prev_ticks)<TCPCONN_TIMEOUT_MIN_RUN)) return;
2672         prev_ticks=ticks;
2673         local_timer_run(&tcp_main_ltimer, ticks);
2674 }
2675
2676
2677
2678 /* keep in sync with tcpconn_destroy, the "delete" part should be
2679  * the same except for io_watch_del..
2680  * Note: this function is called only on shutdown by the main ser process via
2681  * cleanup(). However it's also safe to call it from the tcp_main process.
2682  * => with the ser shutdown exception, it cannot execute in parallel
2683  * with tcpconn_add() or tcpconn_destroy()*/
2684 static inline void tcpconn_destroy_all()
2685 {
2686         struct tcp_connection *c, *next;
2687         unsigned h;
2688         int fd;
2689         
2690         
2691         TCPCONN_LOCK; 
2692         for(h=0; h<TCP_ID_HASH_SIZE; h++){
2693                 c=tcpconn_id_hash[h];
2694                 while(c){
2695                         next=c->id_next;
2696                                 if (is_tcp_main){
2697                                         /* we cannot close or remove the fd if we are not in the
2698                                          * tcp main proc.*/
2699                                         if (!(c->flags & F_CONN_READER))
2700                                                 local_timer_del(&tcp_main_ltimer, &c->timer);
2701                                         /* else still in some reader */
2702                                         fd=c->s;
2703                                         if (fd>0 && (!(c->flags & F_CONN_REMOVED)
2704 #ifdef TCP_BUF_WRITE
2705                                                                 || (c->flags & F_CONN_WRITE_W)
2706 #endif /* TCP_BUF_WRITE */
2707                                                                 )){
2708                                                 io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
2709                                                 c->flags|=F_CONN_REMOVED;
2710 #ifdef TCP_BUF_WRITE
2711                                                 c->flags&=~F_CONN_WRITE_W;
2712 #endif /* TCP_BUF_WRITE */
2713                                         }
2714                                 }else{
2715                                         fd=-1;
2716                                 }
2717 #ifdef USE_TLS
2718                                 if (fd>0 && c->type==PROTO_TLS)
2719                                         tls_close(c, fd);
2720 #endif
2721                                 _tcpconn_rm(c);
2722                                 if (fd>0) {
2723 #ifdef TCP_FD_CACHE
2724                                         if (likely(tcp_options.fd_cache)) shutdown(fd, SHUT_RDWR);
2725 #endif /* TCP_FD_CACHE */
2726                                         close(fd);
2727                                 }
2728                                 (*tcp_connections_no)--;
2729                         c=next;
2730                 }
2731         }
2732         TCPCONN_UNLOCK;
2733 }
2734
2735
2736
2737 /* tcp main loop */
2738 void tcp_main_loop()
2739 {
2740
2741         struct socket_info* si;
2742         int r;
2743         
2744         is_tcp_main=1; /* mark this process as tcp main */
2745         
2746         tcp_main_max_fd_no=get_max_open_fds();
2747         /* init send fd queues (here because we want mem. alloc only in the tcp
2748          *  process */
2749 #ifdef SEND_FD_QUEUE
2750         if (init_send_fd_queues()<0){
2751                 LOG(L_CRIT, "ERROR: init_tcp: could not init send fd queues\n");
2752                 goto error;
2753         }
2754 #endif
2755         /* init io_wait (here because we want the memory allocated only in
2756          * the tcp_main process) */
2757         if  (init_io_wait(&io_h, tcp_main_max_fd_no, tcp_poll_method)<0)
2758                 goto error;
2759         /* init: start watching all the fds*/
2760         
2761         /* init local timer */
2762         if (init_local_timer(&tcp_main_ltimer, get_ticks_raw())!=0){
2763                 LOG(L_ERR, "ERROR: init_tcp: failed to init local timer\n");
2764                 goto error;
2765         }
2766 #ifdef TCP_FD_CACHE
2767         if (tcp_options.fd_cache) tcp_fd_cache_init();
2768 #endif /* TCP_FD_CACHE */
2769         
2770         /* add all the sockets we listen on for connections */
2771         for (si=tcp_listen; si; si=si->next){
2772                 if ((si->proto==PROTO_TCP) &&(si->socket!=-1)){
2773                         if (io_watch_add(&io_h, si->socket, POLLIN, F_SOCKINFO, si)<0){
2774                                 LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
2775                                                         "listen socket to the fd list\n");
2776                                 goto error;
2777                         }
2778                 }else{
2779                         LOG(L_CRIT, "BUG: tcp_main_loop: non tcp address in tcp_listen\n");
2780                 }
2781         }
2782 #ifdef USE_TLS
2783         if (!tls_disable && tls_loaded()){
2784                 for (si=tls_listen; si; si=si->next){
2785                         if ((si->proto==PROTO_TLS) && (si->socket!=-1)){
2786                                 if (io_watch_add(&io_h, si->socket, POLLIN, F_SOCKINFO, si)<0){
2787                                         LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
2788                                                         "tls listen socket to the fd list\n");
2789                                         goto error;
2790                                 }
2791                         }else{
2792                                 LOG(L_CRIT, "BUG: tcp_main_loop: non tls address"
2793                                                 " in tls_listen\n");
2794                         }
2795                 }
2796         }
2797 #endif
2798         /* add all the unix sockets used for communcation with other ser processes
2799          *  (get fd, new connection a.s.o) */
2800         for (r=1; r<process_no; r++){
2801                 if (pt[r].unix_sock>0) /* we can't have 0, we never close it!*/
2802                         if (io_watch_add(&io_h, pt[r].unix_sock, POLLIN,F_PROC, &pt[r])<0){
2803                                         LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
2804                                                         "process %d unix socket to the fd list\n", r);
2805                                         goto error;
2806                         }
2807         }
2808         /* add all the unix sokets used for communication with the tcp childs */
2809         for (r=0; r<tcp_children_no; r++){
2810                 if (tcp_children[r].unix_sock>0)/*we can't have 0, we never close it!*/
2811                         if (io_watch_add(&io_h, tcp_children[r].unix_sock, POLLIN,
2812                                                                         F_TCPCHILD, &tcp_children[r]) <0){
2813                                 LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
2814                                                 "tcp child %d unix socket to the fd list\n", r);
2815                                 goto error;
2816                         }
2817         }
2818
2819
2820         /* initialize the cfg framework */
2821         if (cfg_child_init()) goto error;
2822
2823         /* main loop */
2824         switch(io_h.poll_method){
2825                 case POLL_POLL:
2826                         while(1){
2827                                 /* wait and process IO */
2828                                 io_wait_loop_poll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0); 
2829                                 send_fd_queue_run(&send2child_q); /* then new io */
2830                                 /* remove old connections */
2831                                 tcp_timer_run();
2832                         }
2833                         break;
2834 #ifdef HAVE_SELECT
2835                 case POLL_SELECT:
2836                         while(1){
2837                                 io_wait_loop_select(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
2838                                 send_fd_queue_run(&send2child_q); /* then new io */
2839                                 tcp_timer_run();
2840                         }
2841                         break;
2842 #endif
2843 #ifdef HAVE_SIGIO_RT
2844                 case POLL_SIGIO_RT:
2845                         while(1){
2846                                 io_wait_loop_sigio_rt(&io_h, TCP_MAIN_SELECT_TIMEOUT);
2847                                 send_fd_queue_run(&send2child_q); /* then new io */
2848                                 tcp_timer_run();
2849                         }
2850                         break;
2851 #endif
2852 #ifdef HAVE_EPOLL
2853                 case POLL_EPOLL_LT:
2854                         while(1){
2855                                 io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
2856                                 send_fd_queue_run(&send2child_q); /* then new io */
2857                                 tcp_timer_run();
2858                         }
2859                         break;
2860                 case POLL_EPOLL_ET:
2861                         while(1){
2862                                 io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 1);
2863                                 send_fd_queue_run(&send2child_q); /* then new io */
2864                                 tcp_timer_run();
2865                         }
2866                         break;
2867 #endif
2868 #ifdef HAVE_KQUEUE
2869                 case POLL_KQUEUE:
2870                         while(1){
2871                                 io_wait_loop_kqueue(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
2872                                 send_fd_queue_run(&send2child_q); /* then new io */
2873                                 tcp_timer_run();
2874                         }
2875                         break;
2876 #endif
2877 #ifdef HAVE_DEVPOLL
2878                 case POLL_DEVPOLL:
2879                         while(1){
2880                                 io_wait_loop_devpoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
2881                                 send_fd_queue_run(&send2child_q); /* then new io */
2882                                 tcp_timer_run();
2883                         }
2884                         break;
2885 #endif
2886                 default:
2887                         LOG(L_CRIT, "BUG: tcp_main_loop: no support for poll method "
2888                                         " %s (%d)\n", 
2889                                         poll_method_name(io_h.poll_method), io_h.poll_method);
2890                         goto error;
2891         }
2892 error:
2893 #ifdef SEND_FD_QUEUE
2894         destroy_send_fd_queues();
2895 #endif
2896         destroy_io_wait(&io_h);
2897         LOG(L_CRIT, "ERROR: tcp_main_loop: exiting...");
2898         exit(-1);
2899 }
2900
2901
2902
2903 /* cleanup before exit */
2904 void destroy_tcp()
2905 {
2906                 if (tcpconn_id_hash){
2907                         if (tcpconn_lock)
2908                                 TCPCONN_UNLOCK; /* hack: force-unlock the tcp lock in case
2909                                                                    some process was terminated while holding 
2910                                                                    it; this will allow an almost gracious 
2911                                                                    shutdown */
2912                         tcpconn_destroy_all(); 
2913                         shm_free(tcpconn_id_hash);
2914                         tcpconn_id_hash=0;
2915                 }
2916                 if (tcp_connections_no){
2917                         shm_free(tcp_connections_no);
2918                         tcp_connections_no=0;
2919                 }
2920 #ifdef TCP_BUF_WRITE
2921                 if (tcp_total_wq){
2922                         shm_free(tcp_total_wq);
2923                         tcp_total_wq=0;
2924                 }
2925 #endif /* TCP_BUF_WRITE */
2926                 if (connection_id){
2927                         shm_free(connection_id);
2928                         connection_id=0;
2929                 }
2930                 if (tcpconn_aliases_hash){
2931                         shm_free(tcpconn_aliases_hash);
2932                         tcpconn_aliases_hash=0;
2933                 }
2934                 if (tcpconn_lock){
2935                         lock_destroy(tcpconn_lock);
2936                         lock_dealloc((void*)tcpconn_lock);
2937                         tcpconn_lock=0;
2938                 }
2939                 if (tcp_children){
2940                         pkg_free(tcp_children);
2941                         tcp_children=0;
2942                 }
2943                 destroy_local_timer(&tcp_main_ltimer);
2944 }
2945
2946
2947
2948 int init_tcp()
2949 {
2950         char* poll_err;
2951         
2952         tcp_options_check();
2953         /* init lock */
2954         tcpconn_lock=lock_alloc();
2955         if (tcpconn_lock==0){
2956                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc lock\n");
2957                 goto error;
2958         }
2959         if (lock_init(tcpconn_lock)==0){
2960                 LOG(L_CRIT, "ERROR: init_tcp: could not init lock\n");
2961                 lock_dealloc((void*)tcpconn_lock);
2962                 tcpconn_lock=0;
2963                 goto error;
2964         }
2965         /* init globals */
2966         tcp_connections_no=shm_malloc(sizeof(int));
2967         if (tcp_connections_no==0){
2968                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
2969                 goto error;
2970         }
2971         *tcp_connections_no=0;
2972         connection_id=shm_malloc(sizeof(int));
2973         if (connection_id==0){
2974                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
2975                 goto error;
2976         }
2977         *connection_id=1;
2978 #ifdef TCP_BUF_WRITE
2979         tcp_total_wq=shm_malloc(sizeof(*tcp_total_wq));
2980         if (tcp_total_wq==0){
2981                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
2982                 goto error;
2983         }
2984 #endif /* TCP_BUF_WRITE */
2985         /* alloc hashtables*/
2986         tcpconn_aliases_hash=(struct tcp_conn_alias**)
2987                         shm_malloc(TCP_ALIAS_HASH_SIZE* sizeof(struct tcp_conn_alias*));
2988         if (tcpconn_aliases_hash==0){
2989                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc address hashtable\n");
2990                 goto error;
2991         }
2992         tcpconn_id_hash=(struct tcp_connection**)shm_malloc(TCP_ID_HASH_SIZE*
2993                                                                 sizeof(struct tcp_connection*));
2994         if (tcpconn_id_hash==0){
2995                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc id hashtable\n");
2996                 goto error;
2997         }
2998         /* init hashtables*/
2999         memset((void*)tcpconn_aliases_hash, 0, 
3000                         TCP_ALIAS_HASH_SIZE * sizeof(struct tcp_conn_alias*));
3001         memset((void*)tcpconn_id_hash, 0, 
3002                         TCP_ID_HASH_SIZE * sizeof(struct tcp_connection*));
3003         
3004         /* fix config variables */
3005         if (tcp_connect_timeout<0)
3006                 tcp_connect_timeout=DEFAULT_TCP_CONNECT_TIMEOUT;
3007         if (tcp_send_timeout<0)
3008                 tcp_send_timeout=DEFAULT_TCP_SEND_TIMEOUT;
3009         if (tcp_con_lifetime<0){
3010                 /* set to max value (~ 1/2 MAX_INT) */
3011                 tcp_con_lifetime=MAX_TCP_CON_LIFETIME;
3012         }else{
3013                 if ((unsigned)tcp_con_lifetime > 
3014                                 (unsigned)TICKS_TO_S(MAX_TCP_CON_LIFETIME)){
3015                         LOG(L_WARN, "init_tcp: tcp_con_lifetime too big (%u s), "
3016                                         " the maximum value is %u\n", tcp_con_lifetime,
3017                                         TICKS_TO_S(MAX_TCP_CON_LIFETIME));
3018                         tcp_con_lifetime=MAX_TCP_CON_LIFETIME;
3019                 }else{
3020                         tcp_con_lifetime=S_TO_TICKS(tcp_con_lifetime);
3021                 }
3022         }
3023         
3024                 poll_err=check_poll_method(tcp_poll_method);
3025         
3026         /* set an appropriate poll method */
3027         if (poll_err || (tcp_poll_method==0)){
3028                 tcp_poll_method=choose_poll_method();