38a4c4baec18bd426e2441f90238483320d06b30
[sip-router] / tcp_main.c
1 /*
2  * $Id$
3  *
4  * Copyright (C) 2001-2003 FhG Fokus
5  *
6  * This file is part of ser, a free SIP server.
7  *
8  * ser is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version
12  *
13  * For a license to use the ser software under conditions
14  * other than those described here, or to purchase support for this
15  * software, please contact iptel.org by e-mail at the following addresses:
16  *    info@iptel.org
17  *
18  * ser is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26  */
27 /*
28  * History:
29  * --------
30  *  2002-11-29  created by andrei
31  *  2002-12-11  added tcp_send (andrei)
32  *  2003-01-20  locking fixes, hashtables (andrei)
33  *  2003-02-20  s/lock_t/gen_lock_t/ to avoid a conflict on solaris (andrei)
34  *  2003-02-25  Nagle is disabled if -DDISABLE_NAGLE (andrei)
35  *  2003-03-29  SO_REUSEADDR before calling bind to allow
36  *              server restart, Nagle set on the (hopefuly) 
37  *              correct socket (jiri)
38  *  2003-03-31  always try to find the corresponding tcp listen socket for
39  *               a temp. socket and store in in *->bind_address: added
40  *               find_tcp_si, modified tcpconn_connect (andrei)
41  *  2003-04-14  set sockopts to TOS low delay (andrei)
42  *  2003-06-30  moved tcp new connect checking & handling to
43  *               handle_new_connect (andrei)
44  *  2003-07-09  tls_close called before closing the tcp connection (andrei)
45  *  2003-10-24  converted to the new socket_info lists (andrei)
46  *  2003-10-27  tcp port aliases support added (andrei)
47  *  2003-11-04  always lock before manipulating refcnt; sendchild
48  *              does not inc refcnt by itself anymore (andrei)
49  *  2003-11-07  different unix sockets are used for fd passing
50  *              to/from readers/writers (andrei)
51  *  2003-11-17  handle_new_connect & tcp_connect will close the 
52  *              new socket if tcpconn_new return 0 (e.g. out of mem) (andrei)
53  *  2003-11-28  tcp_blocking_write & tcp_blocking_connect added (andrei)
54  *  2004-11-08  dropped find_tcp_si and replaced with find_si (andrei)
55  *  2005-06-07  new tcp optimized code, supports epoll (LT), sigio + real time
56  *               signals, poll & select (andrei)
57  *  2005-06-26  *bsd kqueue support (andrei)
58  *  2005-07-04  solaris /dev/poll support (andrei)
59  *  2005-07-08  tcp_max_connections, tcp_connection_lifetime, don't accept
60  *               more connections if tcp_max_connections is exceeded (andrei)
61  *  2005-10-21  cleanup all the open connections on exit
62  *              decrement the no. of open connections on timeout too    (andrei) *  2006-01-30  queue send_fd request and execute them at the end of the
63  *              poll loop  (#ifdef) (andrei)
64  *              process all children requests, before attempting to send
65  *              them new stuff (fixes some deadlocks) (andrei)
66  *  2006-02-03  timers are run only once per s (andrei)
67  *              tcp children fds can be non-blocking; send fds are queued on
68  *              EAGAIN; lots of bug fixes (andrei)
69  *  2006-02-06  better tcp_max_connections checks, tcp_connections_no moved to
70  *              shm (andrei)
71  *  2006-04-12  tcp_send() changed to use struct dest_info (andrei)
72  *  2006-11-02  switched to atomic ops for refcnt, locking improvements 
73  *               (andrei)
74  *  2006-11-04  switched to raw ticks (to fix conversion errors which could
75  *               result in inf. lifetime) (andrei)
76  *  2007-07-25  tcpconn_connect can now bind the socket on a specified
77  *                source addr/port (andrei)
78  */
79
80
81 #ifdef USE_TCP
82
83
84 #ifndef SHM_MEM
85 #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
86 #endif
87
88 #include <sys/time.h>
89 #include <sys/types.h>
90 #include <sys/select.h>
91 #include <sys/socket.h>
92 #include <netinet/in.h>
93 #include <netinet/in_systm.h>
94 #include <netinet/ip.h>
95 #include <netinet/tcp.h>
96 #include <sys/uio.h>  /* writev*/
97 #include <netdb.h>
98 #include <stdlib.h> /*exit() */
99
100 #include <unistd.h>
101
102 #include <errno.h>
103 #include <string.h>
104
105 #ifdef HAVE_SELECT
106 #include <sys/select.h>
107 #endif
108 #include <sys/poll.h>
109
110
111 #include "ip_addr.h"
112 #include "pass_fd.h"
113 #include "tcp_conn.h"
114 #include "globals.h"
115 #include "pt.h"
116 #include "locking.h"
117 #include "mem/mem.h"
118 #include "mem/shm_mem.h"
119 #include "timer.h"
120 #include "sr_module.h"
121 #include "tcp_server.h"
122 #include "tcp_init.h"
123 #include "tsend.h"
124 #include "timer_ticks.h"
125 #ifdef CORE_TLS
126 #include "tls/tls_server.h"
127 #define tls_loaded() 1
128 #else
129 #include "tls_hooks_init.h"
130 #include "tls_hooks.h"
131 #endif
132
133 #include "tcp_info.h"
134
135 #define local_malloc pkg_malloc
136 #define local_free   pkg_free
137
138 #define HANDLE_IO_INLINE
139 #include "io_wait.h"
140 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
141
142 #define TCP_PASS_NEW_CONNECTION_ON_DATA /* don't pass a new connection
143                                                                                    immediately to a child, wait for
144                                                                                    some data on it first */
145 #define TCP_LISTEN_BACKLOG 1024
146 #define SEND_FD_QUEUE /* queue send fd requests on EAGAIN, instead of sending 
147                                                         them immediately */
148 #define TCP_CHILD_NON_BLOCKING 
149 #ifdef SEND_FD_QUEUE
150 #ifndef TCP_CHILD_NON_BLOCKING
151 #define TCP_CHILD_NON_BLOCKING
152 #endif
153 #define MAX_SEND_FD_QUEUE_SIZE  tcp_main_max_fd_no
154 #define SEND_FD_QUEUE_SIZE              128  /* initial size */
155 #define MAX_SEND_FD_RETRIES             96       /* FIXME: not used for now */
156 #define SEND_FD_QUEUE_TIMEOUT   MS_TO_TICKS(2000)  /* 2 s */
157 #endif
158
159 /* maximum accepted lifetime (maximum possible is  ~ MAXINT/2) */
160 #define MAX_TCP_CON_LIFETIME    ((1U<<(sizeof(ticks_t)*8-1))-1)
161 /* minimum interval tcpconn_timeout() is allowed to run, in ticks */
162 #define TCPCONN_TIMEOUT_MIN_RUN S_TO_TICKS(1)  /* once per s */
163
164 enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
165                                 F_TCPCONN, F_TCPCHILD, F_PROC };
166
167 static int is_tcp_main=0;
168
169 int tcp_accept_aliases=0; /* by default don't accept aliases */
170 int tcp_connect_timeout=DEFAULT_TCP_CONNECT_TIMEOUT;
171 int tcp_send_timeout=DEFAULT_TCP_SEND_TIMEOUT;
172 int tcp_con_lifetime=DEFAULT_TCP_CONNECTION_LIFETIME;
173 enum poll_types tcp_poll_method=0; /* by default choose the best method */
174 int tcp_max_connections=DEFAULT_TCP_MAX_CONNECTIONS;
175 int tcp_main_max_fd_no=0;
176
177 static union sockaddr_union tcp_source_ipv4_addr; /* saved bind/srv v4 addr. */
178 static union sockaddr_union* tcp_source_ipv4=0;
179 #ifdef USE_IPV6
180 static union sockaddr_union tcp_source_ipv6_addr; /* saved bind/src v6 addr. */
181 static union sockaddr_union* tcp_source_ipv6=0;
182 #endif
183
184 static int* tcp_connections_no=0; /* current open connections */
185
186 /* connection hash table (after ip&port) , includes also aliases */
187 struct tcp_conn_alias** tcpconn_aliases_hash=0;
188 /* connection hash table (after connection id) */
189 struct tcp_connection** tcpconn_id_hash=0;
190 gen_lock_t* tcpconn_lock=0;
191
192 struct tcp_child* tcp_children;
193 static int* connection_id=0; /*  unique for each connection, used for 
194                                                                 quickly finding the corresponding connection
195                                                                 for a reply */
196 int unix_tcp_sock;
197
198 static int tcp_proto_no=-1; /* tcp protocol number as returned by
199                                                            getprotobyname */
200
201 static io_wait_h io_h;
202
203
204
205 /* sets source address used when opening new sockets and no source is specified
206  *  (by default the address is choosen by the kernel)
207  * Should be used only on init.
208  * returns -1 on error */
209 int tcp_set_src_addr(struct ip_addr* ip)
210 {
211         switch (ip->af){
212                 case AF_INET:
213                         ip_addr2su(&tcp_source_ipv4_addr, ip, 0);
214                         tcp_source_ipv4=&tcp_source_ipv4_addr;
215                         break;
216                 #ifdef USE_IPV6
217                 case AF_INET6:
218                         ip_addr2su(&tcp_source_ipv6_addr, ip, 0);
219                         tcp_source_ipv6=&tcp_source_ipv6_addr;
220                         break;
221                 #endif
222                 default:
223                         return -1;
224         }
225         return 0;
226 }
227
228
229
230 /* set all socket/fd options:  disable nagle, tos lowdelay, non-blocking
231  * return -1 on error */
232 static int init_sock_opt(int s)
233 {
234         int flags;
235         int optval;
236         
237 #ifdef DISABLE_NAGLE
238         flags=1;
239         if ( (tcp_proto_no!=-1) && (setsockopt(s, tcp_proto_no , TCP_NODELAY,
240                                         &flags, sizeof(flags))<0) ){
241                 LOG(L_WARN, "WARNING: init_sock_opt: could not disable Nagle: %s\n",
242                                 strerror(errno));
243         }
244 #endif
245         /* tos*/
246         optval = tos;
247         if (setsockopt(s, IPPROTO_IP, IP_TOS, (void*)&optval,sizeof(optval)) ==-1){
248                 LOG(L_WARN, "WARNING: init_sock_opt: setsockopt tos: %s\n",
249                                 strerror(errno));
250                 /* continue since this is not critical */
251         }
252         /* non-blocking */
253         flags=fcntl(s, F_GETFL);
254         if (flags==-1){
255                 LOG(L_ERR, "ERROR: init_sock_opt: fnctl failed: (%d) %s\n",
256                                 errno, strerror(errno));
257                 goto error;
258         }
259         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
260                 LOG(L_ERR, "ERROR: init_sock_opt: fcntl: set non-blocking failed:"
261                                 " (%d) %s\n", errno, strerror(errno));
262                 goto error;
263         }
264         return 0;
265 error:
266         return -1;
267 }
268
269
270
271 /* blocking connect on a non-blocking fd; it will timeout after
272  * tcp_connect_timeout 
273  * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
274  * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
275  */
276 static int tcp_blocking_connect(int fd, const struct sockaddr *servaddr,
277                                                                 socklen_t addrlen)
278 {
279         int n;
280 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
281         fd_set sel_set;
282         fd_set orig_set;
283         struct timeval timeout;
284 #else
285         struct pollfd pf;
286 #endif
287         int elapsed;
288         int to;
289         int ticks;
290         int err;
291         unsigned int err_len;
292         int poll_err;
293         
294         poll_err=0;
295         to=tcp_connect_timeout;
296         ticks=get_ticks();
297 again:
298         n=connect(fd, servaddr, addrlen);
299         if (n==-1){
300                 if (errno==EINTR){
301                         elapsed=(get_ticks()-ticks)*TIMER_TICK;
302                         if (elapsed<to)         goto again;
303                         else goto error_timeout;
304                 }
305                 if (errno!=EINPROGRESS && errno!=EALREADY){
306                         LOG(L_ERR, "ERROR: tcp_blocking_connect: (%d) %s\n",
307                                         errno, strerror(errno));
308                         goto error;
309                 }
310         }else goto end;
311         
312         /* poll/select loop */
313 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
314                 FD_ZERO(&orig_set);
315                 FD_SET(fd, &orig_set);
316 #else
317                 pf.fd=fd;
318                 pf.events=POLLOUT;
319 #endif
320         while(1){
321                 elapsed=(get_ticks()-ticks)*TIMER_TICK;
322                 if (elapsed<to)
323                         to-=elapsed;
324                 else 
325                         goto error_timeout;
326 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
327                 sel_set=orig_set;
328                 timeout.tv_sec=to;
329                 timeout.tv_usec=0;
330                 n=select(fd+1, 0, &sel_set, 0, &timeout);
331 #else
332                 n=poll(&pf, 1, to*1000);
333 #endif
334                 if (n<0){
335                         if (errno==EINTR) continue;
336                         LOG(L_ERR, "ERROR: tcp_blocking_connect: poll/select failed:"
337                                         " (%d) %s\n", errno, strerror(errno));
338                         goto error;
339                 }else if (n==0) /* timeout */ continue;
340 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
341                 if (FD_ISSET(fd, &sel_set))
342 #else
343                 if (pf.revents&(POLLERR|POLLHUP|POLLNVAL)){ 
344                         LOG(L_ERR, "ERROR: tcp_blocking_connect: poll error: flags %x\n",
345                                         pf.revents);
346                         poll_err=1;
347                 }
348 #endif
349                 {
350                         err_len=sizeof(err);
351                         getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
352                         if ((err==0) && (poll_err==0)) goto end;
353                         if (err!=EINPROGRESS && err!=EALREADY){
354                                 LOG(L_ERR, "ERROR: tcp_blocking_connect: SO_ERROR (%d) %s\n",
355                                                 err, strerror(err));
356                                 goto error;
357                         }
358                 }
359         }
360 error_timeout:
361         /* timeout */
362         LOG(L_ERR, "ERROR: tcp_blocking_connect: timeout %d s elapsed from %d s\n",
363                         elapsed, tcp_connect_timeout);
364 error:
365         return -1;
366 end:
367         return 0;
368 }
369
370
371
372 #if 0
373 /* blocking write even on non-blocking sockets 
374  * if TCP_TIMEOUT will return with error */
375 static int tcp_blocking_write(struct tcp_connection* c, int fd, char* buf,
376                                                                 unsigned int len)
377 {
378         int n;
379         fd_set sel_set;
380         struct timeval timeout;
381         int ticks;
382         int initial_len;
383         
384         initial_len=len;
385 again:
386         
387         n=send(fd, buf, len,
388 #ifdef HAVE_MSG_NOSIGNAL
389                         MSG_NOSIGNAL
390 #else
391                         0
392 #endif
393                 );
394         if (n<0){
395                 if (errno==EINTR)       goto again;
396                 else if (errno!=EAGAIN && errno!=EWOULDBLOCK){
397                         LOG(L_ERR, "tcp_blocking_write: failed to send: (%d) %s\n",
398                                         errno, strerror(errno));
399                         goto error;
400                 }
401         }else if (n<len){
402                 /* partial write */
403                 buf+=n;
404                 len-=n;
405         }else{
406                 /* success: full write */
407                 goto end;
408         }
409         while(1){
410                 FD_ZERO(&sel_set);
411                 FD_SET(fd, &sel_set);
412                 timeout.tv_sec=tcp_send_timeout;
413                 timeout.tv_usec=0;
414                 ticks=get_ticks();
415                 n=select(fd+1, 0, &sel_set, 0, &timeout);
416                 if (n<0){
417                         if (errno==EINTR) continue; /* signal, ignore */
418                         LOG(L_ERR, "ERROR: tcp_blocking_write: select failed: "
419                                         " (%d) %s\n", errno, strerror(errno));
420                         goto error;
421                 }else if (n==0){
422                         /* timeout */
423                         if (get_ticks()-ticks>=tcp_send_timeout){
424                                 LOG(L_ERR, "ERROR: tcp_blocking_write: send timeout (%d)\n",
425                                                 tcp_send_timeout);
426                                 goto error;
427                         }
428                         continue;
429                 }
430                 if (FD_ISSET(fd, &sel_set)){
431                         /* we can write again */
432                         goto again;
433                 }
434         }
435 error:
436                 return -1;
437 end:
438                 return initial_len;
439 }
440 #endif
441
442
443
444 struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
445                                                                         union sockaddr_union* local_addr,
446                                                                         struct socket_info* ba, int type, 
447                                                                         int state)
448 {
449         struct tcp_connection *c;
450         
451         c=(struct tcp_connection*)shm_malloc(sizeof(struct tcp_connection));
452         if (c==0){
453                 LOG(L_ERR, "ERROR: tcpconn_new: mem. allocation failure\n");
454                 goto error;
455         }
456         memset(c, 0, sizeof(struct tcp_connection)); /* zero init */
457         c->s=sock;
458         c->fd=-1; /* not initialized */
459         if (lock_init(&c->write_lock)==0){
460                 LOG(L_ERR, "ERROR: tcpconn_new: init lock failed\n");
461                 goto error;
462         }
463         
464         c->rcv.src_su=*su;
465         
466         atomic_set(&c->refcnt, 0);
467         su2ip_addr(&c->rcv.src_ip, su);
468         c->rcv.src_port=su_getport(su);
469         c->rcv.bind_address=ba;
470         if (likely(local_addr)){
471                 su2ip_addr(&c->rcv.dst_ip, local_addr);
472                 c->rcv.dst_port=su_getport(local_addr);
473         }else if (ba){
474                 c->rcv.dst_ip=ba->address;
475                 c->rcv.dst_port=ba->port_no;
476         }
477         print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
478         DBG(     "tcpconn_new: on port %d, type %d\n", c->rcv.src_port, type);
479         init_tcp_req(&c->req);
480         c->id=(*connection_id)++;
481         c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
482         c->rcv.proto_reserved2=0;
483         c->state=state;
484         c->extra_data=0;
485 #ifdef USE_TLS
486         if (type==PROTO_TLS){
487                 if (tls_tcpconn_init(c, sock)==-1) goto error;
488         }else
489 #endif /* USE_TLS*/
490         {
491                 c->type=PROTO_TCP;
492                 c->rcv.proto=PROTO_TCP;
493                 c->timeout=get_ticks_raw()+tcp_con_lifetime;
494         }
495         c->flags|=F_CONN_REMOVED;
496         
497         return c;
498         
499 error:
500         if (c) shm_free(c);
501         return 0;
502 }
503
504
505
506 struct tcp_connection* tcpconn_connect( union sockaddr_union* server, 
507                                                                                 union sockaddr_union* from,
508                                                                                 int type)
509 {
510         int s;
511         struct socket_info* si;
512         union sockaddr_union my_name;
513         socklen_t my_name_len;
514         struct tcp_connection* con;
515         struct ip_addr ip;
516
517         s=-1;
518         
519         if (*tcp_connections_no >= tcp_max_connections){
520                 LOG(L_ERR, "ERROR: tcpconn_connect: maximum number of connections"
521                                         " exceeded (%d/%d)\n",
522                                         *tcp_connections_no, tcp_max_connections);
523                 goto error;
524         }
525         s=socket(AF2PF(server->s.sa_family), SOCK_STREAM, 0);
526         if (s==-1){
527                 LOG(L_ERR, "ERROR: tcpconn_connect: socket: (%d) %s\n",
528                                 errno, strerror(errno));
529                 goto error;
530         }
531         if (init_sock_opt(s)<0){
532                 LOG(L_ERR, "ERROR: tcpconn_connect: init_sock_opt failed\n");
533                 goto error;
534         }
535         
536         if (from && bind(s, &from->s, sockaddru_len(*from)) != 0)
537                 LOG(L_WARN, "WARNING: tcpconn_connect: binding to source address"
538                                         " failed: %s [%d]\n", strerror(errno), errno);
539
540         if (tcp_blocking_connect(s, &server->s, sockaddru_len(*server))<0){
541                 LOG(L_ERR, "ERROR: tcpconn_connect: tcp_blocking_connect failed\n");
542                 goto error;
543         }
544         if (from){
545                 su2ip_addr(&ip, from);
546                 if (!ip_addr_any(&ip))
547                         /* we already know the source ip, skip the sys. call */
548                         goto find_socket;
549         }
550         my_name_len=sizeof(my_name);
551         if (getsockname(s, &my_name.s, &my_name_len)!=0){
552                 LOG(L_ERR, "ERROR: tcp_connect: getsockname failed: %s(%d)\n",
553                                 strerror(errno), errno);
554                 si=0; /* try to go on */
555                 goto skip;
556         }
557         from=&my_name; /* update from with the real "from" address */
558         su2ip_addr(&ip, &my_name);
559 find_socket:
560 #ifdef USE_TLS
561         if (type==PROTO_TLS)
562                 si=find_si(&ip, 0, PROTO_TLS);
563         else
564 #endif
565                 si=find_si(&ip, 0, PROTO_TCP);
566 skip:
567         if (si==0){
568                 LOG(L_WARN, "WARNING: tcp_connect: could not find corresponding"
569                                 " listening socket, using default...\n");
570                 if (server->s.sa_family==AF_INET) si=sendipv4_tcp;
571 #ifdef USE_IPV6
572                 else si=sendipv6_tcp;
573 #endif
574         }
575         con=tcpconn_new(s, server, from, si,  type, S_CONN_CONNECT);
576         if (con==0){
577                 LOG(L_ERR, "ERROR: tcp_connect: tcpconn_new failed, closing the "
578                                  " socket\n");
579                 goto error;
580         }
581         return con;
582         /*FIXME: set sock idx! */
583 error:
584         if (s!=-1) close(s); /* close the opened socket */
585         return 0;
586 }
587
588
589
590 /* adds a tcp connection to the tcpconn hashes
591  * Note: it's called _only_ from the tcp_main process */
592 struct tcp_connection*  tcpconn_add(struct tcp_connection *c)
593 {
594
595         if (c){
596                 c->id_hash=tcp_id_hash(c->id);
597                 c->con_aliases[0].hash=tcp_addr_hash(&c->rcv.src_ip, c->rcv.src_port);
598                 TCPCONN_LOCK;
599                 /* add it at the begining of the list*/
600                 tcpconn_listadd(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
601                 /* set the first alias */
602                 c->con_aliases[0].port=c->rcv.src_port;
603                 c->con_aliases[0].parent=c;
604                 tcpconn_listadd(tcpconn_aliases_hash[c->con_aliases[0].hash],
605                                                         &c->con_aliases[0], next, prev);
606                 c->aliases++;
607                 TCPCONN_UNLOCK;
608                 DBG("tcpconn_add: hashes: %d, %d\n", c->con_aliases[0].hash,
609                                                                                                 c->id_hash);
610                 return c;
611         }else{
612                 LOG(L_CRIT, "tcpconn_add: BUG: null connection pointer\n");
613                 return 0;
614         }
615 }
616
617
618 /* unsafe tcpconn_rm version (nolocks) */
619 void _tcpconn_rm(struct tcp_connection* c)
620 {
621         int r;
622         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
623         /* remove all the aliases */
624         for (r=0; r<c->aliases; r++)
625                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
626                                                 &c->con_aliases[r], next, prev);
627         lock_destroy(&c->write_lock);
628 #ifdef USE_TLS
629         if (c->type==PROTO_TLS) tls_tcpconn_clean(c);
630 #endif
631         shm_free(c);
632 }
633
634
635
636 void tcpconn_rm(struct tcp_connection* c)
637 {
638         int r;
639         TCPCONN_LOCK;
640         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
641         /* remove all the aliases */
642         for (r=0; r<c->aliases; r++)
643                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
644                                                 &c->con_aliases[r], next, prev);
645         TCPCONN_UNLOCK;
646         lock_destroy(&c->write_lock);
647 #ifdef USE_TLS
648         if ((c->type==PROTO_TLS)&&(c->extra_data)) tls_tcpconn_clean(c);
649 #endif
650         shm_free(c);
651 }
652
653
654 /* finds a connection, if id=0 uses the ip addr & port (host byte order)
655  * WARNING: unprotected (locks) use tcpconn_get unless you really
656  * know what you are doing */
657 struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port)
658 {
659
660         struct tcp_connection *c;
661         struct tcp_conn_alias* a;
662         unsigned hash;
663         
664 #ifdef EXTRA_DEBUG
665         DBG("tcpconn_find: %d  port %d\n",id, port);
666         if (ip) print_ip("tcpconn_find: ip ", ip, "\n");
667 #endif
668         if (id){
669                 hash=tcp_id_hash(id);
670                 for (c=tcpconn_id_hash[hash]; c; c=c->id_next){
671 #ifdef EXTRA_DEBUG
672                         DBG("c=%p, c->id=%d, port=%d\n",c, c->id, c->rcv.src_port);
673                         print_ip("ip=", &c->rcv.src_ip, "\n");
674 #endif
675                         if ((id==c->id)&&(c->state!=S_CONN_BAD)) return c;
676                 }
677         }else if (ip){
678                 hash=tcp_addr_hash(ip, port);
679                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
680 #ifdef EXTRA_DEBUG
681                         DBG("a=%p, c=%p, c->id=%d, alias port= %d port=%d\n", a, a->parent,
682                                         a->parent->id, a->port, a->parent->rcv.src_port);
683                         print_ip("ip=",&a->parent->rcv.src_ip,"\n");
684 #endif
685                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
686                                         (ip_addr_cmp(ip, &a->parent->rcv.src_ip)) )
687                                 return a->parent;
688                 }
689         }
690         return 0;
691 }
692
693
694
695 /* _tcpconn_find with locks and timeout */
696 struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
697                                                                         ticks_t timeout)
698 {
699         struct tcp_connection* c;
700         TCPCONN_LOCK;
701         c=_tcpconn_find(id, ip, port);
702         if (c){ 
703                         atomic_inc(&c->refcnt);
704                         c->timeout=get_ticks_raw()+timeout;
705         }
706         TCPCONN_UNLOCK;
707         return c;
708 }
709
710
711
712 /* add port as an alias for the "id" connection
713  * returns 0 on success,-1 on failure */
714 int tcpconn_add_alias(int id, int port, int proto)
715 {
716         struct tcp_connection* c;
717         unsigned hash;
718         struct tcp_conn_alias* a;
719         
720         a=0;
721         /* fix the port */
722         port=port?port:((proto==PROTO_TLS)?SIPS_PORT:SIP_PORT);
723         TCPCONN_LOCK;
724         /* check if alias already exists */
725         c=_tcpconn_find(id, 0, 0);
726         if (c){
727                 hash=tcp_addr_hash(&c->rcv.src_ip, port);
728                 /* search the aliases for an already existing one */
729                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
730                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
731                                         (ip_addr_cmp(&c->rcv.src_ip, &a->parent->rcv.src_ip)) ){
732                                 /* found */
733                                 if (a->parent!=c) goto error_sec;
734                                 else goto ok;
735                         }
736                 }
737                 if (c->aliases>=TCP_CON_MAX_ALIASES) goto error_aliases;
738                 c->con_aliases[c->aliases].parent=c;
739                 c->con_aliases[c->aliases].port=port;
740                 c->con_aliases[c->aliases].hash=hash;
741                 tcpconn_listadd(tcpconn_aliases_hash[hash], 
742                                                                 &c->con_aliases[c->aliases], next, prev);
743                 c->aliases++;
744         }else goto error_not_found;
745 ok:
746         TCPCONN_UNLOCK;
747 #ifdef EXTRA_DEBUG
748         if (a) DBG("tcpconn_add_alias: alias already present\n");
749         else   DBG("tcpconn_add_alias: alias port %d for hash %d, id %d\n",
750                         port, hash, c->id);
751 #endif
752         return 0;
753 error_aliases:
754         TCPCONN_UNLOCK;
755         LOG(L_ERR, "ERROR: tcpconn_add_alias: too many aliases for connection %p"
756                                 " (%d)\n", c, c->id);
757         return -1;
758 error_not_found:
759         TCPCONN_UNLOCK;
760         LOG(L_ERR, "ERROR: tcpconn_add_alias: no connection found for id %d\n",id);
761         return -1;
762 error_sec:
763         TCPCONN_UNLOCK;
764         LOG(L_ERR, "ERROR: tcpconn_add_alias: possible port hijack attempt\n");
765         LOG(L_ERR, "ERROR: tcpconn_add_alias: alias already present and points"
766                         " to another connection (%d : %d and %d : %d)\n",
767                         a->parent->id,  port, c->id, port);
768         return -1;
769 }
770
771
772
773 /* finds a tcpconn & sends on it
774  * uses the dst members to, proto (TCP|TLS) and id
775  * returns: number of bytes written (>=0) on success
776  *          <0 on error */
777 int tcp_send(struct dest_info* dst, char* buf, unsigned len)
778 {
779         struct tcp_connection *c;
780         struct tcp_connection *tmp;
781         struct ip_addr ip;
782         int port;
783         int fd;
784         long response[2];
785         int n;
786         union sockaddr_union* from;
787         
788         port=su_getport(&dst->to);
789         if (port){
790                 su2ip_addr(&ip, &dst->to);
791                 c=tcpconn_get(dst->id, &ip, port, tcp_con_lifetime); 
792         }else if (dst->id){
793                 c=tcpconn_get(dst->id, 0, 0, tcp_con_lifetime);
794         }else{
795                 LOG(L_CRIT, "BUG: tcp_send called with null id & to\n");
796                 return -1;
797         }
798         
799         if (dst->id){
800                 if (c==0) {
801                         if (port){
802                                 /* try again w/o id */
803                                 c=tcpconn_get(0, &ip, port, tcp_con_lifetime);
804                                 goto no_id;
805                         }else{
806                                 LOG(L_ERR, "ERROR: tcp_send: id %d not found, dropping\n",
807                                                 dst->id);
808                                 return -1;
809                         }
810                 }else goto get_fd;
811         }
812 no_id:
813                 if (c==0){
814                         DBG("tcp_send: no open tcp connection found, opening new one\n");
815                         /* create tcp connection */
816                                 from=0;
817                                 /* check to see if we have to use a specific source addr. */
818                                 switch (dst->to.s.sa_family) {
819                                         case AF_INET:
820                                                         from = tcp_source_ipv4;
821                                                 break;
822 #ifdef USE_IPV6
823                                         case AF_INET6:
824                                                         from = tcp_source_ipv6;
825                                                 break;
826 #endif
827                                         default:
828                                                 /* error, bad af, ignore ... */
829                                                 break;
830                                 }
831                         if ((c=tcpconn_connect(&dst->to, from, dst->proto))==0){
832                                 LOG(L_ERR, "ERROR: tcp_send: connect failed\n");
833                                 return -1;
834                         }
835                         atomic_set(&c->refcnt, 1); /* ref. only from here for now */
836                         fd=c->s;
837                         
838                         /* send the new tcpconn to "tcp main" */
839                         response[0]=(long)c;
840                         response[1]=CONN_NEW;
841                         n=send_fd(unix_tcp_sock, response, sizeof(response), c->s);
842                         if (n<=0){
843                                 LOG(L_ERR, "BUG: tcp_send: failed send_fd: %s (%d)\n",
844                                                 strerror(errno), errno);
845                                 n=-1;
846                                 goto end;
847                         }       
848                         goto send_it;
849                 }
850 get_fd:
851                         /* todo: see if this is not the same process holding
852                          *  c  and if so send directly on c->fd */
853                         DBG("tcp_send: tcp connection found (%p), acquiring fd\n", c);
854                         /* get the fd */
855                         response[0]=(long)c;
856                         response[1]=CONN_GET_FD;
857                         n=send_all(unix_tcp_sock, response, sizeof(response));
858                         if (n<=0){
859                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(write):%s (%d)\n",
860                                                 strerror(errno), errno);
861                                 n=-1;
862                                 goto release_c;
863                         }
864                         DBG("tcp_send, c= %p, n=%d\n", c, n);
865                         n=receive_fd(unix_tcp_sock, &tmp, sizeof(tmp), &fd, MSG_WAITALL);
866                         if (n<=0){
867                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(receive_fd):"
868                                                         " %s (%d)\n", strerror(errno), errno);
869                                 n=-1;
870                                 goto release_c;
871                         }
872                         if (c!=tmp){
873                                 LOG(L_CRIT, "BUG: tcp_send: get_fd: got different connection:"
874                                                 "  %p (id= %d, refcnt=%d state=%d) != "
875                                                 "  %p (n=%d)\n",
876                                                   c,   c->id,   atomic_get(&c->refcnt),   c->state,
877                                                   tmp, n
878                                    );
879                                 n=-1; /* fail */
880                                 goto end;
881                         }
882                         DBG("tcp_send: after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
883                 
884         
885         
886 send_it:
887         DBG("tcp_send: sending...\n");
888         lock_get(&c->write_lock);
889 #ifdef USE_TLS
890         if (c->type==PROTO_TLS)
891                 n=tls_blocking_write(c, fd, buf, len);
892         else
893 #endif
894                 /* n=tcp_blocking_write(c, fd, buf, len); */
895                 n=tsend_stream(fd, buf, len, tcp_send_timeout*1000); 
896         lock_release(&c->write_lock);
897         DBG("tcp_send: after write: c= %p n=%d fd=%d\n",c, n, fd);
898         DBG("tcp_send: buf=\n%.*s\n", (int)len, buf);
899         if (n<0){
900                 LOG(L_ERR, "ERROR: tcp_send: failed to send\n");
901                 /* error on the connection , mark it as bad and set 0 timeout */
902                 c->state=S_CONN_BAD;
903                 c->timeout=get_ticks_raw();
904                 /* tell "main" it should drop this (optional it will t/o anyway?)*/
905                 response[0]=(long)c;
906                 response[1]=CONN_ERROR;
907                 if (send_all(unix_tcp_sock, response, sizeof(response))<=0){
908                         LOG(L_ERR, "BUG: tcp_send: error return failed (write):%s (%d)\n",
909                                         strerror(errno), errno);
910                         tcpconn_put(c); /* deref. it manually */
911                         n=-1;
912                 }
913                 /* CONN_ERROR will auto-dec refcnt => we must not call tcpconn_put 
914                  * if it succeeds */
915                 close(fd);
916                 return n; /* error return, no tcpconn_put */
917         }
918 end:
919         close(fd);
920 release_c:
921         tcpconn_put(c); /* release c (lock; dec refcnt; unlock) */
922         return n;
923 }
924
925
926
927 int tcp_init(struct socket_info* sock_info)
928 {
929         union sockaddr_union* addr;
930         int optval;
931 #ifdef DISABLE_NAGLE
932         int flag;
933         struct protoent* pe;
934
935         if (tcp_proto_no==-1){ /* if not already set */
936                 pe=getprotobyname("tcp");
937                 if (pe==0){
938                         LOG(L_ERR, "ERROR: tcp_init: could not get TCP protocol number\n");
939                         tcp_proto_no=-1;
940                 }else{
941                         tcp_proto_no=pe->p_proto;
942                 }
943         }
944 #endif
945         
946         addr=&sock_info->su;
947         /* sock_info->proto=PROTO_TCP; */
948         if (init_su(addr, &sock_info->address, sock_info->port_no)<0){
949                 LOG(L_ERR, "ERROR: tcp_init: could no init sockaddr_union\n");
950                 goto error;
951         }
952         sock_info->socket=socket(AF2PF(addr->s.sa_family), SOCK_STREAM, 0);
953         if (sock_info->socket==-1){
954                 LOG(L_ERR, "ERROR: tcp_init: socket: %s\n", strerror(errno));
955                 goto error;
956         }
957 #ifdef DISABLE_NAGLE
958         flag=1;
959         if ( (tcp_proto_no!=-1) &&
960                  (setsockopt(sock_info->socket, tcp_proto_no , TCP_NODELAY,
961                                          &flag, sizeof(flag))<0) ){
962                 LOG(L_ERR, "ERROR: tcp_init: could not disable Nagle: %s\n",
963                                 strerror(errno));
964         }
965 #endif
966
967
968 #if  !defined(TCP_DONT_REUSEADDR) 
969         /* Stevens, "Network Programming", Section 7.5, "Generic Socket
970      * Options": "...server started,..a child continues..on existing
971          * connection..listening server is restarted...call to bind fails
972          * ... ALL TCP servers should specify the SO_REUSEADDRE option 
973          * to allow the server to be restarted in this situation
974          *
975          * Indeed, without this option, the server can't restart.
976          *   -jiri
977          */
978         optval=1;
979         if (setsockopt(sock_info->socket, SOL_SOCKET, SO_REUSEADDR,
980                                 (void*)&optval, sizeof(optval))==-1) {
981                 LOG(L_ERR, "ERROR: tcp_init: setsockopt %s\n",
982                         strerror(errno));
983                 goto error;
984         }
985 #endif
986         /* tos */
987         optval = tos;
988         if (setsockopt(sock_info->socket, IPPROTO_IP, IP_TOS, (void*)&optval, 
989                                 sizeof(optval)) ==-1){
990                 LOG(L_WARN, "WARNING: tcp_init: setsockopt tos: %s\n", strerror(errno));
991                 /* continue since this is not critical */
992         }
993         if (bind(sock_info->socket, &addr->s, sockaddru_len(*addr))==-1){
994                 LOG(L_ERR, "ERROR: tcp_init: bind(%x, %p, %d) on %s:%d : %s\n",
995                                 sock_info->socket,  &addr->s, 
996                                 (unsigned)sockaddru_len(*addr),
997                                 sock_info->address_str.s,
998                                 sock_info->port_no,
999                                 strerror(errno));
1000                 goto error;
1001         }
1002         if (listen(sock_info->socket, TCP_LISTEN_BACKLOG)==-1){
1003                 LOG(L_ERR, "ERROR: tcp_init: listen(%x, %p, %d) on %s: %s\n",
1004                                 sock_info->socket, &addr->s, 
1005                                 (unsigned)sockaddru_len(*addr),
1006                                 sock_info->address_str.s,
1007                                 strerror(errno));
1008                 goto error;
1009         }
1010         
1011         return 0;
1012 error:
1013         if (sock_info->socket!=-1){
1014                 close(sock_info->socket);
1015                 sock_info->socket=-1;
1016         }
1017         return -1;
1018 }
1019
1020
1021
1022 /* used internally by tcp_main_loop()
1023  * tries to destroy a tcp connection (if it cannot it will force a timeout)
1024  * Note: it's called _only_ from the tcp_main process */
1025 static void tcpconn_destroy(struct tcp_connection* tcpconn)
1026 {
1027         int fd;
1028
1029         TCPCONN_LOCK; /*avoid races w/ tcp_send*/
1030         if (atomic_dec_and_test(&tcpconn->refcnt)){ 
1031                 DBG("tcpconn_destroy: destroying connection %p, flags %04x\n",
1032                                 tcpconn, tcpconn->flags);
1033                 fd=tcpconn->s;
1034 #ifdef USE_TLS
1035                 /*FIXME: lock ->writelock ? */
1036                 if (tcpconn->type==PROTO_TLS)
1037                         tls_close(tcpconn, fd);
1038 #endif
1039                 _tcpconn_rm(tcpconn);
1040                 close(fd);
1041                 (*tcp_connections_no)--;
1042         }else{
1043                 /* force timeout */
1044                 tcpconn->timeout=get_ticks_raw();
1045                 tcpconn->state=S_CONN_BAD;
1046                 DBG("tcpconn_destroy: delaying (%p, flags %04x) ...\n",
1047                                 tcpconn, tcpconn->flags);
1048                 
1049         }
1050         TCPCONN_UNLOCK;
1051 }
1052
1053
1054
1055 #ifdef SEND_FD_QUEUE
1056 struct send_fd_info{
1057         struct tcp_connection* tcp_conn;
1058         ticks_t expire;
1059         int unix_sock;
1060         unsigned int retries; /* debugging */
1061 };
1062
1063 struct tcp_send_fd_q{
1064         struct send_fd_info* data; /* buffer */
1065         struct send_fd_info* crt;  /* pointer inside the buffer */
1066         struct send_fd_info* end;  /* points after the last valid position */
1067 };
1068
1069
1070 static struct tcp_send_fd_q send2child_q;
1071
1072
1073
1074 static int send_fd_queue_init(struct tcp_send_fd_q *q, unsigned int size)
1075 {
1076         q->data=pkg_malloc(size*sizeof(struct send_fd_info));
1077         if (q->data==0){
1078                 LOG(L_ERR, "ERROR: send_fd_queue_init: out of memory\n");
1079                 return -1;
1080         }
1081         q->crt=&q->data[0];
1082         q->end=&q->data[size];
1083         return 0;
1084 }
1085
1086 static void send_fd_queue_destroy(struct tcp_send_fd_q *q)
1087 {
1088         if (q->data){
1089                 pkg_free(q->data);
1090                 q->data=0;
1091                 q->crt=q->end=0;
1092         }
1093 }
1094
1095
1096
1097 static int init_send_fd_queues()
1098 {
1099         if (send_fd_queue_init(&send2child_q, SEND_FD_QUEUE_SIZE)!=0)
1100                 goto error;
1101         return 0;
1102 error:
1103         LOG(L_ERR, "ERROR: init_send_fd_queues: init failed\n");
1104         return -1;
1105 }
1106
1107
1108
1109 static void destroy_send_fd_queues()
1110 {
1111         send_fd_queue_destroy(&send2child_q);
1112 }
1113
1114
1115
1116
1117 inline static int send_fd_queue_add(    struct tcp_send_fd_q* q, 
1118                                                                                 int unix_sock,
1119                                                                                 struct tcp_connection *t)
1120 {
1121         struct send_fd_info* tmp;
1122         unsigned long new_size;
1123         
1124         if (q->crt>=q->end){
1125                 new_size=q->end-&q->data[0];
1126                 if (new_size< MAX_SEND_FD_QUEUE_SIZE/2){
1127                         new_size*=2;
1128                 }else new_size=MAX_SEND_FD_QUEUE_SIZE;
1129                 if (q->crt>=&q->data[new_size]){
1130                         LOG(L_ERR, "ERROR: send_fd_queue_add: queue full: %ld/%ld\n",
1131                                         (long)(q->crt-&q->data[0]-1), new_size);
1132                         goto error;
1133                 }
1134                 LOG(L_CRIT, "INFO: send_fd_queue: queue full: %ld, extending to %ld\n",
1135                                 (long)(q->end-&q->data[0]), new_size);
1136                 tmp=pkg_realloc(q->data, new_size*sizeof(struct send_fd_info));
1137                 if (tmp==0){
1138                         LOG(L_ERR, "ERROR: send_fd_queue_add: out of memory\n");
1139                         goto error;
1140                 }
1141                 q->crt=(q->crt-&q->data[0])+tmp;
1142                 q->data=tmp;
1143                 q->end=&q->data[new_size];
1144         }
1145         q->crt->tcp_conn=t;
1146         q->crt->unix_sock=unix_sock;
1147         q->crt->expire=get_ticks_raw()+SEND_FD_QUEUE_TIMEOUT;
1148         q->crt->retries=0;
1149         q->crt++;
1150         return 0;
1151 error:
1152         return -1;
1153 }
1154
1155
1156
1157 inline static void send_fd_queue_run(struct tcp_send_fd_q* q)
1158 {
1159         struct send_fd_info* p;
1160         struct send_fd_info* t;
1161         
1162         for (p=t=&q->data[0]; p<q->crt; p++){
1163                 if (send_fd(p->unix_sock, &(p->tcp_conn),
1164                                         sizeof(struct tcp_connection*), p->tcp_conn->s)<=0){
1165                         if ( ((errno==EAGAIN)||(errno==EWOULDBLOCK)) && 
1166                                                         ((s_ticks_t)(p->expire-get_ticks_raw())>0)){
1167                                 /* leave in queue for a future try */
1168                                 *t=*p;
1169                                 t->retries++;
1170                                 t++;
1171                         }else{
1172                                 LOG(L_ERR, "ERROR: run_send_fd_queue: send_fd failed"
1173                                                    " on socket %d , queue entry %ld, retries %d,"
1174                                                    " connection %p, tcp socket %d, errno=%d (%s) \n",
1175                                                    p->unix_sock, (long)(p-&q->data[0]), p->retries,
1176                                                    p->tcp_conn, p->tcp_conn->s, errno,
1177                                                    strerror(errno));
1178                                 tcpconn_destroy(p->tcp_conn);
1179                         }
1180                 }
1181         }
1182         q->crt=t;
1183 }
1184 #else
1185 #define send_fd_queue_run(q)
1186 #endif
1187
1188
1189
1190 /* handles io from a tcp child process
1191  * params: tcp_c - pointer in the tcp_children array, to the entry for
1192  *                 which an io event was detected 
1193  *         fd_i  - fd index in the fd_array (usefull for optimizing
1194  *                 io_watch_deletes)
1195  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
1196  *           io events queued), >0 on success. success/error refer only to
1197  *           the reads from the fd.
1198  */
1199 inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
1200 {
1201         struct tcp_connection* tcpconn;
1202         long response[2];
1203         int cmd;
1204         int bytes;
1205         
1206         if (tcp_c->unix_sock<=0){
1207                 /* (we can't have a fd==0, 0 is never closed )*/
1208                 LOG(L_CRIT, "BUG: handle_tcp_child: fd %d for %d "
1209                                 "(pid %d, ser no %d)\n", tcp_c->unix_sock,
1210                                 (int)(tcp_c-&tcp_children[0]), tcp_c->pid, tcp_c->proc_no);
1211                 goto error;
1212         }
1213         /* read until sizeof(response)
1214          * (this is a SOCK_STREAM so read is not atomic) */
1215         bytes=recv_all(tcp_c->unix_sock, response, sizeof(response), MSG_DONTWAIT);
1216         if (bytes<(int)sizeof(response)){
1217                 if (bytes==0){
1218                         /* EOF -> bad, child has died */
1219                         DBG("DBG: handle_tcp_child: dead tcp child %d (pid %d, no %d)"
1220                                         " (shutting down?)\n", (int)(tcp_c-&tcp_children[0]), 
1221                                         tcp_c->pid, tcp_c->proc_no );
1222                         /* don't listen on it any more */
1223                         io_watch_del(&io_h, tcp_c->unix_sock, fd_i, 0); 
1224                         goto error; /* eof. so no more io here, it's ok to return error */
1225                 }else if (bytes<0){
1226                         /* EAGAIN is ok if we try to empty the buffer
1227                          * e.g.: SIGIO_RT overflow mode or EPOLL ET */
1228                         if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
1229                                 LOG(L_CRIT, "ERROR: handle_tcp_child: read from tcp child %ld "
1230                                                 " (pid %d, no %d) %s [%d]\n",
1231                                                 (long)(tcp_c-&tcp_children[0]), tcp_c->pid,
1232                                                 tcp_c->proc_no, strerror(errno), errno );
1233                         }else{
1234                                 bytes=0;
1235                         }
1236                         /* try to ignore ? */
1237                         goto end;
1238                 }else{
1239                         /* should never happen */
1240                         LOG(L_CRIT, "BUG: handle_tcp_child: too few bytes received (%d)\n",
1241                                         bytes );
1242                         bytes=0; /* something was read so there is no error; otoh if
1243                                           receive_fd returned less then requested => the receive
1244                                           buffer is empty => no more io queued on this fd */
1245                         goto end;
1246                 }
1247         }
1248         
1249         DBG("handle_tcp_child: reader response= %lx, %ld from %d \n",
1250                                         response[0], response[1], (int)(tcp_c-&tcp_children[0]));
1251         cmd=response[1];
1252         tcpconn=(struct tcp_connection*)response[0];
1253         if (tcpconn==0){
1254                 /* should never happen */
1255                 LOG(L_CRIT, "BUG: handle_tcp_child: null tcpconn pointer received"
1256                                  " from tcp child %d (pid %d): %lx, %lx\n",
1257                                         (int)(tcp_c-&tcp_children[0]), tcp_c->pid,
1258                                         response[0], response[1]) ;
1259                 goto end;
1260         }
1261         switch(cmd){
1262                 case CONN_RELEASE:
1263                         tcp_c->busy--;
1264                         if (tcpconn->state==S_CONN_BAD){ 
1265                                 tcpconn_destroy(tcpconn);
1266                                 break;
1267                         }
1268                         /* update the timeout*/
1269                         tcpconn->timeout=get_ticks_raw()+tcp_con_lifetime;
1270                         tcpconn_put(tcpconn);
1271                         /* must be after the de-ref*/
1272                         io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1273                         tcpconn->flags&=~F_CONN_REMOVED;
1274                         DBG("handle_tcp_child: CONN_RELEASE  %p refcnt= %d\n", 
1275                                                         tcpconn, atomic_get(&tcpconn->refcnt));
1276                         break;
1277                 case CONN_ERROR:
1278                 case CONN_DESTROY:
1279                 case CONN_EOF:
1280                         /* WARNING: this will auto-dec. refcnt! */
1281                                 tcp_c->busy--;
1282                                 /* main doesn't listen on it => we don't have to delete it
1283                                  if (tcpconn->s!=-1)
1284                                         io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
1285                                 */
1286                                 tcpconn_destroy(tcpconn); /* closes also the fd */
1287                                 break;
1288                 default:
1289                                 LOG(L_CRIT, "BUG: handle_tcp_child:  unknown cmd %d"
1290                                                                         " from tcp reader %d\n",
1291                                                                         cmd, (int)(tcp_c-&tcp_children[0]));
1292         }
1293 end:
1294         return bytes;
1295 error:
1296         return -1;
1297 }
1298
1299
1300
1301 /* handles io from a "generic" ser process (get fd or new_fd from a tcp_send)
1302  * 
1303  * params: p     - pointer in the ser processes array (pt[]), to the entry for
1304  *                 which an io event was detected
1305  *         fd_i  - fd index in the fd_array (usefull for optimizing
1306  *                 io_watch_deletes)
1307  * returns:  handle_* return convention:
1308  *          -1 on error reading from the fd,
1309  *           0 on EAGAIN  or when no  more io events are queued 
1310  *             (receive buffer empty),
1311  *           >0 on successfull reads from the fd (the receive buffer might
1312  *             be non-empty).
1313  */
1314 inline static int handle_ser_child(struct process_table* p, int fd_i)
1315 {
1316         struct tcp_connection* tcpconn;
1317         long response[2];
1318         int cmd;
1319         int bytes;
1320         int ret;
1321         int fd;
1322         
1323         ret=-1;
1324         if (p->unix_sock<=0){
1325                 /* (we can't have a fd==0, 0 is never closed )*/
1326                 LOG(L_CRIT, "BUG: handle_ser_child: fd %d for %d "
1327                                 "(pid %d)\n", p->unix_sock, (int)(p-&pt[0]), p->pid);
1328                 goto error;
1329         }
1330                         
1331         /* get all bytes and the fd (if transmitted)
1332          * (this is a SOCK_STREAM so read is not atomic) */
1333         bytes=receive_fd(p->unix_sock, response, sizeof(response), &fd,
1334                                                 MSG_DONTWAIT);
1335         if (bytes<(int)sizeof(response)){
1336                 /* too few bytes read */
1337                 if (bytes==0){
1338                         /* EOF -> bad, child has died */
1339                         DBG("DBG: handle_ser_child: dead child %d, pid %d"
1340                                         " (shutting down?)\n", (int)(p-&pt[0]), p->pid);
1341                         /* don't listen on it any more */
1342                         io_watch_del(&io_h, p->unix_sock, fd_i, 0);
1343                         goto error; /* child dead => no further io events from it */
1344                 }else if (bytes<0){
1345                         /* EAGAIN is ok if we try to empty the buffer
1346                          * e.g: SIGIO_RT overflow mode or EPOLL ET */
1347                         if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
1348                                 LOG(L_CRIT, "ERROR: handle_ser_child: read from child %d  "
1349                                                 "(pid %d):  %s [%d]\n", (int)(p-&pt[0]), p->pid,
1350                                                 strerror(errno), errno);
1351                                 ret=-1;
1352                         }else{
1353                                 ret=0;
1354                         }
1355                         /* try to ignore ? */
1356                         goto end;
1357                 }else{
1358                         /* should never happen */
1359                         LOG(L_CRIT, "BUG: handle_ser_child: too few bytes received (%d)\n",
1360                                         bytes );
1361                         ret=0; /* something was read so there is no error; otoh if
1362                                           receive_fd returned less then requested => the receive
1363                                           buffer is empty => no more io queued on this fd */
1364                         goto end;
1365                 }
1366         }
1367         ret=1; /* something was received, there might be more queued */
1368         DBG("handle_ser_child: read response= %lx, %ld, fd %d from %d (%d)\n",
1369                                         response[0], response[1], fd, (int)(p-&pt[0]), p->pid);
1370         cmd=response[1];
1371         tcpconn=(struct tcp_connection*)response[0];
1372         if (tcpconn==0){
1373                 LOG(L_CRIT, "BUG: handle_ser_child: null tcpconn pointer received"
1374                                  " from child %d (pid %d): %lx, %lx\n",
1375                                         (int)(p-&pt[0]), p->pid, response[0], response[1]) ;
1376                 goto end;
1377         }
1378         switch(cmd){
1379                 case CONN_ERROR:
1380                         if (!(tcpconn->flags & F_CONN_REMOVED) && (tcpconn->s!=-1)){
1381                                 io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
1382                                 tcpconn->flags|=F_CONN_REMOVED;
1383                         }
1384                         tcpconn_destroy(tcpconn); /* will close also the fd */
1385                         break;
1386                 case CONN_GET_FD:
1387                         /* send the requested FD  */
1388                         /* WARNING: take care of setting refcnt properly to
1389                          * avoid race condition */
1390                         if (send_fd(p->unix_sock, &tcpconn, sizeof(tcpconn),
1391                                                         tcpconn->s)<=0){
1392                                 LOG(L_ERR, "ERROR: handle_ser_child: send_fd failed\n");
1393                         }
1394                         break;
1395                 case CONN_NEW:
1396                         /* update the fd in the requested tcpconn*/
1397                         /* WARNING: take care of setting refcnt properly to
1398                          * avoid race condition */
1399                         if (fd==-1){
1400                                 LOG(L_CRIT, "BUG: handle_ser_child: CONN_NEW:"
1401                                                         " no fd received\n");
1402                                 break;
1403                         }
1404                         (*tcp_connections_no)++;
1405                         tcpconn->s=fd;
1406                         /* add tcpconn to the list*/
1407                         tcpconn_add(tcpconn);
1408                         /* update the timeout*/
1409                         tcpconn->timeout=get_ticks_raw()+tcp_con_lifetime;
1410                         io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1411                         tcpconn->flags&=~F_CONN_REMOVED;
1412                         break;
1413                 default:
1414                         LOG(L_CRIT, "BUG: handle_ser_child: unknown cmd %d\n", cmd);
1415         }
1416 end:
1417         return ret;
1418 error:
1419         return -1;
1420 }
1421
1422
1423
1424 /* sends a tcpconn + fd to a choosen child */
1425 inline static int send2child(struct tcp_connection* tcpconn)
1426 {
1427         int i;
1428         int min_busy;
1429         int idx;
1430         static int crt=0; /* current child */
1431         int last;
1432         
1433         min_busy=tcp_children[0].busy;
1434         idx=0;
1435         last=crt+tcp_children_no;
1436         for (; crt<last; crt++){
1437                 i=crt%tcp_children_no;
1438                 if (!tcp_children[i].busy){
1439                         idx=i;
1440                         min_busy=0;
1441                         break;
1442                 }else if (min_busy>tcp_children[i].busy){
1443                         min_busy=tcp_children[i].busy;
1444                         idx=i;
1445                 }
1446         }
1447         crt=idx+1; /* next time we start with crt%tcp_children_no */
1448         
1449         tcp_children[idx].busy++;
1450         tcp_children[idx].n_reqs++;
1451         if (min_busy){
1452                 DBG("WARNING: send2child: no free tcp receiver, "
1453                                 " connection passed to the least busy one (%d)\n",
1454                                 min_busy);
1455         }
1456         DBG("send2child: to tcp child %d %d(%d), %p\n", idx, 
1457                                         tcp_children[idx].proc_no,
1458                                         tcp_children[idx].pid, tcpconn);
1459         /* first make sure this child doesn't have pending request for
1460          * tcp_main (to avoid a possible deadlock: e.g. child wants to
1461          * send a release command, but the master fills its socket buffer
1462          * with new connection commands => deadlock) */
1463         /* answer tcp_send requests first */
1464         while(handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0);
1465         /* process tcp readers requests */
1466         while(handle_tcp_child(&tcp_children[idx], -1)>0);
1467                 
1468 #ifdef SEND_FD_QUEUE
1469         /* if queue full, try to queue the io */
1470         if (send_fd(tcp_children[idx].unix_sock, &tcpconn, sizeof(tcpconn),
1471                         tcpconn->s)<=0){
1472                 if ((errno==EAGAIN)||(errno==EWOULDBLOCK)){
1473                         /* FIXME: remove after debugging */
1474                          LOG(L_CRIT, "INFO: tcp child %d, socket %d: queue full,"
1475                                                 " %d requests queued (total handled %d)\n",
1476                                         idx, tcp_children[idx].unix_sock, min_busy,
1477                                         tcp_children[idx].n_reqs-1);
1478                         if (send_fd_queue_add(&send2child_q, tcp_children[idx].unix_sock, 
1479                                                 tcpconn)!=0){
1480                                 LOG(L_ERR, "ERROR: send2child: queue send op. failed\n");
1481                                 return -1;
1482                         }
1483                 }else{
1484                         LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
1485                         return -1;
1486                 }
1487         }
1488 #else
1489         if (send_fd(tcp_children[idx].unix_sock, &tcpconn, sizeof(tcpconn),
1490                         tcpconn->s)<=0){
1491                 LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
1492                 return -1;
1493         }
1494 #endif
1495         
1496         return 0;
1497 }
1498
1499
1500
1501 /* handles a new connection, called internally by tcp_main_loop/handle_io.
1502  * params: si - pointer to one of the tcp socket_info structures on which
1503  *              an io event was detected (connection attempt)
1504  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
1505  *           io events queued), >0 on success. success/error refer only to
1506  *           the accept.
1507  */
1508 static inline int handle_new_connect(struct socket_info* si)
1509 {
1510         union sockaddr_union su;
1511         struct tcp_connection* tcpconn;
1512         socklen_t su_len;
1513         int new_sock;
1514         
1515         /* got a connection on r */
1516         su_len=sizeof(su);
1517         new_sock=accept(si->socket, &(su.s), &su_len);
1518         if (new_sock==-1){
1519                 if ((errno==EAGAIN)||(errno==EWOULDBLOCK))
1520                         return 0;
1521                 LOG(L_ERR,  "WARNING: handle_new_connect: error while accepting"
1522                                 " connection(%d): %s\n", errno, strerror(errno));
1523                 return -1;
1524         }
1525         if (*tcp_connections_no>=tcp_max_connections){
1526                 LOG(L_ERR, "ERROR: maximum number of connections exceeded: %d/%d\n",
1527                                         *tcp_connections_no, tcp_max_connections);
1528                 close(new_sock);
1529                 return 1; /* success, because the accept was succesfull */
1530         }
1531         if (init_sock_opt(new_sock)<0){
1532                 LOG(L_ERR, "ERROR: handle_new_connect: init_sock_opt failed\n");
1533                 close(new_sock);
1534                 return 1; /* success, because the accept was succesfull */
1535         }
1536         (*tcp_connections_no)++;
1537         
1538         /* add socket to list */
1539         tcpconn=tcpconn_new(new_sock, &su, &si->su, si, si->proto, S_CONN_ACCEPT);
1540         if (tcpconn){
1541 #ifdef TCP_PASS_NEW_CONNECTION_ON_DATA
1542                 io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1543                 tcpconn->flags&=~F_CONN_REMOVED;
1544                 tcpconn_add(tcpconn);
1545 #else
1546                 atomic_set(&tcpconn->refcnt, 1); /* safe, not yet available to the
1547                                                                                         outside world */
1548                 tcpconn_add(tcpconn);
1549                 DBG("handle_new_connect: new connection: %p %d flags: %04x\n",
1550                         tcpconn, tcpconn->s, tcpconn->flags);
1551                 /* pass it to a child */
1552                 if(send2child(tcpconn)<0){
1553                         LOG(L_ERR,"ERROR: handle_new_connect: no children "
1554                                         "available\n");
1555                         tcpconn_destroy(tcpconn);
1556                 }
1557 #endif
1558         }else{ /*tcpconn==0 */
1559                 LOG(L_ERR, "ERROR: handle_new_connect: tcpconn_new failed, "
1560                                 "closing socket\n");
1561                 close(new_sock);
1562                 (*tcp_connections_no)--;
1563         }
1564         return 1; /* accept() was succesfull */
1565 }
1566
1567
1568
1569 /* handles an io event on one of the watched tcp connections
1570  * 
1571  * params: tcpconn - pointer to the tcp_connection for which we have an io ev.
1572  *         fd_i    - index in the fd_array table (needed for delete)
1573  * returns:  handle_* return convention, but on success it always returns 0
1574  *           (because it's one-shot, after a succesfull execution the fd is
1575  *            removed from tcp_main's watch fd list and passed to a child =>
1576  *            tcp_main is not interested in further io events that might be
1577  *            queued for this fd)
1578  */
1579 inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, int fd_i)
1580 {
1581         /*  is refcnt!=0 really necessary? 
1582          *  No, in fact it's a bug: I can have the following situation: a send only
1583          *   tcp connection used by n processes simultaneously => refcnt = n. In 
1584          *   the same time I can have a read event and this situation is perfectly
1585          *   valid. -- andrei
1586          */
1587 #if 0
1588         if ((tcpconn->refcnt!=0)){
1589                 /* FIXME: might be valid for sigio_rt iff fd flags are not cleared
1590                  *        (there is a short window in which it could generate a sig
1591                  *         that would be catched by tcp_main) */
1592                 LOG(L_CRIT, "BUG: handle_tcpconn_ev: io event on referenced"
1593                                         " tcpconn (%p), refcnt=%d, fd=%d\n",
1594                                         tcpconn, tcpconn->refcnt, tcpconn->s);
1595                 return -1;
1596         }
1597 #endif
1598         /* pass it to child, so remove it from the io watch list */
1599         DBG("handle_tcpconn_ev: data available on %p %d\n", tcpconn, tcpconn->s);
1600         if (io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1) goto error;
1601         tcpconn->flags|=F_CONN_REMOVED;
1602         tcpconn_ref(tcpconn); /* refcnt ++ */
1603         if (send2child(tcpconn)<0){
1604                 LOG(L_ERR,"ERROR: handle_tcpconn_ev: no children available\n");
1605                 tcpconn_destroy(tcpconn);
1606         }
1607         return 0; /* we are not interested in possibly queued io events, 
1608                                  the fd was either passed to a child, or closed */
1609 error:
1610         return -1;
1611 }
1612
1613
1614
1615 /* generic handle io routine, it will call the appropiate
1616  *  handle_xxx() based on the fd_map type
1617  *
1618  * params:  fm  - pointer to a fd hash entry
1619  *          idx - index in the fd_array (or -1 if not known)
1620  * return: -1 on error
1621  *          0 on EAGAIN or when by some other way it is known that no more 
1622  *            io events are queued on the fd (the receive buffer is empty).
1623  *            Usefull to detect when there are no more io events queued for
1624  *            sigio_rt, epoll_et, kqueue.
1625  *         >0 on successfull read from the fd (when there might be more io
1626  *            queued -- the receive buffer might still be non-empty)
1627  */
1628 inline static int handle_io(struct fd_map* fm, int idx)
1629 {       
1630         int ret;
1631         
1632         switch(fm->type){
1633                 case F_SOCKINFO:
1634                         ret=handle_new_connect((struct socket_info*)fm->data);
1635                         break;
1636                 case F_TCPCONN:
1637                         ret=handle_tcpconn_ev((struct tcp_connection*)fm->data, idx);
1638                         break;
1639                 case F_TCPCHILD:
1640                         ret=handle_tcp_child((struct tcp_child*)fm->data, idx);
1641                         break;
1642                 case F_PROC:
1643                         ret=handle_ser_child((struct process_table*)fm->data, idx);
1644                         break;
1645                 case F_NONE:
1646                         LOG(L_CRIT, "BUG: handle_io: empty fd map: %p {%d, %d, %p},"
1647                                                 " idx %d\n", fm, fm->fd, fm->type, fm->data, idx);
1648                         goto error;
1649                 default:
1650                         LOG(L_CRIT, "BUG: handle_io: uknown fd type %d\n", fm->type); 
1651                         goto error;
1652         }
1653         return ret;
1654 error:
1655         return -1;
1656 }
1657
1658
1659
1660 /* very inefficient for now - FIXME
1661  * keep in sync with tcpconn_destroy, the "delete" part should be
1662  * the same except for io_watch_del..
1663  * Note: this function is called only from the tcp_main process with 1 
1664  * exception: on shutdown it's called also by the main ser process via
1665  * cleanup() => with the ser shutdown exception, it cannot execute in parallel
1666  * with tcpconn_add() or tcpconn_destroy()*/
1667 static inline void tcpconn_timeout(int force)
1668 {
1669         static ticks_t prev_ticks=0;
1670         struct tcp_connection *c, *next;
1671         ticks_t ticks;
1672         unsigned h;
1673         int fd;
1674         
1675         
1676         ticks=get_ticks_raw();
1677         if (((ticks-prev_ticks)<TCPCONN_TIMEOUT_MIN_RUN) && !force) return;
1678         prev_ticks=ticks;
1679         TCPCONN_LOCK; /* fixme: we can lock only on delete IMO */
1680         for(h=0; h<TCP_ID_HASH_SIZE; h++){
1681                 c=tcpconn_id_hash[h];
1682                 while(c){
1683                         next=c->id_next;
1684                         if (force ||((atomic_get(&c->refcnt)==0) &&
1685                                                 ((s_ticks_t)(ticks-c->timeout)>=0))){
1686                                 if (!force)
1687                                         DBG("tcpconn_timeout: timeout for hash=%d - %p"
1688                                                         " (%d > %d)\n", h, c, ticks, c->timeout);
1689                                 if (c->s>0 && is_tcp_main){
1690                                         /* we cannot close or remove the fd if we are not in the
1691                                          * tcp main proc.*/
1692                                         fd=c->s;
1693                                         if (!(c->flags & F_CONN_REMOVED)){
1694                                                 io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
1695                                                 c->flags|=F_CONN_REMOVED;
1696                                         }
1697                                 }else{
1698                                         fd=-1;
1699                                 }
1700 #ifdef USE_TLS
1701                                 if (c->type==PROTO_TLS)
1702                                         tls_close(c, fd);
1703 #endif
1704                                 _tcpconn_rm(c);
1705                                 if (fd>0) {
1706                                         close(fd);
1707                                 }
1708                                 (*tcp_connections_no)--;
1709                         }
1710                         c=next;
1711                 }
1712         }
1713         TCPCONN_UNLOCK;
1714 }
1715
1716
1717
1718 /* tcp main loop */
1719 void tcp_main_loop()
1720 {
1721
1722         struct socket_info* si;
1723         int r;
1724         
1725         is_tcp_main=1; /* mark this process as tcp main */
1726         
1727         tcp_main_max_fd_no=get_max_open_fds();
1728         /* init send fd queues (here because we want mem. alloc only in the tcp
1729          *  process */
1730 #ifdef SEND_FD_QUEUE
1731         if (init_send_fd_queues()<0){
1732                 LOG(L_CRIT, "ERROR: init_tcp: could not init send fd queues\n");
1733                 goto error;
1734         }
1735 #endif
1736         /* init io_wait (here because we want the memory allocated only in
1737          * the tcp_main process) */
1738         if  (init_io_wait(&io_h, tcp_main_max_fd_no, tcp_poll_method)<0)
1739                 goto error;
1740         /* init: start watching all the fds*/
1741         
1742         /* add all the sockets we listen on for connections */
1743         for (si=tcp_listen; si; si=si->next){
1744                 if ((si->proto==PROTO_TCP) &&(si->socket!=-1)){
1745                         if (io_watch_add(&io_h, si->socket, F_SOCKINFO, si)<0){
1746                                 LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1747                                                         "listen socket to the fd list\n");
1748                                 goto error;
1749                         }
1750                 }else{
1751                         LOG(L_CRIT, "BUG: tcp_main_loop: non tcp address in tcp_listen\n");
1752                 }
1753         }
1754 #ifdef USE_TLS
1755         if (!tls_disable && tls_loaded()){
1756                 for (si=tls_listen; si; si=si->next){
1757                         if ((si->proto==PROTO_TLS) && (si->socket!=-1)){
1758                                 if (io_watch_add(&io_h, si->socket, F_SOCKINFO, si)<0){
1759                                         LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1760                                                         "tls listen socket to the fd list\n");
1761                                         goto error;
1762                                 }
1763                         }else{
1764                                 LOG(L_CRIT, "BUG: tcp_main_loop: non tls address"
1765                                                 " in tls_listen\n");
1766                         }
1767                 }
1768         }
1769 #endif
1770         /* add all the unix sockets used for communcation with other ser processes
1771          *  (get fd, new connection a.s.o) */
1772         for (r=1; r<process_no; r++){
1773                 if (pt[r].unix_sock>0) /* we can't have 0, we never close it!*/
1774                         if (io_watch_add(&io_h, pt[r].unix_sock, F_PROC, &pt[r])<0){
1775                                         LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1776                                                         "process %d unix socket to the fd list\n", r);
1777                                         goto error;
1778                         }
1779         }
1780         /* add all the unix sokets used for communication with the tcp childs */
1781         for (r=0; r<tcp_children_no; r++){
1782                 if (tcp_children[r].unix_sock>0)/*we can't have 0, we never close it!*/
1783                         if (io_watch_add(&io_h, tcp_children[r].unix_sock, F_TCPCHILD,
1784                                                         &tcp_children[r]) <0){
1785                                 LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1786                                                 "tcp child %d unix socket to the fd list\n", r);
1787                                 goto error;
1788                         }
1789         }
1790         
1791         /* main loop */
1792         switch(io_h.poll_method){
1793                 case POLL_POLL:
1794                         while(1){
1795                                 /* wait and process IO */
1796                                 io_wait_loop_poll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0); 
1797                                 send_fd_queue_run(&send2child_q); /* then new io */
1798                                 /* remove old connections */
1799                                 tcpconn_timeout(0);
1800                         }
1801                         break;
1802 #ifdef HAVE_SELECT
1803                 case POLL_SELECT:
1804                         while(1){
1805                                 io_wait_loop_select(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1806                                 send_fd_queue_run(&send2child_q); /* then new io */
1807                                 tcpconn_timeout(0);
1808                         }
1809                         break;
1810 #endif
1811 #ifdef HAVE_SIGIO_RT
1812                 case POLL_SIGIO_RT:
1813                         while(1){
1814                                 io_wait_loop_sigio_rt(&io_h, TCP_MAIN_SELECT_TIMEOUT);
1815                                 send_fd_queue_run(&send2child_q); /* then new io */
1816                                 tcpconn_timeout(0);
1817                         }
1818                         break;
1819 #endif
1820 #ifdef HAVE_EPOLL
1821                 case POLL_EPOLL_LT:
1822                         while(1){
1823                                 io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1824                                 send_fd_queue_run(&send2child_q); /* then new io */
1825                                 tcpconn_timeout(0);
1826                         }
1827                         break;
1828                 case POLL_EPOLL_ET:
1829                         while(1){
1830                                 io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 1);
1831                                 send_fd_queue_run(&send2child_q); /* then new io */
1832                                 tcpconn_timeout(0);
1833                         }
1834                         break;
1835 #endif
1836 #ifdef HAVE_KQUEUE
1837                 case POLL_KQUEUE:
1838                         while(1){
1839                                 io_wait_loop_kqueue(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1840                                 send_fd_queue_run(&send2child_q); /* then new io */
1841                                 tcpconn_timeout(0);
1842                         }
1843                         break;
1844 #endif
1845 #ifdef HAVE_DEVPOLL
1846                 case POLL_DEVPOLL:
1847                         while(1){
1848                                 io_wait_loop_devpoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1849                                 send_fd_queue_run(&send2child_q); /* then new io */
1850                                 tcpconn_timeout(0);
1851                         }
1852                         break;
1853 #endif
1854                 default:
1855                         LOG(L_CRIT, "BUG: tcp_main_loop: no support for poll method "
1856                                         " %s (%d)\n", 
1857                                         poll_method_name(io_h.poll_method), io_h.poll_method);
1858                         goto error;
1859         }
1860 error:
1861 #ifdef SEND_FD_QUEUE
1862         destroy_send_fd_queues();
1863 #endif
1864         destroy_io_wait(&io_h);
1865         LOG(L_CRIT, "ERROR: tcp_main_loop: exiting...");
1866         exit(-1);
1867 }
1868
1869
1870
1871 /* cleanup before exit */
1872 void destroy_tcp()
1873 {
1874                 if (tcpconn_id_hash){
1875                         if (tcpconn_lock)
1876                                 TCPCONN_UNLOCK; /* hack: force-unlock the tcp lock in case
1877                                                                    some process was terminated while holding 
1878                                                                    it; this will allow an almost gracious 
1879                                                                    shutdown */
1880                         tcpconn_timeout(1); /* force close/expire for all active tcpconns*/
1881                         shm_free(tcpconn_id_hash);
1882                         tcpconn_id_hash=0;
1883                 }
1884                 if (tcp_connections_no){
1885                         shm_free(tcp_connections_no);
1886                         tcp_connections_no=0;
1887                 }
1888                 if (connection_id){
1889                         shm_free(connection_id);
1890                         connection_id=0;
1891                 }
1892                 if (tcpconn_aliases_hash){
1893                         shm_free(tcpconn_aliases_hash);
1894                         tcpconn_aliases_hash=0;
1895                 }
1896                 if (tcpconn_lock){
1897                         lock_destroy(tcpconn_lock);
1898                         lock_dealloc((void*)tcpconn_lock);
1899                         tcpconn_lock=0;
1900                 }
1901                 if (tcp_children){
1902                         pkg_free(tcp_children);
1903                         tcp_children=0;
1904                 }
1905 }
1906
1907
1908
1909 int init_tcp()
1910 {
1911         char* poll_err;
1912         
1913         /* init lock */
1914         tcpconn_lock=lock_alloc();
1915         if (tcpconn_lock==0){
1916                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc lock\n");
1917                 goto error;
1918         }
1919         if (lock_init(tcpconn_lock)==0){
1920                 LOG(L_CRIT, "ERROR: init_tcp: could not init lock\n");
1921                 lock_dealloc((void*)tcpconn_lock);
1922                 tcpconn_lock=0;
1923                 goto error;
1924         }
1925         /* init globals */
1926         tcp_connections_no=shm_malloc(sizeof(int));
1927         if (tcp_connections_no==0){
1928                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
1929                 goto error;
1930         }
1931         *tcp_connections_no=0;
1932         connection_id=shm_malloc(sizeof(int));
1933         if (connection_id==0){
1934                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
1935                 goto error;
1936         }
1937         *connection_id=1;
1938         /* alloc hashtables*/
1939         tcpconn_aliases_hash=(struct tcp_conn_alias**)
1940                         shm_malloc(TCP_ALIAS_HASH_SIZE* sizeof(struct tcp_conn_alias*));
1941         if (tcpconn_aliases_hash==0){
1942                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc address hashtable\n");
1943                 goto error;
1944         }
1945         tcpconn_id_hash=(struct tcp_connection**)shm_malloc(TCP_ID_HASH_SIZE*
1946                                                                 sizeof(struct tcp_connection*));
1947         if (tcpconn_id_hash==0){
1948                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc id hashtable\n");
1949                 goto error;
1950         }
1951         /* init hashtables*/
1952         memset((void*)tcpconn_aliases_hash, 0, 
1953                         TCP_ALIAS_HASH_SIZE * sizeof(struct tcp_conn_alias*));
1954         memset((void*)tcpconn_id_hash, 0, 
1955                         TCP_ID_HASH_SIZE * sizeof(struct tcp_connection*));
1956         
1957         /* fix config variables */
1958         if (tcp_connect_timeout<0)
1959                 tcp_connect_timeout=DEFAULT_TCP_CONNECT_TIMEOUT;
1960         if (tcp_send_timeout<0)
1961                 tcp_send_timeout=DEFAULT_TCP_SEND_TIMEOUT;
1962         if (tcp_con_lifetime<0){
1963                 /* set to max value (~ 1/2 MAX_INT) */
1964                 tcp_con_lifetime=MAX_TCP_CON_LIFETIME;
1965         }else{
1966                 if ((unsigned)tcp_con_lifetime > 
1967                                 (unsigned)TICKS_TO_S(MAX_TCP_CON_LIFETIME)){
1968                         LOG(L_WARN, "init_tcp: tcp_con_lifetime too big (%u s), "
1969                                         " the maximum value is %u\n", tcp_con_lifetime,
1970                                         TICKS_TO_S(MAX_TCP_CON_LIFETIME));
1971                         tcp_con_lifetime=MAX_TCP_CON_LIFETIME;
1972                 }else{
1973                         tcp_con_lifetime=S_TO_TICKS(tcp_con_lifetime);
1974                 }
1975         }
1976         
1977                 poll_err=check_poll_method(tcp_poll_method);
1978         
1979         /* set an appropriate poll method */
1980         if (poll_err || (tcp_poll_method==0)){
1981                 tcp_poll_method=choose_poll_method();
1982                 if (poll_err){
1983                         LOG(L_ERR, "ERROR: init_tcp: %s, using %s instead\n",
1984                                         poll_err, poll_method_name(tcp_poll_method));
1985                 }else{
1986                         LOG(L_INFO, "init_tcp: using %s as the io watch method"
1987                                         " (auto detected)\n", poll_method_name(tcp_poll_method));
1988                 }
1989         }else{
1990                         LOG(L_INFO, "init_tcp: using %s io watch method (config)\n",
1991                                         poll_method_name(tcp_poll_method));
1992         }
1993         
1994         return 0;
1995 error:
1996         /* clean-up */
1997         destroy_tcp();
1998         return -1;
1999 }
2000
2001
2002 #ifdef TCP_CHILD_NON_BLOCKING
2003 /* returns -1 on error */
2004 static int set_non_blocking(int s)
2005 {
2006         int flags;
2007         /* non-blocking */
2008         flags=fcntl(s, F_GETFL);
2009         if (flags==-1){
2010                 LOG(L_ERR, "ERROR: set_non_blocking: fnctl failed: (%d) %s\n",
2011                                 errno, strerror(errno));
2012                 goto error;
2013         }
2014         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
2015                 LOG(L_ERR, "ERROR: set_non_blocking: fcntl: set non-blocking failed:"
2016                                 " (%d) %s\n", errno, strerror(errno));
2017                 goto error;
2018         }
2019         return 0;
2020 error:
2021         return -1;
2022 }
2023
2024 #endif
2025
2026
2027 /*  returns -1 on error, 0 on success */
2028 int tcp_fix_child_sockets(int* fd)
2029 {
2030 #ifdef TCP_CHILD_NON_BLOCKING
2031         if ((set_non_blocking(fd[0])<0) ||
2032                 (set_non_blocking(fd[1])<0)){
2033                 return -1;
2034         }
2035 #endif
2036         return 0;
2037 }
2038
2039
2040
2041 /* starts the tcp processes */
2042 int tcp_init_children()
2043 {
2044         int r;
2045         int reader_fd_1; /* for comm. with the tcp children read  */
2046         pid_t pid;
2047         struct socket_info *si;
2048         
2049         /* estimate max fd. no:
2050          * 1 tcp send unix socket/all_proc, 
2051          *  + 1 udp sock/udp proc + 1 tcp_child sock/tcp child*
2052          *  + no_listen_tcp */
2053         for(r=0, si=tcp_listen; si; si=si->next, r++);
2054 #ifdef USE_TLS
2055         if (! tls_disable)
2056                 for (si=tls_listen; si; si=si->next, r++);
2057 #endif
2058         
2059         register_fds(r+tcp_max_connections+get_max_procs()-1 /* tcp main */);
2060 #if 0
2061         tcp_max_fd_no=get_max_procs()*2 +r-1 /* timer */ +3; /* stdin/out/err*/
2062         /* max connections can be temporarily exceeded with estimated_process_count
2063          * - tcp_main (tcpconn_connect called simultaneously in all all the 
2064          *  processes) */
2065         tcp_max_fd_no+=tcp_max_connections+get_max_procs()-1 /* tcp main */;
2066 #endif
2067         /* alloc the children array */
2068         tcp_children=pkg_malloc(sizeof(struct tcp_child)*tcp_children_no);
2069         if (tcp_children==0){
2070                         LOG(L_ERR, "ERROR: tcp_init_children: out of memory\n");
2071                         goto error;
2072         }
2073         /* create the tcp sock_info structures */
2074         /* copy the sockets --moved to main_loop*/
2075         
2076         /* fork children & create the socket pairs*/
2077         for(r=0; r<tcp_children_no; r++){
2078                 child_rank++;
2079                 pid=fork_tcp_process(child_rank, "tcp receiver", r, &reader_fd_1);
2080                 if (pid<0){
2081                         LOG(L_ERR, "ERROR: tcp_main: fork failed: %s\n",
2082                                         strerror(errno));
2083                         goto error;
2084                 }else if (pid>0){
2085                         /* parent */
2086                 }else{
2087                         /* child */
2088                         bind_address=0; /* force a SEGFAULT if someone uses a non-init.
2089                                                            bind address on tcp */
2090                         tcp_receive_loop(reader_fd_1);
2091                 }
2092         }
2093         return 0;
2094 error:
2095         return -1;
2096 }
2097
2098
2099
2100 void tcp_get_info(struct tcp_gen_info *ti)
2101 {
2102         ti->tcp_readers=tcp_children_no;
2103         ti->tcp_max_connections=tcp_max_connections;
2104         ti->tcp_connections_no=*tcp_connections_no;
2105 }
2106
2107 #endif