- support for setting the source address in tcp_send() and tcpconn_get()
[sip-router] / tcp_main.c
1 /*
2  * $Id$
3  *
4  * Copyright (C) 2001-2003 FhG Fokus
5  *
6  * This file is part of ser, a free SIP server.
7  *
8  * ser is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version
12  *
13  * For a license to use the ser software under conditions
14  * other than those described here, or to purchase support for this
15  * software, please contact iptel.org by e-mail at the following addresses:
16  *    info@iptel.org
17  *
18  * ser is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26  */
27 /*
28  * History:
29  * --------
30  *  2002-11-29  created by andrei
31  *  2002-12-11  added tcp_send (andrei)
32  *  2003-01-20  locking fixes, hashtables (andrei)
33  *  2003-02-20  s/lock_t/gen_lock_t/ to avoid a conflict on solaris (andrei)
34  *  2003-02-25  Nagle is disabled if -DDISABLE_NAGLE (andrei)
35  *  2003-03-29  SO_REUSEADDR before calling bind to allow
36  *              server restart, Nagle set on the (hopefuly) 
37  *              correct socket (jiri)
38  *  2003-03-31  always try to find the corresponding tcp listen socket for
39  *               a temp. socket and store in in *->bind_address: added
40  *               find_tcp_si, modified tcpconn_connect (andrei)
41  *  2003-04-14  set sockopts to TOS low delay (andrei)
42  *  2003-06-30  moved tcp new connect checking & handling to
43  *               handle_new_connect (andrei)
44  *  2003-07-09  tls_close called before closing the tcp connection (andrei)
45  *  2003-10-24  converted to the new socket_info lists (andrei)
46  *  2003-10-27  tcp port aliases support added (andrei)
47  *  2003-11-04  always lock before manipulating refcnt; sendchild
48  *              does not inc refcnt by itself anymore (andrei)
49  *  2003-11-07  different unix sockets are used for fd passing
50  *              to/from readers/writers (andrei)
51  *  2003-11-17  handle_new_connect & tcp_connect will close the 
52  *              new socket if tcpconn_new return 0 (e.g. out of mem) (andrei)
53  *  2003-11-28  tcp_blocking_write & tcp_blocking_connect added (andrei)
54  *  2004-11-08  dropped find_tcp_si and replaced with find_si (andrei)
55  *  2005-06-07  new tcp optimized code, supports epoll (LT), sigio + real time
56  *               signals, poll & select (andrei)
57  *  2005-06-26  *bsd kqueue support (andrei)
58  *  2005-07-04  solaris /dev/poll support (andrei)
59  *  2005-07-08  tcp_max_connections, tcp_connection_lifetime, don't accept
60  *               more connections if tcp_max_connections is exceeded (andrei)
61  *  2005-10-21  cleanup all the open connections on exit
62  *              decrement the no. of open connections on timeout too    (andrei) *  2006-01-30  queue send_fd request and execute them at the end of the
63  *              poll loop  (#ifdef) (andrei)
64  *              process all children requests, before attempting to send
65  *              them new stuff (fixes some deadlocks) (andrei)
66  *  2006-02-03  timers are run only once per s (andrei)
67  *              tcp children fds can be non-blocking; send fds are queued on
68  *              EAGAIN; lots of bug fixes (andrei)
69  *  2006-02-06  better tcp_max_connections checks, tcp_connections_no moved to
70  *              shm (andrei)
71  *  2006-04-12  tcp_send() changed to use struct dest_info (andrei)
72  *  2006-11-02  switched to atomic ops for refcnt, locking improvements 
73  *               (andrei)
74  *  2006-11-04  switched to raw ticks (to fix conversion errors which could
75  *               result in inf. lifetime) (andrei)
76  *  2007-07-25  tcpconn_connect can now bind the socket on a specified
77  *                source addr/port (andrei)
78  *  2007-07-26   tcp_send() and tcpconn_get() can now use a specified source
79  *                addr./port (andrei)
80  */
81
82
83 #ifdef USE_TCP
84
85
86 #ifndef SHM_MEM
87 #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
88 #endif
89
90 #include <sys/time.h>
91 #include <sys/types.h>
92 #include <sys/select.h>
93 #include <sys/socket.h>
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #include <netinet/tcp.h>
98 #include <sys/uio.h>  /* writev*/
99 #include <netdb.h>
100 #include <stdlib.h> /*exit() */
101
102 #include <unistd.h>
103
104 #include <errno.h>
105 #include <string.h>
106
107 #ifdef HAVE_SELECT
108 #include <sys/select.h>
109 #endif
110 #include <sys/poll.h>
111
112
113 #include "ip_addr.h"
114 #include "pass_fd.h"
115 #include "tcp_conn.h"
116 #include "globals.h"
117 #include "pt.h"
118 #include "locking.h"
119 #include "mem/mem.h"
120 #include "mem/shm_mem.h"
121 #include "timer.h"
122 #include "sr_module.h"
123 #include "tcp_server.h"
124 #include "tcp_init.h"
125 #include "tsend.h"
126 #include "timer_ticks.h"
127 #ifdef CORE_TLS
128 #include "tls/tls_server.h"
129 #define tls_loaded() 1
130 #else
131 #include "tls_hooks_init.h"
132 #include "tls_hooks.h"
133 #endif
134
135 #include "tcp_info.h"
136
137 #define local_malloc pkg_malloc
138 #define local_free   pkg_free
139
140 #define HANDLE_IO_INLINE
141 #include "io_wait.h"
142 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
143
144 #define TCP_PASS_NEW_CONNECTION_ON_DATA /* don't pass a new connection
145                                                                                    immediately to a child, wait for
146                                                                                    some data on it first */
147 #define TCP_LISTEN_BACKLOG 1024
148 #define SEND_FD_QUEUE /* queue send fd requests on EAGAIN, instead of sending 
149                                                         them immediately */
150 #define TCP_CHILD_NON_BLOCKING 
151 #ifdef SEND_FD_QUEUE
152 #ifndef TCP_CHILD_NON_BLOCKING
153 #define TCP_CHILD_NON_BLOCKING
154 #endif
155 #define MAX_SEND_FD_QUEUE_SIZE  tcp_main_max_fd_no
156 #define SEND_FD_QUEUE_SIZE              128  /* initial size */
157 #define MAX_SEND_FD_RETRIES             96       /* FIXME: not used for now */
158 #define SEND_FD_QUEUE_TIMEOUT   MS_TO_TICKS(2000)  /* 2 s */
159 #endif
160
161 /* maximum accepted lifetime (maximum possible is  ~ MAXINT/2) */
162 #define MAX_TCP_CON_LIFETIME    ((1U<<(sizeof(ticks_t)*8-1))-1)
163 /* minimum interval tcpconn_timeout() is allowed to run, in ticks */
164 #define TCPCONN_TIMEOUT_MIN_RUN S_TO_TICKS(1)  /* once per s */
165
166 enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
167                                 F_TCPCONN, F_TCPCHILD, F_PROC };
168
169 static int is_tcp_main=0;
170
171 int tcp_accept_aliases=0; /* by default don't accept aliases */
172 int tcp_connect_timeout=DEFAULT_TCP_CONNECT_TIMEOUT;
173 int tcp_send_timeout=DEFAULT_TCP_SEND_TIMEOUT;
174 int tcp_con_lifetime=DEFAULT_TCP_CONNECTION_LIFETIME;
175 enum poll_types tcp_poll_method=0; /* by default choose the best method */
176 int tcp_max_connections=DEFAULT_TCP_MAX_CONNECTIONS;
177 int tcp_main_max_fd_no=0;
178
179 static union sockaddr_union tcp_source_ipv4_addr; /* saved bind/srv v4 addr. */
180 static union sockaddr_union* tcp_source_ipv4=0;
181 #ifdef USE_IPV6
182 static union sockaddr_union tcp_source_ipv6_addr; /* saved bind/src v6 addr. */
183 static union sockaddr_union* tcp_source_ipv6=0;
184 #endif
185
186 static int* tcp_connections_no=0; /* current open connections */
187
188 /* connection hash table (after ip&port) , includes also aliases */
189 struct tcp_conn_alias** tcpconn_aliases_hash=0;
190 /* connection hash table (after connection id) */
191 struct tcp_connection** tcpconn_id_hash=0;
192 gen_lock_t* tcpconn_lock=0;
193
194 struct tcp_child* tcp_children;
195 static int* connection_id=0; /*  unique for each connection, used for 
196                                                                 quickly finding the corresponding connection
197                                                                 for a reply */
198 int unix_tcp_sock;
199
200 static int tcp_proto_no=-1; /* tcp protocol number as returned by
201                                                            getprotobyname */
202
203 static io_wait_h io_h;
204
205
206
207 inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
208                                                                                 struct ip_addr* l_ip, int l_port);
209
210
211
212 /* sets source address used when opening new sockets and no source is specified
213  *  (by default the address is choosen by the kernel)
214  * Should be used only on init.
215  * returns -1 on error */
216 int tcp_set_src_addr(struct ip_addr* ip)
217 {
218         switch (ip->af){
219                 case AF_INET:
220                         ip_addr2su(&tcp_source_ipv4_addr, ip, 0);
221                         tcp_source_ipv4=&tcp_source_ipv4_addr;
222                         break;
223                 #ifdef USE_IPV6
224                 case AF_INET6:
225                         ip_addr2su(&tcp_source_ipv6_addr, ip, 0);
226                         tcp_source_ipv6=&tcp_source_ipv6_addr;
227                         break;
228                 #endif
229                 default:
230                         return -1;
231         }
232         return 0;
233 }
234
235
236
237 /* set all socket/fd options:  disable nagle, tos lowdelay, non-blocking
238  * return -1 on error */
239 static int init_sock_opt(int s)
240 {
241         int flags;
242         int optval;
243         
244 #ifdef DISABLE_NAGLE
245         flags=1;
246         if ( (tcp_proto_no!=-1) && (setsockopt(s, tcp_proto_no , TCP_NODELAY,
247                                         &flags, sizeof(flags))<0) ){
248                 LOG(L_WARN, "WARNING: init_sock_opt: could not disable Nagle: %s\n",
249                                 strerror(errno));
250         }
251 #endif
252         /* tos*/
253         optval = tos;
254         if (setsockopt(s, IPPROTO_IP, IP_TOS, (void*)&optval,sizeof(optval)) ==-1){
255                 LOG(L_WARN, "WARNING: init_sock_opt: setsockopt tos: %s\n",
256                                 strerror(errno));
257                 /* continue since this is not critical */
258         }
259         /* non-blocking */
260         flags=fcntl(s, F_GETFL);
261         if (flags==-1){
262                 LOG(L_ERR, "ERROR: init_sock_opt: fnctl failed: (%d) %s\n",
263                                 errno, strerror(errno));
264                 goto error;
265         }
266         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
267                 LOG(L_ERR, "ERROR: init_sock_opt: fcntl: set non-blocking failed:"
268                                 " (%d) %s\n", errno, strerror(errno));
269                 goto error;
270         }
271         return 0;
272 error:
273         return -1;
274 }
275
276
277
278 /* blocking connect on a non-blocking fd; it will timeout after
279  * tcp_connect_timeout 
280  * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
281  * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
282  */
283 static int tcp_blocking_connect(int fd, const struct sockaddr *servaddr,
284                                                                 socklen_t addrlen)
285 {
286         int n;
287 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
288         fd_set sel_set;
289         fd_set orig_set;
290         struct timeval timeout;
291 #else
292         struct pollfd pf;
293 #endif
294         int elapsed;
295         int to;
296         int ticks;
297         int err;
298         unsigned int err_len;
299         int poll_err;
300         
301         poll_err=0;
302         to=tcp_connect_timeout;
303         ticks=get_ticks();
304 again:
305         n=connect(fd, servaddr, addrlen);
306         if (n==-1){
307                 if (errno==EINTR){
308                         elapsed=(get_ticks()-ticks)*TIMER_TICK;
309                         if (elapsed<to)         goto again;
310                         else goto error_timeout;
311                 }
312                 if (errno!=EINPROGRESS && errno!=EALREADY){
313                         LOG(L_ERR, "ERROR: tcp_blocking_connect: (%d) %s\n",
314                                         errno, strerror(errno));
315                         goto error;
316                 }
317         }else goto end;
318         
319         /* poll/select loop */
320 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
321                 FD_ZERO(&orig_set);
322                 FD_SET(fd, &orig_set);
323 #else
324                 pf.fd=fd;
325                 pf.events=POLLOUT;
326 #endif
327         while(1){
328                 elapsed=(get_ticks()-ticks)*TIMER_TICK;
329                 if (elapsed<to)
330                         to-=elapsed;
331                 else 
332                         goto error_timeout;
333 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
334                 sel_set=orig_set;
335                 timeout.tv_sec=to;
336                 timeout.tv_usec=0;
337                 n=select(fd+1, 0, &sel_set, 0, &timeout);
338 #else
339                 n=poll(&pf, 1, to*1000);
340 #endif
341                 if (n<0){
342                         if (errno==EINTR) continue;
343                         LOG(L_ERR, "ERROR: tcp_blocking_connect: poll/select failed:"
344                                         " (%d) %s\n", errno, strerror(errno));
345                         goto error;
346                 }else if (n==0) /* timeout */ continue;
347 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
348                 if (FD_ISSET(fd, &sel_set))
349 #else
350                 if (pf.revents&(POLLERR|POLLHUP|POLLNVAL)){ 
351                         LOG(L_ERR, "ERROR: tcp_blocking_connect: poll error: flags %x\n",
352                                         pf.revents);
353                         poll_err=1;
354                 }
355 #endif
356                 {
357                         err_len=sizeof(err);
358                         getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
359                         if ((err==0) && (poll_err==0)) goto end;
360                         if (err!=EINPROGRESS && err!=EALREADY){
361                                 LOG(L_ERR, "ERROR: tcp_blocking_connect: SO_ERROR (%d) %s\n",
362                                                 err, strerror(err));
363                                 goto error;
364                         }
365                 }
366         }
367 error_timeout:
368         /* timeout */
369         LOG(L_ERR, "ERROR: tcp_blocking_connect: timeout %d s elapsed from %d s\n",
370                         elapsed, tcp_connect_timeout);
371 error:
372         return -1;
373 end:
374         return 0;
375 }
376
377
378
379 #if 0
380 /* blocking write even on non-blocking sockets 
381  * if TCP_TIMEOUT will return with error */
382 static int tcp_blocking_write(struct tcp_connection* c, int fd, char* buf,
383                                                                 unsigned int len)
384 {
385         int n;
386         fd_set sel_set;
387         struct timeval timeout;
388         int ticks;
389         int initial_len;
390         
391         initial_len=len;
392 again:
393         
394         n=send(fd, buf, len,
395 #ifdef HAVE_MSG_NOSIGNAL
396                         MSG_NOSIGNAL
397 #else
398                         0
399 #endif
400                 );
401         if (n<0){
402                 if (errno==EINTR)       goto again;
403                 else if (errno!=EAGAIN && errno!=EWOULDBLOCK){
404                         LOG(L_ERR, "tcp_blocking_write: failed to send: (%d) %s\n",
405                                         errno, strerror(errno));
406                         goto error;
407                 }
408         }else if (n<len){
409                 /* partial write */
410                 buf+=n;
411                 len-=n;
412         }else{
413                 /* success: full write */
414                 goto end;
415         }
416         while(1){
417                 FD_ZERO(&sel_set);
418                 FD_SET(fd, &sel_set);
419                 timeout.tv_sec=tcp_send_timeout;
420                 timeout.tv_usec=0;
421                 ticks=get_ticks();
422                 n=select(fd+1, 0, &sel_set, 0, &timeout);
423                 if (n<0){
424                         if (errno==EINTR) continue; /* signal, ignore */
425                         LOG(L_ERR, "ERROR: tcp_blocking_write: select failed: "
426                                         " (%d) %s\n", errno, strerror(errno));
427                         goto error;
428                 }else if (n==0){
429                         /* timeout */
430                         if (get_ticks()-ticks>=tcp_send_timeout){
431                                 LOG(L_ERR, "ERROR: tcp_blocking_write: send timeout (%d)\n",
432                                                 tcp_send_timeout);
433                                 goto error;
434                         }
435                         continue;
436                 }
437                 if (FD_ISSET(fd, &sel_set)){
438                         /* we can write again */
439                         goto again;
440                 }
441         }
442 error:
443                 return -1;
444 end:
445                 return initial_len;
446 }
447 #endif
448
449
450
451 struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
452                                                                         union sockaddr_union* local_addr,
453                                                                         struct socket_info* ba, int type, 
454                                                                         int state)
455 {
456         struct tcp_connection *c;
457         
458         c=(struct tcp_connection*)shm_malloc(sizeof(struct tcp_connection));
459         if (c==0){
460                 LOG(L_ERR, "ERROR: tcpconn_new: mem. allocation failure\n");
461                 goto error;
462         }
463         memset(c, 0, sizeof(struct tcp_connection)); /* zero init */
464         c->s=sock;
465         c->fd=-1; /* not initialized */
466         if (lock_init(&c->write_lock)==0){
467                 LOG(L_ERR, "ERROR: tcpconn_new: init lock failed\n");
468                 goto error;
469         }
470         
471         c->rcv.src_su=*su;
472         
473         atomic_set(&c->refcnt, 0);
474         su2ip_addr(&c->rcv.src_ip, su);
475         c->rcv.src_port=su_getport(su);
476         c->rcv.bind_address=ba;
477         if (likely(local_addr)){
478                 su2ip_addr(&c->rcv.dst_ip, local_addr);
479                 c->rcv.dst_port=su_getport(local_addr);
480         }else if (ba){
481                 c->rcv.dst_ip=ba->address;
482                 c->rcv.dst_port=ba->port_no;
483         }
484         print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
485         DBG(     "tcpconn_new: on port %d, type %d\n", c->rcv.src_port, type);
486         init_tcp_req(&c->req);
487         c->id=(*connection_id)++;
488         c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
489         c->rcv.proto_reserved2=0;
490         c->state=state;
491         c->extra_data=0;
492 #ifdef USE_TLS
493         if (type==PROTO_TLS){
494                 if (tls_tcpconn_init(c, sock)==-1) goto error;
495         }else
496 #endif /* USE_TLS*/
497         {
498                 c->type=PROTO_TCP;
499                 c->rcv.proto=PROTO_TCP;
500                 c->timeout=get_ticks_raw()+tcp_con_lifetime;
501         }
502         c->flags|=F_CONN_REMOVED;
503         
504         return c;
505         
506 error:
507         if (c) shm_free(c);
508         return 0;
509 }
510
511
512
513 struct tcp_connection* tcpconn_connect( union sockaddr_union* server, 
514                                                                                 union sockaddr_union* from,
515                                                                                 int type)
516 {
517         int s;
518         struct socket_info* si;
519         union sockaddr_union my_name;
520         socklen_t my_name_len;
521         struct tcp_connection* con;
522         struct ip_addr ip;
523
524         s=-1;
525         
526         if (*tcp_connections_no >= tcp_max_connections){
527                 LOG(L_ERR, "ERROR: tcpconn_connect: maximum number of connections"
528                                         " exceeded (%d/%d)\n",
529                                         *tcp_connections_no, tcp_max_connections);
530                 goto error;
531         }
532         s=socket(AF2PF(server->s.sa_family), SOCK_STREAM, 0);
533         if (s==-1){
534                 LOG(L_ERR, "ERROR: tcpconn_connect: socket: (%d) %s\n",
535                                 errno, strerror(errno));
536                 goto error;
537         }
538         if (init_sock_opt(s)<0){
539                 LOG(L_ERR, "ERROR: tcpconn_connect: init_sock_opt failed\n");
540                 goto error;
541         }
542         
543         if (from && bind(s, &from->s, sockaddru_len(*from)) != 0)
544                 LOG(L_WARN, "WARNING: tcpconn_connect: binding to source address"
545                                         " failed: %s [%d]\n", strerror(errno), errno);
546
547         if (tcp_blocking_connect(s, &server->s, sockaddru_len(*server))<0){
548                 LOG(L_ERR, "ERROR: tcpconn_connect: tcp_blocking_connect failed\n");
549                 goto error;
550         }
551         if (from){
552                 su2ip_addr(&ip, from);
553                 if (!ip_addr_any(&ip))
554                         /* we already know the source ip, skip the sys. call */
555                         goto find_socket;
556         }
557         my_name_len=sizeof(my_name);
558         if (getsockname(s, &my_name.s, &my_name_len)!=0){
559                 LOG(L_ERR, "ERROR: tcp_connect: getsockname failed: %s(%d)\n",
560                                 strerror(errno), errno);
561                 si=0; /* try to go on */
562                 goto skip;
563         }
564         from=&my_name; /* update from with the real "from" address */
565         su2ip_addr(&ip, &my_name);
566 find_socket:
567 #ifdef USE_TLS
568         if (type==PROTO_TLS)
569                 si=find_si(&ip, 0, PROTO_TLS);
570         else
571 #endif
572                 si=find_si(&ip, 0, PROTO_TCP);
573 skip:
574         if (si==0){
575                 LOG(L_WARN, "WARNING: tcp_connect: could not find corresponding"
576                                 " listening socket, using default...\n");
577                 if (server->s.sa_family==AF_INET) si=sendipv4_tcp;
578 #ifdef USE_IPV6
579                 else si=sendipv6_tcp;
580 #endif
581         }
582         con=tcpconn_new(s, server, from, si,  type, S_CONN_CONNECT);
583         if (con==0){
584                 LOG(L_ERR, "ERROR: tcp_connect: tcpconn_new failed, closing the "
585                                  " socket\n");
586                 goto error;
587         }
588         return con;
589         /*FIXME: set sock idx! */
590 error:
591         if (s!=-1) close(s); /* close the opened socket */
592         return 0;
593 }
594
595
596
597 /* adds a tcp connection to the tcpconn hashes
598  * Note: it's called _only_ from the tcp_main process */
599 inline static struct tcp_connection*  tcpconn_add(struct tcp_connection *c)
600 {
601         struct ip_addr zero_ip;
602
603         if (likely(c)){
604                 ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
605                 c->id_hash=tcp_id_hash(c->id);
606                 c->aliases=0;
607                 TCPCONN_LOCK;
608                 /* add it at the begining of the list*/
609                 tcpconn_listadd(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
610                 /* set the aliases */
611                 /* first alias is for (peer_ip, peer_port, 0 ,0) -- for finding
612                  *  any connection to peer_ip, peer_port
613                  * the second alias is for (peer_ip, peer_port, local_addr, 0) -- for
614                  *  finding any conenction to peer_ip, peer_port from local_addr 
615                  * the third alias is for (peer_ip, peer_port, local_addr, local_port) 
616                  *   -- for finding if a fully specified connection exists */
617                 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &zero_ip, 0);
618                 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0);
619                 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
620                                                                                                                 c->rcv.dst_port);
621                 /* ignore add_alias errors, there are some valid cases when one
622                  *  of the add_alias would fail (e.g. first add_alias for 2 connections
623                  *   with the same destination but different src. ip*/
624                 TCPCONN_UNLOCK;
625                 DBG("tcpconn_add: hashes: %d:%d:%d, %d\n",
626                                                                                                 c->con_aliases[0].hash,
627                                                                                                 c->con_aliases[1].hash,
628                                                                                                 c->con_aliases[2].hash,
629                                                                                                 c->id_hash);
630                 return c;
631         }else{
632                 LOG(L_CRIT, "tcpconn_add: BUG: null connection pointer\n");
633                 return 0;
634         }
635 }
636
637
638 /* unsafe tcpconn_rm version (nolocks) */
639 void _tcpconn_rm(struct tcp_connection* c)
640 {
641         int r;
642         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
643         /* remove all the aliases */
644         for (r=0; r<c->aliases; r++)
645                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
646                                                 &c->con_aliases[r], next, prev);
647         lock_destroy(&c->write_lock);
648 #ifdef USE_TLS
649         if (c->type==PROTO_TLS) tls_tcpconn_clean(c);
650 #endif
651         shm_free(c);
652 }
653
654
655
656 void tcpconn_rm(struct tcp_connection* c)
657 {
658         int r;
659         TCPCONN_LOCK;
660         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
661         /* remove all the aliases */
662         for (r=0; r<c->aliases; r++)
663                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
664                                                 &c->con_aliases[r], next, prev);
665         TCPCONN_UNLOCK;
666         lock_destroy(&c->write_lock);
667 #ifdef USE_TLS
668         if ((c->type==PROTO_TLS)&&(c->extra_data)) tls_tcpconn_clean(c);
669 #endif
670         shm_free(c);
671 }
672
673
674 /* finds a connection, if id=0 uses the ip addr, port, local_ip and local port
675  *  (host byte order) and tries to find the connection that matches all of
676  *   them. Wild cards can be used for local_ip and local_port (a 0 filled
677  *   ip address and/or a 0 local port).
678  * WARNING: unprotected (locks) use tcpconn_get unless you really
679  * know what you are doing */
680 struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port,
681                                                                                 struct ip_addr* l_ip, int l_port)
682 {
683
684         struct tcp_connection *c;
685         struct tcp_conn_alias* a;
686         unsigned hash;
687         int is_local_ip_any;
688         
689 #ifdef EXTRA_DEBUG
690         DBG("tcpconn_find: %d  port %d\n",id, port);
691         if (ip) print_ip("tcpconn_find: ip ", ip, "\n");
692 #endif
693         if (id){
694                 hash=tcp_id_hash(id);
695                 for (c=tcpconn_id_hash[hash]; c; c=c->id_next){
696 #ifdef EXTRA_DEBUG
697                         DBG("c=%p, c->id=%d, port=%d\n",c, c->id, c->rcv.src_port);
698                         print_ip("ip=", &c->rcv.src_ip, "\n");
699 #endif
700                         if ((id==c->id)&&(c->state!=S_CONN_BAD)) return c;
701                 }
702         }else if (ip){
703                 hash=tcp_addr_hash(ip, port, l_ip, l_port);
704                 is_local_ip_any=ip_addr_any(l_ip);
705                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
706 #ifdef EXTRA_DEBUG
707                         DBG("a=%p, c=%p, c->id=%d, alias port= %d port=%d\n", a, a->parent,
708                                         a->parent->id, a->port, a->parent->rcv.src_port);
709                         print_ip("ip=",&a->parent->rcv.src_ip,"\n");
710 #endif
711                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
712                                         ((l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
713                                         (ip_addr_cmp(ip, &a->parent->rcv.src_ip)) &&
714                                         (is_local_ip_any ||
715                                                 ip_addr_cmp(l_ip, &a->parent->rcv.dst_ip))
716                                 )
717                                 return a->parent;
718                 }
719         }
720         return 0;
721 }
722
723
724
725 /* _tcpconn_find with locks and timeout
726  * local_addr contains the desired local ip:port. If null any local address 
727  * will be used.  IN*ADDR_ANY or 0 port are wild cards.
728  */
729 struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
730                                                                         union sockaddr_union* local_addr,
731                                                                         ticks_t timeout)
732 {
733         struct tcp_connection* c;
734         struct ip_addr local_ip;
735         int local_port;
736         
737         local_port=0;
738         if (ip){
739                 if (local_addr){
740                         su2ip_addr(&local_ip, local_addr);
741                         local_port=su_getport(local_addr);
742                 }else{
743                         ip_addr_mk_any(ip->af, &local_ip);
744                         local_port=0;
745                 }
746         }
747         TCPCONN_LOCK;
748         c=_tcpconn_find(id, ip, port, &local_ip, local_port);
749         if (c){ 
750                         atomic_inc(&c->refcnt);
751                         c->timeout=get_ticks_raw()+timeout;
752         }
753         TCPCONN_UNLOCK;
754         return c;
755 }
756
757
758
759 /* add c->dst:port, local_addr as an alias for the "id" connection, 
760  * returns 0 on success, <0 on failure ( -1  - null c, -2 too many aliases,
761  *  -3 alias already present and pointing to another connection)
762  * WARNING: must be called with TCPCONN_LOCK held */
763 inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
764                                                                                 struct ip_addr* l_ip, int l_port)
765 {
766         unsigned hash;
767         struct tcp_conn_alias* a;
768         int is_local_ip_any;
769         
770         a=0;
771         is_local_ip_any=ip_addr_any(l_ip);
772         if (c){
773                 hash=tcp_addr_hash(&c->rcv.src_ip, port, l_ip, l_port);
774                 /* search the aliases for an already existing one */
775                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
776                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
777                                         ( (l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
778                                         (ip_addr_cmp(&c->rcv.src_ip, &a->parent->rcv.src_ip)) &&
779                                         ( is_local_ip_any || 
780                                           ip_addr_cmp(&a->parent->rcv.dst_ip, l_ip))
781                                         ){
782                                 /* found */
783                                 if (a->parent!=c) goto error_sec;
784                                 else goto ok;
785                         }
786                 }
787                 if (c->aliases>=TCP_CON_MAX_ALIASES) goto error_aliases;
788                 c->con_aliases[c->aliases].parent=c;
789                 c->con_aliases[c->aliases].port=port;
790                 c->con_aliases[c->aliases].hash=hash;
791                 tcpconn_listadd(tcpconn_aliases_hash[hash], 
792                                                                 &c->con_aliases[c->aliases], next, prev);
793                 c->aliases++;
794         }else goto error_not_found;
795 ok:
796 #ifdef EXTRA_DEBUG
797         if (a) DBG("_tcpconn_add_alias_unsafe: alias already present\n");
798         else   DBG("_tcpconn_add_alias_unsafe: alias port %d for hash %d, id %d\n",
799                         port, hash, c->id);
800 #endif
801         return 0;
802 error_aliases:
803         /* too many aliases */
804         return -2;
805 error_not_found:
806         /* null connection */
807         return -1;
808 error_sec:
809         /* alias already present and pointing to a different connection
810          * (hijack attempt?) */
811         return -3;
812 }
813
814
815
816 /* add port as an alias for the "id" connection, 
817  * returns 0 on success,-1 on failure */
818 int tcpconn_add_alias(int id, int port, int proto)
819 {
820         struct tcp_connection* c;
821         int ret;
822         struct ip_addr zero_ip;
823         
824         /* fix the port */
825         port=port?port:((proto==PROTO_TLS)?SIPS_PORT:SIP_PORT);
826         TCPCONN_LOCK;
827         /* check if alias already exists */
828         c=_tcpconn_find(id, 0, 0, 0, 0);
829         if (c){
830                 ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
831                 
832                 /* alias src_ip:port, 0, 0 */
833                 ret=_tcpconn_add_alias_unsafe(c, port,  &zero_ip, 0);
834                 if (ret<0 && ret!=-3) goto error;
835                 /* alias src_ip:port, local_ip, 0 */
836                 ret=_tcpconn_add_alias_unsafe(c, port,  &c->rcv.dst_ip, 0);
837                 if (ret<0 && ret!=-3) goto error;
838                 /* alias src_ip:port, local_ip, local_port */
839                 ret=_tcpconn_add_alias_unsafe(c, port,  &c->rcv.dst_ip,
840                                                                                                                         c->rcv.dst_port);
841                 if (ret<0) goto error;
842         }else goto error_not_found;
843         TCPCONN_UNLOCK;
844         return 0;
845 error_not_found:
846         TCPCONN_UNLOCK;
847         LOG(L_ERR, "ERROR: tcpconn_add_alias: no connection found for id %d\n",id);
848         return -1;
849 error:
850         TCPCONN_UNLOCK;
851         switch(ret){
852                 case -2:
853                         LOG(L_ERR, "ERROR: tcpconn_add_alias: too many aliases"
854                                         " for connection %p (%d)\n", c, c->id);
855                         break;
856                 case -3:
857                         LOG(L_ERR, "ERROR: tcpconn_add_alias: possible port"
858                                         " hijack attempt\n");
859                         LOG(L_ERR, "ERROR: tcpconn_add_alias: alias for %d port %d already"
860                                                 " present and points to another connection \n",
861                                                 c->id, port);
862                         break;
863                 default:
864                         LOG(L_ERR, "ERROR: tcpconn_add_alias: unkown error %d\n", ret);
865         }
866         return -1;
867 }
868
869
870
871 /* finds a tcpconn & sends on it
872  * uses the dst members to, proto (TCP|TLS) and id and tries to send
873  *  from the "from" address (if non null and id==0)
874  * returns: number of bytes written (>=0) on success
875  *          <0 on error */
876 int tcp_send(struct dest_info* dst, union sockaddr_union* from,
877                                         char* buf, unsigned len)
878 {
879         struct tcp_connection *c;
880         struct tcp_connection *tmp;
881         struct ip_addr ip;
882         int port;
883         int fd;
884         long response[2];
885         int n;
886         
887         port=su_getport(&dst->to);
888         if (port){
889                 su2ip_addr(&ip, &dst->to);
890                 c=tcpconn_get(dst->id, &ip, port, from, tcp_con_lifetime); 
891         }else if (dst->id){
892                 c=tcpconn_get(dst->id, 0, 0, 0, tcp_con_lifetime);
893         }else{
894                 LOG(L_CRIT, "BUG: tcp_send called with null id & to\n");
895                 return -1;
896         }
897         
898         if (dst->id){
899                 if (c==0) {
900                         if (port){
901                                 /* try again w/o id */
902                                 c=tcpconn_get(0, &ip, port, from, tcp_con_lifetime);
903                                 goto no_id;
904                         }else{
905                                 LOG(L_ERR, "ERROR: tcp_send: id %d not found, dropping\n",
906                                                 dst->id);
907                                 return -1;
908                         }
909                 }else goto get_fd;
910         }
911 no_id:
912                 if (c==0){
913                         DBG("tcp_send: no open tcp connection found, opening new one\n");
914                         /* create tcp connection */
915                         if (from==0){
916                                 /* check to see if we have to use a specific source addr. */
917                                 switch (dst->to.s.sa_family) {
918                                         case AF_INET:
919                                                         from = tcp_source_ipv4;
920                                                 break;
921 #ifdef USE_IPV6
922                                         case AF_INET6:
923                                                         from = tcp_source_ipv6;
924                                                 break;
925 #endif
926                                         default:
927                                                 /* error, bad af, ignore ... */
928                                                 break;
929                                 }
930                         }
931                         if ((c=tcpconn_connect(&dst->to, from, dst->proto))==0){
932                                 LOG(L_ERR, "ERROR: tcp_send: connect failed\n");
933                                 return -1;
934                         }
935                         atomic_set(&c->refcnt, 1); /* ref. only from here for now */
936                         fd=c->s;
937                         
938                         /* send the new tcpconn to "tcp main" */
939                         response[0]=(long)c;
940                         response[1]=CONN_NEW;
941                         n=send_fd(unix_tcp_sock, response, sizeof(response), c->s);
942                         if (n<=0){
943                                 LOG(L_ERR, "BUG: tcp_send: failed send_fd: %s (%d)\n",
944                                                 strerror(errno), errno);
945                                 n=-1;
946                                 goto end;
947                         }       
948                         goto send_it;
949                 }
950 get_fd:
951                         /* todo: see if this is not the same process holding
952                          *  c  and if so send directly on c->fd */
953                         DBG("tcp_send: tcp connection found (%p), acquiring fd\n", c);
954                         /* get the fd */
955                         response[0]=(long)c;
956                         response[1]=CONN_GET_FD;
957                         n=send_all(unix_tcp_sock, response, sizeof(response));
958                         if (n<=0){
959                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(write):%s (%d)\n",
960                                                 strerror(errno), errno);
961                                 n=-1;
962                                 goto release_c;
963                         }
964                         DBG("tcp_send, c= %p, n=%d\n", c, n);
965                         n=receive_fd(unix_tcp_sock, &tmp, sizeof(tmp), &fd, MSG_WAITALL);
966                         if (n<=0){
967                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(receive_fd):"
968                                                         " %s (%d)\n", strerror(errno), errno);
969                                 n=-1;
970                                 goto release_c;
971                         }
972                         if (c!=tmp){
973                                 LOG(L_CRIT, "BUG: tcp_send: get_fd: got different connection:"
974                                                 "  %p (id= %d, refcnt=%d state=%d) != "
975                                                 "  %p (n=%d)\n",
976                                                   c,   c->id,   atomic_get(&c->refcnt),   c->state,
977                                                   tmp, n
978                                    );
979                                 n=-1; /* fail */
980                                 goto end;
981                         }
982                         DBG("tcp_send: after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
983                 
984         
985         
986 send_it:
987         DBG("tcp_send: sending...\n");
988         lock_get(&c->write_lock);
989 #ifdef USE_TLS
990         if (c->type==PROTO_TLS)
991                 n=tls_blocking_write(c, fd, buf, len);
992         else
993 #endif
994                 /* n=tcp_blocking_write(c, fd, buf, len); */
995                 n=tsend_stream(fd, buf, len, tcp_send_timeout*1000); 
996         lock_release(&c->write_lock);
997         DBG("tcp_send: after write: c= %p n=%d fd=%d\n",c, n, fd);
998         DBG("tcp_send: buf=\n%.*s\n", (int)len, buf);
999         if (n<0){
1000                 LOG(L_ERR, "ERROR: tcp_send: failed to send\n");
1001                 /* error on the connection , mark it as bad and set 0 timeout */
1002                 c->state=S_CONN_BAD;
1003                 c->timeout=get_ticks_raw();
1004                 /* tell "main" it should drop this (optional it will t/o anyway?)*/
1005                 response[0]=(long)c;
1006                 response[1]=CONN_ERROR;
1007                 if (send_all(unix_tcp_sock, response, sizeof(response))<=0){
1008                         LOG(L_ERR, "BUG: tcp_send: error return failed (write):%s (%d)\n",
1009                                         strerror(errno), errno);
1010                         tcpconn_put(c); /* deref. it manually */
1011                         n=-1;
1012                 }
1013                 /* CONN_ERROR will auto-dec refcnt => we must not call tcpconn_put 
1014                  * if it succeeds */
1015                 close(fd);
1016                 return n; /* error return, no tcpconn_put */
1017         }
1018 end:
1019         close(fd);
1020 release_c:
1021         tcpconn_put(c); /* release c (lock; dec refcnt; unlock) */
1022         return n;
1023 }
1024
1025
1026
1027 int tcp_init(struct socket_info* sock_info)
1028 {
1029         union sockaddr_union* addr;
1030         int optval;
1031 #ifdef DISABLE_NAGLE
1032         int flag;
1033         struct protoent* pe;
1034
1035         if (tcp_proto_no==-1){ /* if not already set */
1036                 pe=getprotobyname("tcp");
1037                 if (pe==0){
1038                         LOG(L_ERR, "ERROR: tcp_init: could not get TCP protocol number\n");
1039                         tcp_proto_no=-1;
1040                 }else{
1041                         tcp_proto_no=pe->p_proto;
1042                 }
1043         }
1044 #endif
1045         
1046         addr=&sock_info->su;
1047         /* sock_info->proto=PROTO_TCP; */
1048         if (init_su(addr, &sock_info->address, sock_info->port_no)<0){
1049                 LOG(L_ERR, "ERROR: tcp_init: could no init sockaddr_union\n");
1050                 goto error;
1051         }
1052         sock_info->socket=socket(AF2PF(addr->s.sa_family), SOCK_STREAM, 0);
1053         if (sock_info->socket==-1){
1054                 LOG(L_ERR, "ERROR: tcp_init: socket: %s\n", strerror(errno));
1055                 goto error;
1056         }
1057 #ifdef DISABLE_NAGLE
1058         flag=1;
1059         if ( (tcp_proto_no!=-1) &&
1060                  (setsockopt(sock_info->socket, tcp_proto_no , TCP_NODELAY,
1061                                          &flag, sizeof(flag))<0) ){
1062                 LOG(L_ERR, "ERROR: tcp_init: could not disable Nagle: %s\n",
1063                                 strerror(errno));
1064         }
1065 #endif
1066
1067
1068 #if  !defined(TCP_DONT_REUSEADDR) 
1069         /* Stevens, "Network Programming", Section 7.5, "Generic Socket
1070      * Options": "...server started,..a child continues..on existing
1071          * connection..listening server is restarted...call to bind fails
1072          * ... ALL TCP servers should specify the SO_REUSEADDRE option 
1073          * to allow the server to be restarted in this situation
1074          *
1075          * Indeed, without this option, the server can't restart.
1076          *   -jiri
1077          */
1078         optval=1;
1079         if (setsockopt(sock_info->socket, SOL_SOCKET, SO_REUSEADDR,
1080                                 (void*)&optval, sizeof(optval))==-1) {
1081                 LOG(L_ERR, "ERROR: tcp_init: setsockopt %s\n",
1082                         strerror(errno));
1083                 goto error;
1084         }
1085 #endif
1086         /* tos */
1087         optval = tos;
1088         if (setsockopt(sock_info->socket, IPPROTO_IP, IP_TOS, (void*)&optval, 
1089                                 sizeof(optval)) ==-1){
1090                 LOG(L_WARN, "WARNING: tcp_init: setsockopt tos: %s\n", strerror(errno));
1091                 /* continue since this is not critical */
1092         }
1093         if (bind(sock_info->socket, &addr->s, sockaddru_len(*addr))==-1){
1094                 LOG(L_ERR, "ERROR: tcp_init: bind(%x, %p, %d) on %s:%d : %s\n",
1095                                 sock_info->socket,  &addr->s, 
1096                                 (unsigned)sockaddru_len(*addr),
1097                                 sock_info->address_str.s,
1098                                 sock_info->port_no,
1099                                 strerror(errno));
1100                 goto error;
1101         }
1102         if (listen(sock_info->socket, TCP_LISTEN_BACKLOG)==-1){
1103                 LOG(L_ERR, "ERROR: tcp_init: listen(%x, %p, %d) on %s: %s\n",
1104                                 sock_info->socket, &addr->s, 
1105                                 (unsigned)sockaddru_len(*addr),
1106                                 sock_info->address_str.s,
1107                                 strerror(errno));
1108                 goto error;
1109         }
1110         
1111         return 0;
1112 error:
1113         if (sock_info->socket!=-1){
1114                 close(sock_info->socket);
1115                 sock_info->socket=-1;
1116         }
1117         return -1;
1118 }
1119
1120
1121
1122 /* used internally by tcp_main_loop()
1123  * tries to destroy a tcp connection (if it cannot it will force a timeout)
1124  * Note: it's called _only_ from the tcp_main process */
1125 static void tcpconn_destroy(struct tcp_connection* tcpconn)
1126 {
1127         int fd;
1128
1129         TCPCONN_LOCK; /*avoid races w/ tcp_send*/
1130         if (atomic_dec_and_test(&tcpconn->refcnt)){ 
1131                 DBG("tcpconn_destroy: destroying connection %p, flags %04x\n",
1132                                 tcpconn, tcpconn->flags);
1133                 fd=tcpconn->s;
1134 #ifdef USE_TLS
1135                 /*FIXME: lock ->writelock ? */
1136                 if (tcpconn->type==PROTO_TLS)
1137                         tls_close(tcpconn, fd);
1138 #endif
1139                 _tcpconn_rm(tcpconn);
1140                 close(fd);
1141                 (*tcp_connections_no)--;
1142         }else{
1143                 /* force timeout */
1144                 tcpconn->timeout=get_ticks_raw();
1145                 tcpconn->state=S_CONN_BAD;
1146                 DBG("tcpconn_destroy: delaying (%p, flags %04x) ...\n",
1147                                 tcpconn, tcpconn->flags);
1148                 
1149         }
1150         TCPCONN_UNLOCK;
1151 }
1152
1153
1154
1155 #ifdef SEND_FD_QUEUE
1156 struct send_fd_info{
1157         struct tcp_connection* tcp_conn;
1158         ticks_t expire;
1159         int unix_sock;
1160         unsigned int retries; /* debugging */
1161 };
1162
1163 struct tcp_send_fd_q{
1164         struct send_fd_info* data; /* buffer */
1165         struct send_fd_info* crt;  /* pointer inside the buffer */
1166         struct send_fd_info* end;  /* points after the last valid position */
1167 };
1168
1169
1170 static struct tcp_send_fd_q send2child_q;
1171
1172
1173
1174 static int send_fd_queue_init(struct tcp_send_fd_q *q, unsigned int size)
1175 {
1176         q->data=pkg_malloc(size*sizeof(struct send_fd_info));
1177         if (q->data==0){
1178                 LOG(L_ERR, "ERROR: send_fd_queue_init: out of memory\n");
1179                 return -1;
1180         }
1181         q->crt=&q->data[0];
1182         q->end=&q->data[size];
1183         return 0;
1184 }
1185
1186 static void send_fd_queue_destroy(struct tcp_send_fd_q *q)
1187 {
1188         if (q->data){
1189                 pkg_free(q->data);
1190                 q->data=0;
1191                 q->crt=q->end=0;
1192         }
1193 }
1194
1195
1196
1197 static int init_send_fd_queues()
1198 {
1199         if (send_fd_queue_init(&send2child_q, SEND_FD_QUEUE_SIZE)!=0)
1200                 goto error;
1201         return 0;
1202 error:
1203         LOG(L_ERR, "ERROR: init_send_fd_queues: init failed\n");
1204         return -1;
1205 }
1206
1207
1208
1209 static void destroy_send_fd_queues()
1210 {
1211         send_fd_queue_destroy(&send2child_q);
1212 }
1213
1214
1215
1216
1217 inline static int send_fd_queue_add(    struct tcp_send_fd_q* q, 
1218                                                                                 int unix_sock,
1219                                                                                 struct tcp_connection *t)
1220 {
1221         struct send_fd_info* tmp;
1222         unsigned long new_size;
1223         
1224         if (q->crt>=q->end){
1225                 new_size=q->end-&q->data[0];
1226                 if (new_size< MAX_SEND_FD_QUEUE_SIZE/2){
1227                         new_size*=2;
1228                 }else new_size=MAX_SEND_FD_QUEUE_SIZE;
1229                 if (q->crt>=&q->data[new_size]){
1230                         LOG(L_ERR, "ERROR: send_fd_queue_add: queue full: %ld/%ld\n",
1231                                         (long)(q->crt-&q->data[0]-1), new_size);
1232                         goto error;
1233                 }
1234                 LOG(L_CRIT, "INFO: send_fd_queue: queue full: %ld, extending to %ld\n",
1235                                 (long)(q->end-&q->data[0]), new_size);
1236                 tmp=pkg_realloc(q->data, new_size*sizeof(struct send_fd_info));
1237                 if (tmp==0){
1238                         LOG(L_ERR, "ERROR: send_fd_queue_add: out of memory\n");
1239                         goto error;
1240                 }
1241                 q->crt=(q->crt-&q->data[0])+tmp;
1242                 q->data=tmp;
1243                 q->end=&q->data[new_size];
1244         }
1245         q->crt->tcp_conn=t;
1246         q->crt->unix_sock=unix_sock;
1247         q->crt->expire=get_ticks_raw()+SEND_FD_QUEUE_TIMEOUT;
1248         q->crt->retries=0;
1249         q->crt++;
1250         return 0;
1251 error:
1252         return -1;
1253 }
1254
1255
1256
1257 inline static void send_fd_queue_run(struct tcp_send_fd_q* q)
1258 {
1259         struct send_fd_info* p;
1260         struct send_fd_info* t;
1261         
1262         for (p=t=&q->data[0]; p<q->crt; p++){
1263                 if (send_fd(p->unix_sock, &(p->tcp_conn),
1264                                         sizeof(struct tcp_connection*), p->tcp_conn->s)<=0){
1265                         if ( ((errno==EAGAIN)||(errno==EWOULDBLOCK)) && 
1266                                                         ((s_ticks_t)(p->expire-get_ticks_raw())>0)){
1267                                 /* leave in queue for a future try */
1268                                 *t=*p;
1269                                 t->retries++;
1270                                 t++;
1271                         }else{
1272                                 LOG(L_ERR, "ERROR: run_send_fd_queue: send_fd failed"
1273                                                    " on socket %d , queue entry %ld, retries %d,"
1274                                                    " connection %p, tcp socket %d, errno=%d (%s) \n",
1275                                                    p->unix_sock, (long)(p-&q->data[0]), p->retries,
1276                                                    p->tcp_conn, p->tcp_conn->s, errno,
1277                                                    strerror(errno));
1278                                 tcpconn_destroy(p->tcp_conn);
1279                         }
1280                 }
1281         }
1282         q->crt=t;
1283 }
1284 #else
1285 #define send_fd_queue_run(q)
1286 #endif
1287
1288
1289
1290 /* handles io from a tcp child process
1291  * params: tcp_c - pointer in the tcp_children array, to the entry for
1292  *                 which an io event was detected 
1293  *         fd_i  - fd index in the fd_array (usefull for optimizing
1294  *                 io_watch_deletes)
1295  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
1296  *           io events queued), >0 on success. success/error refer only to
1297  *           the reads from the fd.
1298  */
1299 inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
1300 {
1301         struct tcp_connection* tcpconn;
1302         long response[2];
1303         int cmd;
1304         int bytes;
1305         
1306         if (tcp_c->unix_sock<=0){
1307                 /* (we can't have a fd==0, 0 is never closed )*/
1308                 LOG(L_CRIT, "BUG: handle_tcp_child: fd %d for %d "
1309                                 "(pid %d, ser no %d)\n", tcp_c->unix_sock,
1310                                 (int)(tcp_c-&tcp_children[0]), tcp_c->pid, tcp_c->proc_no);
1311                 goto error;
1312         }
1313         /* read until sizeof(response)
1314          * (this is a SOCK_STREAM so read is not atomic) */
1315         bytes=recv_all(tcp_c->unix_sock, response, sizeof(response), MSG_DONTWAIT);
1316         if (bytes<(int)sizeof(response)){
1317                 if (bytes==0){
1318                         /* EOF -> bad, child has died */
1319                         DBG("DBG: handle_tcp_child: dead tcp child %d (pid %d, no %d)"
1320                                         " (shutting down?)\n", (int)(tcp_c-&tcp_children[0]), 
1321                                         tcp_c->pid, tcp_c->proc_no );
1322                         /* don't listen on it any more */
1323                         io_watch_del(&io_h, tcp_c->unix_sock, fd_i, 0); 
1324                         goto error; /* eof. so no more io here, it's ok to return error */
1325                 }else if (bytes<0){
1326                         /* EAGAIN is ok if we try to empty the buffer
1327                          * e.g.: SIGIO_RT overflow mode or EPOLL ET */
1328                         if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
1329                                 LOG(L_CRIT, "ERROR: handle_tcp_child: read from tcp child %ld "
1330                                                 " (pid %d, no %d) %s [%d]\n",
1331                                                 (long)(tcp_c-&tcp_children[0]), tcp_c->pid,
1332                                                 tcp_c->proc_no, strerror(errno), errno );
1333                         }else{
1334                                 bytes=0;
1335                         }
1336                         /* try to ignore ? */
1337                         goto end;
1338                 }else{
1339                         /* should never happen */
1340                         LOG(L_CRIT, "BUG: handle_tcp_child: too few bytes received (%d)\n",
1341                                         bytes );
1342                         bytes=0; /* something was read so there is no error; otoh if
1343                                           receive_fd returned less then requested => the receive
1344                                           buffer is empty => no more io queued on this fd */
1345                         goto end;
1346                 }
1347         }
1348         
1349         DBG("handle_tcp_child: reader response= %lx, %ld from %d \n",
1350                                         response[0], response[1], (int)(tcp_c-&tcp_children[0]));
1351         cmd=response[1];
1352         tcpconn=(struct tcp_connection*)response[0];
1353         if (tcpconn==0){
1354                 /* should never happen */
1355                 LOG(L_CRIT, "BUG: handle_tcp_child: null tcpconn pointer received"
1356                                  " from tcp child %d (pid %d): %lx, %lx\n",
1357                                         (int)(tcp_c-&tcp_children[0]), tcp_c->pid,
1358                                         response[0], response[1]) ;
1359                 goto end;
1360         }
1361         switch(cmd){
1362                 case CONN_RELEASE:
1363                         tcp_c->busy--;
1364                         if (tcpconn->state==S_CONN_BAD){ 
1365                                 tcpconn_destroy(tcpconn);
1366                                 break;
1367                         }
1368                         /* update the timeout*/
1369                         tcpconn->timeout=get_ticks_raw()+tcp_con_lifetime;
1370                         tcpconn_put(tcpconn);
1371                         /* must be after the de-ref*/
1372                         io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1373                         tcpconn->flags&=~F_CONN_REMOVED;
1374                         DBG("handle_tcp_child: CONN_RELEASE  %p refcnt= %d\n", 
1375                                                         tcpconn, atomic_get(&tcpconn->refcnt));
1376                         break;
1377                 case CONN_ERROR:
1378                 case CONN_DESTROY:
1379                 case CONN_EOF:
1380                         /* WARNING: this will auto-dec. refcnt! */
1381                                 tcp_c->busy--;
1382                                 /* main doesn't listen on it => we don't have to delete it
1383                                  if (tcpconn->s!=-1)
1384                                         io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
1385                                 */
1386                                 tcpconn_destroy(tcpconn); /* closes also the fd */
1387                                 break;
1388                 default:
1389                                 LOG(L_CRIT, "BUG: handle_tcp_child:  unknown cmd %d"
1390                                                                         " from tcp reader %d\n",
1391                                                                         cmd, (int)(tcp_c-&tcp_children[0]));
1392         }
1393 end:
1394         return bytes;
1395 error:
1396         return -1;
1397 }
1398
1399
1400
1401 /* handles io from a "generic" ser process (get fd or new_fd from a tcp_send)
1402  * 
1403  * params: p     - pointer in the ser processes array (pt[]), to the entry for
1404  *                 which an io event was detected
1405  *         fd_i  - fd index in the fd_array (usefull for optimizing
1406  *                 io_watch_deletes)
1407  * returns:  handle_* return convention:
1408  *          -1 on error reading from the fd,
1409  *           0 on EAGAIN  or when no  more io events are queued 
1410  *             (receive buffer empty),
1411  *           >0 on successfull reads from the fd (the receive buffer might
1412  *             be non-empty).
1413  */
1414 inline static int handle_ser_child(struct process_table* p, int fd_i)
1415 {
1416         struct tcp_connection* tcpconn;
1417         long response[2];
1418         int cmd;
1419         int bytes;
1420         int ret;
1421         int fd;
1422         
1423         ret=-1;
1424         if (p->unix_sock<=0){
1425                 /* (we can't have a fd==0, 0 is never closed )*/
1426                 LOG(L_CRIT, "BUG: handle_ser_child: fd %d for %d "
1427                                 "(pid %d)\n", p->unix_sock, (int)(p-&pt[0]), p->pid);
1428                 goto error;
1429         }
1430                         
1431         /* get all bytes and the fd (if transmitted)
1432          * (this is a SOCK_STREAM so read is not atomic) */
1433         bytes=receive_fd(p->unix_sock, response, sizeof(response), &fd,
1434                                                 MSG_DONTWAIT);
1435         if (bytes<(int)sizeof(response)){
1436                 /* too few bytes read */
1437                 if (bytes==0){
1438                         /* EOF -> bad, child has died */
1439                         DBG("DBG: handle_ser_child: dead child %d, pid %d"
1440                                         " (shutting down?)\n", (int)(p-&pt[0]), p->pid);
1441                         /* don't listen on it any more */
1442                         io_watch_del(&io_h, p->unix_sock, fd_i, 0);
1443                         goto error; /* child dead => no further io events from it */
1444                 }else if (bytes<0){
1445                         /* EAGAIN is ok if we try to empty the buffer
1446                          * e.g: SIGIO_RT overflow mode or EPOLL ET */
1447                         if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
1448                                 LOG(L_CRIT, "ERROR: handle_ser_child: read from child %d  "
1449                                                 "(pid %d):  %s [%d]\n", (int)(p-&pt[0]), p->pid,
1450                                                 strerror(errno), errno);
1451                                 ret=-1;
1452                         }else{
1453                                 ret=0;
1454                         }
1455                         /* try to ignore ? */
1456                         goto end;
1457                 }else{
1458                         /* should never happen */
1459                         LOG(L_CRIT, "BUG: handle_ser_child: too few bytes received (%d)\n",
1460                                         bytes );
1461                         ret=0; /* something was read so there is no error; otoh if
1462                                           receive_fd returned less then requested => the receive
1463                                           buffer is empty => no more io queued on this fd */
1464                         goto end;
1465                 }
1466         }
1467         ret=1; /* something was received, there might be more queued */
1468         DBG("handle_ser_child: read response= %lx, %ld, fd %d from %d (%d)\n",
1469                                         response[0], response[1], fd, (int)(p-&pt[0]), p->pid);
1470         cmd=response[1];
1471         tcpconn=(struct tcp_connection*)response[0];
1472         if (tcpconn==0){
1473                 LOG(L_CRIT, "BUG: handle_ser_child: null tcpconn pointer received"
1474                                  " from child %d (pid %d): %lx, %lx\n",
1475                                         (int)(p-&pt[0]), p->pid, response[0], response[1]) ;
1476                 goto end;
1477         }
1478         switch(cmd){
1479                 case CONN_ERROR:
1480                         if (!(tcpconn->flags & F_CONN_REMOVED) && (tcpconn->s!=-1)){
1481                                 io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
1482                                 tcpconn->flags|=F_CONN_REMOVED;
1483                         }
1484                         tcpconn_destroy(tcpconn); /* will close also the fd */
1485                         break;
1486                 case CONN_GET_FD:
1487                         /* send the requested FD  */
1488                         /* WARNING: take care of setting refcnt properly to
1489                          * avoid race condition */
1490                         if (send_fd(p->unix_sock, &tcpconn, sizeof(tcpconn),
1491                                                         tcpconn->s)<=0){
1492                                 LOG(L_ERR, "ERROR: handle_ser_child: send_fd failed\n");
1493                         }
1494                         break;
1495                 case CONN_NEW:
1496                         /* update the fd in the requested tcpconn*/
1497                         /* WARNING: take care of setting refcnt properly to
1498                          * avoid race condition */
1499                         if (fd==-1){
1500                                 LOG(L_CRIT, "BUG: handle_ser_child: CONN_NEW:"
1501                                                         " no fd received\n");
1502                                 break;
1503                         }
1504                         (*tcp_connections_no)++;
1505                         tcpconn->s=fd;
1506                         /* add tcpconn to the list*/
1507                         tcpconn_add(tcpconn);
1508                         /* update the timeout*/
1509                         tcpconn->timeout=get_ticks_raw()+tcp_con_lifetime;
1510                         io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1511                         tcpconn->flags&=~F_CONN_REMOVED;
1512                         break;
1513                 default:
1514                         LOG(L_CRIT, "BUG: handle_ser_child: unknown cmd %d\n", cmd);
1515         }
1516 end:
1517         return ret;
1518 error:
1519         return -1;
1520 }
1521
1522
1523
1524 /* sends a tcpconn + fd to a choosen child */
1525 inline static int send2child(struct tcp_connection* tcpconn)
1526 {
1527         int i;
1528         int min_busy;
1529         int idx;
1530         static int crt=0; /* current child */
1531         int last;
1532         
1533         min_busy=tcp_children[0].busy;
1534         idx=0;
1535         last=crt+tcp_children_no;
1536         for (; crt<last; crt++){
1537                 i=crt%tcp_children_no;
1538                 if (!tcp_children[i].busy){
1539                         idx=i;
1540                         min_busy=0;
1541                         break;
1542                 }else if (min_busy>tcp_children[i].busy){
1543                         min_busy=tcp_children[i].busy;
1544                         idx=i;
1545                 }
1546         }
1547         crt=idx+1; /* next time we start with crt%tcp_children_no */
1548         
1549         tcp_children[idx].busy++;
1550         tcp_children[idx].n_reqs++;
1551         if (min_busy){
1552                 DBG("WARNING: send2child: no free tcp receiver, "
1553                                 " connection passed to the least busy one (%d)\n",
1554                                 min_busy);
1555         }
1556         DBG("send2child: to tcp child %d %d(%d), %p\n", idx, 
1557                                         tcp_children[idx].proc_no,
1558                                         tcp_children[idx].pid, tcpconn);
1559         /* first make sure this child doesn't have pending request for
1560          * tcp_main (to avoid a possible deadlock: e.g. child wants to
1561          * send a release command, but the master fills its socket buffer
1562          * with new connection commands => deadlock) */
1563         /* answer tcp_send requests first */
1564         while(handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0);
1565         /* process tcp readers requests */
1566         while(handle_tcp_child(&tcp_children[idx], -1)>0);
1567                 
1568 #ifdef SEND_FD_QUEUE
1569         /* if queue full, try to queue the io */
1570         if (send_fd(tcp_children[idx].unix_sock, &tcpconn, sizeof(tcpconn),
1571                         tcpconn->s)<=0){
1572                 if ((errno==EAGAIN)||(errno==EWOULDBLOCK)){
1573                         /* FIXME: remove after debugging */
1574                          LOG(L_CRIT, "INFO: tcp child %d, socket %d: queue full,"
1575                                                 " %d requests queued (total handled %d)\n",
1576                                         idx, tcp_children[idx].unix_sock, min_busy,
1577                                         tcp_children[idx].n_reqs-1);
1578                         if (send_fd_queue_add(&send2child_q, tcp_children[idx].unix_sock, 
1579                                                 tcpconn)!=0){
1580                                 LOG(L_ERR, "ERROR: send2child: queue send op. failed\n");
1581                                 return -1;
1582                         }
1583                 }else{
1584                         LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
1585                         return -1;
1586                 }
1587         }
1588 #else
1589         if (send_fd(tcp_children[idx].unix_sock, &tcpconn, sizeof(tcpconn),
1590                         tcpconn->s)<=0){
1591                 LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
1592                 return -1;
1593         }
1594 #endif
1595         
1596         return 0;
1597 }
1598
1599
1600
1601 /* handles a new connection, called internally by tcp_main_loop/handle_io.
1602  * params: si - pointer to one of the tcp socket_info structures on which
1603  *              an io event was detected (connection attempt)
1604  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
1605  *           io events queued), >0 on success. success/error refer only to
1606  *           the accept.
1607  */
1608 static inline int handle_new_connect(struct socket_info* si)
1609 {
1610         union sockaddr_union su;
1611         struct tcp_connection* tcpconn;
1612         socklen_t su_len;
1613         int new_sock;
1614         
1615         /* got a connection on r */
1616         su_len=sizeof(su);
1617         new_sock=accept(si->socket, &(su.s), &su_len);
1618         if (new_sock==-1){
1619                 if ((errno==EAGAIN)||(errno==EWOULDBLOCK))
1620                         return 0;
1621                 LOG(L_ERR,  "WARNING: handle_new_connect: error while accepting"
1622                                 " connection(%d): %s\n", errno, strerror(errno));
1623                 return -1;
1624         }
1625         if (*tcp_connections_no>=tcp_max_connections){
1626                 LOG(L_ERR, "ERROR: maximum number of connections exceeded: %d/%d\n",
1627                                         *tcp_connections_no, tcp_max_connections);
1628                 close(new_sock);
1629                 return 1; /* success, because the accept was succesfull */
1630         }
1631         if (init_sock_opt(new_sock)<0){
1632                 LOG(L_ERR, "ERROR: handle_new_connect: init_sock_opt failed\n");
1633                 close(new_sock);
1634                 return 1; /* success, because the accept was succesfull */
1635         }
1636         (*tcp_connections_no)++;
1637         
1638         /* add socket to list */
1639         tcpconn=tcpconn_new(new_sock, &su, &si->su, si, si->proto, S_CONN_ACCEPT);
1640         if (tcpconn){
1641 #ifdef TCP_PASS_NEW_CONNECTION_ON_DATA
1642                 io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1643                 tcpconn->flags&=~F_CONN_REMOVED;
1644                 tcpconn_add(tcpconn);
1645 #else
1646                 atomic_set(&tcpconn->refcnt, 1); /* safe, not yet available to the
1647                                                                                         outside world */
1648                 tcpconn_add(tcpconn);
1649                 DBG("handle_new_connect: new connection: %p %d flags: %04x\n",
1650                         tcpconn, tcpconn->s, tcpconn->flags);
1651                 /* pass it to a child */
1652                 if(send2child(tcpconn)<0){
1653                         LOG(L_ERR,"ERROR: handle_new_connect: no children "
1654                                         "available\n");
1655                         tcpconn_destroy(tcpconn);
1656                 }
1657 #endif
1658         }else{ /*tcpconn==0 */
1659                 LOG(L_ERR, "ERROR: handle_new_connect: tcpconn_new failed, "
1660                                 "closing socket\n");
1661                 close(new_sock);
1662                 (*tcp_connections_no)--;
1663         }
1664         return 1; /* accept() was succesfull */
1665 }
1666
1667
1668
1669 /* handles an io event on one of the watched tcp connections
1670  * 
1671  * params: tcpconn - pointer to the tcp_connection for which we have an io ev.
1672  *         fd_i    - index in the fd_array table (needed for delete)
1673  * returns:  handle_* return convention, but on success it always returns 0
1674  *           (because it's one-shot, after a succesfull execution the fd is
1675  *            removed from tcp_main's watch fd list and passed to a child =>
1676  *            tcp_main is not interested in further io events that might be
1677  *            queued for this fd)
1678  */
1679 inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, int fd_i)
1680 {
1681         /*  is refcnt!=0 really necessary? 
1682          *  No, in fact it's a bug: I can have the following situation: a send only
1683          *   tcp connection used by n processes simultaneously => refcnt = n. In 
1684          *   the same time I can have a read event and this situation is perfectly
1685          *   valid. -- andrei
1686          */
1687 #if 0
1688         if ((tcpconn->refcnt!=0)){
1689                 /* FIXME: might be valid for sigio_rt iff fd flags are not cleared
1690                  *        (there is a short window in which it could generate a sig
1691                  *         that would be catched by tcp_main) */
1692                 LOG(L_CRIT, "BUG: handle_tcpconn_ev: io event on referenced"
1693                                         " tcpconn (%p), refcnt=%d, fd=%d\n",
1694                                         tcpconn, tcpconn->refcnt, tcpconn->s);
1695                 return -1;
1696         }
1697 #endif
1698         /* pass it to child, so remove it from the io watch list */
1699         DBG("handle_tcpconn_ev: data available on %p %d\n", tcpconn, tcpconn->s);
1700         if (io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1) goto error;
1701         tcpconn->flags|=F_CONN_REMOVED;
1702         tcpconn_ref(tcpconn); /* refcnt ++ */
1703         if (send2child(tcpconn)<0){
1704                 LOG(L_ERR,"ERROR: handle_tcpconn_ev: no children available\n");
1705                 tcpconn_destroy(tcpconn);
1706         }
1707         return 0; /* we are not interested in possibly queued io events, 
1708                                  the fd was either passed to a child, or closed */
1709 error:
1710         return -1;
1711 }
1712
1713
1714
1715 /* generic handle io routine, it will call the appropiate
1716  *  handle_xxx() based on the fd_map type
1717  *
1718  * params:  fm  - pointer to a fd hash entry
1719  *          idx - index in the fd_array (or -1 if not known)
1720  * return: -1 on error
1721  *          0 on EAGAIN or when by some other way it is known that no more 
1722  *            io events are queued on the fd (the receive buffer is empty).
1723  *            Usefull to detect when there are no more io events queued for
1724  *            sigio_rt, epoll_et, kqueue.
1725  *         >0 on successfull read from the fd (when there might be more io
1726  *            queued -- the receive buffer might still be non-empty)
1727  */
1728 inline static int handle_io(struct fd_map* fm, int idx)
1729 {       
1730         int ret;
1731         
1732         switch(fm->type){
1733                 case F_SOCKINFO:
1734                         ret=handle_new_connect((struct socket_info*)fm->data);
1735                         break;
1736                 case F_TCPCONN:
1737                         ret=handle_tcpconn_ev((struct tcp_connection*)fm->data, idx);
1738                         break;
1739                 case F_TCPCHILD:
1740                         ret=handle_tcp_child((struct tcp_child*)fm->data, idx);
1741                         break;
1742                 case F_PROC:
1743                         ret=handle_ser_child((struct process_table*)fm->data, idx);
1744                         break;
1745                 case F_NONE:
1746                         LOG(L_CRIT, "BUG: handle_io: empty fd map: %p {%d, %d, %p},"
1747                                                 " idx %d\n", fm, fm->fd, fm->type, fm->data, idx);
1748                         goto error;
1749                 default:
1750                         LOG(L_CRIT, "BUG: handle_io: uknown fd type %d\n", fm->type); 
1751                         goto error;
1752         }
1753         return ret;
1754 error:
1755         return -1;
1756 }
1757
1758
1759
1760 /* very inefficient for now - FIXME
1761  * keep in sync with tcpconn_destroy, the "delete" part should be
1762  * the same except for io_watch_del..
1763  * Note: this function is called only from the tcp_main process with 1 
1764  * exception: on shutdown it's called also by the main ser process via
1765  * cleanup() => with the ser shutdown exception, it cannot execute in parallel
1766  * with tcpconn_add() or tcpconn_destroy()*/
1767 static inline void tcpconn_timeout(int force)
1768 {
1769         static ticks_t prev_ticks=0;
1770         struct tcp_connection *c, *next;
1771         ticks_t ticks;
1772         unsigned h;
1773         int fd;
1774         
1775         
1776         ticks=get_ticks_raw();
1777         if (((ticks-prev_ticks)<TCPCONN_TIMEOUT_MIN_RUN) && !force) return;
1778         prev_ticks=ticks;
1779         TCPCONN_LOCK; /* fixme: we can lock only on delete IMO */
1780         for(h=0; h<TCP_ID_HASH_SIZE; h++){
1781                 c=tcpconn_id_hash[h];
1782                 while(c){
1783                         next=c->id_next;
1784                         if (force ||((atomic_get(&c->refcnt)==0) &&
1785                                                 ((s_ticks_t)(ticks-c->timeout)>=0))){
1786                                 if (!force)
1787                                         DBG("tcpconn_timeout: timeout for hash=%d - %p"
1788                                                         " (%d > %d)\n", h, c, ticks, c->timeout);
1789                                 if (c->s>0 && is_tcp_main){
1790                                         /* we cannot close or remove the fd if we are not in the
1791                                          * tcp main proc.*/
1792                                         fd=c->s;
1793                                         if (!(c->flags & F_CONN_REMOVED)){
1794                                                 io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
1795                                                 c->flags|=F_CONN_REMOVED;
1796                                         }
1797                                 }else{
1798                                         fd=-1;
1799                                 }
1800 #ifdef USE_TLS
1801                                 if (c->type==PROTO_TLS)
1802                                         tls_close(c, fd);
1803 #endif
1804                                 _tcpconn_rm(c);
1805                                 if (fd>0) {
1806                                         close(fd);
1807                                 }
1808                                 (*tcp_connections_no)--;
1809                         }
1810                         c=next;
1811                 }
1812         }
1813         TCPCONN_UNLOCK;
1814 }
1815
1816
1817
1818 /* tcp main loop */
1819 void tcp_main_loop()
1820 {
1821
1822         struct socket_info* si;
1823         int r;
1824         
1825         is_tcp_main=1; /* mark this process as tcp main */
1826         
1827         tcp_main_max_fd_no=get_max_open_fds();
1828         /* init send fd queues (here because we want mem. alloc only in the tcp
1829          *  process */
1830 #ifdef SEND_FD_QUEUE
1831         if (init_send_fd_queues()<0){
1832                 LOG(L_CRIT, "ERROR: init_tcp: could not init send fd queues\n");
1833                 goto error;
1834         }
1835 #endif
1836         /* init io_wait (here because we want the memory allocated only in
1837          * the tcp_main process) */
1838         if  (init_io_wait(&io_h, tcp_main_max_fd_no, tcp_poll_method)<0)
1839                 goto error;
1840         /* init: start watching all the fds*/
1841         
1842         /* add all the sockets we listen on for connections */
1843         for (si=tcp_listen; si; si=si->next){
1844                 if ((si->proto==PROTO_TCP) &&(si->socket!=-1)){
1845                         if (io_watch_add(&io_h, si->socket, F_SOCKINFO, si)<0){
1846                                 LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1847                                                         "listen socket to the fd list\n");
1848                                 goto error;
1849                         }
1850                 }else{
1851                         LOG(L_CRIT, "BUG: tcp_main_loop: non tcp address in tcp_listen\n");
1852                 }
1853         }
1854 #ifdef USE_TLS
1855         if (!tls_disable && tls_loaded()){
1856                 for (si=tls_listen; si; si=si->next){
1857                         if ((si->proto==PROTO_TLS) && (si->socket!=-1)){
1858                                 if (io_watch_add(&io_h, si->socket, F_SOCKINFO, si)<0){
1859                                         LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1860                                                         "tls listen socket to the fd list\n");
1861                                         goto error;
1862                                 }
1863                         }else{
1864                                 LOG(L_CRIT, "BUG: tcp_main_loop: non tls address"
1865                                                 " in tls_listen\n");
1866                         }
1867                 }
1868         }
1869 #endif
1870         /* add all the unix sockets used for communcation with other ser processes
1871          *  (get fd, new connection a.s.o) */
1872         for (r=1; r<process_no; r++){
1873                 if (pt[r].unix_sock>0) /* we can't have 0, we never close it!*/
1874                         if (io_watch_add(&io_h, pt[r].unix_sock, F_PROC, &pt[r])<0){
1875                                         LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1876                                                         "process %d unix socket to the fd list\n", r);
1877                                         goto error;
1878                         }
1879         }
1880         /* add all the unix sokets used for communication with the tcp childs */
1881         for (r=0; r<tcp_children_no; r++){
1882                 if (tcp_children[r].unix_sock>0)/*we can't have 0, we never close it!*/
1883                         if (io_watch_add(&io_h, tcp_children[r].unix_sock, F_TCPCHILD,
1884                                                         &tcp_children[r]) <0){
1885                                 LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1886                                                 "tcp child %d unix socket to the fd list\n", r);
1887                                 goto error;
1888                         }
1889         }
1890         
1891         /* main loop */
1892         switch(io_h.poll_method){
1893                 case POLL_POLL:
1894                         while(1){
1895                                 /* wait and process IO */
1896                                 io_wait_loop_poll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0); 
1897                                 send_fd_queue_run(&send2child_q); /* then new io */
1898                                 /* remove old connections */
1899                                 tcpconn_timeout(0);
1900                         }
1901                         break;
1902 #ifdef HAVE_SELECT
1903                 case POLL_SELECT:
1904                         while(1){
1905                                 io_wait_loop_select(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1906                                 send_fd_queue_run(&send2child_q); /* then new io */
1907                                 tcpconn_timeout(0);
1908                         }
1909                         break;
1910 #endif
1911 #ifdef HAVE_SIGIO_RT
1912                 case POLL_SIGIO_RT:
1913                         while(1){
1914                                 io_wait_loop_sigio_rt(&io_h, TCP_MAIN_SELECT_TIMEOUT);
1915                                 send_fd_queue_run(&send2child_q); /* then new io */
1916                                 tcpconn_timeout(0);
1917                         }
1918                         break;
1919 #endif
1920 #ifdef HAVE_EPOLL
1921                 case POLL_EPOLL_LT:
1922                         while(1){
1923                                 io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1924                                 send_fd_queue_run(&send2child_q); /* then new io */
1925                                 tcpconn_timeout(0);
1926                         }
1927                         break;
1928                 case POLL_EPOLL_ET:
1929                         while(1){
1930                                 io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 1);
1931                                 send_fd_queue_run(&send2child_q); /* then new io */
1932                                 tcpconn_timeout(0);
1933                         }
1934                         break;
1935 #endif
1936 #ifdef HAVE_KQUEUE
1937                 case POLL_KQUEUE:
1938                         while(1){
1939                                 io_wait_loop_kqueue(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1940                                 send_fd_queue_run(&send2child_q); /* then new io */
1941                                 tcpconn_timeout(0);
1942                         }
1943                         break;
1944 #endif
1945 #ifdef HAVE_DEVPOLL
1946                 case POLL_DEVPOLL:
1947                         while(1){
1948                                 io_wait_loop_devpoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1949                                 send_fd_queue_run(&send2child_q); /* then new io */
1950                                 tcpconn_timeout(0);
1951                         }
1952                         break;
1953 #endif
1954                 default:
1955                         LOG(L_CRIT, "BUG: tcp_main_loop: no support for poll method "
1956                                         " %s (%d)\n", 
1957                                         poll_method_name(io_h.poll_method), io_h.poll_method);
1958                         goto error;
1959         }
1960 error:
1961 #ifdef SEND_FD_QUEUE
1962         destroy_send_fd_queues();
1963 #endif
1964         destroy_io_wait(&io_h);
1965         LOG(L_CRIT, "ERROR: tcp_main_loop: exiting...");
1966         exit(-1);
1967 }
1968
1969
1970
1971 /* cleanup before exit */
1972 void destroy_tcp()
1973 {
1974                 if (tcpconn_id_hash){
1975                         if (tcpconn_lock)
1976                                 TCPCONN_UNLOCK; /* hack: force-unlock the tcp lock in case
1977                                                                    some process was terminated while holding 
1978                                                                    it; this will allow an almost gracious 
1979                                                                    shutdown */
1980                         tcpconn_timeout(1); /* force close/expire for all active tcpconns*/
1981                         shm_free(tcpconn_id_hash);
1982                         tcpconn_id_hash=0;
1983                 }
1984                 if (tcp_connections_no){
1985                         shm_free(tcp_connections_no);
1986                         tcp_connections_no=0;
1987                 }
1988                 if (connection_id){
1989                         shm_free(connection_id);
1990                         connection_id=0;
1991                 }
1992                 if (tcpconn_aliases_hash){
1993                         shm_free(tcpconn_aliases_hash);
1994                         tcpconn_aliases_hash=0;
1995                 }
1996                 if (tcpconn_lock){
1997                         lock_destroy(tcpconn_lock);
1998                         lock_dealloc((void*)tcpconn_lock);
1999                         tcpconn_lock=0;
2000                 }
2001                 if (tcp_children){
2002                         pkg_free(tcp_children);
2003                         tcp_children=0;
2004                 }
2005 }
2006
2007
2008
2009 int init_tcp()
2010 {
2011         char* poll_err;
2012         
2013         /* init lock */
2014         tcpconn_lock=lock_alloc();
2015         if (tcpconn_lock==0){
2016                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc lock\n");
2017                 goto error;
2018         }
2019         if (lock_init(tcpconn_lock)==0){
2020                 LOG(L_CRIT, "ERROR: init_tcp: could not init lock\n");
2021                 lock_dealloc((void*)tcpconn_lock);
2022                 tcpconn_lock=0;
2023                 goto error;
2024         }
2025         /* init globals */
2026         tcp_connections_no=shm_malloc(sizeof(int));
2027         if (tcp_connections_no==0){
2028                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
2029                 goto error;
2030         }
2031         *tcp_connections_no=0;
2032         connection_id=shm_malloc(sizeof(int));
2033         if (connection_id==0){
2034                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
2035                 goto error;
2036         }
2037         *connection_id=1;
2038         /* alloc hashtables*/
2039         tcpconn_aliases_hash=(struct tcp_conn_alias**)
2040                         shm_malloc(TCP_ALIAS_HASH_SIZE* sizeof(struct tcp_conn_alias*));
2041         if (tcpconn_aliases_hash==0){
2042                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc address hashtable\n");
2043                 goto error;
2044         }
2045         tcpconn_id_hash=(struct tcp_connection**)shm_malloc(TCP_ID_HASH_SIZE*
2046                                                                 sizeof(struct tcp_connection*));
2047         if (tcpconn_id_hash==0){
2048                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc id hashtable\n");
2049                 goto error;
2050         }
2051         /* init hashtables*/
2052         memset((void*)tcpconn_aliases_hash, 0, 
2053                         TCP_ALIAS_HASH_SIZE * sizeof(struct tcp_conn_alias*));
2054         memset((void*)tcpconn_id_hash, 0, 
2055                         TCP_ID_HASH_SIZE * sizeof(struct tcp_connection*));
2056         
2057         /* fix config variables */
2058         if (tcp_connect_timeout<0)
2059                 tcp_connect_timeout=DEFAULT_TCP_CONNECT_TIMEOUT;
2060         if (tcp_send_timeout<0)
2061                 tcp_send_timeout=DEFAULT_TCP_SEND_TIMEOUT;
2062         if (tcp_con_lifetime<0){
2063                 /* set to max value (~ 1/2 MAX_INT) */
2064                 tcp_con_lifetime=MAX_TCP_CON_LIFETIME;
2065         }else{
2066                 if ((unsigned)tcp_con_lifetime > 
2067                                 (unsigned)TICKS_TO_S(MAX_TCP_CON_LIFETIME)){
2068                         LOG(L_WARN, "init_tcp: tcp_con_lifetime too big (%u s), "
2069                                         " the maximum value is %u\n", tcp_con_lifetime,
2070                                         TICKS_TO_S(MAX_TCP_CON_LIFETIME));
2071                         tcp_con_lifetime=MAX_TCP_CON_LIFETIME;
2072                 }else{
2073                         tcp_con_lifetime=S_TO_TICKS(tcp_con_lifetime);
2074                 }
2075         }
2076         
2077                 poll_err=check_poll_method(tcp_poll_method);
2078         
2079         /* set an appropriate poll method */
2080         if (poll_err || (tcp_poll_method==0)){
2081                 tcp_poll_method=choose_poll_method();
2082                 if (poll_err){
2083                         LOG(L_ERR, "ERROR: init_tcp: %s, using %s instead\n",
2084                                         poll_err, poll_method_name(tcp_poll_method));
2085                 }else{
2086                         LOG(L_INFO, "init_tcp: using %s as the io watch method"
2087                                         " (auto detected)\n", poll_method_name(tcp_poll_method));
2088                 }
2089         }else{
2090                         LOG(L_INFO, "init_tcp: using %s io watch method (config)\n",
2091                                         poll_method_name(tcp_poll_method));
2092         }
2093         
2094         return 0;
2095 error:
2096         /* clean-up */
2097         destroy_tcp();
2098         return -1;
2099 }
2100
2101
2102 #ifdef TCP_CHILD_NON_BLOCKING
2103 /* returns -1 on error */
2104 static int set_non_blocking(int s)
2105 {
2106         int flags;
2107         /* non-blocking */
2108         flags=fcntl(s, F_GETFL);
2109         if (flags==-1){
2110                 LOG(L_ERR, "ERROR: set_non_blocking: fnctl failed: (%d) %s\n",
2111                                 errno, strerror(errno));
2112                 goto error;
2113         }
2114         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
2115                 LOG(L_ERR, "ERROR: set_non_blocking: fcntl: set non-blocking failed:"
2116                                 " (%d) %s\n", errno, strerror(errno));
2117                 goto error;
2118         }
2119         return 0;
2120 error:
2121         return -1;
2122 }
2123
2124 #endif
2125
2126
2127 /*  returns -1 on error, 0 on success */
2128 int tcp_fix_child_sockets(int* fd)
2129 {
2130 #ifdef TCP_CHILD_NON_BLOCKING
2131         if ((set_non_blocking(fd[0])<0) ||
2132                 (set_non_blocking(fd[1])<0)){
2133                 return -1;
2134         }
2135 #endif
2136         return 0;
2137 }
2138
2139
2140
2141 /* starts the tcp processes */
2142 int tcp_init_children()
2143 {
2144         int r;
2145         int reader_fd_1; /* for comm. with the tcp children read  */
2146         pid_t pid;
2147         struct socket_info *si;
2148         
2149         /* estimate max fd. no:
2150          * 1 tcp send unix socket/all_proc, 
2151          *  + 1 udp sock/udp proc + 1 tcp_child sock/tcp child*
2152          *  + no_listen_tcp */
2153         for(r=0, si=tcp_listen; si; si=si->next, r++);
2154 #ifdef USE_TLS
2155         if (! tls_disable)
2156                 for (si=tls_listen; si; si=si->next, r++);
2157 #endif
2158         
2159         register_fds(r+tcp_max_connections+get_max_procs()-1 /* tcp main */);
2160 #if 0
2161         tcp_max_fd_no=get_max_procs()*2 +r-1 /* timer */ +3; /* stdin/out/err*/
2162         /* max connections can be temporarily exceeded with estimated_process_count
2163          * - tcp_main (tcpconn_connect called simultaneously in all all the 
2164          *  processes) */
2165         tcp_max_fd_no+=tcp_max_connections+get_max_procs()-1 /* tcp main */;
2166 #endif
2167         /* alloc the children array */
2168         tcp_children=pkg_malloc(sizeof(struct tcp_child)*tcp_children_no);
2169         if (tcp_children==0){
2170                         LOG(L_ERR, "ERROR: tcp_init_children: out of memory\n");
2171                         goto error;
2172         }
2173         /* create the tcp sock_info structures */
2174         /* copy the sockets --moved to main_loop*/
2175         
2176         /* fork children & create the socket pairs*/
2177         for(r=0; r<tcp_children_no; r++){
2178                 child_rank++;
2179                 pid=fork_tcp_process(child_rank, "tcp receiver", r, &reader_fd_1);
2180                 if (pid<0){
2181                         LOG(L_ERR, "ERROR: tcp_main: fork failed: %s\n",
2182                                         strerror(errno));
2183                         goto error;
2184                 }else if (pid>0){
2185                         /* parent */
2186                 }else{
2187                         /* child */
2188                         bind_address=0; /* force a SEGFAULT if someone uses a non-init.
2189                                                            bind address on tcp */
2190                         tcp_receive_loop(reader_fd_1);
2191                 }
2192         }
2193         return 0;
2194 error:
2195         return -1;
2196 }
2197
2198
2199
2200 void tcp_get_info(struct tcp_gen_info *ti)
2201 {
2202         ti->tcp_readers=tcp_children_no;
2203         ti->tcp_max_connections=tcp_max_connections;
2204         ti->tcp_connections_no=*tcp_connections_no;
2205 }
2206
2207 #endif