93f21219d62642ac82956c161794b4cd2265fb76
[sip-router] / tcp_main.c
1 /*
2  * $Id$
3  *
4  * Copyright (C) 2001-2003 FhG Fokus
5  *
6  * This file is part of ser, a free SIP server.
7  *
8  * ser is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version
12  *
13  * For a license to use the ser software under conditions
14  * other than those described here, or to purchase support for this
15  * software, please contact iptel.org by e-mail at the following addresses:
16  *    info@iptel.org
17  *
18  * ser is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26  */
27 /*
28  * History:
29  * --------
30  *  2002-11-29  created by andrei
31  *  2002-12-11  added tcp_send (andrei)
32  *  2003-01-20  locking fixes, hashtables (andrei)
33  *  2003-02-20  s/lock_t/gen_lock_t/ to avoid a conflict on solaris (andrei)
34  *  2003-02-25  Nagle is disabled if -DDISABLE_NAGLE (andrei)
35  *  2003-03-29  SO_REUSEADDR before calling bind to allow
36  *              server restart, Nagle set on the (hopefuly) 
37  *              correct socket (jiri)
38  *  2003-03-31  always try to find the corresponding tcp listen socket for
39  *               a temp. socket and store in in *->bind_address: added
40  *               find_tcp_si, modified tcpconn_connect (andrei)
41  *  2003-04-14  set sockopts to TOS low delay (andrei)
42  *  2003-06-30  moved tcp new connect checking & handling to
43  *               handle_new_connect (andrei)
44  *  2003-07-09  tls_close called before closing the tcp connection (andrei)
45  *  2003-10-24  converted to the new socket_info lists (andrei)
46  *  2003-10-27  tcp port aliases support added (andrei)
47  *  2003-11-04  always lock before manipulating refcnt; sendchild
48  *              does not inc refcnt by itself anymore (andrei)
49  *  2003-11-07  different unix sockets are used for fd passing
50  *              to/from readers/writers (andrei)
51  *  2003-11-17  handle_new_connect & tcp_connect will close the 
52  *              new socket if tcpconn_new return 0 (e.g. out of mem) (andrei)
53  *  2003-11-28  tcp_blocking_write & tcp_blocking_connect added (andrei)
54  *  2004-11-08  dropped find_tcp_si and replaced with find_si (andrei)
55  *  2005-06-07  new tcp optimized code, supports epoll (LT), sigio + real time
56  *               signals, poll & select (andrei)
57  *  2005-06-26  *bsd kqueue support (andrei)
58  *  2005-07-04  solaris /dev/poll support (andrei)
59  *  2005-07-08  tcp_max_connections, tcp_connection_lifetime, don't accept
60  *               more connections if tcp_max_connections is exceeded (andrei)
61  *  2005-10-21  cleanup all the open connections on exit
62  *              decrement the no. of open connections on timeout too    (andrei) *  2006-01-30  queue send_fd request and execute them at the end of the
63  *              poll loop  (#ifdef) (andrei)
64  *              process all children requests, before attempting to send
65  *              them new stuff (fixes some deadlocks) (andrei)
66  *  2006-02-03  timers are run only once per s (andrei)
67  *              tcp children fds can be non-blocking; send fds are queued on
68  *              EAGAIN; lots of bug fixes (andrei)
69  *  2006-02-06  better tcp_max_connections checks, tcp_connections_no moved to
70  *              shm (andrei)
71  *  2006-04-12  tcp_send() changed to use struct dest_info (andrei)
72  */
73
74
75 #ifdef USE_TCP
76
77
78 #ifndef SHM_MEM
79 #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
80 #endif
81
82 #include <sys/time.h>
83 #include <sys/types.h>
84 #include <sys/select.h>
85 #include <sys/socket.h>
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
89 #include <netinet/tcp.h>
90 #include <sys/uio.h>  /* writev*/
91 #include <netdb.h>
92 #include <stdlib.h> /*exit() */
93
94 #include <unistd.h>
95
96 #include <errno.h>
97 #include <string.h>
98
99 #ifdef HAVE_SELECT
100 #include <sys/select.h>
101 #endif
102 #include <sys/poll.h>
103
104
105 #include "ip_addr.h"
106 #include "pass_fd.h"
107 #include "tcp_conn.h"
108 #include "globals.h"
109 #include "pt.h"
110 #include "locking.h"
111 #include "mem/mem.h"
112 #include "mem/shm_mem.h"
113 #include "timer.h"
114 #include "sr_module.h"
115 #include "tcp_server.h"
116 #include "tcp_init.h"
117 #include "tsend.h"
118 #include "timer_ticks.h"
119 #ifdef USE_TLS
120 #include "tls/tls_server.h"
121 #endif 
122 #include "tcp_info.h"
123
124 #define local_malloc pkg_malloc
125 #define local_free   pkg_free
126
127 #define HANDLE_IO_INLINE
128 #include "io_wait.h"
129 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
130
131 #define TCP_PASS_NEW_CONNECTION_ON_DATA /* don't pass a new connection
132                                                                                    immediately to a child, wait for
133                                                                                    some data on it first */
134 #define TCP_LISTEN_BACKLOG 1024
135 #define SEND_FD_QUEUE /* queue send fd requests on EAGAIN, instead of sending 
136                                                         them immediately */
137 #define TCP_CHILD_NON_BLOCKING 
138 #ifdef SEND_FD_QUEUE
139 #ifndef TCP_CHILD_NON_BLOCKING
140 #define TCP_CHILD_NON_BLOCKING
141 #endif
142 #define MAX_SEND_FD_QUEUE_SIZE  tcp_max_fd_no
143 #define SEND_FD_QUEUE_SIZE              128  /* initial size */
144 #define MAX_SEND_FD_RETRIES             96       /* FIXME: not used for now */
145 #define SEND_FD_QUEUE_TIMEOUT   MS_TO_TICKS(2000)  /* 2 s */
146 #endif
147
148
149 enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
150                                 F_TCPCONN, F_TCPCHILD, F_PROC };
151
152 struct tcp_child{
153         pid_t pid;
154         int proc_no; /* ser proc_no, for debugging */
155         int unix_sock; /* unix "read child" sock fd */
156         int busy;
157         int n_reqs; /* number of requests serviced so far */
158 };
159
160
161
162 int tcp_accept_aliases=0; /* by default don't accept aliases */
163 int tcp_connect_timeout=DEFAULT_TCP_CONNECT_TIMEOUT;
164 int tcp_send_timeout=DEFAULT_TCP_SEND_TIMEOUT;
165 int tcp_con_lifetime=DEFAULT_TCP_CONNECTION_LIFETIME;
166 enum poll_types tcp_poll_method=0; /* by default choose the best method */
167 int tcp_max_connections=DEFAULT_TCP_MAX_CONNECTIONS;
168 int tcp_max_fd_no=0;
169
170 static int* tcp_connections_no=0; /* current open connections */
171
172 /* connection hash table (after ip&port) , includes also aliases */
173 struct tcp_conn_alias** tcpconn_aliases_hash=0;
174 /* connection hash table (after connection id) */
175 struct tcp_connection** tcpconn_id_hash=0;
176 gen_lock_t* tcpconn_lock=0;
177
178 static struct tcp_child* tcp_children;
179 static int* connection_id=0; /*  unique for each connection, used for 
180                                                                 quickly finding the corresponding connection
181                                                                 for a reply */
182 int unix_tcp_sock;
183
184 static int tcp_proto_no=-1; /* tcp protocol number as returned by
185                                                            getprotobyname */
186
187 static io_wait_h io_h;
188
189
190
191 /* set all socket/fd options:  disable nagle, tos lowdelay, non-blocking
192  * return -1 on error */
193 static int init_sock_opt(int s)
194 {
195         int flags;
196         int optval;
197         
198 #ifdef DISABLE_NAGLE
199         flags=1;
200         if ( (tcp_proto_no!=-1) && (setsockopt(s, tcp_proto_no , TCP_NODELAY,
201                                         &flags, sizeof(flags))<0) ){
202                 LOG(L_WARN, "WARNING: init_sock_opt: could not disable Nagle: %s\n",
203                                 strerror(errno));
204         }
205 #endif
206         /* tos*/
207         optval = tos;
208         if (setsockopt(s, IPPROTO_IP, IP_TOS, (void*)&optval,sizeof(optval)) ==-1){
209                 LOG(L_WARN, "WARNING: init_sock_opt: setsockopt tos: %s\n",
210                                 strerror(errno));
211                 /* continue since this is not critical */
212         }
213         /* non-blocking */
214         flags=fcntl(s, F_GETFL);
215         if (flags==-1){
216                 LOG(L_ERR, "ERROR: init_sock_opt: fnctl failed: (%d) %s\n",
217                                 errno, strerror(errno));
218                 goto error;
219         }
220         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
221                 LOG(L_ERR, "ERROR: init_sock_opt: fcntl: set non-blocking failed:"
222                                 " (%d) %s\n", errno, strerror(errno));
223                 goto error;
224         }
225         return 0;
226 error:
227         return -1;
228 }
229
230
231
232 /* blocking connect on a non-blocking fd; it will timeout after
233  * tcp_connect_timeout 
234  * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
235  * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
236  */
237 static int tcp_blocking_connect(int fd, const struct sockaddr *servaddr,
238                                                                 socklen_t addrlen)
239 {
240         int n;
241 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
242         fd_set sel_set;
243         fd_set orig_set;
244         struct timeval timeout;
245 #else
246         struct pollfd pf;
247 #endif
248         int elapsed;
249         int to;
250         int ticks;
251         int err;
252         unsigned int err_len;
253         int poll_err;
254         
255         poll_err=0;
256         to=tcp_connect_timeout;
257         ticks=get_ticks();
258 again:
259         n=connect(fd, servaddr, addrlen);
260         if (n==-1){
261                 if (errno==EINTR){
262                         elapsed=(get_ticks()-ticks)*TIMER_TICK;
263                         if (elapsed<to)         goto again;
264                         else goto error_timeout;
265                 }
266                 if (errno!=EINPROGRESS && errno!=EALREADY){
267                         LOG(L_ERR, "ERROR: tcp_blocking_connect: (%d) %s\n",
268                                         errno, strerror(errno));
269                         goto error;
270                 }
271         }else goto end;
272         
273         /* poll/select loop */
274 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
275                 FD_ZERO(&orig_set);
276                 FD_SET(fd, &orig_set);
277 #else
278                 pf.fd=fd;
279                 pf.events=POLLOUT;
280 #endif
281         while(1){
282                 elapsed=(get_ticks()-ticks)*TIMER_TICK;
283                 if (elapsed<to)
284                         to-=elapsed;
285                 else 
286                         goto error_timeout;
287 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
288                 sel_set=orig_set;
289                 timeout.tv_sec=to;
290                 timeout.tv_usec=0;
291                 n=select(fd+1, 0, &sel_set, 0, &timeout);
292 #else
293                 n=poll(&pf, 1, to*1000);
294 #endif
295                 if (n<0){
296                         if (errno==EINTR) continue;
297                         LOG(L_ERR, "ERROR: tcp_blocking_connect: poll/select failed:"
298                                         " (%d) %s\n", errno, strerror(errno));
299                         goto error;
300                 }else if (n==0) /* timeout */ continue;
301 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
302                 if (FD_ISSET(fd, &sel_set))
303 #else
304                 if (pf.revents&(POLLERR|POLLHUP|POLLNVAL)){ 
305                         LOG(L_ERR, "ERROR: tcp_blocking_connect: poll error: flags %x\n",
306                                         pf.revents);
307                         poll_err=1;
308                 }
309 #endif
310                 {
311                         err_len=sizeof(err);
312                         getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
313                         if ((err==0) && (poll_err==0)) goto end;
314                         if (err!=EINPROGRESS && err!=EALREADY){
315                                 LOG(L_ERR, "ERROR: tcp_blocking_connect: SO_ERROR (%d) %s\n",
316                                                 err, strerror(err));
317                                 goto error;
318                         }
319                 }
320         }
321 error_timeout:
322         /* timeout */
323         LOG(L_ERR, "ERROR: tcp_blocking_connect: timeout %d s elapsed from %d s\n",
324                         elapsed, tcp_connect_timeout);
325 error:
326         return -1;
327 end:
328         return 0;
329 }
330
331
332
333 #if 0
334 /* blocking write even on non-blocking sockets 
335  * if TCP_TIMEOUT will return with error */
336 static int tcp_blocking_write(struct tcp_connection* c, int fd, char* buf,
337                                                                 unsigned int len)
338 {
339         int n;
340         fd_set sel_set;
341         struct timeval timeout;
342         int ticks;
343         int initial_len;
344         
345         initial_len=len;
346 again:
347         
348         n=send(fd, buf, len,
349 #ifdef HAVE_MSG_NOSIGNAL
350                         MSG_NOSIGNAL
351 #else
352                         0
353 #endif
354                 );
355         if (n<0){
356                 if (errno==EINTR)       goto again;
357                 else if (errno!=EAGAIN && errno!=EWOULDBLOCK){
358                         LOG(L_ERR, "tcp_blocking_write: failed to send: (%d) %s\n",
359                                         errno, strerror(errno));
360                         goto error;
361                 }
362         }else if (n<len){
363                 /* partial write */
364                 buf+=n;
365                 len-=n;
366         }else{
367                 /* success: full write */
368                 goto end;
369         }
370         while(1){
371                 FD_ZERO(&sel_set);
372                 FD_SET(fd, &sel_set);
373                 timeout.tv_sec=tcp_send_timeout;
374                 timeout.tv_usec=0;
375                 ticks=get_ticks();
376                 n=select(fd+1, 0, &sel_set, 0, &timeout);
377                 if (n<0){
378                         if (errno==EINTR) continue; /* signal, ignore */
379                         LOG(L_ERR, "ERROR: tcp_blocking_write: select failed: "
380                                         " (%d) %s\n", errno, strerror(errno));
381                         goto error;
382                 }else if (n==0){
383                         /* timeout */
384                         if (get_ticks()-ticks>=tcp_send_timeout){
385                                 LOG(L_ERR, "ERROR: tcp_blocking_write: send timeout (%d)\n",
386                                                 tcp_send_timeout);
387                                 goto error;
388                         }
389                         continue;
390                 }
391                 if (FD_ISSET(fd, &sel_set)){
392                         /* we can write again */
393                         goto again;
394                 }
395         }
396 error:
397                 return -1;
398 end:
399                 return initial_len;
400 }
401 #endif
402
403
404
405 struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
406                                                                         struct socket_info* ba, int type, 
407                                                                         int state)
408 {
409         struct tcp_connection *c;
410         
411         c=(struct tcp_connection*)shm_malloc(sizeof(struct tcp_connection));
412         if (c==0){
413                 LOG(L_ERR, "ERROR: tcpconn_new: mem. allocation failure\n");
414                 goto error;
415         }
416         memset(c, 0, sizeof(struct tcp_connection)); /* zero init */
417         c->s=sock;
418         c->fd=-1; /* not initialized */
419         if (lock_init(&c->write_lock)==0){
420                 LOG(L_ERR, "ERROR: tcpconn_new: init lock failed\n");
421                 goto error;
422         }
423         
424         c->rcv.src_su=*su;
425         
426         c->refcnt=0;
427         su2ip_addr(&c->rcv.src_ip, su);
428         c->rcv.src_port=su_getport(su);
429         c->rcv.bind_address=ba;
430         if (ba){
431                 c->rcv.dst_ip=ba->address;
432                 c->rcv.dst_port=ba->port_no;
433         }
434         print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
435         DBG(     "tcpconn_new: on port %d, type %d\n", c->rcv.src_port, type);
436         init_tcp_req(&c->req);
437         c->id=(*connection_id)++;
438         c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
439         c->rcv.proto_reserved2=0;
440         c->state=state;
441         c->extra_data=0;
442 #ifdef USE_TLS
443         if (type==PROTO_TLS){
444                 if (tls_tcpconn_init(c, sock)==-1) goto error;
445         }else
446 #endif /* USE_TLS*/
447         {
448                 c->type=PROTO_TCP;
449                 c->rcv.proto=PROTO_TCP;
450                 c->timeout=get_ticks()+tcp_con_lifetime;
451         }
452         c->flags|=F_CONN_REMOVED;
453         
454         return c;
455         
456 error:
457         if (c) shm_free(c);
458         return 0;
459 }
460
461
462
463 struct tcp_connection* tcpconn_connect(union sockaddr_union* server, int type)
464 {
465         int s;
466         struct socket_info* si;
467         union sockaddr_union my_name;
468         socklen_t my_name_len;
469         struct tcp_connection* con;
470         struct ip_addr ip;
471
472         s=-1;
473         
474         if (*tcp_connections_no >= tcp_max_connections){
475                 LOG(L_ERR, "ERROR: tcpconn_connect: maximum number of connections"
476                                         " exceeded (%d/%d)\n",
477                                         *tcp_connections_no, tcp_max_connections);
478                 goto error;
479         }
480         s=socket(AF2PF(server->s.sa_family), SOCK_STREAM, 0);
481         if (s==-1){
482                 LOG(L_ERR, "ERROR: tcpconn_connect: socket: (%d) %s\n",
483                                 errno, strerror(errno));
484                 goto error;
485         }
486         if (init_sock_opt(s)<0){
487                 LOG(L_ERR, "ERROR: tcpconn_connect: init_sock_opt failed\n");
488                 goto error;
489         }
490         if (tcp_blocking_connect(s, &server->s, sockaddru_len(*server))<0){
491                 LOG(L_ERR, "ERROR: tcpconn_connect: tcp_blocking_connect failed\n");
492                 goto error;
493         }
494         my_name_len=sizeof(my_name);
495         if (getsockname(s, &my_name.s, &my_name_len)!=0){
496                 LOG(L_ERR, "ERROR: tcp_connect: getsockname failed: %s(%d)\n",
497                                 strerror(errno), errno);
498                 si=0; /* try to go on */
499         }
500         su2ip_addr(&ip, &my_name);
501 #ifdef USE_TLS
502         if (type==PROTO_TLS)
503                 si=find_si(&ip, 0, PROTO_TLS);
504         else
505 #endif
506                 si=find_si(&ip, 0, PROTO_TCP);
507
508         if (si==0){
509                 LOG(L_ERR, "ERROR: tcp_connect: could not find corresponding"
510                                 " listening socket, using default...\n");
511                 if (server->s.sa_family==AF_INET) si=sendipv4_tcp;
512 #ifdef USE_IPV6
513                 else si=sendipv6_tcp;
514 #endif
515         }
516         con=tcpconn_new(s, server, si, type, S_CONN_CONNECT);
517         if (con==0){
518                 LOG(L_ERR, "ERROR: tcp_connect: tcpconn_new failed, closing the "
519                                  " socket\n");
520                 goto error;
521         }
522         return con;
523         /*FIXME: set sock idx! */
524 error:
525         if (s!=-1) close(s); /* close the opened socket */
526         return 0;
527 }
528
529
530
531 struct tcp_connection*  tcpconn_add(struct tcp_connection *c)
532 {
533         unsigned hash;
534
535         if (c){
536                 TCPCONN_LOCK;
537                 /* add it at the begining of the list*/
538                 hash=tcp_id_hash(c->id);
539                 c->id_hash=hash;
540                 tcpconn_listadd(tcpconn_id_hash[hash], c, id_next, id_prev);
541                 
542                 hash=tcp_addr_hash(&c->rcv.src_ip, c->rcv.src_port);
543                 /* set the first alias */
544                 c->con_aliases[0].port=c->rcv.src_port;
545                 c->con_aliases[0].hash=hash;
546                 c->con_aliases[0].parent=c;
547                 tcpconn_listadd(tcpconn_aliases_hash[hash], &c->con_aliases[0],
548                                                 next, prev);
549                 c->aliases++;
550                 TCPCONN_UNLOCK;
551                 DBG("tcpconn_add: hashes: %d, %d\n", hash, c->id_hash);
552                 return c;
553         }else{
554                 LOG(L_CRIT, "tcpconn_add: BUG: null connection pointer\n");
555                 return 0;
556         }
557 }
558
559
560 /* unsafe tcpconn_rm version (nolocks) */
561 void _tcpconn_rm(struct tcp_connection* c)
562 {
563         int r;
564         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
565         /* remove all the aliases */
566         for (r=0; r<c->aliases; r++)
567                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
568                                                 &c->con_aliases[r], next, prev);
569         lock_destroy(&c->write_lock);
570 #ifdef USE_TLS
571         if (c->type==PROTO_TLS) tls_tcpconn_clean(c);
572 #endif
573         shm_free(c);
574 }
575
576
577
578 void tcpconn_rm(struct tcp_connection* c)
579 {
580         int r;
581         TCPCONN_LOCK;
582         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
583         /* remove all the aliases */
584         for (r=0; r<c->aliases; r++)
585                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
586                                                 &c->con_aliases[r], next, prev);
587         TCPCONN_UNLOCK;
588         lock_destroy(&c->write_lock);
589 #ifdef USE_TLS
590         if ((c->type==PROTO_TLS)&&(c->extra_data)) tls_tcpconn_clean(c);
591 #endif
592         shm_free(c);
593 }
594
595
596 /* finds a connection, if id=0 uses the ip addr & port (host byte order)
597  * WARNING: unprotected (locks) use tcpconn_get unless you really
598  * know what you are doing */
599 struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port)
600 {
601
602         struct tcp_connection *c;
603         struct tcp_conn_alias* a;
604         unsigned hash;
605         
606 #ifdef EXTRA_DEBUG
607         DBG("tcpconn_find: %d  port %d\n",id, port);
608         if (ip) print_ip("tcpconn_find: ip ", ip, "\n");
609 #endif
610         if (id){
611                 hash=tcp_id_hash(id);
612                 for (c=tcpconn_id_hash[hash]; c; c=c->id_next){
613 #ifdef EXTRA_DEBUG
614                         DBG("c=%p, c->id=%d, port=%d\n",c, c->id, c->rcv.src_port);
615                         print_ip("ip=", &c->rcv.src_ip, "\n");
616 #endif
617                         if ((id==c->id)&&(c->state!=S_CONN_BAD)) return c;
618                 }
619         }else if (ip){
620                 hash=tcp_addr_hash(ip, port);
621                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
622 #ifdef EXTRA_DEBUG
623                         DBG("a=%p, c=%p, c->id=%d, alias port= %d port=%d\n", a, a->parent,
624                                         a->parent->id, a->port, a->parent->rcv.src_port);
625                         print_ip("ip=",&a->parent->rcv.src_ip,"\n");
626 #endif
627                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
628                                         (ip_addr_cmp(ip, &a->parent->rcv.src_ip)) )
629                                 return a->parent;
630                 }
631         }
632         return 0;
633 }
634
635
636
637 /* _tcpconn_find with locks and timeout */
638 struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
639                                                                         int timeout)
640 {
641         struct tcp_connection* c;
642         TCPCONN_LOCK;
643         c=_tcpconn_find(id, ip, port);
644         if (c){ 
645                         c->refcnt++;
646                         c->timeout=get_ticks()+timeout;
647         }
648         TCPCONN_UNLOCK;
649         return c;
650 }
651
652
653
654 /* add port as an alias for the "id" connection
655  * returns 0 on success,-1 on failure */
656 int tcpconn_add_alias(int id, int port, int proto)
657 {
658         struct tcp_connection* c;
659         unsigned hash;
660         struct tcp_conn_alias* a;
661         
662         a=0;
663         /* fix the port */
664         port=port?port:((proto==PROTO_TLS)?SIPS_PORT:SIP_PORT);
665         TCPCONN_LOCK;
666         /* check if alias already exists */
667         c=_tcpconn_find(id, 0, 0);
668         if (c){
669                 hash=tcp_addr_hash(&c->rcv.src_ip, port);
670                 /* search the aliases for an already existing one */
671                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
672                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
673                                         (ip_addr_cmp(&c->rcv.src_ip, &a->parent->rcv.src_ip)) ){
674                                 /* found */
675                                 if (a->parent!=c) goto error_sec;
676                                 else goto ok;
677                         }
678                 }
679                 if (c->aliases>=TCP_CON_MAX_ALIASES) goto error_aliases;
680                 c->con_aliases[c->aliases].parent=c;
681                 c->con_aliases[c->aliases].port=port;
682                 c->con_aliases[c->aliases].hash=hash;
683                 tcpconn_listadd(tcpconn_aliases_hash[hash], 
684                                                                 &c->con_aliases[c->aliases], next, prev);
685                 c->aliases++;
686         }else goto error_not_found;
687 ok:
688         TCPCONN_UNLOCK;
689 #ifdef EXTRA_DEBUG
690         if (a) DBG("tcpconn_add_alias: alias already present\n");
691         else   DBG("tcpconn_add_alias: alias port %d for hash %d, id %d\n",
692                         port, hash, c->id);
693 #endif
694         return 0;
695 error_aliases:
696         TCPCONN_UNLOCK;
697         LOG(L_ERR, "ERROR: tcpconn_add_alias: too many aliases for connection %p"
698                                 " (%d)\n", c, c->id);
699         return -1;
700 error_not_found:
701         TCPCONN_UNLOCK;
702         LOG(L_ERR, "ERROR: tcpconn_add_alias: no connection found for id %d\n",id);
703         return -1;
704 error_sec:
705         TCPCONN_UNLOCK;
706         LOG(L_ERR, "ERROR: tcpconn_add_alias: possible port hijack attempt\n");
707         LOG(L_ERR, "ERROR: tcpconn_add_alias: alias already present and points"
708                         " to another connection (%d : %d and %d : %d)\n",
709                         a->parent->id,  port, c->id, port);
710         return -1;
711 }
712
713
714
715 void tcpconn_ref(struct tcp_connection* c)
716 {
717         TCPCONN_LOCK;
718         c->refcnt++; /* FIXME: atomic_dec */
719         TCPCONN_UNLOCK;
720 }
721
722
723
724 void tcpconn_put(struct tcp_connection* c)
725 {
726         TCPCONN_LOCK;
727         c->refcnt--; /* FIXME: atomic_dec */
728         TCPCONN_UNLOCK;
729 }
730
731
732
733 /* finds a tcpconn & sends on it
734  * uses the dst members to, proto (TCP|TLS) and id
735  * returns: number of bytes written (>=0) on success
736  *          <0 on error */
737 int tcp_send(struct dest_info* dst, char* buf, unsigned len)
738 {
739         struct tcp_connection *c;
740         struct tcp_connection *tmp;
741         struct ip_addr ip;
742         int port;
743         int fd;
744         long response[2];
745         int n;
746         
747         port=su_getport(&dst->to);
748         if (port){
749                 su2ip_addr(&ip, &dst->to);
750                 c=tcpconn_get(dst->id, &ip, port, tcp_con_lifetime); 
751         }else if (dst->id){
752                 c=tcpconn_get(dst->id, 0, 0, tcp_con_lifetime);
753         }else{
754                 LOG(L_CRIT, "BUG: tcp_send called with null id & to\n");
755                 return -1;
756         }
757         
758         if (dst->id){
759                 if (c==0) {
760                         if (port){
761                                 /* try again w/o id */
762                                 c=tcpconn_get(0, &ip, port, tcp_con_lifetime);
763                                 goto no_id;
764                         }else{
765                                 LOG(L_ERR, "ERROR: tcp_send: id %d not found, dropping\n",
766                                                 dst->id);
767                                 return -1;
768                         }
769                 }else goto get_fd;
770         }
771 no_id:
772                 if (c==0){
773                         DBG("tcp_send: no open tcp connection found, opening new one\n");
774                         /* create tcp connection */
775                         if ((c=tcpconn_connect(&dst->to, dst->proto))==0){
776                                 LOG(L_ERR, "ERROR: tcp_send: connect failed\n");
777                                 return -1;
778                         }
779                         c->refcnt++; /* safe to do it w/o locking, it's not yet
780                                                         available to the rest of the world */
781                         fd=c->s;
782                         
783                         /* send the new tcpconn to "tcp main" */
784                         response[0]=(long)c;
785                         response[1]=CONN_NEW;
786                         n=send_fd(unix_tcp_sock, response, sizeof(response), c->s);
787                         if (n<=0){
788                                 LOG(L_ERR, "BUG: tcp_send: failed send_fd: %s (%d)\n",
789                                                 strerror(errno), errno);
790                                 n=-1;
791                                 goto end;
792                         }       
793                         goto send_it;
794                 }
795 get_fd:
796                         /* todo: see if this is not the same process holding
797                          *  c  and if so send directly on c->fd */
798                         DBG("tcp_send: tcp connection found (%p), acquiring fd\n", c);
799                         /* get the fd */
800                         response[0]=(long)c;
801                         response[1]=CONN_GET_FD;
802                         n=send_all(unix_tcp_sock, response, sizeof(response));
803                         if (n<=0){
804                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(write):%s (%d)\n",
805                                                 strerror(errno), errno);
806                                 n=-1;
807                                 goto release_c;
808                         }
809                         DBG("tcp_send, c= %p, n=%d\n", c, n);
810                         tmp=c;
811                         n=receive_fd(unix_tcp_sock, &c, sizeof(c), &fd, MSG_WAITALL);
812                         if (n<=0){
813                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(receive_fd):"
814                                                         " %s (%d)\n", strerror(errno), errno);
815                                 n=-1;
816                                 goto release_c;
817                         }
818                         if (c!=tmp){
819                                 LOG(L_CRIT, "BUG: tcp_send: get_fd: got different connection:"
820                                                 "  %p (id= %d, refcnt=%d state=%d != "
821                                                 "  %p (id= %d, refcnt=%d state=%d (n=%d)\n",
822                                                   c,   c->id,   c->refcnt,   c->state,
823                                                   tmp, tmp->id, tmp->refcnt, tmp->state, n
824                                    );
825                                 n=-1; /* fail */
826                                 goto end;
827                         }
828                         DBG("tcp_send: after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
829                 
830         
831         
832 send_it:
833         DBG("tcp_send: sending...\n");
834         lock_get(&c->write_lock);
835 #ifdef USE_TLS
836         if (c->type==PROTO_TLS)
837                 n=tls_blocking_write(c, fd, buf, len);
838         else
839 #endif
840                 /* n=tcp_blocking_write(c, fd, buf, len); */
841                 n=tsend_stream(fd, buf, len, tcp_send_timeout*1000); 
842         lock_release(&c->write_lock);
843         DBG("tcp_send: after write: c= %p n=%d fd=%d\n",c, n, fd);
844         DBG("tcp_send: buf=\n%.*s\n", (int)len, buf);
845         if (n<0){
846                 LOG(L_ERR, "ERROR: tcp_send: failed to send\n");
847                 /* error on the connection , mark it as bad and set 0 timeout */
848                 c->state=S_CONN_BAD;
849                 c->timeout=0;
850                 /* tell "main" it should drop this (optional it will t/o anyway?)*/
851                 response[0]=(long)c;
852                 response[1]=CONN_ERROR;
853                 if (send_all(unix_tcp_sock, response, sizeof(response))<=0){
854                         LOG(L_ERR, "BUG: tcp_send: error return failed (write):%s (%d)\n",
855                                         strerror(errno), errno);
856                         tcpconn_put(c); /* deref. it manually */
857                         n=-1;
858                 }
859                 /* CONN_ERROR will auto-dec refcnt => we must not call tcpconn_put 
860                  * if it succeeds */
861                 close(fd);
862                 return n; /* error return, no tcpconn_put */
863         }
864 end:
865         close(fd);
866 release_c:
867         tcpconn_put(c); /* release c (lock; dec refcnt; unlock) */
868         return n;
869 }
870
871
872
873 int tcp_init(struct socket_info* sock_info)
874 {
875         union sockaddr_union* addr;
876         int optval;
877 #ifdef DISABLE_NAGLE
878         int flag;
879         struct protoent* pe;
880
881         if (tcp_proto_no==-1){ /* if not already set */
882                 pe=getprotobyname("tcp");
883                 if (pe==0){
884                         LOG(L_ERR, "ERROR: tcp_init: could not get TCP protocol number\n");
885                         tcp_proto_no=-1;
886                 }else{
887                         tcp_proto_no=pe->p_proto;
888                 }
889         }
890 #endif
891         
892         addr=&sock_info->su;
893         /* sock_info->proto=PROTO_TCP; */
894         if (init_su(addr, &sock_info->address, sock_info->port_no)<0){
895                 LOG(L_ERR, "ERROR: tcp_init: could no init sockaddr_union\n");
896                 goto error;
897         }
898         sock_info->socket=socket(AF2PF(addr->s.sa_family), SOCK_STREAM, 0);
899         if (sock_info->socket==-1){
900                 LOG(L_ERR, "ERROR: tcp_init: socket: %s\n", strerror(errno));
901                 goto error;
902         }
903 #ifdef DISABLE_NAGLE
904         flag=1;
905         if ( (tcp_proto_no!=-1) &&
906                  (setsockopt(sock_info->socket, tcp_proto_no , TCP_NODELAY,
907                                          &flag, sizeof(flag))<0) ){
908                 LOG(L_ERR, "ERROR: tcp_init: could not disable Nagle: %s\n",
909                                 strerror(errno));
910         }
911 #endif
912
913
914 #if  !defined(TCP_DONT_REUSEADDR) 
915         /* Stevens, "Network Programming", Section 7.5, "Generic Socket
916      * Options": "...server started,..a child continues..on existing
917          * connection..listening server is restarted...call to bind fails
918          * ... ALL TCP servers should specify the SO_REUSEADDRE option 
919          * to allow the server to be restarted in this situation
920          *
921          * Indeed, without this option, the server can't restart.
922          *   -jiri
923          */
924         optval=1;
925         if (setsockopt(sock_info->socket, SOL_SOCKET, SO_REUSEADDR,
926                                 (void*)&optval, sizeof(optval))==-1) {
927                 LOG(L_ERR, "ERROR: tcp_init: setsockopt %s\n",
928                         strerror(errno));
929                 goto error;
930         }
931 #endif
932         /* tos */
933         optval = tos;
934         if (setsockopt(sock_info->socket, IPPROTO_IP, IP_TOS, (void*)&optval, 
935                                 sizeof(optval)) ==-1){
936                 LOG(L_WARN, "WARNING: tcp_init: setsockopt tos: %s\n", strerror(errno));
937                 /* continue since this is not critical */
938         }
939         if (bind(sock_info->socket, &addr->s, sockaddru_len(*addr))==-1){
940                 LOG(L_ERR, "ERROR: tcp_init: bind(%x, %p, %d) on %s:%d : %s\n",
941                                 sock_info->socket,  &addr->s, 
942                                 (unsigned)sockaddru_len(*addr),
943                                 sock_info->address_str.s,
944                                 sock_info->port_no,
945                                 strerror(errno));
946                 goto error;
947         }
948         if (listen(sock_info->socket, TCP_LISTEN_BACKLOG)==-1){
949                 LOG(L_ERR, "ERROR: tcp_init: listen(%x, %p, %d) on %s: %s\n",
950                                 sock_info->socket, &addr->s, 
951                                 (unsigned)sockaddru_len(*addr),
952                                 sock_info->address_str.s,
953                                 strerror(errno));
954                 goto error;
955         }
956         
957         return 0;
958 error:
959         if (sock_info->socket!=-1){
960                 close(sock_info->socket);
961                 sock_info->socket=-1;
962         }
963         return -1;
964 }
965
966
967
968 /* used internally by tcp_main_loop() */
969 static void tcpconn_destroy(struct tcp_connection* tcpconn)
970 {
971         int fd;
972
973         TCPCONN_LOCK; /*avoid races w/ tcp_send*/
974         tcpconn->refcnt--;
975         if (tcpconn->refcnt==0){ 
976                 DBG("tcpconn_destroy: destroying connection %p, flags %04x\n",
977                                 tcpconn, tcpconn->flags);
978                 fd=tcpconn->s;
979 #ifdef USE_TLS
980                 /*FIXME: lock ->writelock ? */
981                 if (tcpconn->type==PROTO_TLS)
982                         tls_close(tcpconn, fd);
983 #endif
984                 _tcpconn_rm(tcpconn);
985                 close(fd);
986                 (*tcp_connections_no)--;
987         }else{
988                 /* force timeout */
989                 tcpconn->timeout=0;
990                 tcpconn->state=S_CONN_BAD;
991                 DBG("tcpconn_destroy: delaying (%p, flags %04x) ...\n",
992                                 tcpconn, tcpconn->flags);
993                 
994         }
995         TCPCONN_UNLOCK;
996 }
997
998
999
1000 #ifdef SEND_FD_QUEUE
1001 struct send_fd_info{
1002         struct tcp_connection* tcp_conn;
1003         ticks_t expire;
1004         int unix_sock;
1005         unsigned int retries; /* debugging */
1006 };
1007
1008 struct tcp_send_fd_q{
1009         struct send_fd_info* data; /* buffer */
1010         struct send_fd_info* crt;  /* pointer inside the buffer */
1011         struct send_fd_info* end;  /* points after the last valid position */
1012 };
1013
1014
1015 static struct tcp_send_fd_q send2child_q;
1016
1017
1018
1019 static int send_fd_queue_init(struct tcp_send_fd_q *q, unsigned int size)
1020 {
1021         q->data=pkg_malloc(size*sizeof(struct send_fd_info));
1022         if (q->data==0){
1023                 LOG(L_ERR, "ERROR: send_fd_queue_init: out of memory\n");
1024                 return -1;
1025         }
1026         q->crt=&q->data[0];
1027         q->end=&q->data[size];
1028         return 0;
1029 }
1030
1031 static void send_fd_queue_destroy(struct tcp_send_fd_q *q)
1032 {
1033         if (q->data){
1034                 pkg_free(q->data);
1035                 q->data=0;
1036                 q->crt=q->end=0;
1037         }
1038 }
1039
1040
1041
1042 static int init_send_fd_queues()
1043 {
1044         if (send_fd_queue_init(&send2child_q, SEND_FD_QUEUE_SIZE)!=0)
1045                 goto error;
1046         return 0;
1047 error:
1048         LOG(L_ERR, "ERROR: init_send_fd_queues: init failed\n");
1049         return -1;
1050 }
1051
1052
1053
1054 static void destroy_send_fd_queues()
1055 {
1056         send_fd_queue_destroy(&send2child_q);
1057 }
1058
1059
1060
1061
1062 inline static int send_fd_queue_add(    struct tcp_send_fd_q* q, 
1063                                                                                 int unix_sock,
1064                                                                                 struct tcp_connection *t)
1065 {
1066         struct send_fd_info* tmp;
1067         unsigned long new_size;
1068         
1069         if (q->crt>=q->end){
1070                 new_size=q->end-&q->data[0];
1071                 if (new_size< MAX_SEND_FD_QUEUE_SIZE/2){
1072                         new_size*=2;
1073                 }else new_size=MAX_SEND_FD_QUEUE_SIZE;
1074                 if (q->crt>=&q->data[new_size]){
1075                         LOG(L_ERR, "ERROR: send_fd_queue_add: queue full: %ld/%ld\n",
1076                                         (long)(q->crt-&q->data[0]-1), new_size);
1077                         goto error;
1078                 }
1079                 LOG(L_CRIT, "INFO: send_fd_queue: queue full: %ld, extending to %ld\n",
1080                                 (long)(q->end-&q->data[0]), new_size);
1081                 tmp=pkg_realloc(q->data, new_size*sizeof(struct send_fd_info));
1082                 if (tmp==0){
1083                         LOG(L_ERR, "ERROR: send_fd_queue_add: out of memory\n");
1084                         goto error;
1085                 }
1086                 q->crt=(q->crt-&q->data[0])+tmp;
1087                 q->data=tmp;
1088                 q->end=&q->data[new_size];
1089         }
1090         q->crt->tcp_conn=t;
1091         q->crt->unix_sock=unix_sock;
1092         q->crt->expire=get_ticks_raw()+SEND_FD_QUEUE_TIMEOUT;
1093         q->crt->retries=0;
1094         q->crt++;
1095         return 0;
1096 error:
1097         return -1;
1098 }
1099
1100
1101
1102 inline static void send_fd_queue_run(struct tcp_send_fd_q* q)
1103 {
1104         struct send_fd_info* p;
1105         struct send_fd_info* t;
1106         
1107         for (p=t=&q->data[0]; p<q->crt; p++){
1108                 if (send_fd(p->unix_sock, &(p->tcp_conn),
1109                                         sizeof(struct tcp_connection*), p->tcp_conn->s)<=0){
1110                         if ( ((errno==EAGAIN)||(errno==EWOULDBLOCK)) && 
1111                                                         ((s_ticks_t)(p->expire-get_ticks_raw())>0)){
1112                                 /* leave in queue for a future try */
1113                                 *t=*p;
1114                                 t->retries++;
1115                                 t++;
1116                         }else{
1117                                 LOG(L_ERR, "ERROR: run_send_fd_queue: send_fd failed"
1118                                                    " on socket %d , queue entry %ld, retries %d,"
1119                                                    " connection %p, tcp socket %d, errno=%d (%s) \n",
1120                                                    p->unix_sock, (long)(p-&q->data[0]), p->retries,
1121                                                    p->tcp_conn, p->tcp_conn->s, errno,
1122                                                    strerror(errno));
1123                                 tcpconn_destroy(p->tcp_conn);
1124                         }
1125                 }
1126         }
1127         q->crt=t;
1128 }
1129 #else
1130 #define send_fd_queue_run(q)
1131 #endif
1132
1133
1134
1135 /* handles io from a tcp child process
1136  * params: tcp_c - pointer in the tcp_children array, to the entry for
1137  *                 which an io event was detected 
1138  *         fd_i  - fd index in the fd_array (usefull for optimizing
1139  *                 io_watch_deletes)
1140  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
1141  *           io events queued), >0 on success. success/error refer only to
1142  *           the reads from the fd.
1143  */
1144 inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
1145 {
1146         struct tcp_connection* tcpconn;
1147         long response[2];
1148         int cmd;
1149         int bytes;
1150         
1151         if (tcp_c->unix_sock<=0){
1152                 /* (we can't have a fd==0, 0 is never closed )*/
1153                 LOG(L_CRIT, "BUG: handle_tcp_child: fd %d for %d "
1154                                 "(pid %d, ser no %d)\n", tcp_c->unix_sock,
1155                                 (int)(tcp_c-&tcp_children[0]), tcp_c->pid, tcp_c->proc_no);
1156                 goto error;
1157         }
1158         /* read until sizeof(response)
1159          * (this is a SOCK_STREAM so read is not atomic) */
1160         bytes=recv_all(tcp_c->unix_sock, response, sizeof(response), MSG_DONTWAIT);
1161         if (bytes<(int)sizeof(response)){
1162                 if (bytes==0){
1163                         /* EOF -> bad, child has died */
1164                         DBG("DBG: handle_tcp_child: dead tcp child %d (pid %d, no %d)"
1165                                         " (shutting down?)\n", (int)(tcp_c-&tcp_children[0]), 
1166                                         tcp_c->pid, tcp_c->proc_no );
1167                         /* don't listen on it any more */
1168                         io_watch_del(&io_h, tcp_c->unix_sock, fd_i, 0); 
1169                         goto error; /* eof. so no more io here, it's ok to return error */
1170                 }else if (bytes<0){
1171                         /* EAGAIN is ok if we try to empty the buffer
1172                          * e.g.: SIGIO_RT overflow mode or EPOLL ET */
1173                         if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
1174                                 LOG(L_CRIT, "ERROR: handle_tcp_child: read from tcp child %ld "
1175                                                 " (pid %d, no %d) %s [%d]\n",
1176                                                 (long)(tcp_c-&tcp_children[0]), tcp_c->pid,
1177                                                 tcp_c->proc_no, strerror(errno), errno );
1178                         }else{
1179                                 bytes=0;
1180                         }
1181                         /* try to ignore ? */
1182                         goto end;
1183                 }else{
1184                         /* should never happen */
1185                         LOG(L_CRIT, "BUG: handle_tcp_child: too few bytes received (%d)\n",
1186                                         bytes );
1187                         bytes=0; /* something was read so there is no error; otoh if
1188                                           receive_fd returned less then requested => the receive
1189                                           buffer is empty => no more io queued on this fd */
1190                         goto end;
1191                 }
1192         }
1193         
1194         DBG("handle_tcp_child: reader response= %lx, %ld from %d \n",
1195                                         response[0], response[1], (int)(tcp_c-&tcp_children[0]));
1196         cmd=response[1];
1197         tcpconn=(struct tcp_connection*)response[0];
1198         if (tcpconn==0){
1199                 /* should never happen */
1200                 LOG(L_CRIT, "BUG: handle_tcp_child: null tcpconn pointer received"
1201                                  " from tcp child %d (pid %d): %lx, %lx\n",
1202                                         (int)(tcp_c-&tcp_children[0]), tcp_c->pid,
1203                                         response[0], response[1]) ;
1204                 goto end;
1205         }
1206         switch(cmd){
1207                 case CONN_RELEASE:
1208                         tcp_c->busy--;
1209                         if (tcpconn->state==S_CONN_BAD){ 
1210                                 tcpconn_destroy(tcpconn);
1211                                 break;
1212                         }
1213                         /* update the timeout*/
1214                         tcpconn->timeout=get_ticks()+tcp_con_lifetime;
1215                         tcpconn_put(tcpconn);
1216                         /* must be after the de-ref*/
1217                         io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1218                         tcpconn->flags&=~F_CONN_REMOVED;
1219                         DBG("handle_tcp_child: CONN_RELEASE  %p refcnt= %d\n", 
1220                                                                                         tcpconn, tcpconn->refcnt);
1221                         break;
1222                 case CONN_ERROR:
1223                 case CONN_DESTROY:
1224                 case CONN_EOF:
1225                         /* WARNING: this will auto-dec. refcnt! */
1226                                 tcp_c->busy--;
1227                                 /* main doesn't listen on it => we don't have to delete it
1228                                  if (tcpconn->s!=-1)
1229                                         io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
1230                                 */
1231                                 tcpconn_destroy(tcpconn); /* closes also the fd */
1232                                 break;
1233                 default:
1234                                 LOG(L_CRIT, "BUG: handle_tcp_child:  unknown cmd %d"
1235                                                                         " from tcp reader %d\n",
1236                                                                         cmd, (int)(tcp_c-&tcp_children[0]));
1237         }
1238 end:
1239         return bytes;
1240 error:
1241         return -1;
1242 }
1243
1244
1245
1246 /* handles io from a "generic" ser process (get fd or new_fd from a tcp_send)
1247  * 
1248  * params: p     - pointer in the ser processes array (pt[]), to the entry for
1249  *                 which an io event was detected
1250  *         fd_i  - fd index in the fd_array (usefull for optimizing
1251  *                 io_watch_deletes)
1252  * returns:  handle_* return convention:
1253  *          -1 on error reading from the fd,
1254  *           0 on EAGAIN  or when no  more io events are queued 
1255  *             (receive buffer empty),
1256  *           >0 on successfull reads from the fd (the receive buffer might
1257  *             be non-empty).
1258  */
1259 inline static int handle_ser_child(struct process_table* p, int fd_i)
1260 {
1261         struct tcp_connection* tcpconn;
1262         long response[2];
1263         int cmd;
1264         int bytes;
1265         int ret;
1266         int fd;
1267         
1268         ret=-1;
1269         if (p->unix_sock<=0){
1270                 /* (we can't have a fd==0, 0 is never closed )*/
1271                 LOG(L_CRIT, "BUG: handle_ser_child: fd %d for %d "
1272                                 "(pid %d)\n", p->unix_sock, (int)(p-&pt[0]), p->pid);
1273                 goto error;
1274         }
1275                         
1276         /* get all bytes and the fd (if transmitted)
1277          * (this is a SOCK_STREAM so read is not atomic) */
1278         bytes=receive_fd(p->unix_sock, response, sizeof(response), &fd,
1279                                                 MSG_DONTWAIT);
1280         if (bytes<(int)sizeof(response)){
1281                 /* too few bytes read */
1282                 if (bytes==0){
1283                         /* EOF -> bad, child has died */
1284                         DBG("DBG: handle_ser_child: dead child %d, pid %d"
1285                                         " (shutting down?)\n", (int)(p-&pt[0]), p->pid);
1286                         /* don't listen on it any more */
1287                         io_watch_del(&io_h, p->unix_sock, fd_i, 0);
1288                         goto error; /* child dead => no further io events from it */
1289                 }else if (bytes<0){
1290                         /* EAGAIN is ok if we try to empty the buffer
1291                          * e.g: SIGIO_RT overflow mode or EPOLL ET */
1292                         if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
1293                                 LOG(L_CRIT, "ERROR: handle_ser_child: read from child %d  "
1294                                                 "(pid %d):  %s [%d]\n", (int)(p-&pt[0]), p->pid,
1295                                                 strerror(errno), errno);
1296                                 ret=-1;
1297                         }else{
1298                                 ret=0;
1299                         }
1300                         /* try to ignore ? */
1301                         goto end;
1302                 }else{
1303                         /* should never happen */
1304                         LOG(L_CRIT, "BUG: handle_ser_child: too few bytes received (%d)\n",
1305                                         bytes );
1306                         ret=0; /* something was read so there is no error; otoh if
1307                                           receive_fd returned less then requested => the receive
1308                                           buffer is empty => no more io queued on this fd */
1309                         goto end;
1310                 }
1311         }
1312         ret=1; /* something was received, there might be more queued */
1313         DBG("handle_ser_child: read response= %lx, %ld, fd %d from %d (%d)\n",
1314                                         response[0], response[1], fd, (int)(p-&pt[0]), p->pid);
1315         cmd=response[1];
1316         tcpconn=(struct tcp_connection*)response[0];
1317         if (tcpconn==0){
1318                 LOG(L_CRIT, "BUG: handle_ser_child: null tcpconn pointer received"
1319                                  " from child %d (pid %d): %lx, %lx\n",
1320                                         (int)(p-&pt[0]), p->pid, response[0], response[1]) ;
1321                 goto end;
1322         }
1323         switch(cmd){
1324                 case CONN_ERROR:
1325                         if (!(tcpconn->flags & F_CONN_REMOVED) && (tcpconn->s!=-1)){
1326                                 io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
1327                                 tcpconn->flags|=F_CONN_REMOVED;
1328                         }
1329                         tcpconn_destroy(tcpconn); /* will close also the fd */
1330                         break;
1331                 case CONN_GET_FD:
1332                         /* send the requested FD  */
1333                         /* WARNING: take care of setting refcnt properly to
1334                          * avoid race condition */
1335                         if (send_fd(p->unix_sock, &tcpconn, sizeof(tcpconn),
1336                                                         tcpconn->s)<=0){
1337                                 LOG(L_ERR, "ERROR: handle_ser_child: send_fd failed\n");
1338                         }
1339                         break;
1340                 case CONN_NEW:
1341                         /* update the fd in the requested tcpconn*/
1342                         /* WARNING: take care of setting refcnt properly to
1343                          * avoid race condition */
1344                         if (fd==-1){
1345                                 LOG(L_CRIT, "BUG: handle_ser_child: CONN_NEW:"
1346                                                         " no fd received\n");
1347                                 break;
1348                         }
1349                         (*tcp_connections_no)++;
1350                         tcpconn->s=fd;
1351                         /* add tcpconn to the list*/
1352                         tcpconn_add(tcpconn);
1353                         /* update the timeout*/
1354                         tcpconn->timeout=get_ticks()+tcp_con_lifetime;
1355                         io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1356                         tcpconn->flags&=~F_CONN_REMOVED;
1357                         break;
1358                 default:
1359                         LOG(L_CRIT, "BUG: handle_ser_child: unknown cmd %d\n", cmd);
1360         }
1361 end:
1362         return ret;
1363 error:
1364         return -1;
1365 }
1366
1367
1368
1369 /* sends a tcpconn + fd to a choosen child */
1370 inline static int send2child(struct tcp_connection* tcpconn)
1371 {
1372         int i;
1373         int min_busy;
1374         int idx;
1375         static int crt=0; /* current child */
1376         int last;
1377         
1378         min_busy=tcp_children[0].busy;
1379         idx=0;
1380         last=crt+tcp_children_no;
1381         for (; crt<last; crt++){
1382                 i=crt%tcp_children_no;
1383                 if (!tcp_children[i].busy){
1384                         idx=i;
1385                         min_busy=0;
1386                         break;
1387                 }else if (min_busy>tcp_children[i].busy){
1388                         min_busy=tcp_children[i].busy;
1389                         idx=i;
1390                 }
1391         }
1392         crt=idx+1; /* next time we start with crt%tcp_children_no */
1393         
1394         tcp_children[idx].busy++;
1395         tcp_children[idx].n_reqs++;
1396         if (min_busy){
1397                 DBG("WARNING: send2child: no free tcp receiver, "
1398                                 " connection passed to the least busy one (%d)\n",
1399                                 min_busy);
1400         }
1401         DBG("send2child: to tcp child %d %d(%d), %p\n", idx, 
1402                                         tcp_children[idx].proc_no,
1403                                         tcp_children[idx].pid, tcpconn);
1404         /* first make sure this child doesn't have pending request for
1405          * tcp_main (to avoid a possible deadlock: e.g. child wants to
1406          * send a release command, but the master fills its socket buffer
1407          * with new connection commands => deadlock) */
1408         /* answer tcp_send requests first */
1409         while(handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0);
1410         /* process tcp readers requests */
1411         while(handle_tcp_child(&tcp_children[idx], -1)>0);
1412                 
1413 #ifdef SEND_FD_QUEUE
1414         /* if queue full, try to queue the io */
1415         if (send_fd(tcp_children[idx].unix_sock, &tcpconn, sizeof(tcpconn),
1416                         tcpconn->s)<=0){
1417                 if ((errno==EAGAIN)||(errno==EWOULDBLOCK)){
1418                         /* FIXME: remove after debugging */
1419                          LOG(L_CRIT, "INFO: tcp child %d, socket %d: queue full,"
1420                                                 " %d requests queued (total handled %d)\n",
1421                                         idx, tcp_children[idx].unix_sock, min_busy,
1422                                         tcp_children[idx].n_reqs-1);
1423                         if (send_fd_queue_add(&send2child_q, tcp_children[idx].unix_sock, 
1424                                                 tcpconn)!=0){
1425                                 LOG(L_ERR, "ERROR: send2child: queue send op. failed\n");
1426                                 return -1;
1427                         }
1428                 }else{
1429                         LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
1430                         return -1;
1431                 }
1432         }
1433 #else
1434         if (send_fd(tcp_children[idx].unix_sock, &tcpconn, sizeof(tcpconn),
1435                         tcpconn->s)<=0){
1436                 LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
1437                 return -1;
1438         }
1439 #endif
1440         
1441         return 0;
1442 }
1443
1444
1445
1446 /* handles a new connection, called internally by tcp_main_loop/handle_io.
1447  * params: si - pointer to one of the tcp socket_info structures on which
1448  *              an io event was detected (connection attempt)
1449  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
1450  *           io events queued), >0 on success. success/error refer only to
1451  *           the accept.
1452  */
1453 static inline int handle_new_connect(struct socket_info* si)
1454 {
1455         union sockaddr_union su;
1456         struct tcp_connection* tcpconn;
1457         socklen_t su_len;
1458         int new_sock;
1459         
1460         /* got a connection on r */
1461         su_len=sizeof(su);
1462         new_sock=accept(si->socket, &(su.s), &su_len);
1463         if (new_sock==-1){
1464                 if ((errno==EAGAIN)||(errno==EWOULDBLOCK))
1465                         return 0;
1466                 LOG(L_ERR,  "WARNING: handle_new_connect: error while accepting"
1467                                 " connection(%d): %s\n", errno, strerror(errno));
1468                 return -1;
1469         }
1470         if (*tcp_connections_no>=tcp_max_connections){
1471                 LOG(L_ERR, "ERROR: maximum number of connections exceeded: %d/%d\n",
1472                                         *tcp_connections_no, tcp_max_connections);
1473                 close(new_sock);
1474                 return 1; /* success, because the accept was succesfull */
1475         }
1476         if (init_sock_opt(new_sock)<0){
1477                 LOG(L_ERR, "ERROR: handle_new_connect: init_sock_opt failed\n");
1478                 close(new_sock);
1479                 return 1; /* success, because the accept was succesfull */
1480         }
1481         (*tcp_connections_no)++;
1482         
1483         /* add socket to list */
1484         tcpconn=tcpconn_new(new_sock, &su, si, si->proto, S_CONN_ACCEPT);
1485         if (tcpconn){
1486 #ifdef TCP_PASS_NEW_CONNECTION_ON_DATA
1487                 io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1488                 tcpconn->flags&=~F_CONN_REMOVED;
1489                 tcpconn_add(tcpconn);
1490 #else
1491                 tcpconn->refcnt++; /* safe, not yet available to the
1492                                                           outside world */
1493                 tcpconn_add(tcpconn);
1494                 DBG("handle_new_connect: new connection: %p %d flags: %04x\n",
1495                         tcpconn, tcpconn->s, tcpconn->flags);
1496                 /* pass it to a child */
1497                 if(send2child(tcpconn)<0){
1498                         LOG(L_ERR,"ERROR: handle_new_connect: no children "
1499                                         "available\n");
1500                         tcpconn_destroy(tcpconn);
1501                 }
1502 #endif
1503         }else{ /*tcpconn==0 */
1504                 LOG(L_ERR, "ERROR: handle_new_connect: tcpconn_new failed, "
1505                                 "closing socket\n");
1506                 close(new_sock);
1507                 (*tcp_connections_no)--;
1508         }
1509         return 1; /* accept() was succesfull */
1510 }
1511
1512
1513
1514 /* handles an io event on one of the watched tcp connections
1515  * 
1516  * params: tcpconn - pointer to the tcp_connection for which we have an io ev.
1517  *         fd_i    - index in the fd_array table (needed for delete)
1518  * returns:  handle_* return convention, but on success it always returns 0
1519  *           (because it's one-shot, after a succesfull execution the fd is
1520  *            removed from tcp_main's watch fd list and passed to a child =>
1521  *            tcp_main is not interested in further io events that might be
1522  *            queued for this fd)
1523  */
1524 inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, int fd_i)
1525 {
1526         /*  is refcnt!=0 really necessary? 
1527          *  No, in fact it's a bug: I can have the following situation: a send only
1528          *   tcp connection used by n processes simultaneously => refcnt = n. In 
1529          *   the same time I can have a read event and this situation is perfectly
1530          *   valid. -- andrei
1531          */
1532 #if 0
1533         if ((tcpconn->refcnt!=0)){
1534                 /* FIXME: might be valid for sigio_rt iff fd flags are not cleared
1535                  *        (there is a short window in which it could generate a sig
1536                  *         that would be catched by tcp_main) */
1537                 LOG(L_CRIT, "BUG: handle_tcpconn_ev: io event on referenced"
1538                                         " tcpconn (%p), refcnt=%d, fd=%d\n",
1539                                         tcpconn, tcpconn->refcnt, tcpconn->s);
1540                 return -1;
1541         }
1542 #endif
1543         /* pass it to child, so remove it from the io watch list */
1544         DBG("handle_tcpconn_ev: data available on %p %d\n", tcpconn, tcpconn->s);
1545         if (io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1) goto error;
1546         tcpconn->flags|=F_CONN_REMOVED;
1547         tcpconn_ref(tcpconn); /* refcnt ++ */
1548         if (send2child(tcpconn)<0){
1549                 LOG(L_ERR,"ERROR: handle_tcpconn_ev: no children available\n");
1550                 tcpconn_destroy(tcpconn);
1551 #if 0
1552                 TCPCONN_LOCK;
1553                 tcpconn->refcnt--;
1554                 if (tcpconn->refcnt==0){
1555                         fd=tcpconn->s;
1556                         _tcpconn_rm(tcpconn);
1557                         close(fd);
1558                 }else tcpconn->timeout=0; /* force expire*/
1559                 TCPCONN_UNLOCK;
1560 #endif
1561         }
1562         return 0; /* we are not interested in possibly queued io events, 
1563                                  the fd was either passed to a child, or closed */
1564 error:
1565         return -1;
1566 }
1567
1568
1569
1570 /* generic handle io routine, it will call the appropiate
1571  *  handle_xxx() based on the fd_map type
1572  *
1573  * params:  fm  - pointer to a fd hash entry
1574  *          idx - index in the fd_array (or -1 if not known)
1575  * return: -1 on error
1576  *          0 on EAGAIN or when by some other way it is known that no more 
1577  *            io events are queued on the fd (the receive buffer is empty).
1578  *            Usefull to detect when there are no more io events queued for
1579  *            sigio_rt, epoll_et, kqueue.
1580  *         >0 on successfull read from the fd (when there might be more io
1581  *            queued -- the receive buffer might still be non-empty)
1582  */
1583 inline static int handle_io(struct fd_map* fm, int idx)
1584 {       
1585         int ret;
1586         
1587         switch(fm->type){
1588                 case F_SOCKINFO:
1589                         ret=handle_new_connect((struct socket_info*)fm->data);
1590                         break;
1591                 case F_TCPCONN:
1592                         ret=handle_tcpconn_ev((struct tcp_connection*)fm->data, idx);
1593                         break;
1594                 case F_TCPCHILD:
1595                         ret=handle_tcp_child((struct tcp_child*)fm->data, idx);
1596                         break;
1597                 case F_PROC:
1598                         ret=handle_ser_child((struct process_table*)fm->data, idx);
1599                         break;
1600                 case F_NONE:
1601                         LOG(L_CRIT, "BUG: handle_io: empty fd map\n");
1602                         goto error;
1603                 default:
1604                         LOG(L_CRIT, "BUG: handle_io: uknown fd type %d\n", fm->type); 
1605                         goto error;
1606         }
1607         return ret;
1608 error:
1609         return -1;
1610 }
1611
1612
1613
1614 /* very inefficient for now - FIXME
1615  * keep in sync with tcpconn_destroy, the "delete" part should be
1616  * the same except for io_watch_del..*/
1617 static inline void tcpconn_timeout(int force)
1618 {
1619         static int prev_ticks=0;
1620         struct tcp_connection *c, *next;
1621         unsigned int ticks;
1622         unsigned h;
1623         int fd;
1624         
1625         
1626         ticks=get_ticks();
1627         if ((ticks==prev_ticks) && !force) return;
1628         prev_ticks=ticks;
1629         TCPCONN_LOCK; /* fixme: we can lock only on delete IMO */
1630         for(h=0; h<TCP_ID_HASH_SIZE; h++){
1631                 c=tcpconn_id_hash[h];
1632                 while(c){
1633                         next=c->id_next;
1634                         if (force ||((c->refcnt==0) && ((int)(ticks-c->timeout)>=0))){
1635                                 if (!force)
1636                                         DBG("tcpconn_timeout: timeout for hash=%d - %p"
1637                                                         " (%d > %d)\n", h, c, ticks, c->timeout);
1638                                 fd=c->s;
1639 #ifdef USE_TLS
1640                                 if (c->type==PROTO_TLS)
1641                                         tls_close(c, fd);
1642 #endif
1643                                 _tcpconn_rm(c);
1644                                 if ((fd>0)&&(c->refcnt==0)) {
1645                                         if (!(c->flags & F_CONN_REMOVED)){
1646                                                 io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
1647                                                 c->flags|=F_CONN_REMOVED;
1648                                         }
1649                                         close(fd);
1650                                 }
1651                                 (*tcp_connections_no)--;
1652                         }
1653                         c=next;
1654                 }
1655         }
1656         TCPCONN_UNLOCK;
1657 }
1658
1659
1660
1661 /* tcp main loop */
1662 void tcp_main_loop()
1663 {
1664
1665         struct socket_info* si;
1666         int r;
1667         
1668         /* init send fd queues (here because we want mem. alloc only in the tcp
1669          *  process */
1670 #ifdef SEND_FD_QUEUE
1671         if (init_send_fd_queues()<0){
1672                 LOG(L_CRIT, "ERROR: init_tcp: could not init send fd queues\n");
1673                 goto error;
1674         }
1675 #endif
1676         /* init io_wait (here because we want the memory allocated only in
1677          * the tcp_main process) */
1678         
1679         /* FIXME: TODO: make tcp_max_fd_no a config param */
1680         if  (init_io_wait(&io_h, tcp_max_fd_no, tcp_poll_method)<0)
1681                 goto error;
1682         /* init: start watching all the fds*/
1683         
1684         /* add all the sockets we listens on for connections */
1685         for (si=tcp_listen; si; si=si->next){
1686                 if ((si->proto==PROTO_TCP) &&(si->socket!=-1)){
1687                         if (io_watch_add(&io_h, si->socket, F_SOCKINFO, si)<0){
1688                                 LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1689                                                         "listen socket to the fd list\n");
1690                                 goto error;
1691                         }
1692                 }else{
1693                         LOG(L_CRIT, "BUG: tcp_main_loop: non tcp address in tcp_listen\n");
1694                 }
1695         }
1696 #ifdef USE_TLS
1697         if (!tls_disable){
1698                 for (si=tls_listen; si; si=si->next){
1699                         if ((si->proto==PROTO_TLS) && (si->socket!=-1)){
1700                                 if (io_watch_add(&io_h, si->socket, F_SOCKINFO, si)<0){
1701                                         LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1702                                                         "tls listen socket to the fd list\n");
1703                                         goto error;
1704                                 }
1705                         }else{
1706                                 LOG(L_CRIT, "BUG: tcp_main_loop: non tls address"
1707                                                 " in tls_listen\n");
1708                         }
1709                 }
1710         }
1711 #endif
1712         /* add all the unix sockets used for communcation with other ser processes
1713          *  (get fd, new connection a.s.o) */
1714         for (r=1; r<process_no; r++){
1715                 if (pt[r].unix_sock>0) /* we can't have 0, we never close it!*/
1716                         if (io_watch_add(&io_h, pt[r].unix_sock, F_PROC, &pt[r])<0){
1717                                         LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1718                                                         "process %d unix socket to the fd list\n", r);
1719                                         goto error;
1720                         }
1721         }
1722         /* add all the unix sokets used for communication with the tcp childs */
1723         for (r=0; r<tcp_children_no; r++){
1724                 if (tcp_children[r].unix_sock>0)/*we can't have 0, we never close it!*/
1725                         if (io_watch_add(&io_h, tcp_children[r].unix_sock, F_TCPCHILD,
1726                                                         &tcp_children[r]) <0){
1727                                 LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1728                                                 "tcp child %d unix socket to the fd list\n", r);
1729                                 goto error;
1730                         }
1731         }
1732         
1733         /* main loop */
1734         switch(io_h.poll_method){
1735                 case POLL_POLL:
1736                         while(1){
1737                                 /* wait and process IO */
1738                                 io_wait_loop_poll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0); 
1739                                 send_fd_queue_run(&send2child_q); /* then new io */
1740                                 /* remove old connections */
1741                                 tcpconn_timeout(0);
1742                         }
1743                         break;
1744 #ifdef HAVE_SELECT
1745                 case POLL_SELECT:
1746                         while(1){
1747                                 io_wait_loop_select(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1748                                 send_fd_queue_run(&send2child_q); /* then new io */
1749                                 tcpconn_timeout(0);
1750                         }
1751                         break;
1752 #endif
1753 #ifdef HAVE_SIGIO_RT
1754                 case POLL_SIGIO_RT:
1755                         while(1){
1756                                 io_wait_loop_sigio_rt(&io_h, TCP_MAIN_SELECT_TIMEOUT);
1757                                 send_fd_queue_run(&send2child_q); /* then new io */
1758                                 tcpconn_timeout(0);
1759                         }
1760                         break;
1761 #endif
1762 #ifdef HAVE_EPOLL
1763                 case POLL_EPOLL_LT:
1764                         while(1){
1765                                 io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1766                                 send_fd_queue_run(&send2child_q); /* then new io */
1767                                 tcpconn_timeout(0);
1768                         }
1769                         break;
1770                 case POLL_EPOLL_ET:
1771                         while(1){
1772                                 io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 1);
1773                                 send_fd_queue_run(&send2child_q); /* then new io */
1774                                 tcpconn_timeout(0);
1775                         }
1776                         break;
1777 #endif
1778 #ifdef HAVE_KQUEUE
1779                 case POLL_KQUEUE:
1780                         while(1){
1781                                 io_wait_loop_kqueue(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1782                                 send_fd_queue_run(&send2child_q); /* then new io */
1783                                 tcpconn_timeout(0);
1784                         }
1785                         break;
1786 #endif
1787 #ifdef HAVE_DEVPOLL
1788                 case POLL_DEVPOLL:
1789                         while(1){
1790                                 io_wait_loop_devpoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1791                                 send_fd_queue_run(&send2child_q); /* then new io */
1792                                 tcpconn_timeout(0);
1793                         }
1794                         break;
1795 #endif
1796                 default:
1797                         LOG(L_CRIT, "BUG: tcp_main_loop: no support for poll method "
1798                                         " %s (%d)\n", 
1799                                         poll_method_name(io_h.poll_method), io_h.poll_method);
1800                         goto error;
1801         }
1802 error:
1803 #ifdef SEND_FD_QUEUE
1804         destroy_send_fd_queues();
1805 #endif
1806         destroy_io_wait(&io_h);
1807         LOG(L_CRIT, "ERROR: tcp_main_loop: exiting...");
1808         exit(-1);
1809 }
1810
1811
1812
1813 /* cleanup before exit */
1814 void destroy_tcp()
1815 {
1816                 if (tcpconn_id_hash){
1817                         tcpconn_timeout(1); /* force close/expire for all active tcpconns*/
1818                         shm_free(tcpconn_id_hash);
1819                         tcpconn_id_hash=0;
1820                 }
1821                 if (tcp_connections_no){
1822                         shm_free(tcp_connections_no);
1823                         tcp_connections_no=0;
1824                 }
1825                 if (connection_id){
1826                         shm_free(connection_id);
1827                         connection_id=0;
1828                 }
1829                 if (tcpconn_aliases_hash){
1830                         shm_free(tcpconn_aliases_hash);
1831                         tcpconn_aliases_hash=0;
1832                 }
1833                 if (tcpconn_lock){
1834                         lock_destroy(tcpconn_lock);
1835                         lock_dealloc((void*)tcpconn_lock);
1836                         tcpconn_lock=0;
1837                 }
1838                 if (tcp_children){
1839                         pkg_free(tcp_children);
1840                         tcp_children=0;
1841                 }
1842 }
1843
1844
1845
1846 int init_tcp()
1847 {
1848         char* poll_err;
1849         
1850         /* init lock */
1851         tcpconn_lock=lock_alloc();
1852         if (tcpconn_lock==0){
1853                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc lock\n");
1854                 goto error;
1855         }
1856         if (lock_init(tcpconn_lock)==0){
1857                 LOG(L_CRIT, "ERROR: init_tcp: could not init lock\n");
1858                 lock_dealloc((void*)tcpconn_lock);
1859                 tcpconn_lock=0;
1860                 goto error;
1861         }
1862         /* init globals */
1863         tcp_connections_no=shm_malloc(sizeof(int));
1864         if (tcp_connections_no==0){
1865                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
1866                 goto error;
1867         }
1868         *tcp_connections_no=0;
1869         connection_id=shm_malloc(sizeof(int));
1870         if (connection_id==0){
1871                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
1872                 goto error;
1873         }
1874         *connection_id=1;
1875         /* alloc hashtables*/
1876         tcpconn_aliases_hash=(struct tcp_conn_alias**)
1877                         shm_malloc(TCP_ALIAS_HASH_SIZE* sizeof(struct tcp_conn_alias*));
1878         if (tcpconn_aliases_hash==0){
1879                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc address hashtable\n");
1880                 goto error;
1881         }
1882         tcpconn_id_hash=(struct tcp_connection**)shm_malloc(TCP_ID_HASH_SIZE*
1883                                                                 sizeof(struct tcp_connection*));
1884         if (tcpconn_id_hash==0){
1885                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc id hashtable\n");
1886                 goto error;
1887         }
1888         /* init hashtables*/
1889         memset((void*)tcpconn_aliases_hash, 0, 
1890                         TCP_ALIAS_HASH_SIZE * sizeof(struct tcp_conn_alias*));
1891         memset((void*)tcpconn_id_hash, 0, 
1892                         TCP_ID_HASH_SIZE * sizeof(struct tcp_connection*));
1893         
1894         /* fix config variables */
1895         /* they can have only positive values due the config parser so we can
1896          * ignore most of them */
1897                 poll_err=check_poll_method(tcp_poll_method);
1898         
1899         /* set an appropiate poll method */
1900         if (poll_err || (tcp_poll_method==0)){
1901                 tcp_poll_method=choose_poll_method();
1902                 if (poll_err){
1903                         LOG(L_ERR, "ERROR: init_tcp: %s, using %s instead\n",
1904                                         poll_err, poll_method_name(tcp_poll_method));
1905                 }else{
1906                         LOG(L_INFO, "init_tcp: using %s as the io watch method"
1907                                         " (auto detected)\n", poll_method_name(tcp_poll_method));
1908                 }
1909         }else{
1910                         LOG(L_INFO, "init_tcp: using %s io watch method (config)\n",
1911                                         poll_method_name(tcp_poll_method));
1912         }
1913         
1914         return 0;
1915 error:
1916         /* clean-up */
1917         destroy_tcp();
1918         return -1;
1919 }
1920
1921
1922
1923 /* returns -1 on error */
1924 static int set_non_blocking(int s)
1925 {
1926         int flags;
1927         /* non-blocking */
1928         flags=fcntl(s, F_GETFL);
1929         if (flags==-1){
1930                 LOG(L_ERR, "ERROR: set_non_blocking: fnctl failed: (%d) %s\n",
1931                                 errno, strerror(errno));
1932                 goto error;
1933         }
1934         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
1935                 LOG(L_ERR, "ERROR: set_non_blocking: fcntl: set non-blocking failed:"
1936                                 " (%d) %s\n", errno, strerror(errno));
1937                 goto error;
1938         }
1939         return 0;
1940 error:
1941         return -1;
1942 }
1943
1944
1945
1946 /* starts the tcp processes */
1947 int tcp_init_children()
1948 {
1949         int r;
1950         int sockfd[2];
1951         int reader_fd[2]; /* for comm. with the tcp children read  */
1952         pid_t pid;
1953         struct socket_info *si;
1954         
1955         /* estimate max fd. no:
1956          * 1 tcp send unix socket/all_proc, 
1957          *  + 1 udp sock/udp proc + 1 tcp_child sock/tcp child*
1958          *  + no_listen_tcp */
1959         for(r=0, si=tcp_listen; si; si=si->next, r++);
1960 #ifdef USE_TLS
1961         if (! tls_disable)
1962                 for (si=tls_listen; si; si=si->next, r++);
1963 #endif
1964         
1965         tcp_max_fd_no=process_count*2 +r-1 /* timer */ +3; /* stdin/out/err*/
1966         /* max connections can be temporarily exceeded with process_count
1967          * - tcp_main (tcpconn_connect called simultaneously in all all the 
1968          *  processes) */
1969         tcp_max_fd_no+=tcp_max_connections+process_count-1 /* tcp main */;
1970         
1971         /* alloc the children array */
1972         tcp_children=pkg_malloc(sizeof(struct tcp_child)*tcp_children_no);
1973         if (tcp_children==0){
1974                         LOG(L_ERR, "ERROR: tcp_init_children: out of memory\n");
1975                         goto error;
1976         }
1977         /* create the tcp sock_info structures */
1978         /* copy the sockets --moved to main_loop*/
1979         
1980         /* fork children & create the socket pairs*/
1981         for(r=0; r<tcp_children_no; r++){
1982                 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockfd)<0){
1983                         LOG(L_ERR, "ERROR: tcp_main: socketpair failed: %s\n",
1984                                         strerror(errno));
1985                         goto error;
1986                 }
1987                 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reader_fd)<0){
1988                         LOG(L_ERR, "ERROR: tcp_main: socketpair failed: %s\n",
1989                                         strerror(errno));
1990                         goto error;
1991                 }
1992 #ifdef TCP_CHILD_NON_BLOCKING
1993                 if ((set_non_blocking(reader_fd[0])<0) || 
1994                         (set_non_blocking(reader_fd[1])<0)){
1995                         LOG(L_ERR, "ERROR: tcp_main: failed to set non blocking"
1996                                                 "on child sockets\n");
1997                         /* continue, it's not critical (it will go slower under
1998                          * very high connection rates) */
1999                 }
2000 #endif
2001                 
2002                 process_no++;
2003                 child_rank++;
2004                 pid=fork();
2005                 if (pid<0){
2006                         LOG(L_ERR, "ERROR: tcp_main: fork failed: %s\n",
2007                                         strerror(errno));
2008                         goto error;
2009                 }else if (pid>0){
2010                         /* parent */
2011                         close(sockfd[1]);
2012                         close(reader_fd[1]);
2013                         tcp_children[r].pid=pid;
2014                         tcp_children[r].proc_no=process_no;
2015                         tcp_children[r].busy=0;
2016                         tcp_children[r].n_reqs=0;
2017                         tcp_children[r].unix_sock=reader_fd[0];
2018                         pt[process_no].pid=pid;
2019                         pt[process_no].unix_sock=sockfd[0];
2020                         pt[process_no].idx=r;
2021                         strncpy(pt[process_no].desc, "tcp receiver", MAX_PT_DESC);
2022                 }else{
2023                         /* child */
2024                         close(sockfd[0]);
2025                         unix_tcp_sock=sockfd[1];
2026                         bind_address=0; /* force a SEGFAULT if someone uses a non-init.
2027                                                            bind address on tcp */
2028                         /* record pid twice to avoid the child using it, before
2029                          * parent gets a chance to set it*/
2030                         pt[process_no].pid=getpid();
2031                         if (init_child(child_rank) < 0) {
2032                                 LOG(L_ERR, "init_children failed\n");
2033                                 goto error;
2034                         }
2035                         tcp_receive_loop(reader_fd[1]);
2036                 }
2037         }
2038         return 0;
2039 error:
2040         return -1;
2041 }
2042
2043
2044
2045 void tcp_get_info(struct tcp_gen_info *ti)
2046 {
2047         ti->tcp_readers=tcp_children_no;
2048         ti->tcp_max_connections=tcp_max_connections;
2049         ti->tcp_connections_no=*tcp_connections_no;
2050 }
2051
2052 #endif