- tcp children control fds are now non-blocking
[sip-router] / tcp_main.c
1 /*
2  * $Id$
3  *
4  * Copyright (C) 2001-2003 FhG Fokus
5  *
6  * This file is part of ser, a free SIP server.
7  *
8  * ser is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version
12  *
13  * For a license to use the ser software under conditions
14  * other than those described here, or to purchase support for this
15  * software, please contact iptel.org by e-mail at the following addresses:
16  *    info@iptel.org
17  *
18  * ser is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26  */
27 /*
28  * History:
29  * --------
30  *  2002-11-29  created by andrei
31  *  2002-12-11  added tcp_send (andrei)
32  *  2003-01-20  locking fixes, hashtables (andrei)
33  *  2003-02-20  s/lock_t/gen_lock_t/ to avoid a conflict on solaris (andrei)
34  *  2003-02-25  Nagle is disabled if -DDISABLE_NAGLE (andrei)
35  *  2003-03-29  SO_REUSEADDR before calling bind to allow
36  *              server restart, Nagle set on the (hopefuly) 
37  *              correct socket (jiri)
38  *  2003-03-31  always try to find the corresponding tcp listen socket for
39  *               a temp. socket and store in in *->bind_address: added
40  *               find_tcp_si, modified tcpconn_connect (andrei)
41  *  2003-04-14  set sockopts to TOS low delay (andrei)
42  *  2003-06-30  moved tcp new connect checking & handling to
43  *               handle_new_connect (andrei)
44  *  2003-07-09  tls_close called before closing the tcp connection (andrei)
45  *  2003-10-24  converted to the new socket_info lists (andrei)
46  *  2003-10-27  tcp port aliases support added (andrei)
47  *  2003-11-04  always lock before manipulating refcnt; sendchild
48  *              does not inc refcnt by itself anymore (andrei)
49  *  2003-11-07  different unix sockets are used for fd passing
50  *              to/from readers/writers (andrei)
51  *  2003-11-17  handle_new_connect & tcp_connect will close the 
52  *              new socket if tcpconn_new return 0 (e.g. out of mem) (andrei)
53  *  2003-11-28  tcp_blocking_write & tcp_blocking_connect added (andrei)
54  *  2004-11-08  dropped find_tcp_si and replaced with find_si (andrei)
55  *  2005-06-07  new tcp optimized code, supports epoll (LT), sigio + real time
56  *               signals, poll & select (andrei)
57  *  2005-06-26  *bsd kqueue support (andrei)
58  *  2005-07-04  solaris /dev/poll support (andrei)
59  *  2005-07-08  tcp_max_connections, tcp_connection_lifetime, don't accept
60  *               more connections if tcp_max_connections is exceeded (andrei)
61  *  2005-10-21  cleanup all the open connections on exit
62  *              decrement the no. of open connections on timeout too    (andrei) *  2006-01-30  queue send_fd request and execute them at the end of the
63  *              poll loop  (#ifdef) (andrei)
64  *              process all children requests, before attempting to send
65  *              them new stuff (fixes some deadlocks) (andrei)
66  *  2006-02-03  timers are run only once per s (andrei)
67  *              tcp children fds can be non-blocking; send fds are queues on
68  *              EAGAIN (andrei)
69  */
70
71
72 #ifdef USE_TCP
73
74
75 #ifndef SHM_MEM
76 #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
77 #endif
78
79 #include <sys/time.h>
80 #include <sys/types.h>
81 #include <sys/select.h>
82 #include <sys/socket.h>
83 #include <netinet/in.h>
84 #include <netinet/in_systm.h>
85 #include <netinet/ip.h>
86 #include <netinet/tcp.h>
87 #include <sys/uio.h>  /* writev*/
88 #include <netdb.h>
89 #include <stdlib.h> /*exit() */
90
91 #include <unistd.h>
92
93 #include <errno.h>
94 #include <string.h>
95
96 #ifdef HAVE_SELECT
97 #include <sys/select.h>
98 #endif
99 #include <sys/poll.h>
100
101
102 #include "ip_addr.h"
103 #include "pass_fd.h"
104 #include "tcp_conn.h"
105 #include "globals.h"
106 #include "pt.h"
107 #include "locking.h"
108 #include "mem/mem.h"
109 #include "mem/shm_mem.h"
110 #include "timer.h"
111 #include "sr_module.h"
112 #include "tcp_server.h"
113 #include "tcp_init.h"
114 #include "tsend.h"
115 #ifdef USE_TLS
116 #include "tls/tls_server.h"
117 #endif 
118
119 #define local_malloc pkg_malloc
120 #define local_free   pkg_free
121
122 #define HANDLE_IO_INLINE
123 #include "io_wait.h"
124 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
125
126 #define MAX_TCP_CHILDREN 100
127 #define TCP_LISTEN_BACKLOG 1024
128 #define SEND_FD_QUEUE /* queue send fd requests on EAGAIN, instead of sending 
129                                                         them immediately */
130 #define TCP_CHILD_NON_BLOCKING 
131 #ifdef SEND_FD_QUEUE
132 #ifndef TCP_CHILD_NON_BLOCKING
133 #define TCP_CHILD_NON_BLOCKING
134 #endif
135 #define MAX_SEND_FD_QUEUE_SIZE  1024  /* alternative: tcp_max_fd_no */
136 #define SEND_FD_QUEUE_SIZE              128  /* initial size */
137 #define MAX_SEND_FD_RETRIES             3        /* FIXME: increase */
138 #endif
139
140
141 enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
142                                 F_TCPCONN, F_TCPCHILD, F_PROC };
143
144 struct tcp_child{
145         pid_t pid;
146         int proc_no; /* ser proc_no, for debugging */
147         int unix_sock; /* unix "read child" sock fd */
148         int busy;
149         int n_reqs; /* number of requests serviced so far */
150 };
151
152
153
154 int tcp_accept_aliases=0; /* by default don't accept aliases */
155 int tcp_connect_timeout=DEFAULT_TCP_CONNECT_TIMEOUT;
156 int tcp_send_timeout=DEFAULT_TCP_SEND_TIMEOUT;
157 int tcp_con_lifetime=DEFAULT_TCP_CONNECTION_LIFETIME;
158 enum poll_types tcp_poll_method=0; /* by default choose the best method */
159 int tcp_max_connections=DEFAULT_TCP_MAX_CONNECTIONS;
160 int tcp_max_fd_no=0;
161
162 static int tcp_connections_no=0; /* current open connections */
163
164 /* connection hash table (after ip&port) , includes also aliases */
165 struct tcp_conn_alias** tcpconn_aliases_hash=0;
166 /* connection hash table (after connection id) */
167 struct tcp_connection** tcpconn_id_hash=0;
168 gen_lock_t* tcpconn_lock=0;
169
170 struct tcp_child tcp_children[MAX_TCP_CHILDREN];
171 static int* connection_id=0; /*  unique for each connection, used for 
172                                                                 quickly finding the corresponding connection
173                                                                 for a reply */
174 int unix_tcp_sock;
175
176 static int tcp_proto_no=-1; /* tcp protocol number as returned by
177                                                            getprotobyname */
178
179 static io_wait_h io_h;
180
181
182
183 /* set all socket/fd options:  disable nagle, tos lowdelay, non-blocking
184  * return -1 on error */
185 static int init_sock_opt(int s)
186 {
187         int flags;
188         int optval;
189         
190 #ifdef DISABLE_NAGLE
191         flags=1;
192         if ( (tcp_proto_no!=-1) && (setsockopt(s, tcp_proto_no , TCP_NODELAY,
193                                         &flags, sizeof(flags))<0) ){
194                 LOG(L_WARN, "WARNING: init_sock_opt: could not disable Nagle: %s\n",
195                                 strerror(errno));
196         }
197 #endif
198         /* tos*/
199         optval = tos;
200         if (setsockopt(s, IPPROTO_IP, IP_TOS, (void*)&optval,sizeof(optval)) ==-1){
201                 LOG(L_WARN, "WARNING: init_sock_opt: setsockopt tos: %s\n",
202                                 strerror(errno));
203                 /* continue since this is not critical */
204         }
205         /* non-blocking */
206         flags=fcntl(s, F_GETFL);
207         if (flags==-1){
208                 LOG(L_ERR, "ERROR: init_sock_opt: fnctl failed: (%d) %s\n",
209                                 errno, strerror(errno));
210                 goto error;
211         }
212         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
213                 LOG(L_ERR, "ERROR: init_sock_opt: fcntl: set non-blocking failed:"
214                                 " (%d) %s\n", errno, strerror(errno));
215                 goto error;
216         }
217         return 0;
218 error:
219         return -1;
220 }
221
222
223
224 /* blocking connect on a non-blocking fd; it will timeout after
225  * tcp_connect_timeout 
226  * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
227  * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
228  */
229 static int tcp_blocking_connect(int fd, const struct sockaddr *servaddr,
230                                                                 socklen_t addrlen)
231 {
232         int n;
233 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
234         fd_set sel_set;
235         fd_set orig_set;
236         struct timeval timeout;
237 #else
238         struct pollfd pf;
239 #endif
240         int elapsed;
241         int to;
242         int ticks;
243         int err;
244         unsigned int err_len;
245         int poll_err;
246         
247         poll_err=0;
248         to=tcp_connect_timeout;
249         ticks=get_ticks();
250 again:
251         n=connect(fd, servaddr, addrlen);
252         if (n==-1){
253                 if (errno==EINTR){
254                         elapsed=(get_ticks()-ticks)*TIMER_TICK;
255                         if (elapsed<to)         goto again;
256                         else goto error_timeout;
257                 }
258                 if (errno!=EINPROGRESS && errno!=EALREADY){
259                         LOG(L_ERR, "ERROR: tcp_blocking_connect: (%d) %s\n",
260                                         errno, strerror(errno));
261                         goto error;
262                 }
263         }else goto end;
264         
265         /* poll/select loop */
266 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
267                 FD_ZERO(&orig_set);
268                 FD_SET(fd, &orig_set);
269 #else
270                 pf.fd=fd;
271                 pf.events=POLLOUT;
272 #endif
273         while(1){
274                 elapsed=(get_ticks()-ticks)*TIMER_TICK;
275                 if (elapsed<to)
276                         to-=elapsed;
277                 else 
278                         goto error_timeout;
279 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
280                 sel_set=orig_set;
281                 timeout.tv_sec=to;
282                 timeout.tv_usec=0;
283                 n=select(fd+1, 0, &sel_set, 0, &timeout);
284 #else
285                 n=poll(&pf, 1, to*1000);
286 #endif
287                 if (n<0){
288                         if (errno==EINTR) continue;
289                         LOG(L_ERR, "ERROR: tcp_blocking_connect: poll/select failed:"
290                                         " (%d) %s\n", errno, strerror(errno));
291                         goto error;
292                 }else if (n==0) /* timeout */ continue;
293 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
294                 if (FD_ISSET(fd, &sel_set))
295 #else
296                 if (pf.revents&(POLLERR|POLLHUP|POLLNVAL)){ 
297                         LOG(L_ERR, "ERROR: tcp_blocking_connect: poll error: flags %x\n",
298                                         pf.revents);
299                         poll_err=1;
300                 }
301 #endif
302                 {
303                         err_len=sizeof(err);
304                         getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
305                         if ((err==0) && (poll_err==0)) goto end;
306                         if (err!=EINPROGRESS && err!=EALREADY){
307                                 LOG(L_ERR, "ERROR: tcp_blocking_connect: SO_ERROR (%d) %s\n",
308                                                 err, strerror(err));
309                                 goto error;
310                         }
311                 }
312         }
313 error_timeout:
314         /* timeout */
315         LOG(L_ERR, "ERROR: tcp_blocking_connect: timeout %d s elapsed from %d s\n",
316                         elapsed, tcp_connect_timeout);
317 error:
318         return -1;
319 end:
320         return 0;
321 }
322
323
324
325 #if 0
326 /* blocking write even on non-blocking sockets 
327  * if TCP_TIMEOUT will return with error */
328 static int tcp_blocking_write(struct tcp_connection* c, int fd, char* buf,
329                                                                 unsigned int len)
330 {
331         int n;
332         fd_set sel_set;
333         struct timeval timeout;
334         int ticks;
335         int initial_len;
336         
337         initial_len=len;
338 again:
339         
340         n=send(fd, buf, len,
341 #ifdef HAVE_MSG_NOSIGNAL
342                         MSG_NOSIGNAL
343 #else
344                         0
345 #endif
346                 );
347         if (n<0){
348                 if (errno==EINTR)       goto again;
349                 else if (errno!=EAGAIN && errno!=EWOULDBLOCK){
350                         LOG(L_ERR, "tcp_blocking_write: failed to send: (%d) %s\n",
351                                         errno, strerror(errno));
352                         goto error;
353                 }
354         }else if (n<len){
355                 /* partial write */
356                 buf+=n;
357                 len-=n;
358         }else{
359                 /* success: full write */
360                 goto end;
361         }
362         while(1){
363                 FD_ZERO(&sel_set);
364                 FD_SET(fd, &sel_set);
365                 timeout.tv_sec=tcp_send_timeout;
366                 timeout.tv_usec=0;
367                 ticks=get_ticks();
368                 n=select(fd+1, 0, &sel_set, 0, &timeout);
369                 if (n<0){
370                         if (errno==EINTR) continue; /* signal, ignore */
371                         LOG(L_ERR, "ERROR: tcp_blocking_write: select failed: "
372                                         " (%d) %s\n", errno, strerror(errno));
373                         goto error;
374                 }else if (n==0){
375                         /* timeout */
376                         if (get_ticks()-ticks>=tcp_send_timeout){
377                                 LOG(L_ERR, "ERROR: tcp_blocking_write: send timeout (%d)\n",
378                                                 tcp_send_timeout);
379                                 goto error;
380                         }
381                         continue;
382                 }
383                 if (FD_ISSET(fd, &sel_set)){
384                         /* we can write again */
385                         goto again;
386                 }
387         }
388 error:
389                 return -1;
390 end:
391                 return initial_len;
392 }
393 #endif
394
395
396
397 struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
398                                                                         struct socket_info* ba, int type, 
399                                                                         int state)
400 {
401         struct tcp_connection *c;
402         
403         c=(struct tcp_connection*)shm_malloc(sizeof(struct tcp_connection));
404         if (c==0){
405                 LOG(L_ERR, "ERROR: tcpconn_new: mem. allocation failure\n");
406                 goto error;
407         }
408         memset(c, 0, sizeof(struct tcp_connection)); /* zero init */
409         c->s=sock;
410         c->fd=-1; /* not initialized */
411         if (lock_init(&c->write_lock)==0){
412                 LOG(L_ERR, "ERROR: tcpconn_new: init lock failed\n");
413                 goto error;
414         }
415         
416         c->rcv.src_su=*su;
417         
418         c->refcnt=0;
419         su2ip_addr(&c->rcv.src_ip, su);
420         c->rcv.src_port=su_getport(su);
421         c->rcv.bind_address=ba;
422         if (ba){
423                 c->rcv.dst_ip=ba->address;
424                 c->rcv.dst_port=ba->port_no;
425         }
426         print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
427         DBG(     "tcpconn_new: on port %d, type %d\n", c->rcv.src_port, type);
428         init_tcp_req(&c->req);
429         c->id=(*connection_id)++;
430         c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
431         c->rcv.proto_reserved2=0;
432         c->state=state;
433         c->extra_data=0;
434 #ifdef USE_TLS
435         if (type==PROTO_TLS){
436                 if (tls_tcpconn_init(c, sock)==-1) goto error;
437         }else
438 #endif /* USE_TLS*/
439         {
440                 c->type=PROTO_TCP;
441                 c->rcv.proto=PROTO_TCP;
442                 c->timeout=get_ticks()+tcp_con_lifetime;
443         }
444         c->flags|=F_CONN_REMOVED;
445         
446         tcp_connections_no++;
447         return c;
448         
449 error:
450         if (c) shm_free(c);
451         return 0;
452 }
453
454
455
456 struct tcp_connection* tcpconn_connect(union sockaddr_union* server, int type)
457 {
458         int s;
459         struct socket_info* si;
460         union sockaddr_union my_name;
461         socklen_t my_name_len;
462         struct tcp_connection* con;
463         struct ip_addr ip;
464
465         s=socket(AF2PF(server->s.sa_family), SOCK_STREAM, 0);
466         if (s==-1){
467                 LOG(L_ERR, "ERROR: tcpconn_connect: socket: (%d) %s\n",
468                                 errno, strerror(errno));
469                 goto error;
470         }
471         if (init_sock_opt(s)<0){
472                 LOG(L_ERR, "ERROR: tcpconn_connect: init_sock_opt failed\n");
473                 goto error;
474         }
475         if (tcp_blocking_connect(s, &server->s, sockaddru_len(*server))<0){
476                 LOG(L_ERR, "ERROR: tcpconn_connect: tcp_blocking_connect failed\n");
477                 goto error;
478         }
479         my_name_len=sizeof(my_name);
480         if (getsockname(s, &my_name.s, &my_name_len)!=0){
481                 LOG(L_ERR, "ERROR: tcp_connect: getsockname failed: %s(%d)\n",
482                                 strerror(errno), errno);
483                 si=0; /* try to go on */
484         }
485         su2ip_addr(&ip, &my_name);
486 #ifdef USE_TLS
487         if (type==PROTO_TLS)
488                 si=find_si(&ip, 0, PROTO_TLS);
489         else
490 #endif
491                 si=find_si(&ip, 0, PROTO_TCP);
492
493         if (si==0){
494                 LOG(L_ERR, "ERROR: tcp_connect: could not find corresponding"
495                                 " listening socket, using default...\n");
496                 if (server->s.sa_family==AF_INET) si=sendipv4_tcp;
497 #ifdef USE_IPV6
498                 else si=sendipv6_tcp;
499 #endif
500         }
501         con=tcpconn_new(s, server, si, type, S_CONN_CONNECT);
502         if (con==0){
503                 LOG(L_ERR, "ERROR: tcp_connect: tcpconn_new failed, closing the "
504                                  " socket\n");
505                 goto error;
506         }
507         return con;
508         /*FIXME: set sock idx! */
509 error:
510         if (s!=-1) close(s); /* close the opened socket */
511         return 0;
512 }
513
514
515
516 struct tcp_connection*  tcpconn_add(struct tcp_connection *c)
517 {
518         unsigned hash;
519
520         if (c){
521                 TCPCONN_LOCK;
522                 /* add it at the begining of the list*/
523                 hash=tcp_id_hash(c->id);
524                 c->id_hash=hash;
525                 tcpconn_listadd(tcpconn_id_hash[hash], c, id_next, id_prev);
526                 
527                 hash=tcp_addr_hash(&c->rcv.src_ip, c->rcv.src_port);
528                 /* set the first alias */
529                 c->con_aliases[0].port=c->rcv.src_port;
530                 c->con_aliases[0].hash=hash;
531                 c->con_aliases[0].parent=c;
532                 tcpconn_listadd(tcpconn_aliases_hash[hash], &c->con_aliases[0],
533                                                 next, prev);
534                 c->aliases++;
535                 TCPCONN_UNLOCK;
536                 DBG("tcpconn_add: hashes: %d, %d\n", hash, c->id_hash);
537                 return c;
538         }else{
539                 LOG(L_CRIT, "tcpconn_add: BUG: null connection pointer\n");
540                 return 0;
541         }
542 }
543
544
545 /* unsafe tcpconn_rm version (nolocks) */
546 void _tcpconn_rm(struct tcp_connection* c)
547 {
548         int r;
549         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
550         /* remove all the aliases */
551         for (r=0; r<c->aliases; r++)
552                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
553                                                 &c->con_aliases[r], next, prev);
554         lock_destroy(&c->write_lock);
555 #ifdef USE_TLS
556         if (c->type==PROTO_TLS) tls_tcpconn_clean(c);
557 #endif
558         shm_free(c);
559 }
560
561
562
563 void tcpconn_rm(struct tcp_connection* c)
564 {
565         int r;
566         TCPCONN_LOCK;
567         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
568         /* remove all the aliases */
569         for (r=0; r<c->aliases; r++)
570                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
571                                                 &c->con_aliases[r], next, prev);
572         TCPCONN_UNLOCK;
573         lock_destroy(&c->write_lock);
574 #ifdef USE_TLS
575         if ((c->type==PROTO_TLS)&&(c->extra_data)) tls_tcpconn_clean(c);
576 #endif
577         shm_free(c);
578 }
579
580
581 /* finds a connection, if id=0 uses the ip addr & port (host byte order)
582  * WARNING: unprotected (locks) use tcpconn_get unless you really
583  * know what you are doing */
584 struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port)
585 {
586
587         struct tcp_connection *c;
588         struct tcp_conn_alias* a;
589         unsigned hash;
590         
591 #ifdef EXTRA_DEBUG
592         DBG("tcpconn_find: %d  port %d\n",id, port);
593         if (ip) print_ip("tcpconn_find: ip ", ip, "\n");
594 #endif
595         if (id){
596                 hash=tcp_id_hash(id);
597                 for (c=tcpconn_id_hash[hash]; c; c=c->id_next){
598 #ifdef EXTRA_DEBUG
599                         DBG("c=%p, c->id=%d, port=%d\n",c, c->id, c->rcv.src_port);
600                         print_ip("ip=", &c->rcv.src_ip, "\n");
601 #endif
602                         if ((id==c->id)&&(c->state!=S_CONN_BAD)) return c;
603                 }
604         }else if (ip){
605                 hash=tcp_addr_hash(ip, port);
606                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
607 #ifdef EXTRA_DEBUG
608                         DBG("a=%p, c=%p, c->id=%d, alias port= %d port=%d\n", a, a->parent,
609                                         a->parent->id, a->port, a->parent->rcv.src_port);
610                         print_ip("ip=",&a->parent->rcv.src_ip,"\n");
611 #endif
612                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
613                                         (ip_addr_cmp(ip, &a->parent->rcv.src_ip)) )
614                                 return a->parent;
615                 }
616         }
617         return 0;
618 }
619
620
621
622 /* _tcpconn_find with locks and timeout */
623 struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
624                                                                         int timeout)
625 {
626         struct tcp_connection* c;
627         TCPCONN_LOCK;
628         c=_tcpconn_find(id, ip, port);
629         if (c){ 
630                         c->refcnt++;
631                         c->timeout=get_ticks()+timeout;
632         }
633         TCPCONN_UNLOCK;
634         return c;
635 }
636
637
638
639 /* add port as an alias for the "id" connection
640  * returns 0 on success,-1 on failure */
641 int tcpconn_add_alias(int id, int port, int proto)
642 {
643         struct tcp_connection* c;
644         unsigned hash;
645         struct tcp_conn_alias* a;
646         
647         a=0;
648         /* fix the port */
649         port=port?port:((proto==PROTO_TLS)?SIPS_PORT:SIP_PORT);
650         TCPCONN_LOCK;
651         /* check if alias already exists */
652         c=_tcpconn_find(id, 0, 0);
653         if (c){
654                 hash=tcp_addr_hash(&c->rcv.src_ip, port);
655                 /* search the aliases for an already existing one */
656                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
657                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
658                                         (ip_addr_cmp(&c->rcv.src_ip, &a->parent->rcv.src_ip)) ){
659                                 /* found */
660                                 if (a->parent!=c) goto error_sec;
661                                 else goto ok;
662                         }
663                 }
664                 if (c->aliases>=TCP_CON_MAX_ALIASES) goto error_aliases;
665                 c->con_aliases[c->aliases].parent=c;
666                 c->con_aliases[c->aliases].port=port;
667                 c->con_aliases[c->aliases].hash=hash;
668                 tcpconn_listadd(tcpconn_aliases_hash[hash], 
669                                                                 &c->con_aliases[c->aliases], next, prev);
670                 c->aliases++;
671         }else goto error_not_found;
672 ok:
673         TCPCONN_UNLOCK;
674 #ifdef EXTRA_DEBUG
675         if (a) DBG("tcpconn_add_alias: alias already present\n");
676         else   DBG("tcpconn_add_alias: alias port %d for hash %d, id %d\n",
677                         port, hash, c->id);
678 #endif
679         return 0;
680 error_aliases:
681         TCPCONN_UNLOCK;
682         LOG(L_ERR, "ERROR: tcpconn_add_alias: too many aliases for connection %p"
683                                 " (%d)\n", c, c->id);
684         return -1;
685 error_not_found:
686         TCPCONN_UNLOCK;
687         LOG(L_ERR, "ERROR: tcpconn_add_alias: no connection found for id %d\n",id);
688         return -1;
689 error_sec:
690         TCPCONN_UNLOCK;
691         LOG(L_ERR, "ERROR: tcpconn_add_alias: possible port hijack attempt\n");
692         LOG(L_ERR, "ERROR: tcpconn_add_alias: alias already present and points"
693                         " to another connection (%d : %d and %d : %d)\n",
694                         a->parent->id,  port, c->id, port);
695         return -1;
696 }
697
698
699
700 void tcpconn_ref(struct tcp_connection* c)
701 {
702         TCPCONN_LOCK;
703         c->refcnt++; /* FIXME: atomic_dec */
704         TCPCONN_UNLOCK;
705 }
706
707
708
709 void tcpconn_put(struct tcp_connection* c)
710 {
711         TCPCONN_LOCK;
712         c->refcnt--; /* FIXME: atomic_dec */
713         TCPCONN_UNLOCK;
714 }
715
716
717
718 /* finds a tcpconn & sends on it */
719 int tcp_send(int type, char* buf, unsigned len, union sockaddr_union* to,
720                                 int id)
721 {
722         struct tcp_connection *c;
723         struct tcp_connection *tmp;
724         struct ip_addr ip;
725         int port;
726         int fd;
727         long response[2];
728         int n;
729         
730         port=0;
731         if (to){
732                 su2ip_addr(&ip, to);
733                 port=su_getport(to);
734                 c=tcpconn_get(id, &ip, port, tcp_con_lifetime); 
735         }else if (id){
736                 c=tcpconn_get(id, 0, 0, tcp_con_lifetime);
737         }else{
738                 LOG(L_CRIT, "BUG: tcp_send called with null id & to\n");
739                 return -1;
740         }
741         
742         if (id){
743                 if (c==0) {
744                         if (to){
745                                 /* try again w/o id */
746                                 c=tcpconn_get(0, &ip, port, tcp_con_lifetime);
747                                 goto no_id;
748                         }else{
749                                 LOG(L_ERR, "ERROR: tcp_send: id %d not found, dropping\n",
750                                                 id);
751                                 return -1;
752                         }
753                 }else goto get_fd;
754         }
755 no_id:
756                 if (c==0){
757                         DBG("tcp_send: no open tcp connection found, opening new one\n");
758                         /* create tcp connection */
759                         if ((c=tcpconn_connect(to, type))==0){
760                                 LOG(L_ERR, "ERROR: tcp_send: connect failed\n");
761                                 return -1;
762                         }
763                         c->refcnt++; /* safe to do it w/o locking, it's not yet
764                                                         available to the rest of the world */
765                         fd=c->s;
766                         
767                         /* send the new tcpconn to "tcp main" */
768                         response[0]=(long)c;
769                         response[1]=CONN_NEW;
770                         n=send_fd(unix_tcp_sock, response, sizeof(response), c->s);
771                         if (n<=0){
772                                 LOG(L_ERR, "BUG: tcp_send: failed send_fd: %s (%d)\n",
773                                                 strerror(errno), errno);
774                                 n=-1;
775                                 goto end;
776                         }       
777                         goto send_it;
778                 }
779 get_fd:
780                         /* todo: see if this is not the same process holding
781                          *  c  and if so send directly on c->fd */
782                         DBG("tcp_send: tcp connection found (%p), acquiring fd\n", c);
783                         /* get the fd */
784                         response[0]=(long)c;
785                         response[1]=CONN_GET_FD;
786                         n=send_all(unix_tcp_sock, response, sizeof(response));
787                         if (n<=0){
788                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(write):%s (%d)\n",
789                                                 strerror(errno), errno);
790                                 n=-1;
791                                 goto release_c;
792                         }
793                         DBG("tcp_send, c= %p, n=%d\n", c, n);
794                         tmp=c;
795                         n=receive_fd(unix_tcp_sock, &c, sizeof(c), &fd, MSG_WAITALL);
796                         if (n<=0){
797                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(receive_fd):"
798                                                         " %s (%d)\n", strerror(errno), errno);
799                                 n=-1;
800                                 goto release_c;
801                         }
802                         if (c!=tmp){
803                                 LOG(L_CRIT, "BUG: tcp_send: get_fd: got different connection:"
804                                                 "  %p (id= %d, refcnt=%d state=%d != "
805                                                 "  %p (id= %d, refcnt=%d state=%d (n=%d)\n",
806                                                   c,   c->id,   c->refcnt,   c->state,
807                                                   tmp, tmp->id, tmp->refcnt, tmp->state, n
808                                    );
809                                 n=-1; /* fail */
810                                 goto end;
811                         }
812                         DBG("tcp_send: after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
813                 
814         
815         
816 send_it:
817         DBG("tcp_send: sending...\n");
818         lock_get(&c->write_lock);
819 #ifdef USE_TLS
820         if (c->type==PROTO_TLS)
821                 n=tls_blocking_write(c, fd, buf, len);
822         else
823 #endif
824                 /* n=tcp_blocking_write(c, fd, buf, len); */
825                 n=tsend_stream(fd, buf, len, tcp_send_timeout*1000); 
826         lock_release(&c->write_lock);
827         DBG("tcp_send: after write: c= %p n=%d fd=%d\n",c, n, fd);
828         DBG("tcp_send: buf=\n%.*s\n", (int)len, buf);
829         if (n<0){
830                 LOG(L_ERR, "ERROR: tcp_send: failed to send\n");
831                 /* error on the connection , mark it as bad and set 0 timeout */
832                 c->state=S_CONN_BAD;
833                 c->timeout=0;
834                 /* tell "main" it should drop this (optional it will t/o anyway?)*/
835                 response[0]=(long)c;
836                 response[1]=CONN_ERROR;
837                 n=send_all(unix_tcp_sock, response, sizeof(response));
838                 /* CONN_ERROR will auto-dec refcnt => we must not call tcpconn_put !!*/
839                 if (n<=0){
840                         LOG(L_ERR, "BUG: tcp_send: error return failed (write):%s (%d)\n",
841                                         strerror(errno), errno);
842                         n=-1;
843                 }
844                 close(fd);
845                 return n; /* error return, no tcpconn_put */
846         }
847 end:
848         close(fd);
849 release_c:
850         tcpconn_put(c); /* release c (lock; dec refcnt; unlock) */
851         return n;
852 }
853
854
855
856 int tcp_init(struct socket_info* sock_info)
857 {
858         union sockaddr_union* addr;
859         int optval;
860 #ifdef DISABLE_NAGLE
861         int flag;
862         struct protoent* pe;
863
864         if (tcp_proto_no==-1){ /* if not already set */
865                 pe=getprotobyname("tcp");
866                 if (pe==0){
867                         LOG(L_ERR, "ERROR: tcp_init: could not get TCP protocol number\n");
868                         tcp_proto_no=-1;
869                 }else{
870                         tcp_proto_no=pe->p_proto;
871                 }
872         }
873 #endif
874         
875         addr=&sock_info->su;
876         /* sock_info->proto=PROTO_TCP; */
877         if (init_su(addr, &sock_info->address, sock_info->port_no)<0){
878                 LOG(L_ERR, "ERROR: tcp_init: could no init sockaddr_union\n");
879                 goto error;
880         }
881         sock_info->socket=socket(AF2PF(addr->s.sa_family), SOCK_STREAM, 0);
882         if (sock_info->socket==-1){
883                 LOG(L_ERR, "ERROR: tcp_init: socket: %s\n", strerror(errno));
884                 goto error;
885         }
886 #ifdef DISABLE_NAGLE
887         flag=1;
888         if ( (tcp_proto_no!=-1) &&
889                  (setsockopt(sock_info->socket, tcp_proto_no , TCP_NODELAY,
890                                          &flag, sizeof(flag))<0) ){
891                 LOG(L_ERR, "ERROR: tcp_init: could not disable Nagle: %s\n",
892                                 strerror(errno));
893         }
894 #endif
895
896
897 #if  !defined(TCP_DONT_REUSEADDR) 
898         /* Stevens, "Network Programming", Section 7.5, "Generic Socket
899      * Options": "...server started,..a child continues..on existing
900          * connection..listening server is restarted...call to bind fails
901          * ... ALL TCP servers should specify the SO_REUSEADDRE option 
902          * to allow the server to be restarted in this situation
903          *
904          * Indeed, without this option, the server can't restart.
905          *   -jiri
906          */
907         optval=1;
908         if (setsockopt(sock_info->socket, SOL_SOCKET, SO_REUSEADDR,
909                                 (void*)&optval, sizeof(optval))==-1) {
910                 LOG(L_ERR, "ERROR: tcp_init: setsockopt %s\n",
911                         strerror(errno));
912                 goto error;
913         }
914 #endif
915         /* tos */
916         optval = tos;
917         if (setsockopt(sock_info->socket, IPPROTO_IP, IP_TOS, (void*)&optval, 
918                                 sizeof(optval)) ==-1){
919                 LOG(L_WARN, "WARNING: tcp_init: setsockopt tos: %s\n", strerror(errno));
920                 /* continue since this is not critical */
921         }
922         if (bind(sock_info->socket, &addr->s, sockaddru_len(*addr))==-1){
923                 LOG(L_ERR, "ERROR: tcp_init: bind(%x, %p, %d) on %s:%d : %s\n",
924                                 sock_info->socket,  &addr->s, 
925                                 (unsigned)sockaddru_len(*addr),
926                                 sock_info->address_str.s,
927                                 sock_info->port_no,
928                                 strerror(errno));
929                 goto error;
930         }
931         if (listen(sock_info->socket, TCP_LISTEN_BACKLOG)==-1){
932                 LOG(L_ERR, "ERROR: tcp_init: listen(%x, %p, %d) on %s: %s\n",
933                                 sock_info->socket, &addr->s, 
934                                 (unsigned)sockaddru_len(*addr),
935                                 sock_info->address_str.s,
936                                 strerror(errno));
937                 goto error;
938         }
939         
940         return 0;
941 error:
942         if (sock_info->socket!=-1){
943                 close(sock_info->socket);
944                 sock_info->socket=-1;
945         }
946         return -1;
947 }
948
949
950
951 #ifdef SEND_FD_QUEUE
952 struct send_fd_info{
953         struct tcp_connection* tcp_conn;
954         int unix_sock;
955         int retries;
956 };
957
958 struct tcp_send_fd_q{
959         struct send_fd_info* data; /* buffer */
960         struct send_fd_info* crt;  /* pointer inside the buffer */
961         struct send_fd_info* end;  /* points after the last valid position */
962 };
963
964
965 static struct tcp_send_fd_q send2child_q;
966
967
968
969 static int send_fd_queue_init(struct tcp_send_fd_q *q, unsigned int size)
970 {
971         q->data=pkg_malloc(size*sizeof(struct send_fd_info));
972         if (q->data==0){
973                 LOG(L_ERR, "ERROR: send_fd_queue_init: out of memory\n");
974                 return -1;
975         }
976         q->crt=&q->data[0];
977         q->end=&q->data[size];
978         return 0;
979 }
980
981 static void send_fd_queue_destroy(struct tcp_send_fd_q *q)
982 {
983         if (q->data){
984                 pkg_free(q->data);
985                 q->data=0;
986                 q->crt=q->end=0;
987         }
988 }
989
990
991
992 static int init_send_fd_queues()
993 {
994         if (send_fd_queue_init(&send2child_q, SEND_FD_QUEUE_SIZE)!=0)
995                 goto error;
996         return 0;
997 error:
998         LOG(L_ERR, "ERROR: init_send_fd_queues: init failed\n");
999         return -1;
1000 }
1001
1002
1003
1004 static void destroy_send_fd_queues()
1005 {
1006         send_fd_queue_destroy(&send2child_q);
1007 }
1008
1009
1010
1011
1012 inline static int send_fd_queue_add(    struct tcp_send_fd_q* q, 
1013                                                                                 int unix_sock,
1014                                                                                 struct tcp_connection *t)
1015 {
1016         struct send_fd_info* tmp;
1017         unsigned long new_size;
1018         
1019         if (q->crt>=q->end){
1020                 new_size=q->end-&q->data[0];
1021                 if (new_size< MAX_SEND_FD_QUEUE_SIZE/2){
1022                         new_size*=2;
1023                 }else new_size=MAX_SEND_FD_QUEUE_SIZE;
1024                 if (q->crt>=&q->data[new_size]){
1025                         LOG(L_ERR, "ERROR: send_fd_queue_add: queue full: %ld/%ld\n",
1026                                         q->crt-&q->data[0]-1, new_size);
1027                         goto error;
1028                 }
1029                 LOG(L_CRIT, "INFO: send_fd_queue: queue full: %ld, extending to %ld\n",
1030                                 q->end-&q->data[0], new_size);
1031                 tmp=pkg_realloc(q->data, new_size*sizeof(struct send_fd_info));
1032                 if (tmp==0){
1033                         LOG(L_ERR, "ERROR: send_fd_queue_add: out of memory\n");
1034                         goto error;
1035                 }
1036                 q->crt=(q->crt-&q->data[0])+tmp;
1037                 q->data=tmp;
1038                 q->end=&q->data[new_size];
1039         }
1040         q->crt->tcp_conn=t;
1041         q->crt->unix_sock=unix_sock;
1042         q->crt->retries=0;
1043         q->crt++;
1044         return 0;
1045 error:
1046         return -1;
1047 }
1048
1049
1050
1051 inline static void send_fd_queue_run(struct tcp_send_fd_q* q)
1052 {
1053         struct send_fd_info* p;
1054         struct send_fd_info* t;
1055         
1056         for (p=t=&q->data[0]; p<q->crt; p++){
1057                 if (send_fd(p->unix_sock, &(p->tcp_conn),
1058                                         sizeof(struct tcp_connection*), p->tcp_conn->s)<=0){
1059                         if ( ((errno==EAGAIN)||(errno==EWOULDBLOCK)) && 
1060                                                         (p->retries<MAX_SEND_FD_RETRIES)){
1061                                 /* leave in queue for a future try */
1062                                 *t=*p;
1063                                 t->retries++;
1064                                 t++;
1065                         }else{
1066                                 LOG(L_ERR, "ERROR: run_send_fd_queue: send_fd failed"
1067                                                    "on %d socket, %ld queue entry, retries %d \n",
1068                                                    p->unix_sock, p-&q->data[0], p->retries);
1069                         }
1070                 }
1071         }
1072         q->crt=t;
1073 }
1074 #else
1075 #define send_fd_queue_run(q)
1076 #endif
1077
1078
1079
1080 /* used internally by tcp_main_loop() */
1081 static void tcpconn_destroy(struct tcp_connection* tcpconn)
1082 {
1083         int fd;
1084
1085         TCPCONN_LOCK; /*avoid races w/ tcp_send*/
1086         tcpconn->refcnt--;
1087         if (tcpconn->refcnt==0){ 
1088                 DBG("tcpconn_destroy: destroying connection %p, flags %04x\n",
1089                                 tcpconn, tcpconn->flags);
1090                 fd=tcpconn->s;
1091 #ifdef USE_TLS
1092                 /*FIXME: lock ->writelock ? */
1093                 if (tcpconn->type==PROTO_TLS)
1094                         tls_close(tcpconn, fd);
1095 #endif
1096                 _tcpconn_rm(tcpconn);
1097                 close(fd);
1098                 tcp_connections_no--;
1099         }else{
1100                 /* force timeout */
1101                 tcpconn->timeout=0;
1102                 tcpconn->state=S_CONN_BAD;
1103                 DBG("tcpconn_destroy: delaying (%p, flags %04x) ...\n",
1104                                 tcpconn, tcpconn->flags);
1105                 
1106         }
1107         TCPCONN_UNLOCK;
1108 }
1109
1110
1111
1112 /* handles io from a tcp child process
1113  * params: tcp_c - pointer in the tcp_children array, to the entry for
1114  *                 which an io event was detected 
1115  *         fd_i  - fd index in the fd_array (usefull for optimizing
1116  *                 io_watch_deletes)
1117  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
1118  *           io events queued), >0 on success. success/error refer only to
1119  *           the reads from the fd.
1120  */
1121 inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
1122 {
1123         struct tcp_connection* tcpconn;
1124         long response[2];
1125         int cmd;
1126         int bytes;
1127         
1128         if (tcp_c->unix_sock<=0){
1129                 /* (we can't have a fd==0, 0 is never closed )*/
1130                 LOG(L_CRIT, "BUG: handle_tcp_child: fd %d for %d "
1131                                 "(pid %d, ser no %d)\n", tcp_c->unix_sock,
1132                                 (int)(tcp_c-&tcp_children[0]), tcp_c->pid, tcp_c->proc_no);
1133                 goto error;
1134         }
1135         /* read until sizeof(response)
1136          * (this is a SOCK_STREAM so read is not atomic) */
1137         bytes=recv_all(tcp_c->unix_sock, response, sizeof(response), MSG_DONTWAIT);
1138         if (bytes<(int)sizeof(response)){
1139                 if (bytes==0){
1140                         /* EOF -> bad, child has died */
1141                         DBG("DBG: handle_tcp_child: dead tcp child %d (pid %d, no %d)"
1142                                         " (shutting down?)\n", (int)(tcp_c-&tcp_children[0]), 
1143                                         tcp_c->pid, tcp_c->proc_no );
1144                         /* don't listen on it any more */
1145                         io_watch_del(&io_h, tcp_c->unix_sock, fd_i, 0); 
1146                         goto error; /* eof. so no more io here, it's ok to return error */
1147                 }else if (bytes<0){
1148                         /* EAGAIN is ok if we try to empty the buffer
1149                          * e.g.: SIGIO_RT overflow mode or EPOLL ET */
1150                         if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
1151                                 LOG(L_CRIT, "ERROR: handle_tcp_child: read from tcp child %ld "
1152                                                 " (pid %d, no %d) %s [%d]\n",
1153                                                 (long)(tcp_c-&tcp_children[0]), tcp_c->pid,
1154                                                 tcp_c->proc_no, strerror(errno), errno );
1155                         }else{
1156                                 bytes=0;
1157                         }
1158                         /* try to ignore ? */
1159                         goto end;
1160                 }else{
1161                         /* should never happen */
1162                         LOG(L_CRIT, "BUG: handle_tcp_child: too few bytes received (%d)\n",
1163                                         bytes );
1164                         bytes=0; /* something was read so there is no error; otoh if
1165                                           receive_fd returned less then requested => the receive
1166                                           buffer is empty => no more io queued on this fd */
1167                         goto end;
1168                 }
1169         }
1170         
1171         DBG("handle_tcp_child: reader response= %lx, %ld from %d \n",
1172                                         response[0], response[1], (int)(tcp_c-&tcp_children[0]));
1173         cmd=response[1];
1174         tcpconn=(struct tcp_connection*)response[0];
1175         if (tcpconn==0){
1176                 /* should never happen */
1177                 LOG(L_CRIT, "BUG: handle_tcp_child: null tcpconn pointer received"
1178                                  " from tcp child %d (pid %d): %lx, %lx\n",
1179                                         (int)(tcp_c-&tcp_children[0]), tcp_c->pid,
1180                                         response[0], response[1]) ;
1181                 goto end;
1182         }
1183         switch(cmd){
1184                 case CONN_RELEASE:
1185                         tcp_c->busy--;
1186                         if (tcpconn->state==S_CONN_BAD){ 
1187                                 tcpconn_destroy(tcpconn);
1188                                 break;
1189                         }
1190                         /* update the timeout*/
1191                         tcpconn->timeout=get_ticks()+tcp_con_lifetime;
1192                         tcpconn_put(tcpconn);
1193                         /* must be after the de-ref*/
1194                         io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1195                         tcpconn->flags&=~F_CONN_REMOVED;
1196                         DBG("handle_tcp_child: CONN_RELEASE  %p refcnt= %d\n", 
1197                                                                                         tcpconn, tcpconn->refcnt);
1198                         break;
1199                 case CONN_ERROR:
1200                 case CONN_DESTROY:
1201                 case CONN_EOF:
1202                         /* WARNING: this will auto-dec. refcnt! */
1203                                 tcp_c->busy--;
1204                                 /* main doesn't listen on it => we don't have to delete it
1205                                  if (tcpconn->s!=-1)
1206                                         io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
1207                                 */
1208                                 tcpconn_destroy(tcpconn); /* closes also the fd */
1209                                 break;
1210                 default:
1211                                 LOG(L_CRIT, "BUG: handle_tcp_child:  unknown cmd %d"
1212                                                                         " from tcp reader %d\n",
1213                                                                         cmd, (int)(tcp_c-&tcp_children[0]));
1214         }
1215 end:
1216         return bytes;
1217 error:
1218         return -1;
1219 }
1220
1221
1222
1223 /* handles io from a "generic" ser process (get fd or new_fd from a tcp_send)
1224  * 
1225  * params: p     - pointer in the ser processes array (pt[]), to the entry for
1226  *                 which an io event was detected
1227  *         fd_i  - fd index in the fd_array (usefull for optimizing
1228  *                 io_watch_deletes)
1229  * returns:  handle_* return convention:
1230  *          -1 on error reading from the fd,
1231  *           0 on EAGAIN  or when no  more io events are queued 
1232  *             (receive buffer empty),
1233  *           >0 on successfull reads from the fd (the receive buffer might
1234  *             be non-empty).
1235  */
1236 inline static int handle_ser_child(struct process_table* p, int fd_i)
1237 {
1238         struct tcp_connection* tcpconn;
1239         long response[2];
1240         int cmd;
1241         int bytes;
1242         int ret;
1243         int fd;
1244         
1245         ret=-1;
1246         if (p->unix_sock<=0){
1247                 /* (we can't have a fd==0, 0 is never closed )*/
1248                 LOG(L_CRIT, "BUG: handle_ser_child: fd %d for %d "
1249                                 "(pid %d)\n", p->unix_sock, (int)(p-&pt[0]), p->pid);
1250                 goto error;
1251         }
1252                         
1253         /* get all bytes and the fd (if transmitted)
1254          * (this is a SOCK_STREAM so read is not atomic) */
1255         bytes=receive_fd(p->unix_sock, response, sizeof(response), &fd,
1256                                                 MSG_DONTWAIT);
1257         if (bytes<(int)sizeof(response)){
1258                 /* too few bytes read */
1259                 if (bytes==0){
1260                         /* EOF -> bad, child has died */
1261                         DBG("DBG: handle_ser_child: dead child %d, pid %d"
1262                                         " (shutting down?)\n", (int)(p-&pt[0]), p->pid);
1263                         /* don't listen on it any more */
1264                         io_watch_del(&io_h, p->unix_sock, fd_i, 0);
1265                         goto error; /* child dead => no further io events from it */
1266                 }else if (bytes<0){
1267                         /* EAGAIN is ok if we try to empty the buffer
1268                          * e.g: SIGIO_RT overflow mode or EPOLL ET */
1269                         if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
1270                                 LOG(L_CRIT, "ERROR: handle_ser_child: read from child %d  "
1271                                                 "(pid %d):  %s [%d]\n", (int)(p-&pt[0]), p->pid,
1272                                                 strerror(errno), errno);
1273                                 ret=-1;
1274                         }else{
1275                                 ret=0;
1276                         }
1277                         /* try to ignore ? */
1278                         goto end;
1279                 }else{
1280                         /* should never happen */
1281                         LOG(L_CRIT, "BUG: handle_ser_child: too few bytes received (%d)\n",
1282                                         bytes );
1283                         ret=0; /* something was read so there is no error; otoh if
1284                                           receive_fd returned less then requested => the receive
1285                                           buffer is empty => no more io queued on this fd */
1286                         goto end;
1287                 }
1288         }
1289         ret=1; /* something was received, there might be more queued */
1290         DBG("handle_ser_child: read response= %lx, %ld, fd %d from %d (%d)\n",
1291                                         response[0], response[1], fd, (int)(p-&pt[0]), p->pid);
1292         cmd=response[1];
1293         tcpconn=(struct tcp_connection*)response[0];
1294         if (tcpconn==0){
1295                 LOG(L_CRIT, "BUG: handle_ser_child: null tcpconn pointer received"
1296                                  " from child %d (pid %d): %lx, %lx\n",
1297                                         (int)(p-&pt[0]), p->pid, response[0], response[1]) ;
1298                 goto end;
1299         }
1300         switch(cmd){
1301                 case CONN_ERROR:
1302                         if (!(tcpconn->flags & F_CONN_REMOVED) && (tcpconn->s!=-1)){
1303                                 io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
1304                                 tcpconn->flags|=F_CONN_REMOVED;
1305                         }
1306                         tcpconn_destroy(tcpconn); /* will close also the fd */
1307                         break;
1308                 case CONN_GET_FD:
1309                         /* send the requested FD  */
1310                         /* WARNING: take care of setting refcnt properly to
1311                          * avoid race condition */
1312                         if (send_fd(p->unix_sock, &tcpconn, sizeof(tcpconn),
1313                                                         tcpconn->s)<=0){
1314                                 LOG(L_ERR, "ERROR: handle_ser_child: send_fd failed\n");
1315                         }
1316                         break;
1317                 case CONN_NEW:
1318                         /* update the fd in the requested tcpconn*/
1319                         /* WARNING: take care of setting refcnt properly to
1320                          * avoid race condition */
1321                         if (fd==-1){
1322                                 LOG(L_CRIT, "BUG: handle_ser_child: CONN_NEW:"
1323                                                         " no fd received\n");
1324                                 break;
1325                         }
1326                         tcpconn->s=fd;
1327                         /* add tcpconn to the list*/
1328                         tcpconn_add(tcpconn);
1329                         /* update the timeout*/
1330                         tcpconn->timeout=get_ticks()+tcp_con_lifetime;
1331                         io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1332                         tcpconn->flags&=~F_CONN_REMOVED;
1333                         break;
1334                 default:
1335                         LOG(L_CRIT, "BUG: handle_ser_child: unknown cmd %d\n", cmd);
1336         }
1337 end:
1338         return ret;
1339 error:
1340         return -1;
1341 }
1342
1343
1344
1345 /* sends a tcpconn + fd to a choosen child */
1346 inline static int send2child(struct tcp_connection* tcpconn)
1347 {
1348         int i;
1349         int min_busy;
1350         int idx;
1351         
1352         min_busy=tcp_children[0].busy;
1353         idx=0;
1354         for (i=0; i<tcp_children_no; i++){
1355                 if (!tcp_children[i].busy){
1356                         idx=i;
1357                         min_busy=0;
1358                         break;
1359                 }else if (min_busy>tcp_children[i].busy){
1360                         min_busy=tcp_children[i].busy;
1361                         idx=i;
1362                 }
1363         }
1364         
1365         tcp_children[idx].busy++;
1366         tcp_children[idx].n_reqs++;
1367         if (min_busy){
1368                 DBG("WARNING: send2child: no free tcp receiver, "
1369                                 " connection passed to the least busy one (%d)\n",
1370                                 min_busy);
1371         }
1372         DBG("send2child: to tcp child %d %d(%d), %p\n", idx, 
1373                                         tcp_children[idx].proc_no,
1374                                         tcp_children[idx].pid, tcpconn);
1375         /* first make sure this child doesn't have pending request for
1376          * tcp_main (to avoid a possible deadlock: e.g. child wants to
1377          * send a release command, but the master fills its socket buffer
1378          * with new connection commands => deadlock) */
1379         /* answer tcp_send requests first */
1380         while(handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0);
1381         /* process tcp readers requests */
1382         while(handle_tcp_child(&tcp_children[idx], -1)>0);
1383                 
1384 #ifdef SEND_FD_QUEUE
1385         /* if queue full, try to queue the io */
1386         if (send_fd(tcp_children[idx].unix_sock, &tcpconn, sizeof(tcpconn),
1387                         tcpconn->s)<=0){
1388                 if ((errno==EAGAIN)||(errno==EWOULDBLOCK)){
1389                         /* FIXME: remove after debugging */
1390                         LOG(L_WARN, "WARNING: tcp child %d, socket %d: queue full\n",
1391                                         idx, tcp_children[idx].unix_sock);
1392                         if (send_fd_queue_add(&send2child_q, tcp_children[idx].unix_sock, 
1393                                                 tcpconn)!=0){
1394                                 LOG(L_ERR, "ERROR: send2child: queue send op. failed\n");
1395                                 return -1;
1396                         }
1397                 }else{
1398                         LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
1399                 }
1400         }
1401 #else
1402         if (send_fd(tcp_children[idx].unix_sock, &tcpconn, sizeof(tcpconn),
1403                         tcpconn->s)<=0){
1404                 LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
1405                 return -1;
1406         }
1407 #endif
1408         
1409         return 0;
1410 }
1411
1412
1413
1414 /* handles a new connection, called internally by tcp_main_loop/handle_io.
1415  * params: si - pointer to one of the tcp socket_info structures on which
1416  *              an io event was detected (connection attempt)
1417  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
1418  *           io events queued), >0 on success. success/error refer only to
1419  *           the accept.
1420  */
1421 static inline int handle_new_connect(struct socket_info* si)
1422 {
1423         union sockaddr_union su;
1424         struct tcp_connection* tcpconn;
1425         socklen_t su_len;
1426         int new_sock;
1427         
1428         /* got a connection on r */
1429         su_len=sizeof(su);
1430         new_sock=accept(si->socket, &(su.s), &su_len);
1431         if (new_sock==-1){
1432                 if ((errno==EAGAIN)||(errno==EWOULDBLOCK))
1433                         return 0;
1434                 LOG(L_ERR,  "WARNING: handle_new_connect: error while accepting"
1435                                 " connection(%d): %s\n", errno, strerror(errno));
1436                 return -1;
1437         }
1438         if (tcp_connections_no>=tcp_max_connections){
1439                 LOG(L_ERR, "ERROR: maximum number of connections exceeded: %d/%d\n",
1440                                         tcp_connections_no, tcp_max_connections);
1441                 close(new_sock);
1442                 return 1; /* success, because the accept was succesfull */
1443         }
1444         if (init_sock_opt(new_sock)<0){
1445                 LOG(L_ERR, "ERROR: handle_new_connect: init_sock_opt failed\n");
1446                 close(new_sock);
1447                 return 1; /* success, because the accept was succesfull */
1448         }
1449         
1450         /* add socket to list */
1451         tcpconn=tcpconn_new(new_sock, &su, si, si->proto, S_CONN_ACCEPT);
1452         if (tcpconn){
1453                 tcpconn->refcnt++; /* safe, not yet available to the
1454                                                           outside world */
1455                 tcpconn_add(tcpconn);
1456                 DBG("handle_new_connect: new connection: %p %d flags: %04x\n",
1457                         tcpconn, tcpconn->s, tcpconn->flags);
1458                 /* pass it to a child */
1459                 if(send2child(tcpconn)<0){
1460                         LOG(L_ERR,"ERROR: handle_new_connect: no children "
1461                                         "available\n");
1462                         TCPCONN_LOCK;
1463                         tcpconn->refcnt--;
1464                         if (tcpconn->refcnt==0){
1465                                 close(tcpconn->s);
1466                                 _tcpconn_rm(tcpconn);
1467                         }else tcpconn->timeout=0; /* force expire */
1468                         TCPCONN_UNLOCK;
1469                 }
1470         }else{ /*tcpconn==0 */
1471                 LOG(L_ERR, "ERROR: handle_new_connect: tcpconn_new failed, "
1472                                 "closing socket\n");
1473                 close(new_sock);
1474                 
1475         }
1476         return 1; /* accept() was succesfull */
1477 }
1478
1479
1480
1481 /* handles an io event on one of the watched tcp connections
1482  * 
1483  * params: tcpconn - pointer to the tcp_connection for which we have an io ev.
1484  *         fd_i    - index in the fd_array table (needed for delete)
1485  * returns:  handle_* return convention, but on success it always returns 0
1486  *           (because it's one-shot, after a succesfull execution the fd is
1487  *            removed from tcp_main's watch fd list and passed to a child =>
1488  *            tcp_main is not interested in further io events that might be
1489  *            queued for this fd)
1490  */
1491 inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, int fd_i)
1492 {
1493         int fd;
1494         
1495         /*  is refcnt!=0 really necessary? 
1496          *  No, in fact it's a bug: I can have the following situation: a send only
1497          *   tcp connection used by n processes simultaneously => refcnt = n. In 
1498          *   the same time I can have a read event and this situation is perfectly
1499          *   valid. -- andrei
1500          */
1501 #if 0
1502         if ((tcpconn->refcnt!=0)){
1503                 /* FIXME: might be valid for sigio_rt iff fd flags are not cleared
1504                  *        (there is a short window in which it could generate a sig
1505                  *         that would be catched by tcp_main) */
1506                 LOG(L_CRIT, "BUG: handle_tcpconn_ev: io event on referenced"
1507                                         " tcpconn (%p), refcnt=%d, fd=%d\n",
1508                                         tcpconn, tcpconn->refcnt, tcpconn->s);
1509                 return -1;
1510         }
1511 #endif
1512         /* pass it to child, so remove it from the io watch list */
1513         DBG("handle_tcpconn_ev: data available on %p %d\n", tcpconn, tcpconn->s);
1514         if (io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1) goto error;
1515         tcpconn->flags|=F_CONN_REMOVED;
1516         tcpconn_ref(tcpconn); /* refcnt ++ */
1517         if (send2child(tcpconn)<0){
1518                 LOG(L_ERR,"ERROR: handle_tcpconn_ev: no children available\n");
1519                 TCPCONN_LOCK;
1520                 tcpconn->refcnt--;
1521                 if (tcpconn->refcnt==0){
1522                         fd=tcpconn->s;
1523                         _tcpconn_rm(tcpconn);
1524                         close(fd);
1525                 }else tcpconn->timeout=0; /* force expire*/
1526                 TCPCONN_UNLOCK;
1527         }
1528         return 0; /* we are not interested in possibly queued io events, 
1529                                  the fd was either passed to a child, or closed */
1530 error:
1531         return -1;
1532 }
1533
1534
1535
1536 /* generic handle io routine, it will call the appropiate
1537  *  handle_xxx() based on the fd_map type
1538  *
1539  * params:  fm  - pointer to a fd hash entry
1540  *          idx - index in the fd_array (or -1 if not known)
1541  * return: -1 on error
1542  *          0 on EAGAIN or when by some other way it is known that no more 
1543  *            io events are queued on the fd (the receive buffer is empty).
1544  *            Usefull to detect when there are no more io events queued for
1545  *            sigio_rt, epoll_et, kqueue.
1546  *         >0 on successfull read from the fd (when there might be more io
1547  *            queued -- the receive buffer might still be non-empty)
1548  */
1549 inline static int handle_io(struct fd_map* fm, int idx)
1550 {       
1551         int ret;
1552         
1553         switch(fm->type){
1554                 case F_SOCKINFO:
1555                         ret=handle_new_connect((struct socket_info*)fm->data);
1556                         break;
1557                 case F_TCPCONN:
1558                         ret=handle_tcpconn_ev((struct tcp_connection*)fm->data, idx);
1559                         break;
1560                 case F_TCPCHILD:
1561                         ret=handle_tcp_child((struct tcp_child*)fm->data, idx);
1562                         break;
1563                 case F_PROC:
1564                         ret=handle_ser_child((struct process_table*)fm->data, idx);
1565                         break;
1566                 case F_NONE:
1567                         LOG(L_CRIT, "BUG: handle_io: empty fd map\n");
1568                         goto error;
1569                 default:
1570                         LOG(L_CRIT, "BUG: handle_io: uknown fd type %d\n", fm->type); 
1571                         goto error;
1572         }
1573         return ret;
1574 error:
1575         return -1;
1576 }
1577
1578
1579
1580 /* very inefficient for now - FIXME
1581  * keep in sync with tcpconn_destroy, the "delete" part should be
1582  * the same except for io_watch_del..*/
1583 static inline void tcpconn_timeout(int force)
1584 {
1585         static int prev_ticks=0;
1586         struct tcp_connection *c, *next;
1587         unsigned int ticks;
1588         unsigned h;
1589         int fd;
1590         
1591         
1592         ticks=get_ticks();
1593         if ((ticks==prev_ticks) && !force) return;
1594         prev_ticks=ticks;
1595         TCPCONN_LOCK; /* fixme: we can lock only on delete IMO */
1596         for(h=0; h<TCP_ID_HASH_SIZE; h++){
1597                 c=tcpconn_id_hash[h];
1598                 while(c){
1599                         next=c->id_next;
1600                         if (force ||((c->refcnt==0) && ((int)(ticks-c->timeout)>=0))){
1601                                 if (!force)
1602                                         DBG("tcpconn_timeout: timeout for hash=%d - %p"
1603                                                         " (%d > %d)\n", h, c, ticks, c->timeout);
1604                                 fd=c->s;
1605 #ifdef USE_TLS
1606                                 if (c->type==PROTO_TLS)
1607                                         tls_close(c, fd);
1608 #endif
1609                                 _tcpconn_rm(c);
1610                                 if ((fd>0)&&(c->refcnt==0)) {
1611                                         if (!(c->flags & F_CONN_REMOVED)){
1612                                                 io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
1613                                                 c->flags|=F_CONN_REMOVED;
1614                                         }
1615                                         close(fd);
1616                                 }
1617                                 tcp_connections_no--;
1618                         }
1619                         c=next;
1620                 }
1621         }
1622         TCPCONN_UNLOCK;
1623 }
1624
1625
1626
1627 /* tcp main loop */
1628 void tcp_main_loop()
1629 {
1630
1631         struct socket_info* si;
1632         int r;
1633         
1634         /* init io_wait (here because we want the memory allocated only in
1635          * the tcp_main process) */
1636         
1637         /* FIXME: TODO: make tcp_max_fd_no a config param */
1638         if  (init_io_wait(&io_h, tcp_max_fd_no, tcp_poll_method)<0)
1639                 goto error;
1640         /* init: start watching all the fds*/
1641         
1642         /* add all the sockets we listens on for connections */
1643         for (si=tcp_listen; si; si=si->next){
1644                 if ((si->proto==PROTO_TCP) &&(si->socket!=-1)){
1645                         if (io_watch_add(&io_h, si->socket, F_SOCKINFO, si)<0){
1646                                 LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1647                                                         "listen socket to the fd list\n");
1648                                 goto error;
1649                         }
1650                 }else{
1651                         LOG(L_CRIT, "BUG: tcp_main_loop: non tcp address in tcp_listen\n");
1652                 }
1653         }
1654 #ifdef USE_TLS
1655         if (!tls_disable){
1656                 for (si=tls_listen; si; si=si->next){
1657                         if ((si->proto==PROTO_TLS) && (si->socket!=-1)){
1658                                 if (io_watch_add(&io_h, si->socket, F_SOCKINFO, si)<0){
1659                                         LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1660                                                         "tls listen socket to the fd list\n");
1661                                         goto error;
1662                                 }
1663                         }else{
1664                                 LOG(L_CRIT, "BUG: tcp_main_loop: non tls address"
1665                                                 " in tls_listen\n");
1666                         }
1667                 }
1668         }
1669 #endif
1670         /* add all the unix sockets used for communcation with other ser processes
1671          *  (get fd, new connection a.s.o) */
1672         for (r=1; r<process_no; r++){
1673                 if (pt[r].unix_sock>0) /* we can't have 0, we never close it!*/
1674                         if (io_watch_add(&io_h, pt[r].unix_sock, F_PROC, &pt[r])<0){
1675                                         LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1676                                                         "process %d unix socket to the fd list\n", r);
1677                                         goto error;
1678                         }
1679         }
1680         /* add all the unix sokets used for communication with the tcp childs */
1681         for (r=0; r<tcp_children_no; r++){
1682                 if (tcp_children[r].unix_sock>0)/*we can't have 0, we never close it!*/
1683                         if (io_watch_add(&io_h, tcp_children[r].unix_sock, F_TCPCHILD,
1684                                                         &tcp_children[r]) <0){
1685                                 LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1686                                                 "tcp child %d unix socket to the fd list\n", r);
1687                                 goto error;
1688                         }
1689         }
1690         
1691         /* main loop */
1692         switch(io_h.poll_method){
1693                 case POLL_POLL:
1694                         while(1){
1695                                 /* wait and process IO */
1696                                 io_wait_loop_poll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0); 
1697                                 send_fd_queue_run(&send2child_q); /* then new io */
1698                                 /* remove old connections */
1699                                 tcpconn_timeout(0);
1700                         }
1701                         break;
1702 #ifdef HAVE_SELECT
1703                 case POLL_SELECT:
1704                         while(1){
1705                                 io_wait_loop_select(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1706                                 send_fd_queue_run(&send2child_q); /* then new io */
1707                                 tcpconn_timeout(0);
1708                         }
1709                         break;
1710 #endif
1711 #ifdef HAVE_SIGIO_RT
1712                 case POLL_SIGIO_RT:
1713                         while(1){
1714                                 io_wait_loop_sigio_rt(&io_h, TCP_MAIN_SELECT_TIMEOUT);
1715                                 send_fd_queue_run(&send2child_q); /* then new io */
1716                                 tcpconn_timeout(0);
1717                         }
1718                         break;
1719 #endif
1720 #ifdef HAVE_EPOLL
1721                 case POLL_EPOLL_LT:
1722                         while(1){
1723                                 io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1724                                 send_fd_queue_run(&send2child_q); /* then new io */
1725                                 tcpconn_timeout(0);
1726                         }
1727                         break;
1728                 case POLL_EPOLL_ET:
1729                         while(1){
1730                                 io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 1);
1731                                 send_fd_queue_run(&send2child_q); /* then new io */
1732                                 tcpconn_timeout(0);
1733                         }
1734                         break;
1735 #endif
1736 #ifdef HAVE_KQUEUE
1737                 case POLL_KQUEUE:
1738                         while(1){
1739                                 io_wait_loop_kqueue(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1740                                 send_fd_queue_run(&send2child_q); /* then new io */
1741                                 tcpconn_timeout(0);
1742                         }
1743                         break;
1744 #endif
1745 #ifdef HAVE_DEVPOLL
1746                 case POLL_DEVPOLL:
1747                         while(1){
1748                                 io_wait_loop_devpoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1749                                 send_fd_queue_run(&send2child_q); /* then new io */
1750                                 tcpconn_timeout(0);
1751                         }
1752                         break;
1753 #endif
1754                 default:
1755                         LOG(L_CRIT, "BUG: tcp_main_loop: no support for poll method "
1756                                         " %s (%d)\n", 
1757                                         poll_method_name(io_h.poll_method), io_h.poll_method);
1758                         goto error;
1759         }
1760 error:
1761         destroy_io_wait(&io_h);
1762         LOG(L_CRIT, "ERROR: tcp_main_loop: exiting...");
1763         exit(-1);
1764 }
1765
1766
1767
1768 /* cleanup before exit */
1769 void destroy_tcp()
1770 {
1771                 if (tcpconn_id_hash){
1772                         tcpconn_timeout(1); /* force close/expire for all active tcpconns*/
1773                         shm_free(tcpconn_id_hash);
1774                         tcpconn_id_hash=0;
1775                 }
1776                 if (connection_id){
1777                         shm_free(connection_id);
1778                         connection_id=0;
1779                 }
1780                 if (tcpconn_aliases_hash){
1781                         shm_free(tcpconn_aliases_hash);
1782                         tcpconn_aliases_hash=0;
1783                 }
1784                 if (tcpconn_lock){
1785                         lock_destroy(tcpconn_lock);
1786                         lock_dealloc((void*)tcpconn_lock);
1787                         tcpconn_lock=0;
1788                 }
1789 #ifdef SEND_FD_QUEUE
1790                 destroy_send_fd_queues();
1791 #endif
1792 }
1793
1794
1795
1796 int init_tcp()
1797 {
1798         char* poll_err;
1799         
1800         /* init lock */
1801         tcpconn_lock=lock_alloc();
1802         if (tcpconn_lock==0){
1803                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc lock\n");
1804                 goto error;
1805         }
1806         if (lock_init(tcpconn_lock)==0){
1807                 LOG(L_CRIT, "ERROR: init_tcp: could not init lock\n");
1808                 lock_dealloc((void*)tcpconn_lock);
1809                 tcpconn_lock=0;
1810                 goto error;
1811         }
1812         /* init globals */
1813         connection_id=(int*)shm_malloc(sizeof(int));
1814         if (connection_id==0){
1815                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
1816                 goto error;
1817         }
1818         *connection_id=1;
1819         /* alloc hashtables*/
1820         tcpconn_aliases_hash=(struct tcp_conn_alias**)
1821                         shm_malloc(TCP_ALIAS_HASH_SIZE* sizeof(struct tcp_conn_alias*));
1822         if (tcpconn_aliases_hash==0){
1823                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc address hashtable\n");
1824                 goto error;
1825         }
1826         tcpconn_id_hash=(struct tcp_connection**)shm_malloc(TCP_ID_HASH_SIZE*
1827                                                                 sizeof(struct tcp_connection*));
1828         if (tcpconn_id_hash==0){
1829                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc id hashtable\n");
1830                 goto error;
1831         }
1832         /* init hashtables*/
1833         memset((void*)tcpconn_aliases_hash, 0, 
1834                         TCP_ALIAS_HASH_SIZE * sizeof(struct tcp_conn_alias*));
1835         memset((void*)tcpconn_id_hash, 0, 
1836                         TCP_ID_HASH_SIZE * sizeof(struct tcp_connection*));
1837         /* init send fd queues */
1838         
1839 #ifdef SEND_FD_QUEUE
1840         if (init_send_fd_queues()<0){
1841                 LOG(L_CRIT, "ERROR: init_tcp: could not init send fd queues\n");
1842                 goto error;
1843         }
1844 #endif
1845         /* fix config variables */
1846         /* they can have only positive values due the config parser so we can
1847          * ignore most of them */
1848                 poll_err=check_poll_method(tcp_poll_method);
1849         
1850         /* set an appropiate poll method */
1851         if (poll_err || (tcp_poll_method==0)){
1852                 tcp_poll_method=choose_poll_method();
1853                 if (poll_err){
1854                         LOG(L_ERR, "ERROR: init_tcp: %s, using %s instead\n",
1855                                         poll_err, poll_method_name(tcp_poll_method));
1856                 }else{
1857                         LOG(L_INFO, "init_tcp: using %s as the io watch method"
1858                                         " (auto detected)\n", poll_method_name(tcp_poll_method));
1859                 }
1860         }else{
1861                         LOG(L_INFO, "init_tcp: using %s io watch method (config)\n",
1862                                         poll_method_name(tcp_poll_method));
1863         }
1864         
1865         return 0;
1866 error:
1867         /* clean-up */
1868         destroy_tcp();
1869         return -1;
1870 }
1871
1872
1873
1874 /* returns -1 on error */
1875 static int set_non_blocking(int s)
1876 {
1877         int flags;
1878         /* non-blocking */
1879         flags=fcntl(s, F_GETFL);
1880         if (flags==-1){
1881                 LOG(L_ERR, "ERROR: set_non_blocking: fnctl failed: (%d) %s\n",
1882                                 errno, strerror(errno));
1883                 goto error;
1884         }
1885         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
1886                 LOG(L_ERR, "ERROR: set_non_blocking: fcntl: set non-blocking failed:"
1887                                 " (%d) %s\n", errno, strerror(errno));
1888                 goto error;
1889         }
1890         return 0;
1891 error:
1892         return -1;
1893 }
1894
1895
1896
1897 /* starts the tcp processes */
1898 int tcp_init_children()
1899 {
1900         int r;
1901         int sockfd[2];
1902         int reader_fd[2]; /* for comm. with the tcp children read  */
1903         pid_t pid;
1904         struct socket_info *si;
1905         
1906         /* estimate max fd. no:
1907          * 1 tcp send unix socket/all_proc, 
1908          *  + 1 udp sock/udp proc + 1 tcp_child sock/tcp child*
1909          *  + no_listen_tcp */
1910         for(r=0, si=tcp_listen; si; si=si->next, r++);
1911 #ifdef USE_TLS
1912         if (! tls_disable)
1913                 for (si=tls_listen; si; si=si->next, r++);
1914 #endif
1915         
1916         tcp_max_fd_no=process_count*2 +r-1 /* timer */ +3; /* stdin/out/err*/
1917         tcp_max_fd_no+=tcp_max_connections;
1918         
1919         /* create the tcp sock_info structures */
1920         /* copy the sockets --moved to main_loop*/
1921         
1922         /* fork children & create the socket pairs*/
1923         for(r=0; r<tcp_children_no; r++){
1924                 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockfd)<0){
1925                         LOG(L_ERR, "ERROR: tcp_main: socketpair failed: %s\n",
1926                                         strerror(errno));
1927                         goto error;
1928                 }
1929                 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reader_fd)<0){
1930                         LOG(L_ERR, "ERROR: tcp_main: socketpair failed: %s\n",
1931                                         strerror(errno));
1932                         goto error;
1933                 }
1934 #ifdef TCP_CHILD_NON_BLOCKING
1935                 if ((set_non_blocking(reader_fd[0])<0) || 
1936                         (set_non_blocking(reader_fd[1])<0)){
1937                         LOG(L_ERR, "ERROR: tcp_main: failed to set non blocking"
1938                                                 "on child sockets\n");
1939                         /* continue, it's not critical (it will go slower under
1940                          * very high connection rates) */
1941                 }
1942 #endif
1943                 
1944                 process_no++;
1945                 child_rank++;
1946                 pid=fork();
1947                 if (pid<0){
1948                         LOG(L_ERR, "ERROR: tcp_main: fork failed: %s\n",
1949                                         strerror(errno));
1950                         goto error;
1951                 }else if (pid>0){
1952                         /* parent */
1953                         close(sockfd[1]);
1954                         close(reader_fd[1]);
1955                         tcp_children[r].pid=pid;
1956                         tcp_children[r].proc_no=process_no;
1957                         tcp_children[r].busy=0;
1958                         tcp_children[r].n_reqs=0;
1959                         tcp_children[r].unix_sock=reader_fd[0];
1960                         pt[process_no].pid=pid;
1961                         pt[process_no].unix_sock=sockfd[0];
1962                         pt[process_no].idx=r;
1963                         strncpy(pt[process_no].desc, "tcp receiver", MAX_PT_DESC);
1964                 }else{
1965                         /* child */
1966                         close(sockfd[0]);
1967                         unix_tcp_sock=sockfd[1];
1968                         bind_address=0; /* force a SEGFAULT if someone uses a non-init.
1969                                                            bind address on tcp */
1970                         /* record pid twice to avoid the child using it, before
1971                          * parent gets a chance to set it*/
1972                         pt[process_no].pid=getpid();
1973                         if (init_child(child_rank) < 0) {
1974                                 LOG(L_ERR, "init_children failed\n");
1975                                 goto error;
1976                         }
1977                         tcp_receive_loop(reader_fd[1]);
1978                 }
1979         }
1980         return 0;
1981 error:
1982         return -1;
1983 }
1984
1985 #endif