44bc98818630d764c5b6b72469a4645a96e8cfb0
[sip-router] / tcp_main.c
1 /*
2  * $Id$
3  *
4  * Copyright (C) 2001-2003 FhG Fokus
5  *
6  * This file is part of ser, a free SIP server.
7  *
8  * ser is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version
12  *
13  * For a license to use the ser software under conditions
14  * other than those described here, or to purchase support for this
15  * software, please contact iptel.org by e-mail at the following addresses:
16  *    info@iptel.org
17  *
18  * ser is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26  */
27 /*
28  * History:
29  * --------
30  *  2002-11-29  created by andrei
31  *  2002-12-11  added tcp_send (andrei)
32  *  2003-01-20  locking fixes, hashtables (andrei)
33  *  2003-02-20  s/lock_t/gen_lock_t/ to avoid a conflict on solaris (andrei)
34  *  2003-02-25  Nagle is disabled if -DDISABLE_NAGLE (andrei)
35  *  2003-03-29  SO_REUSEADDR before calling bind to allow
36  *              server restart, Nagle set on the (hopefuly) 
37  *              correct socket (jiri)
38  *  2003-03-31  always try to find the corresponding tcp listen socket for
39  *               a temp. socket and store in in *->bind_address: added
40  *               find_tcp_si, modified tcpconn_connect (andrei)
41  *  2003-04-14  set sockopts to TOS low delay (andrei)
42  *  2003-06-30  moved tcp new connect checking & handling to
43  *               handle_new_connect (andrei)
44  *  2003-07-09  tls_close called before closing the tcp connection (andrei)
45  *  2003-10-24  converted to the new socket_info lists (andrei)
46  *  2003-10-27  tcp port aliases support added (andrei)
47  *  2003-11-04  always lock before manipulating refcnt; sendchild
48  *              does not inc refcnt by itself anymore (andrei)
49  *  2003-11-07  different unix sockets are used for fd passing
50  *              to/from readers/writers (andrei)
51  *  2003-11-17  handle_new_connect & tcp_connect will close the 
52  *              new socket if tcpconn_new return 0 (e.g. out of mem) (andrei)
53  *  2003-11-28  tcp_blocking_write & tcp_blocking_connect added (andrei)
54  *  2004-11-08  dropped find_tcp_si and replaced with find_si (andrei)
55  *  2005-06-07  new tcp optimized code, supports epoll (LT), sigio + real time
56  *               signals, poll & select (andrei)
57  *  2005-06-26  *bsd kqueue support (andrei)
58  *  2005-07-04  solaris /dev/poll support (andrei)
59  *  2005-07-08  tcp_max_connections, tcp_connection_lifetime, don't accept
60  *               more connections if tcp_max_connections is exceeded (andrei)
61  *  2005-10-21  cleanup all the open connections on exit
62  *              decrement the no. of open connections on timeout too    (andrei)
63  */
64
65
66 #ifdef USE_TCP
67
68
69 #ifndef SHM_MEM
70 #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
71 #endif
72
73 #include <sys/time.h>
74 #include <sys/types.h>
75 #include <sys/select.h>
76 #include <sys/socket.h>
77 #include <netinet/in.h>
78 #include <netinet/in_systm.h>
79 #include <netinet/ip.h>
80 #include <netinet/tcp.h>
81 #include <sys/uio.h>  /* writev*/
82 #include <netdb.h>
83 #include <stdlib.h> /*exit() */
84
85 #include <unistd.h>
86
87 #include <errno.h>
88 #include <string.h>
89
90 #ifdef HAVE_SELECT
91 #include <sys/select.h>
92 #endif
93 #include <sys/poll.h>
94
95
96 #include "ip_addr.h"
97 #include "pass_fd.h"
98 #include "tcp_conn.h"
99 #include "globals.h"
100 #include "pt.h"
101 #include "locking.h"
102 #include "mem/mem.h"
103 #include "mem/shm_mem.h"
104 #include "timer.h"
105 #include "sr_module.h"
106 #include "tcp_server.h"
107 #include "tcp_init.h"
108 #include "tsend.h"
109 #ifdef USE_TLS
110 #include "tls/tls_server.h"
111 #endif 
112
113 #define local_malloc pkg_malloc
114 #define local_free   pkg_free
115
116 #define HANDLE_IO_INLINE
117 #include "io_wait.h"
118 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
119
120 #define MAX_TCP_CHILDREN 100
121
122
123
124 enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
125                                 F_TCPCONN, F_TCPCHILD, F_PROC };
126
127 struct tcp_child{
128         pid_t pid;
129         int proc_no; /* ser proc_no, for debugging */
130         int unix_sock; /* unix "read child" sock fd */
131         int busy;
132         int n_reqs; /* number of requests serviced so far */
133 };
134
135
136
137 int tcp_accept_aliases=0; /* by default don't accept aliases */
138 int tcp_connect_timeout=DEFAULT_TCP_CONNECT_TIMEOUT;
139 int tcp_send_timeout=DEFAULT_TCP_SEND_TIMEOUT;
140 int tcp_con_lifetime=DEFAULT_TCP_CONNECTION_LIFETIME;
141 enum poll_types tcp_poll_method=0; /* by default choose the best method */
142 int tcp_max_connections=DEFAULT_TCP_MAX_CONNECTIONS;
143 int tcp_max_fd_no=0;
144
145 static int tcp_connections_no=0; /* current open connections */
146
147 /* connection hash table (after ip&port) , includes also aliases */
148 struct tcp_conn_alias** tcpconn_aliases_hash=0;
149 /* connection hash table (after connection id) */
150 struct tcp_connection** tcpconn_id_hash=0;
151 gen_lock_t* tcpconn_lock=0;
152
153 struct tcp_child tcp_children[MAX_TCP_CHILDREN];
154 static int* connection_id=0; /*  unique for each connection, used for 
155                                                                 quickly finding the corresponding connection
156                                                                 for a reply */
157 int unix_tcp_sock;
158
159 static int tcp_proto_no=-1; /* tcp protocol number as returned by
160                                                            getprotobyname */
161
162 static io_wait_h io_h;
163
164
165
166 /* set all socket/fd options:  disable nagle, tos lowdelay, non-blocking
167  * return -1 on error */
168 static int init_sock_opt(int s)
169 {
170         int flags;
171         int optval;
172         
173 #ifdef DISABLE_NAGLE
174         flags=1;
175         if ( (tcp_proto_no!=-1) && (setsockopt(s, tcp_proto_no , TCP_NODELAY,
176                                         &flags, sizeof(flags))<0) ){
177                 LOG(L_WARN, "WARNING: init_sock_opt: could not disable Nagle: %s\n",
178                                 strerror(errno));
179         }
180 #endif
181         /* tos*/
182         optval = tos;
183         if (setsockopt(s, IPPROTO_IP, IP_TOS, (void*)&optval,sizeof(optval)) ==-1){
184                 LOG(L_WARN, "WARNING: init_sock_opt: setsockopt tos: %s\n",
185                                 strerror(errno));
186                 /* continue since this is not critical */
187         }
188         /* non-blocking */
189         flags=fcntl(s, F_GETFL);
190         if (flags==-1){
191                 LOG(L_ERR, "ERROR: init_sock_opt: fnctl failed: (%d) %s\n",
192                                 errno, strerror(errno));
193                 goto error;
194         }
195         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
196                 LOG(L_ERR, "ERROR: init_sock_opt: fcntl: set non-blocking failed:"
197                                 " (%d) %s\n", errno, strerror(errno));
198                 goto error;
199         }
200         return 0;
201 error:
202         return -1;
203 }
204
205
206
207 /* blocking connect on a non-blocking fd; it will timeout after
208  * tcp_connect_timeout 
209  * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
210  * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
211  */
212 static int tcp_blocking_connect(int fd, const struct sockaddr *servaddr,
213                                                                 socklen_t addrlen)
214 {
215         int n;
216 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
217         fd_set sel_set;
218         fd_set orig_set;
219         struct timeval timeout;
220 #else
221         struct pollfd pf;
222 #endif
223         int elapsed;
224         int to;
225         int ticks;
226         int err;
227         unsigned int err_len;
228         int poll_err;
229         
230         poll_err=0;
231         to=tcp_connect_timeout;
232         ticks=get_ticks();
233 again:
234         n=connect(fd, servaddr, addrlen);
235         if (n==-1){
236                 if (errno==EINTR){
237                         elapsed=(get_ticks()-ticks)*TIMER_TICK;
238                         if (elapsed<to)         goto again;
239                         else goto error_timeout;
240                 }
241                 if (errno!=EINPROGRESS && errno!=EALREADY){
242                         LOG(L_ERR, "ERROR: tcp_blocking_connect: (%d) %s\n",
243                                         errno, strerror(errno));
244                         goto error;
245                 }
246         }else goto end;
247         
248         /* poll/select loop */
249 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
250                 FD_ZERO(&orig_set);
251                 FD_SET(fd, &orig_set);
252 #else
253                 pf.fd=fd;
254                 pf.events=POLLOUT;
255 #endif
256         while(1){
257                 elapsed=(get_ticks()-ticks)*TIMER_TICK;
258                 if (elapsed<to)
259                         to-=elapsed;
260                 else 
261                         goto error_timeout;
262 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
263                 sel_set=orig_set;
264                 timeout.tv_sec=to;
265                 timeout.tv_usec=0;
266                 n=select(fd+1, 0, &sel_set, 0, &timeout);
267 #else
268                 n=poll(&pf, 1, to*1000);
269 #endif
270                 if (n<0){
271                         if (errno==EINTR) continue;
272                         LOG(L_ERR, "ERROR: tcp_blocking_connect: poll/select failed:"
273                                         " (%d) %s\n", errno, strerror(errno));
274                         goto error;
275                 }else if (n==0) /* timeout */ continue;
276 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
277                 if (FD_ISSET(fd, &sel_set))
278 #else
279                 if (pf.revents&(POLLERR|POLLHUP|POLLNVAL)){ 
280                         LOG(L_ERR, "ERROR: tcp_blocking_connect: poll error: flags %x\n",
281                                         pf.revents);
282                         poll_err=1;
283                 }
284 #endif
285                 {
286                         err_len=sizeof(err);
287                         getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
288                         if ((err==0) && (poll_err==0)) goto end;
289                         if (err!=EINPROGRESS && err!=EALREADY){
290                                 LOG(L_ERR, "ERROR: tcp_blocking_connect: SO_ERROR (%d) %s\n",
291                                                 err, strerror(err));
292                                 goto error;
293                         }
294                 }
295         }
296 error_timeout:
297         /* timeout */
298         LOG(L_ERR, "ERROR: tcp_blocking_connect: timeout %d s elapsed from %d s\n",
299                         elapsed, tcp_connect_timeout);
300 error:
301         return -1;
302 end:
303         return 0;
304 }
305
306
307
308 #if 0
309 /* blocking write even on non-blocking sockets 
310  * if TCP_TIMEOUT will return with error */
311 static int tcp_blocking_write(struct tcp_connection* c, int fd, char* buf,
312                                                                 unsigned int len)
313 {
314         int n;
315         fd_set sel_set;
316         struct timeval timeout;
317         int ticks;
318         int initial_len;
319         
320         initial_len=len;
321 again:
322         
323         n=send(fd, buf, len,
324 #ifdef HAVE_MSG_NOSIGNAL
325                         MSG_NOSIGNAL
326 #else
327                         0
328 #endif
329                 );
330         if (n<0){
331                 if (errno==EINTR)       goto again;
332                 else if (errno!=EAGAIN && errno!=EWOULDBLOCK){
333                         LOG(L_ERR, "tcp_blocking_write: failed to send: (%d) %s\n",
334                                         errno, strerror(errno));
335                         goto error;
336                 }
337         }else if (n<len){
338                 /* partial write */
339                 buf+=n;
340                 len-=n;
341         }else{
342                 /* success: full write */
343                 goto end;
344         }
345         while(1){
346                 FD_ZERO(&sel_set);
347                 FD_SET(fd, &sel_set);
348                 timeout.tv_sec=tcp_send_timeout;
349                 timeout.tv_usec=0;
350                 ticks=get_ticks();
351                 n=select(fd+1, 0, &sel_set, 0, &timeout);
352                 if (n<0){
353                         if (errno==EINTR) continue; /* signal, ignore */
354                         LOG(L_ERR, "ERROR: tcp_blocking_write: select failed: "
355                                         " (%d) %s\n", errno, strerror(errno));
356                         goto error;
357                 }else if (n==0){
358                         /* timeout */
359                         if (get_ticks()-ticks>=tcp_send_timeout){
360                                 LOG(L_ERR, "ERROR: tcp_blocking_write: send timeout (%d)\n",
361                                                 tcp_send_timeout);
362                                 goto error;
363                         }
364                         continue;
365                 }
366                 if (FD_ISSET(fd, &sel_set)){
367                         /* we can write again */
368                         goto again;
369                 }
370         }
371 error:
372                 return -1;
373 end:
374                 return initial_len;
375 }
376 #endif
377
378
379
380 struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
381                                                                         struct socket_info* ba, int type, 
382                                                                         int state)
383 {
384         struct tcp_connection *c;
385         
386         c=(struct tcp_connection*)shm_malloc(sizeof(struct tcp_connection));
387         if (c==0){
388                 LOG(L_ERR, "ERROR: tcpconn_new: mem. allocation failure\n");
389                 goto error;
390         }
391         memset(c, 0, sizeof(struct tcp_connection)); /* zero init */
392         c->s=sock;
393         c->fd=-1; /* not initialized */
394         if (lock_init(&c->write_lock)==0){
395                 LOG(L_ERR, "ERROR: tcpconn_new: init lock failed\n");
396                 goto error;
397         }
398         
399         c->rcv.src_su=*su;
400         
401         c->refcnt=0;
402         su2ip_addr(&c->rcv.src_ip, su);
403         c->rcv.src_port=su_getport(su);
404         c->rcv.bind_address=ba;
405         if (ba){
406                 c->rcv.dst_ip=ba->address;
407                 c->rcv.dst_port=ba->port_no;
408         }
409         print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
410         DBG(     "tcpconn_new: on port %d, type %d\n", c->rcv.src_port, type);
411         init_tcp_req(&c->req);
412         c->id=(*connection_id)++;
413         c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
414         c->rcv.proto_reserved2=0;
415         c->state=state;
416         c->extra_data=0;
417 #ifdef USE_TLS
418         if (type==PROTO_TLS){
419                 if (tls_tcpconn_init(c, sock)==-1) goto error;
420         }else
421 #endif /* USE_TLS*/
422         {
423                 c->type=PROTO_TCP;
424                 c->rcv.proto=PROTO_TCP;
425                 c->timeout=get_ticks()+tcp_con_lifetime;
426         }
427         c->flags|=F_CONN_REMOVED;
428         
429         tcp_connections_no++;
430         return c;
431         
432 error:
433         if (c) shm_free(c);
434         return 0;
435 }
436
437
438
439 struct tcp_connection* tcpconn_connect(union sockaddr_union* server, int type)
440 {
441         int s;
442         struct socket_info* si;
443         union sockaddr_union my_name;
444         socklen_t my_name_len;
445         struct tcp_connection* con;
446         struct ip_addr ip;
447
448         s=socket(AF2PF(server->s.sa_family), SOCK_STREAM, 0);
449         if (s==-1){
450                 LOG(L_ERR, "ERROR: tcpconn_connect: socket: (%d) %s\n",
451                                 errno, strerror(errno));
452                 goto error;
453         }
454         if (init_sock_opt(s)<0){
455                 LOG(L_ERR, "ERROR: tcpconn_connect: init_sock_opt failed\n");
456                 goto error;
457         }
458         if (tcp_blocking_connect(s, &server->s, sockaddru_len(*server))<0){
459                 LOG(L_ERR, "ERROR: tcpconn_connect: tcp_blocking_connect failed\n");
460                 goto error;
461         }
462         my_name_len=sizeof(my_name);
463         if (getsockname(s, &my_name.s, &my_name_len)!=0){
464                 LOG(L_ERR, "ERROR: tcp_connect: getsockname failed: %s(%d)\n",
465                                 strerror(errno), errno);
466                 si=0; /* try to go on */
467         }
468         su2ip_addr(&ip, &my_name);
469 #ifdef USE_TLS
470         if (type==PROTO_TLS)
471                 si=find_si(&ip, 0, PROTO_TLS);
472         else
473 #endif
474                 si=find_si(&ip, 0, PROTO_TCP);
475
476         if (si==0){
477                 LOG(L_ERR, "ERROR: tcp_connect: could not find corresponding"
478                                 " listening socket, using default...\n");
479                 if (server->s.sa_family==AF_INET) si=sendipv4_tcp;
480 #ifdef USE_IPV6
481                 else si=sendipv6_tcp;
482 #endif
483         }
484         con=tcpconn_new(s, server, si, type, S_CONN_CONNECT);
485         if (con==0){
486                 LOG(L_ERR, "ERROR: tcp_connect: tcpconn_new failed, closing the "
487                                  " socket\n");
488                 goto error;
489         }
490         return con;
491         /*FIXME: set sock idx! */
492 error:
493         if (s!=-1) close(s); /* close the opened socket */
494         return 0;
495 }
496
497
498
499 struct tcp_connection*  tcpconn_add(struct tcp_connection *c)
500 {
501         unsigned hash;
502
503         if (c){
504                 TCPCONN_LOCK;
505                 /* add it at the begining of the list*/
506                 hash=tcp_id_hash(c->id);
507                 c->id_hash=hash;
508                 tcpconn_listadd(tcpconn_id_hash[hash], c, id_next, id_prev);
509                 
510                 hash=tcp_addr_hash(&c->rcv.src_ip, c->rcv.src_port);
511                 /* set the first alias */
512                 c->con_aliases[0].port=c->rcv.src_port;
513                 c->con_aliases[0].hash=hash;
514                 c->con_aliases[0].parent=c;
515                 tcpconn_listadd(tcpconn_aliases_hash[hash], &c->con_aliases[0],
516                                                 next, prev);
517                 c->aliases++;
518                 TCPCONN_UNLOCK;
519                 DBG("tcpconn_add: hashes: %d, %d\n", hash, c->id_hash);
520                 return c;
521         }else{
522                 LOG(L_CRIT, "tcpconn_add: BUG: null connection pointer\n");
523                 return 0;
524         }
525 }
526
527
528 /* unsafe tcpconn_rm version (nolocks) */
529 void _tcpconn_rm(struct tcp_connection* c)
530 {
531         int r;
532         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
533         /* remove all the aliases */
534         for (r=0; r<c->aliases; r++)
535                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
536                                                 &c->con_aliases[r], next, prev);
537         lock_destroy(&c->write_lock);
538 #ifdef USE_TLS
539         if (c->type==PROTO_TLS) tls_tcpconn_clean(c);
540 #endif
541         shm_free(c);
542 }
543
544
545
546 void tcpconn_rm(struct tcp_connection* c)
547 {
548         int r;
549         TCPCONN_LOCK;
550         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
551         /* remove all the aliases */
552         for (r=0; r<c->aliases; r++)
553                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
554                                                 &c->con_aliases[r], next, prev);
555         TCPCONN_UNLOCK;
556         lock_destroy(&c->write_lock);
557 #ifdef USE_TLS
558         if ((c->type==PROTO_TLS)&&(c->extra_data)) tls_tcpconn_clean(c);
559 #endif
560         shm_free(c);
561 }
562
563
564 /* finds a connection, if id=0 uses the ip addr & port (host byte order)
565  * WARNING: unprotected (locks) use tcpconn_get unless you really
566  * know what you are doing */
567 struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port)
568 {
569
570         struct tcp_connection *c;
571         struct tcp_conn_alias* a;
572         unsigned hash;
573         
574 #ifdef EXTRA_DEBUG
575         DBG("tcpconn_find: %d  port %d\n",id, port);
576         if (ip) print_ip("tcpconn_find: ip ", ip, "\n");
577 #endif
578         if (id){
579                 hash=tcp_id_hash(id);
580                 for (c=tcpconn_id_hash[hash]; c; c=c->id_next){
581 #ifdef EXTRA_DEBUG
582                         DBG("c=%p, c->id=%d, port=%d\n",c, c->id, c->rcv.src_port);
583                         print_ip("ip=", &c->rcv.src_ip, "\n");
584 #endif
585                         if ((id==c->id)&&(c->state!=S_CONN_BAD)) return c;
586                 }
587         }else if (ip){
588                 hash=tcp_addr_hash(ip, port);
589                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
590 #ifdef EXTRA_DEBUG
591                         DBG("a=%p, c=%p, c->id=%d, alias port= %d port=%d\n", a, a->parent,
592                                         a->parent->id, a->port, a->parent->rcv.src_port);
593                         print_ip("ip=",&a->parent->rcv.src_ip,"\n");
594 #endif
595                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
596                                         (ip_addr_cmp(ip, &a->parent->rcv.src_ip)) )
597                                 return a->parent;
598                 }
599         }
600         return 0;
601 }
602
603
604
605 /* _tcpconn_find with locks and timeout */
606 struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
607                                                                         int timeout)
608 {
609         struct tcp_connection* c;
610         TCPCONN_LOCK;
611         c=_tcpconn_find(id, ip, port);
612         if (c){ 
613                         c->refcnt++;
614                         c->timeout=get_ticks()+timeout;
615         }
616         TCPCONN_UNLOCK;
617         return c;
618 }
619
620
621
622 /* add port as an alias for the "id" connection
623  * returns 0 on success,-1 on failure */
624 int tcpconn_add_alias(int id, int port, int proto)
625 {
626         struct tcp_connection* c;
627         unsigned hash;
628         struct tcp_conn_alias* a;
629         
630         a=0;
631         /* fix the port */
632         port=port?port:((proto==PROTO_TLS)?SIPS_PORT:SIP_PORT);
633         TCPCONN_LOCK;
634         /* check if alias already exists */
635         c=_tcpconn_find(id, 0, 0);
636         if (c){
637                 hash=tcp_addr_hash(&c->rcv.src_ip, port);
638                 /* search the aliases for an already existing one */
639                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
640                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
641                                         (ip_addr_cmp(&c->rcv.src_ip, &a->parent->rcv.src_ip)) ){
642                                 /* found */
643                                 if (a->parent!=c) goto error_sec;
644                                 else goto ok;
645                         }
646                 }
647                 if (c->aliases>=TCP_CON_MAX_ALIASES) goto error_aliases;
648                 c->con_aliases[c->aliases].parent=c;
649                 c->con_aliases[c->aliases].port=port;
650                 c->con_aliases[c->aliases].hash=hash;
651                 tcpconn_listadd(tcpconn_aliases_hash[hash], 
652                                                                 &c->con_aliases[c->aliases], next, prev);
653                 c->aliases++;
654         }else goto error_not_found;
655 ok:
656         TCPCONN_UNLOCK;
657 #ifdef EXTRA_DEBUG
658         if (a) DBG("tcpconn_add_alias: alias already present\n");
659         else   DBG("tcpconn_add_alias: alias port %d for hash %d, id %d\n",
660                         port, hash, c->id);
661 #endif
662         return 0;
663 error_aliases:
664         TCPCONN_UNLOCK;
665         LOG(L_ERR, "ERROR: tcpconn_add_alias: too many aliases for connection %p"
666                                 " (%d)\n", c, c->id);
667         return -1;
668 error_not_found:
669         TCPCONN_UNLOCK;
670         LOG(L_ERR, "ERROR: tcpconn_add_alias: no connection found for id %d\n",id);
671         return -1;
672 error_sec:
673         TCPCONN_UNLOCK;
674         LOG(L_ERR, "ERROR: tcpconn_add_alias: possible port hijack attempt\n");
675         LOG(L_ERR, "ERROR: tcpconn_add_alias: alias already present and points"
676                         " to another connection (%d : %d and %d : %d)\n",
677                         a->parent->id,  port, c->id, port);
678         return -1;
679 }
680
681
682
683 void tcpconn_ref(struct tcp_connection* c)
684 {
685         TCPCONN_LOCK;
686         c->refcnt++; /* FIXME: atomic_dec */
687         TCPCONN_UNLOCK;
688 }
689
690
691
692 void tcpconn_put(struct tcp_connection* c)
693 {
694         TCPCONN_LOCK;
695         c->refcnt--; /* FIXME: atomic_dec */
696         TCPCONN_UNLOCK;
697 }
698
699
700
701 /* finds a tcpconn & sends on it */
702 int tcp_send(int type, char* buf, unsigned len, union sockaddr_union* to,
703                                 int id)
704 {
705         struct tcp_connection *c;
706         struct tcp_connection *tmp;
707         struct ip_addr ip;
708         int port;
709         int fd;
710         long response[2];
711         int n;
712         
713         port=0;
714         if (to){
715                 su2ip_addr(&ip, to);
716                 port=su_getport(to);
717                 c=tcpconn_get(id, &ip, port, tcp_con_lifetime); 
718         }else if (id){
719                 c=tcpconn_get(id, 0, 0, tcp_con_lifetime);
720         }else{
721                 LOG(L_CRIT, "BUG: tcp_send called with null id & to\n");
722                 return -1;
723         }
724         
725         if (id){
726                 if (c==0) {
727                         if (to){
728                                 /* try again w/o id */
729                                 c=tcpconn_get(0, &ip, port, tcp_con_lifetime);
730                                 goto no_id;
731                         }else{
732                                 LOG(L_ERR, "ERROR: tcp_send: id %d not found, dropping\n",
733                                                 id);
734                                 return -1;
735                         }
736                 }else goto get_fd;
737         }
738 no_id:
739                 if (c==0){
740                         DBG("tcp_send: no open tcp connection found, opening new one\n");
741                         /* create tcp connection */
742                         if ((c=tcpconn_connect(to, type))==0){
743                                 LOG(L_ERR, "ERROR: tcp_send: connect failed\n");
744                                 return -1;
745                         }
746                         c->refcnt++; /* safe to do it w/o locking, it's not yet
747                                                         available to the rest of the world */
748                         fd=c->s;
749                         
750                         /* send the new tcpconn to "tcp main" */
751                         response[0]=(long)c;
752                         response[1]=CONN_NEW;
753                         n=send_fd(unix_tcp_sock, response, sizeof(response), c->s);
754                         if (n<=0){
755                                 LOG(L_ERR, "BUG: tcp_send: failed send_fd: %s (%d)\n",
756                                                 strerror(errno), errno);
757                                 n=-1;
758                                 goto end;
759                         }       
760                         goto send_it;
761                 }
762 get_fd:
763                         /* todo: see if this is not the same process holding
764                          *  c  and if so send directly on c->fd */
765                         DBG("tcp_send: tcp connection found (%p), acquiring fd\n", c);
766                         /* get the fd */
767                         response[0]=(long)c;
768                         response[1]=CONN_GET_FD;
769                         n=send_all(unix_tcp_sock, response, sizeof(response));
770                         if (n<=0){
771                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(write):%s (%d)\n",
772                                                 strerror(errno), errno);
773                                 n=-1;
774                                 goto release_c;
775                         }
776                         DBG("tcp_send, c= %p, n=%d\n", c, n);
777                         tmp=c;
778                         n=receive_fd(unix_tcp_sock, &c, sizeof(c), &fd, MSG_WAITALL);
779                         if (n<=0){
780                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(receive_fd):"
781                                                         " %s (%d)\n", strerror(errno), errno);
782                                 n=-1;
783                                 goto release_c;
784                         }
785                         if (c!=tmp){
786                                 LOG(L_CRIT, "BUG: tcp_send: get_fd: got different connection:"
787                                                 "  %p (id= %d, refcnt=%d state=%d != "
788                                                 "  %p (id= %d, refcnt=%d state=%d (n=%d)\n",
789                                                   c,   c->id,   c->refcnt,   c->state,
790                                                   tmp, tmp->id, tmp->refcnt, tmp->state, n
791                                    );
792                                 n=-1; /* fail */
793                                 goto end;
794                         }
795                         DBG("tcp_send: after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
796                 
797         
798         
799 send_it:
800         DBG("tcp_send: sending...\n");
801         lock_get(&c->write_lock);
802 #ifdef USE_TLS
803         if (c->type==PROTO_TLS)
804                 n=tls_blocking_write(c, fd, buf, len);
805         else
806 #endif
807                 /* n=tcp_blocking_write(c, fd, buf, len); */
808                 n=tsend_stream(fd, buf, len, tcp_send_timeout*1000); 
809         lock_release(&c->write_lock);
810         DBG("tcp_send: after write: c= %p n=%d fd=%d\n",c, n, fd);
811         DBG("tcp_send: buf=\n%.*s\n", (int)len, buf);
812         if (n<0){
813                 LOG(L_ERR, "ERROR: tcp_send: failed to send\n");
814                 /* error on the connection , mark it as bad and set 0 timeout */
815                 c->state=S_CONN_BAD;
816                 c->timeout=0;
817                 /* tell "main" it should drop this (optional it will t/o anyway?)*/
818                 response[0]=(long)c;
819                 response[1]=CONN_ERROR;
820                 n=send_all(unix_tcp_sock, response, sizeof(response));
821                 /* CONN_ERROR will auto-dec refcnt => we must not call tcpconn_put !!*/
822                 if (n<=0){
823                         LOG(L_ERR, "BUG: tcp_send: error return failed (write):%s (%d)\n",
824                                         strerror(errno), errno);
825                         n=-1;
826                 }
827                 close(fd);
828                 return n; /* error return, no tcpconn_put */
829         }
830 end:
831         close(fd);
832 release_c:
833         tcpconn_put(c); /* release c (lock; dec refcnt; unlock) */
834         return n;
835 }
836
837
838
839 int tcp_init(struct socket_info* sock_info)
840 {
841         union sockaddr_union* addr;
842         int optval;
843 #ifdef DISABLE_NAGLE
844         int flag;
845         struct protoent* pe;
846
847         if (tcp_proto_no==-1){ /* if not already set */
848                 pe=getprotobyname("tcp");
849                 if (pe==0){
850                         LOG(L_ERR, "ERROR: tcp_init: could not get TCP protocol number\n");
851                         tcp_proto_no=-1;
852                 }else{
853                         tcp_proto_no=pe->p_proto;
854                 }
855         }
856 #endif
857         
858         addr=&sock_info->su;
859         /* sock_info->proto=PROTO_TCP; */
860         if (init_su(addr, &sock_info->address, sock_info->port_no)<0){
861                 LOG(L_ERR, "ERROR: tcp_init: could no init sockaddr_union\n");
862                 goto error;
863         }
864         sock_info->socket=socket(AF2PF(addr->s.sa_family), SOCK_STREAM, 0);
865         if (sock_info->socket==-1){
866                 LOG(L_ERR, "ERROR: tcp_init: socket: %s\n", strerror(errno));
867                 goto error;
868         }
869 #ifdef DISABLE_NAGLE
870         flag=1;
871         if ( (tcp_proto_no!=-1) &&
872                  (setsockopt(sock_info->socket, tcp_proto_no , TCP_NODELAY,
873                                          &flag, sizeof(flag))<0) ){
874                 LOG(L_ERR, "ERROR: tcp_init: could not disable Nagle: %s\n",
875                                 strerror(errno));
876         }
877 #endif
878
879
880 #if  !defined(TCP_DONT_REUSEADDR) 
881         /* Stevens, "Network Programming", Section 7.5, "Generic Socket
882      * Options": "...server started,..a child continues..on existing
883          * connection..listening server is restarted...call to bind fails
884          * ... ALL TCP servers should specify the SO_REUSEADDRE option 
885          * to allow the server to be restarted in this situation
886          *
887          * Indeed, without this option, the server can't restart.
888          *   -jiri
889          */
890         optval=1;
891         if (setsockopt(sock_info->socket, SOL_SOCKET, SO_REUSEADDR,
892                                 (void*)&optval, sizeof(optval))==-1) {
893                 LOG(L_ERR, "ERROR: tcp_init: setsockopt %s\n",
894                         strerror(errno));
895                 goto error;
896         }
897 #endif
898         /* tos */
899         optval = tos;
900         if (setsockopt(sock_info->socket, IPPROTO_IP, IP_TOS, (void*)&optval, 
901                                 sizeof(optval)) ==-1){
902                 LOG(L_WARN, "WARNING: tcp_init: setsockopt tos: %s\n", strerror(errno));
903                 /* continue since this is not critical */
904         }
905         if (bind(sock_info->socket, &addr->s, sockaddru_len(*addr))==-1){
906                 LOG(L_ERR, "ERROR: tcp_init: bind(%x, %p, %d) on %s:%d : %s\n",
907                                 sock_info->socket,  &addr->s, 
908                                 (unsigned)sockaddru_len(*addr),
909                                 sock_info->address_str.s,
910                                 sock_info->port_no,
911                                 strerror(errno));
912                 goto error;
913         }
914         if (listen(sock_info->socket, 10)==-1){
915                 LOG(L_ERR, "ERROR: tcp_init: listen(%x, %p, %d) on %s: %s\n",
916                                 sock_info->socket, &addr->s, 
917                                 (unsigned)sockaddru_len(*addr),
918                                 sock_info->address_str.s,
919                                 strerror(errno));
920                 goto error;
921         }
922         
923         return 0;
924 error:
925         if (sock_info->socket!=-1){
926                 close(sock_info->socket);
927                 sock_info->socket=-1;
928         }
929         return -1;
930 }
931
932
933
934 static int send2child(struct tcp_connection* tcpconn)
935 {
936         int i;
937         int min_busy;
938         int idx;
939         
940         min_busy=tcp_children[0].busy;
941         idx=0;
942         for (i=0; i<tcp_children_no; i++){
943                 if (!tcp_children[i].busy){
944                         idx=i;
945                         min_busy=0;
946                         break;
947                 }else if (min_busy>tcp_children[i].busy){
948                         min_busy=tcp_children[i].busy;
949                         idx=i;
950                 }
951         }
952         
953         tcp_children[idx].busy++;
954         tcp_children[idx].n_reqs++;
955         if (min_busy){
956                 DBG("WARNING: send2child: no free tcp receiver, "
957                                 " connection passed to the least busy one (%d)\n",
958                                 min_busy);
959         }
960         DBG("send2child: to tcp child %d %d(%d), %p\n", idx, 
961                                         tcp_children[idx].proc_no,
962                                         tcp_children[idx].pid, tcpconn);
963         if (send_fd(tcp_children[idx].unix_sock, &tcpconn, sizeof(tcpconn),
964                         tcpconn->s)<=0){
965                 LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
966                 return -1;
967         }
968         
969         return 0;
970 }
971
972
973 /* handles a new connection, called internally by tcp_main_loop/handle_io.
974  * params: si - pointer to one of the tcp socket_info structures on which
975  *              an io event was detected (connection attempt)
976  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
977  *           io events queued), >0 on success. success/error refer only to
978  *           the accept.
979  */
980 static inline int handle_new_connect(struct socket_info* si)
981 {
982         union sockaddr_union su;
983         struct tcp_connection* tcpconn;
984         socklen_t su_len;
985         int new_sock;
986         
987         /* got a connection on r */
988         su_len=sizeof(su);
989         new_sock=accept(si->socket, &(su.s), &su_len);
990         if (new_sock==-1){
991                 if ((errno==EAGAIN)||(errno==EWOULDBLOCK))
992                         return 0;
993                 LOG(L_ERR,  "WARNING: handle_new_connect: error while accepting"
994                                 " connection(%d): %s\n", errno, strerror(errno));
995                 return -1;
996         }
997         if (tcp_connections_no>=tcp_max_connections){
998                 LOG(L_ERR, "ERROR: maximum number of connections exceeded: %d/%d\n",
999                                         tcp_connections_no, tcp_max_connections);
1000                 close(new_sock);
1001                 return 1; /* success, because the accept was succesfull */
1002         }
1003         if (init_sock_opt(new_sock)<0){
1004                 LOG(L_ERR, "ERROR: handle_new_connect: init_sock_opt failed\n");
1005                 close(new_sock);
1006                 return 1; /* success, because the accept was succesfull */
1007         }
1008         
1009         /* add socket to list */
1010         tcpconn=tcpconn_new(new_sock, &su, si, si->proto, S_CONN_ACCEPT);
1011         if (tcpconn){
1012                 tcpconn->refcnt++; /* safe, not yet available to the
1013                                                           outside world */
1014                 tcpconn_add(tcpconn);
1015                 DBG("handle_new_connect: new connection: %p %d flags: %04x\n",
1016                         tcpconn, tcpconn->s, tcpconn->flags);
1017                 /* pass it to a child */
1018                 if(send2child(tcpconn)<0){
1019                         LOG(L_ERR,"ERROR: handle_new_connect: no children "
1020                                         "available\n");
1021                         TCPCONN_LOCK;
1022                         tcpconn->refcnt--;
1023                         if (tcpconn->refcnt==0){
1024                                 close(tcpconn->s);
1025                                 _tcpconn_rm(tcpconn);
1026                         }else tcpconn->timeout=0; /* force expire */
1027                         TCPCONN_UNLOCK;
1028                 }
1029         }else{ /*tcpconn==0 */
1030                 LOG(L_ERR, "ERROR: handle_new_connect: tcpconn_new failed, "
1031                                 "closing socket\n");
1032                 close(new_sock);
1033                 
1034         }
1035         return 1; /* accept() was succesfull */
1036 }
1037
1038
1039
1040 /* used internally by tcp_main_loop() */
1041 static void tcpconn_destroy(struct tcp_connection* tcpconn)
1042 {
1043         int fd;
1044
1045         TCPCONN_LOCK; /*avoid races w/ tcp_send*/
1046         tcpconn->refcnt--;
1047         if (tcpconn->refcnt==0){ 
1048                 DBG("tcpconn_destroy: destroying connection %p, flags %04x\n",
1049                                 tcpconn, tcpconn->flags);
1050                 fd=tcpconn->s;
1051 #ifdef USE_TLS
1052                 /*FIXME: lock ->writelock ? */
1053                 if (tcpconn->type==PROTO_TLS)
1054                         tls_close(tcpconn, fd);
1055 #endif
1056                 _tcpconn_rm(tcpconn);
1057                 close(fd);
1058                 tcp_connections_no--;
1059         }else{
1060                 /* force timeout */
1061                 tcpconn->timeout=0;
1062                 tcpconn->state=S_CONN_BAD;
1063                 DBG("tcpconn_destroy: delaying (%p, flags %04x) ...\n",
1064                                 tcpconn, tcpconn->flags);
1065                 
1066         }
1067         TCPCONN_UNLOCK;
1068 }
1069
1070
1071
1072 /* handles an io event on one of the watched tcp connections
1073  * 
1074  * params: tcpconn - pointer to the tcp_connection for which we have an io ev.
1075  *         fd_i    - index in the fd_array table (needed for delete)
1076  * returns:  handle_* return convention, but on success it always returns 0
1077  *           (because it's one-shot, after a succesfull execution the fd is
1078  *            removed from tcp_main's watch fd list and passed to a child =>
1079  *            tcp_main is not interested in further io events that might be
1080  *            queued for this fd)
1081  */
1082 inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, int fd_i)
1083 {
1084         int fd;
1085         
1086         /*  is refcnt!=0 really necessary? 
1087          *  No, in fact it's a bug: I can have the following situation: a send only
1088          *   tcp connection used by n processes simultaneously => refcnt = n. In 
1089          *   the same time I can have a read event and this situation is perfectly
1090          *   valid. -- andrei
1091          */
1092 #if 0
1093         if ((tcpconn->refcnt!=0)){
1094                 /* FIXME: might be valid for sigio_rt iff fd flags are not cleared
1095                  *        (there is a short window in which it could generate a sig
1096                  *         that would be catched by tcp_main) */
1097                 LOG(L_CRIT, "BUG: handle_tcpconn_ev: io event on referenced"
1098                                         " tcpconn (%p), refcnt=%d, fd=%d\n",
1099                                         tcpconn, tcpconn->refcnt, tcpconn->s);
1100                 return -1;
1101         }
1102 #endif
1103         /* pass it to child, so remove it from the io watch list */
1104         DBG("handle_tcpconn_ev: data available on %p %d\n", tcpconn, tcpconn->s);
1105         if (io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1) goto error;
1106         tcpconn->flags|=F_CONN_REMOVED;
1107         tcpconn_ref(tcpconn); /* refcnt ++ */
1108         if (send2child(tcpconn)<0){
1109                 LOG(L_ERR,"ERROR: handle_tcpconn_ev: no children available\n");
1110                 TCPCONN_LOCK;
1111                 tcpconn->refcnt--;
1112                 if (tcpconn->refcnt==0){
1113                         fd=tcpconn->s;
1114                         _tcpconn_rm(tcpconn);
1115                         close(fd);
1116                 }else tcpconn->timeout=0; /* force expire*/
1117                 TCPCONN_UNLOCK;
1118         }
1119         return 0; /* we are not interested in possibly queued io events, 
1120                                  the fd was either passed to a child, or closed */
1121 error:
1122         return -1;
1123 }
1124
1125
1126
1127 /* handles io from a tcp child process
1128  * params: tcp_c - pointer in the tcp_children array, to the entry for
1129  *                 which an io event was detected 
1130  *         fd_i  - fd index in the fd_array (usefull for optimizing
1131  *                 io_watch_deletes)
1132  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
1133  *           io events queued), >0 on success. success/error refer only to
1134  *           the reads from the fd.
1135  */
1136 inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
1137 {
1138         struct tcp_connection* tcpconn;
1139         long response[2];
1140         int cmd;
1141         int bytes;
1142         
1143         if (tcp_c->unix_sock<=0){
1144                 /* (we can't have a fd==0, 0 is never closed )*/
1145                 LOG(L_CRIT, "BUG: handle_tcp_child: fd %d for %d "
1146                                 "(pid %d, ser no %d)\n", tcp_c->unix_sock,
1147                                 (int)(tcp_c-&tcp_children[0]), tcp_c->pid, tcp_c->proc_no);
1148                 goto error;
1149         }
1150         /* read until sizeof(response)
1151          * (this is a SOCK_STREAM so read is not atomic) */
1152         bytes=recv_all(tcp_c->unix_sock, response, sizeof(response), MSG_DONTWAIT);
1153         if (bytes<(int)sizeof(response)){
1154                 if (bytes==0){
1155                         /* EOF -> bad, child has died */
1156                         DBG("DBG: handle_tcp_child: dead tcp child %d (pid %d, no %d)"
1157                                         " (shutting down?)\n", (int)(tcp_c-&tcp_children[0]), 
1158                                         tcp_c->pid, tcp_c->proc_no );
1159                         /* don't listen on it any more */
1160                         io_watch_del(&io_h, tcp_c->unix_sock, fd_i, 0); 
1161                         goto error; /* eof. so no more io here, it's ok to return error */
1162                 }else if (bytes<0){
1163                         /* EAGAIN is ok if we try to empty the buffer
1164                          * e.g.: SIGIO_RT overflow mode or EPOLL ET */
1165                         if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
1166                                 LOG(L_CRIT, "ERROR: handle_tcp_child: read from tcp child %ld "
1167                                                 " (pid %d, no %d) %s [%d]\n",
1168                                                 (long)(tcp_c-&tcp_children[0]), tcp_c->pid,
1169                                                 tcp_c->proc_no, strerror(errno), errno );
1170                         }else{
1171                                 bytes=0;
1172                         }
1173                         /* try to ignore ? */
1174                         goto end;
1175                 }else{
1176                         /* should never happen */
1177                         LOG(L_CRIT, "BUG: handle_tcp_child: too few bytes received (%d)\n",
1178                                         bytes );
1179                         bytes=0; /* something was read so there is no error; otoh if
1180                                           receive_fd returned less then requested => the receive
1181                                           buffer is empty => no more io queued on this fd */
1182                         goto end;
1183                 }
1184         }
1185         
1186         DBG("handle_tcp_child: reader response= %lx, %ld from %d \n",
1187                                         response[0], response[1], (int)(tcp_c-&tcp_children[0]));
1188         cmd=response[1];
1189         tcpconn=(struct tcp_connection*)response[0];
1190         if (tcpconn==0){
1191                 /* should never happen */
1192                 LOG(L_CRIT, "BUG: handle_tcp_child: null tcpconn pointer received"
1193                                  " from tcp child %d (pid %d): %lx, %lx\n",
1194                                         (int)(tcp_c-&tcp_children[0]), tcp_c->pid,
1195                                         response[0], response[1]) ;
1196                 goto end;
1197         }
1198         switch(cmd){
1199                 case CONN_RELEASE:
1200                         tcp_c->busy--;
1201                         if (tcpconn->state==S_CONN_BAD){ 
1202                                 tcpconn_destroy(tcpconn);
1203                                 break;
1204                         }
1205                         /* update the timeout*/
1206                         tcpconn->timeout=get_ticks()+tcp_con_lifetime;
1207                         tcpconn_put(tcpconn);
1208                         /* must be after the de-ref*/
1209                         io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1210                         tcpconn->flags&=~F_CONN_REMOVED;
1211                         DBG("handle_tcp_child: CONN_RELEASE  %p refcnt= %d\n", 
1212                                                                                         tcpconn, tcpconn->refcnt);
1213                         break;
1214                 case CONN_ERROR:
1215                 case CONN_DESTROY:
1216                 case CONN_EOF:
1217                         /* WARNING: this will auto-dec. refcnt! */
1218                                 tcp_c->busy--;
1219                                 /* main doesn't listen on it => we don't have to delete it
1220                                  if (tcpconn->s!=-1)
1221                                         io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
1222                                 */
1223                                 tcpconn_destroy(tcpconn); /* closes also the fd */
1224                                 break;
1225                 default:
1226                                 LOG(L_CRIT, "BUG: handle_tcp_child:  unknown cmd %d"
1227                                                                         " from tcp reader %d\n",
1228                                                                         cmd, (int)(tcp_c-&tcp_children[0]));
1229         }
1230 end:
1231         return bytes;
1232 error:
1233         return -1;
1234 }
1235
1236
1237
1238 /* handles io from a "generic" ser process (get fd or new_fd from a tcp_send)
1239  * 
1240  * params: p     - pointer in the ser processes array (pt[]), to the entry for
1241  *                 which an io event was detected
1242  *         fd_i  - fd index in the fd_array (usefull for optimizing
1243  *                 io_watch_deletes)
1244  * returns:  handle_* return convention:
1245  *          -1 on error reading from the fd,
1246  *           0 on EAGAIN  or when no  more io events are queued 
1247  *             (receive buffer empty),
1248  *           >0 on successfull reads from the fd (the receive buffer might
1249  *             be non-empty).
1250  */
1251 inline static int handle_ser_child(struct process_table* p, int fd_i)
1252 {
1253         struct tcp_connection* tcpconn;
1254         long response[2];
1255         int cmd;
1256         int bytes;
1257         int ret;
1258         int fd;
1259         
1260         ret=-1;
1261         if (p->unix_sock<=0){
1262                 /* (we can't have a fd==0, 0 is never closed )*/
1263                 LOG(L_CRIT, "BUG: handle_ser_child: fd %d for %d "
1264                                 "(pid %d)\n", p->unix_sock, (int)(p-&pt[0]), p->pid);
1265                 goto error;
1266         }
1267                         
1268         /* get all bytes and the fd (if transmitted)
1269          * (this is a SOCK_STREAM so read is not atomic) */
1270         bytes=receive_fd(p->unix_sock, response, sizeof(response), &fd,
1271                                                 MSG_DONTWAIT);
1272         if (bytes<(int)sizeof(response)){
1273                 /* too few bytes read */
1274                 if (bytes==0){
1275                         /* EOF -> bad, child has died */
1276                         DBG("DBG: handle_ser_child: dead child %d, pid %d"
1277                                         " (shutting down?)\n", (int)(p-&pt[0]), p->pid);
1278                         /* don't listen on it any more */
1279                         io_watch_del(&io_h, p->unix_sock, fd_i, 0);
1280                         goto error; /* child dead => no further io events from it */
1281                 }else if (bytes<0){
1282                         /* EAGAIN is ok if we try to empty the buffer
1283                          * e.g: SIGIO_RT overflow mode or EPOLL ET */
1284                         if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
1285                                 LOG(L_CRIT, "ERROR: handle_ser_child: read from child %d  "
1286                                                 "(pid %d):  %s [%d]\n", (int)(p-&pt[0]), p->pid,
1287                                                 strerror(errno), errno);
1288                                 ret=-1;
1289                         }else{
1290                                 ret=0;
1291                         }
1292                         /* try to ignore ? */
1293                         goto end;
1294                 }else{
1295                         /* should never happen */
1296                         LOG(L_CRIT, "BUG: handle_ser_child: too few bytes received (%d)\n",
1297                                         bytes );
1298                         ret=0; /* something was read so there is no error; otoh if
1299                                           receive_fd returned less then requested => the receive
1300                                           buffer is empty => no more io queued on this fd */
1301                         goto end;
1302                 }
1303         }
1304         ret=1; /* something was received, there might be more queued */
1305         DBG("handle_ser_child: read response= %lx, %ld, fd %d from %d (%d)\n",
1306                                         response[0], response[1], fd, (int)(p-&pt[0]), p->pid);
1307         cmd=response[1];
1308         tcpconn=(struct tcp_connection*)response[0];
1309         if (tcpconn==0){
1310                 LOG(L_CRIT, "BUG: handle_ser_child: null tcpconn pointer received"
1311                                  " from child %d (pid %d): %lx, %lx\n",
1312                                         (int)(p-&pt[0]), p->pid, response[0], response[1]) ;
1313                 goto end;
1314         }
1315         switch(cmd){
1316                 case CONN_ERROR:
1317                         if (!(tcpconn->flags & F_CONN_REMOVED) && (tcpconn->s!=-1)){
1318                                 io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
1319                                 tcpconn->flags|=F_CONN_REMOVED;
1320                         }
1321                         tcpconn_destroy(tcpconn); /* will close also the fd */
1322                         break;
1323                 case CONN_GET_FD:
1324                         /* send the requested FD  */
1325                         /* WARNING: take care of setting refcnt properly to
1326                          * avoid race condition */
1327                         if (send_fd(p->unix_sock, &tcpconn, sizeof(tcpconn),
1328                                                         tcpconn->s)<=0){
1329                                 LOG(L_ERR, "ERROR: handle_ser_child: send_fd failed\n");
1330                         }
1331                         break;
1332                 case CONN_NEW:
1333                         /* update the fd in the requested tcpconn*/
1334                         /* WARNING: take care of setting refcnt properly to
1335                          * avoid race condition */
1336                         if (fd==-1){
1337                                 LOG(L_CRIT, "BUG: handle_ser_child: CONN_NEW:"
1338                                                         " no fd received\n");
1339                                 break;
1340                         }
1341                         tcpconn->s=fd;
1342                         /* add tcpconn to the list*/
1343                         tcpconn_add(tcpconn);
1344                         /* update the timeout*/
1345                         tcpconn->timeout=get_ticks()+tcp_con_lifetime;
1346                         io_watch_add(&io_h, tcpconn->s, F_TCPCONN, tcpconn);
1347                         tcpconn->flags&=~F_CONN_REMOVED;
1348                         break;
1349                 default:
1350                         LOG(L_CRIT, "BUG: handle_ser_child: unknown cmd %d\n", cmd);
1351         }
1352 end:
1353         return ret;
1354 error:
1355         return -1;
1356 }
1357
1358
1359
1360 /* generic handle io routine, it will call the appropiate
1361  *  handle_xxx() based on the fd_map type
1362  *
1363  * params:  fm  - pointer to a fd hash entry
1364  *          idx - index in the fd_array (or -1 if not known)
1365  * return: -1 on error
1366  *          0 on EAGAIN or when by some other way it is known that no more 
1367  *            io events are queued on the fd (the receive buffer is empty).
1368  *            Usefull to detect when there are no more io events queued for
1369  *            sigio_rt, epoll_et, kqueue.
1370  *         >0 on successfull read from the fd (when there might be more io
1371  *            queued -- the receive buffer might still be non-empty)
1372  */
1373 inline static int handle_io(struct fd_map* fm, int idx)
1374 {       
1375         int ret;
1376         
1377         switch(fm->type){
1378                 case F_SOCKINFO:
1379                         ret=handle_new_connect((struct socket_info*)fm->data);
1380                         break;
1381                 case F_TCPCONN:
1382                         ret=handle_tcpconn_ev((struct tcp_connection*)fm->data, idx);
1383                         break;
1384                 case F_TCPCHILD:
1385                         ret=handle_tcp_child((struct tcp_child*)fm->data, idx);
1386                         break;
1387                 case F_PROC:
1388                         ret=handle_ser_child((struct process_table*)fm->data, idx);
1389                         break;
1390                 case F_NONE:
1391                         LOG(L_CRIT, "BUG: handle_io: empty fd map\n");
1392                         goto error;
1393                 default:
1394                         LOG(L_CRIT, "BUG: handle_io: uknown fd type %d\n", fm->type); 
1395                         goto error;
1396         }
1397         return ret;
1398 error:
1399         return -1;
1400 }
1401
1402
1403
1404 /* very inefficient for now - FIXME
1405  * keep in sync with tcpconn_destroy, the "delete" part should be
1406  * the same except for io_watch_del..*/
1407 static inline void tcpconn_timeout(int force)
1408 {
1409         struct tcp_connection *c, *next;
1410         int ticks;
1411         unsigned h;
1412         int fd;
1413         
1414         
1415         ticks=get_ticks();
1416         TCPCONN_LOCK; /* fixme: we can lock only on delete IMO */
1417         for(h=0; h<TCP_ID_HASH_SIZE; h++){
1418                 c=tcpconn_id_hash[h];
1419                 while(c){
1420                         next=c->id_next;
1421                         if (force ||((c->refcnt==0) && (ticks>c->timeout))) {
1422                                 if (!force)
1423                                         DBG("tcpconn_timeout: timeout for hash=%d - %p"
1424                                                         " (%d > %d)\n", h, c, ticks, c->timeout);
1425                                 fd=c->s;
1426 #ifdef USE_TLS
1427                                 if (c->type==PROTO_TLS)
1428                                         tls_close(c, fd);
1429 #endif
1430                                 _tcpconn_rm(c);
1431                                 if ((fd>0)&&(c->refcnt==0)) {
1432                                         if (!(c->flags & F_CONN_REMOVED)){
1433                                                 io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
1434                                                 c->flags|=F_CONN_REMOVED;
1435                                         }
1436                                         close(fd);
1437                                 }
1438                                 tcp_connections_no--;
1439                         }
1440                         c=next;
1441                 }
1442         }
1443         TCPCONN_UNLOCK;
1444 }
1445
1446
1447
1448 /* tcp main loop */
1449 void tcp_main_loop()
1450 {
1451
1452         struct socket_info* si;
1453         int r;
1454         
1455         /* init io_wait (here because we want the memory allocated only in
1456          * the tcp_main process) */
1457         
1458         /* FIXME: TODO: make tcp_max_fd_no a config param */
1459         if  (init_io_wait(&io_h, tcp_max_fd_no, tcp_poll_method)<0)
1460                 goto error;
1461         /* init: start watching all the fds*/
1462         
1463         /* add all the sockets we listens on for connections */
1464         for (si=tcp_listen; si; si=si->next){
1465                 if ((si->proto==PROTO_TCP) &&(si->socket!=-1)){
1466                         if (io_watch_add(&io_h, si->socket, F_SOCKINFO, si)<0){
1467                                 LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1468                                                         "listen socket to the fd list\n");
1469                                 goto error;
1470                         }
1471                 }else{
1472                         LOG(L_CRIT, "BUG: tcp_main_loop: non tcp address in tcp_listen\n");
1473                 }
1474         }
1475 #ifdef USE_TLS
1476         if (!tls_disable){
1477                 for (si=tls_listen; si; si=si->next){
1478                         if ((si->proto==PROTO_TLS) && (si->socket!=-1)){
1479                                 if (io_watch_add(&io_h, si->socket, F_SOCKINFO, si)<0){
1480                                         LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1481                                                         "tls listen socket to the fd list\n");
1482                                         goto error;
1483                                 }
1484                         }else{
1485                                 LOG(L_CRIT, "BUG: tcp_main_loop: non tls address"
1486                                                 " in tls_listen\n");
1487                         }
1488                 }
1489         }
1490 #endif
1491         /* add all the unix sockets used for communcation with other ser processes
1492          *  (get fd, new connection a.s.o) */
1493         for (r=1; r<process_no; r++){
1494                 if (pt[r].unix_sock>0) /* we can't have 0, we never close it!*/
1495                         if (io_watch_add(&io_h, pt[r].unix_sock, F_PROC, &pt[r])<0){
1496                                         LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1497                                                         "process %d unix socket to the fd list\n", r);
1498                                         goto error;
1499                         }
1500         }
1501         /* add all the unix sokets used for communication with the tcp childs */
1502         for (r=0; r<tcp_children_no; r++){
1503                 if (tcp_children[r].unix_sock>0)/*we can't have 0, we never close it!*/
1504                         if (io_watch_add(&io_h, tcp_children[r].unix_sock, F_TCPCHILD,
1505                                                         &tcp_children[r]) <0){
1506                                 LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
1507                                                 "tcp child %d unix socket to the fd list\n", r);
1508                                 goto error;
1509                         }
1510         }
1511         
1512         /* main loop */
1513         switch(io_h.poll_method){
1514                 case POLL_POLL:
1515                         while(1){
1516                                 /* wait and process IO */
1517                                 io_wait_loop_poll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0); 
1518                                 /* remove old connections */
1519                                 tcpconn_timeout(0);
1520                         }
1521                         break;
1522 #ifdef HAVE_SELECT
1523                 case POLL_SELECT:
1524                         while(1){
1525                                 io_wait_loop_select(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1526                                 tcpconn_timeout(0);
1527                         }
1528                         break;
1529 #endif
1530 #ifdef HAVE_SIGIO_RT
1531                 case POLL_SIGIO_RT:
1532                         while(1){
1533                                 io_wait_loop_sigio_rt(&io_h, TCP_MAIN_SELECT_TIMEOUT);
1534                                 tcpconn_timeout(0);
1535                         }
1536                         break;
1537 #endif
1538 #ifdef HAVE_EPOLL
1539                 case POLL_EPOLL_LT:
1540                         while(1){
1541                                 io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1542                                 tcpconn_timeout(0);
1543                         }
1544                         break;
1545                 case POLL_EPOLL_ET:
1546                         while(1){
1547                                 io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 1);
1548                                 tcpconn_timeout(0);
1549                         }
1550                         break;
1551 #endif
1552 #ifdef HAVE_KQUEUE
1553                 case POLL_KQUEUE:
1554                         while(1){
1555                                 io_wait_loop_kqueue(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1556                                 tcpconn_timeout(0);
1557                         }
1558                         break;
1559 #endif
1560 #ifdef HAVE_DEVPOLL
1561                 case POLL_DEVPOLL:
1562                         while(1){
1563                                 io_wait_loop_devpoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
1564                                 tcpconn_timeout(0);
1565                         }
1566                         break;
1567 #endif
1568                 default:
1569                         LOG(L_CRIT, "BUG: tcp_main_loop: no support for poll method "
1570                                         " %s (%d)\n", 
1571                                         poll_method_name(io_h.poll_method), io_h.poll_method);
1572                         goto error;
1573         }
1574 error:
1575         destroy_io_wait(&io_h);
1576         LOG(L_CRIT, "ERROR: tcp_main_loop: exiting...");
1577         exit(-1);
1578 }
1579
1580
1581
1582 /* cleanup before exit */
1583 void destroy_tcp()
1584 {
1585                 if (tcpconn_id_hash){
1586                         tcpconn_timeout(1); /* force close/expire for all active tcpconns*/
1587                         shm_free(tcpconn_id_hash);
1588                         tcpconn_id_hash=0;
1589                 }
1590                 if (connection_id){
1591                         shm_free(connection_id);
1592                         connection_id=0;
1593                 }
1594                 if (tcpconn_aliases_hash){
1595                         shm_free(tcpconn_aliases_hash);
1596                         tcpconn_aliases_hash=0;
1597                 }
1598                 if (tcpconn_lock){
1599                         lock_destroy(tcpconn_lock);
1600                         lock_dealloc((void*)tcpconn_lock);
1601                         tcpconn_lock=0;
1602                 }
1603 }
1604
1605
1606
1607 int init_tcp()
1608 {
1609         char* poll_err;
1610         
1611         /* init lock */
1612         tcpconn_lock=lock_alloc();
1613         if (tcpconn_lock==0){
1614                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc lock\n");
1615                 goto error;
1616         }
1617         if (lock_init(tcpconn_lock)==0){
1618                 LOG(L_CRIT, "ERROR: init_tcp: could not init lock\n");
1619                 lock_dealloc((void*)tcpconn_lock);
1620                 tcpconn_lock=0;
1621                 goto error;
1622         }
1623         /* init globals */
1624         connection_id=(int*)shm_malloc(sizeof(int));
1625         if (connection_id==0){
1626                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
1627                 goto error;
1628         }
1629         *connection_id=1;
1630         /* alloc hashtables*/
1631         tcpconn_aliases_hash=(struct tcp_conn_alias**)
1632                         shm_malloc(TCP_ALIAS_HASH_SIZE* sizeof(struct tcp_conn_alias*));
1633         if (tcpconn_aliases_hash==0){
1634                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc address hashtable\n");
1635                 goto error;
1636         }
1637         tcpconn_id_hash=(struct tcp_connection**)shm_malloc(TCP_ID_HASH_SIZE*
1638                                                                 sizeof(struct tcp_connection*));
1639         if (tcpconn_id_hash==0){
1640                 LOG(L_CRIT, "ERROR: init_tcp: could not alloc id hashtable\n");
1641                 goto error;
1642         }
1643         /* init hashtables*/
1644         memset((void*)tcpconn_aliases_hash, 0, 
1645                         TCP_ALIAS_HASH_SIZE * sizeof(struct tcp_conn_alias*));
1646         memset((void*)tcpconn_id_hash, 0, 
1647                         TCP_ID_HASH_SIZE * sizeof(struct tcp_connection*));
1648         
1649         /* fix config variables */
1650         /* they can have only positive values due the config parser so we can
1651          * ignore most of them */
1652                 poll_err=check_poll_method(tcp_poll_method);
1653         
1654         /* set an appropiate poll method */
1655         if (poll_err || (tcp_poll_method==0)){
1656                 tcp_poll_method=choose_poll_method();
1657                 if (poll_err){
1658                         LOG(L_ERR, "ERROR: init_tcp: %s, using %s instead\n",
1659                                         poll_err, poll_method_name(tcp_poll_method));
1660                 }else{
1661                         LOG(L_INFO, "init_tcp: using %s as the io watch method"
1662                                         " (auto detected)\n", poll_method_name(tcp_poll_method));
1663                 }
1664         }else{
1665                         LOG(L_INFO, "init_tcp: using %s io watch method (config)\n",
1666                                         poll_method_name(tcp_poll_method));
1667         }
1668         
1669         return 0;
1670 error:
1671         /* clean-up */
1672         destroy_tcp();
1673         return -1;
1674 }
1675
1676
1677
1678 /* starts the tcp processes */
1679 int tcp_init_children()
1680 {
1681         int r;
1682         int sockfd[2];
1683         int reader_fd[2]; /* for comm. with the tcp children read  */
1684         pid_t pid;
1685         struct socket_info *si;
1686         
1687         /* estimate max fd. no:
1688          * 1 tcp send unix socket/all_proc, 
1689          *  + 1 udp sock/udp proc + 1 tcp_child sock/tcp child*
1690          *  + no_listen_tcp */
1691         for(r=0, si=tcp_listen; si; si=si->next, r++);
1692 #ifdef USE_TLS
1693         if (! tls_disable)
1694                 for (si=tls_listen; si; si=si->next, r++);
1695 #endif
1696         
1697         tcp_max_fd_no=process_count()*2 +r-1 /* timer */ +3; /* stdin/out/err*/
1698         tcp_max_fd_no+=tcp_max_connections;
1699         
1700         /* create the tcp sock_info structures */
1701         /* copy the sockets --moved to main_loop*/
1702         
1703         /* fork children & create the socket pairs*/
1704         for(r=0; r<tcp_children_no; r++){
1705                 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockfd)<0){
1706                         LOG(L_ERR, "ERROR: tcp_main: socketpair failed: %s\n",
1707                                         strerror(errno));
1708                         goto error;
1709                 }
1710                 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reader_fd)<0){
1711                         LOG(L_ERR, "ERROR: tcp_main: socketpair failed: %s\n",
1712                                         strerror(errno));
1713                         goto error;
1714                 }
1715                 
1716                 process_no++;
1717                 pid=fork();
1718                 if (pid<0){
1719                         LOG(L_ERR, "ERROR: tcp_main: fork failed: %s\n",
1720                                         strerror(errno));
1721                         goto error;
1722                 }else if (pid>0){
1723                         /* parent */
1724                         close(sockfd[1]);
1725                         close(reader_fd[1]);
1726                         tcp_children[r].pid=pid;
1727                         tcp_children[r].proc_no=process_no;
1728                         tcp_children[r].busy=0;
1729                         tcp_children[r].n_reqs=0;
1730                         tcp_children[r].unix_sock=reader_fd[0];
1731                         pt[process_no].pid=pid;
1732                         pt[process_no].unix_sock=sockfd[0];
1733                         pt[process_no].idx=r;
1734                         strncpy(pt[process_no].desc, "tcp receiver", MAX_PT_DESC);
1735                 }else{
1736                         /* child */
1737                         close(sockfd[0]);
1738                         unix_tcp_sock=sockfd[1];
1739                         bind_address=0; /* force a SEGFAULT if someone uses a non-init.
1740                                                            bind address on tcp */
1741                         /* record pid twice to avoid the child using it, before
1742                          * parent gets a chance to set it*/
1743                         pt[process_no].pid=getpid();
1744                         if (init_child(r+children_no+1) < 0) {
1745                                 LOG(L_ERR, "init_children failed\n");
1746                                 goto error;
1747                         }
1748                         tcp_receive_loop(reader_fd[1]);
1749                 }
1750         }
1751         return 0;
1752 error:
1753         return -1;
1754 }
1755
1756 #endif