modules/ims_qos: added patch for flow-description bug when request originates from...
[sip-router] / tcp_main.c
1 /*
2  * Copyright (C) 2001-2003 FhG Fokus
3  *
4  * This file is part of Kamailio, a free SIP server.
5  *
6  * Kamailio is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version
10  *
11  * Kamailio is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
19  */
20
21 /** Kamailio core: tcp main/dispatcher and tcp send functions.
22  * @file tcp_main.c
23  * @ingroup core
24  * Module: @ref core
25  */
26
27
28 #ifdef USE_TCP
29
30
31 #ifndef SHM_MEM
32 #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
33 #endif
34
35 #define HANDLE_IO_INLINE
36 #include "io_wait.h" /* include first to make sure the needed features are
37                                                 turned on (e.g. _GNU_SOURCE for POLLRDHUP) */
38
39 #include <sys/time.h>
40 #include <sys/types.h>
41 #include <sys/select.h>
42 #include <sys/socket.h>
43 #ifdef HAVE_FILIO_H
44 #include <sys/filio.h> /* needed on solaris 2.x for FIONREAD */
45 #elif defined __OS_solaris
46 #define BSD_COMP  /* needed on older solaris for FIONREAD */
47 #endif /* HAVE_FILIO_H / __OS_solaris */
48 #include <sys/ioctl.h>  /* ioctl() used on write error */
49 #include <netinet/in.h>
50 #include <netinet/in_systm.h>
51 #include <netinet/ip.h>
52 #include <netinet/tcp.h>
53 #include <sys/uio.h>  /* writev*/
54 #include <netdb.h>
55 #include <stdlib.h> /*exit() */
56
57 #include <unistd.h>
58
59 #include <errno.h>
60 #include <string.h>
61
62 #ifdef HAVE_SELECT
63 #include <sys/select.h>
64 #endif
65 #include <poll.h>
66
67
68 #include "ip_addr.h"
69 #include "pass_fd.h"
70 #include "tcp_conn.h"
71 #include "globals.h"
72 #include "pt.h"
73 #include "locking.h"
74 #include "mem/mem.h"
75 #include "mem/shm_mem.h"
76 #include "timer.h"
77 #include "sr_module.h"
78 #include "tcp_server.h"
79 #include "tcp_init.h"
80 #include "tcp_int_send.h"
81 #include "tcp_stats.h"
82 #include "tcp_ev.h"
83 #include "tsend.h"
84 #include "timer_ticks.h"
85 #include "local_timer.h"
86 #ifdef CORE_TLS
87 #include "tls/tls_server.h"
88 #define tls_loaded() 1
89 #else
90 #include "tls_hooks_init.h"
91 #include "tls_hooks.h"
92 #endif /* CORE_TLS*/
93 #ifdef USE_DST_BLACKLIST
94 #include "dst_blacklist.h"
95 #endif /* USE_DST_BLACKLIST */
96
97 #include "tcp_info.h"
98 #include "tcp_options.h"
99 #include "ut.h"
100 #include "cfg/cfg_struct.h"
101
102 #define local_malloc pkg_malloc
103 #define local_free   pkg_free
104
105 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
106
107
108 #ifdef NO_MSG_DONTWAIT
109 #ifndef MSG_DONTWAIT
110 /* should work inside tcp_main */
111 #define MSG_DONTWAIT 0
112 #endif
113 #endif /*NO_MSG_DONTWAIT */
114
115
116 #define TCP_PASS_NEW_CONNECTION_ON_DATA /* don't pass a new connection
117                                                                                    immediately to a child, wait for
118                                                                                    some data on it first */
119 #define TCP_LISTEN_BACKLOG 1024
120 #define SEND_FD_QUEUE /* queue send fd requests on EAGAIN, instead of sending 
121                                                         them immediately */
122 #define TCP_CHILD_NON_BLOCKING 
123 #ifdef SEND_FD_QUEUE
124 #ifndef TCP_CHILD_NON_BLOCKING
125 #define TCP_CHILD_NON_BLOCKING
126 #endif
127 #define MAX_SEND_FD_QUEUE_SIZE  tcp_main_max_fd_no
128 #define SEND_FD_QUEUE_SIZE              128  /* initial size */
129 #define SEND_FD_QUEUE_TIMEOUT   MS_TO_TICKS(2000)  /* 2 s */
130 #endif
131
132 /* minimum interval local_timer_run() is allowed to run, in ticks */
133 #define TCPCONN_TIMEOUT_MIN_RUN 1  /* once per tick */
134 #define TCPCONN_WAIT_TIMEOUT 1 /* 1 tick */
135
136 #ifdef TCP_ASYNC
137 static unsigned int* tcp_total_wq=0;
138 #endif
139
140
141 enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
142                                 F_TCPCONN, F_TCPCHILD, F_PROC };
143
144
145 #ifdef TCP_FD_CACHE
146
147 #define TCP_FD_CACHE_SIZE 8
148
149 struct fd_cache_entry{
150         struct tcp_connection* con;
151         int id;
152         int fd;
153 };
154
155
156 static struct fd_cache_entry fd_cache[TCP_FD_CACHE_SIZE];
157 #endif /* TCP_FD_CACHE */
158
159 static int is_tcp_main=0;
160
161
162 enum poll_types tcp_poll_method=0; /* by default choose the best method */
163 int tcp_main_max_fd_no=0;
164 int tcp_max_connections=DEFAULT_TCP_MAX_CONNECTIONS;
165 int tls_max_connections=DEFAULT_TLS_MAX_CONNECTIONS;
166
167 static union sockaddr_union tcp_source_ipv4_addr; /* saved bind/srv v4 addr. */
168 static union sockaddr_union* tcp_source_ipv4=0;
169 static union sockaddr_union tcp_source_ipv6_addr; /* saved bind/src v6 addr. */
170 static union sockaddr_union* tcp_source_ipv6=0;
171
172 static int* tcp_connections_no=0; /* current tcp (+tls) open connections */
173 static int* tls_connections_no=0; /* current tls open connections */
174
175 /* connection hash table (after ip&port) , includes also aliases */
176 struct tcp_conn_alias** tcpconn_aliases_hash=0;
177 /* connection hash table (after connection id) */
178 struct tcp_connection** tcpconn_id_hash=0;
179 gen_lock_t* tcpconn_lock=0;
180
181 struct tcp_child* tcp_children=0;
182 static int* connection_id=0; /*  unique for each connection, used for 
183                                                                 quickly finding the corresponding connection
184                                                                 for a reply */
185 int unix_tcp_sock;
186
187 static int tcp_proto_no=-1; /* tcp protocol number as returned by
188                                                            getprotobyname */
189
190 static io_wait_h io_h;
191
192 static struct local_timer tcp_main_ltimer;
193 static ticks_t tcp_main_prev_ticks;
194
195 /* tell if there are tcp workers that should handle only specific socket
196  * - used to optimize the search of least loaded worker for a tcp socket
197  * - 0 - no workers per tcp sockets have been set
198  * - 1 + generic_workers - when there are workers per tcp sockets
199  */
200 static int tcp_sockets_gworkers = 0;
201
202 static ticks_t tcpconn_main_timeout(ticks_t , struct timer_ln* , void* );
203
204 inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
205                                                                                 struct ip_addr* l_ip, int l_port,
206                                                                                 int flags);
207
208
209
210 /* sets source address used when opening new sockets and no source is specified
211  *  (by default the address is choosen by the kernel)
212  * Should be used only on init.
213  * returns -1 on error */
214 int tcp_set_src_addr(struct ip_addr* ip)
215 {
216         switch (ip->af){
217                 case AF_INET:
218                         ip_addr2su(&tcp_source_ipv4_addr, ip, 0);
219                         tcp_source_ipv4=&tcp_source_ipv4_addr;
220                         break;
221                 case AF_INET6:
222                         ip_addr2su(&tcp_source_ipv6_addr, ip, 0);
223                         tcp_source_ipv6=&tcp_source_ipv6_addr;
224                         break;
225                 default:
226                         return -1;
227         }
228         return 0;
229 }
230
231
232
233 static inline int init_sock_keepalive(int s)
234 {
235         int optval;
236         
237 #ifdef HAVE_SO_KEEPALIVE
238         if (cfg_get(tcp, tcp_cfg, keepalive)){
239                 optval=1;
240                 if (setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, &optval,
241                                                 sizeof(optval))<0){
242                         LM_WARN("failed to enable SO_KEEPALIVE: %s\n", strerror(errno));
243                         return -1;
244                 }
245         }
246 #endif
247 #ifdef HAVE_TCP_KEEPINTVL
248         if ((optval=cfg_get(tcp, tcp_cfg, keepintvl))){
249                 if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &optval,
250                                                 sizeof(optval))<0){
251                         LM_WARN("failed to set keepalive probes interval: %s\n", strerror(errno));
252                 }
253         }
254 #endif
255 #ifdef HAVE_TCP_KEEPIDLE
256         if ((optval=cfg_get(tcp, tcp_cfg, keepidle))){
257                 if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &optval,
258                                                 sizeof(optval))<0){
259                         LM_WARN("failed to set keepalive idle interval: %s\n", strerror(errno));
260                 }
261         }
262 #endif
263 #ifdef HAVE_TCP_KEEPCNT
264         if ((optval=cfg_get(tcp, tcp_cfg, keepcnt))){
265                 if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &optval,
266                                                 sizeof(optval))<0){
267                         LM_WARN("failed to set maximum keepalive count: %s\n", strerror(errno));
268                 }
269         }
270 #endif
271         return 0;
272 }
273
274
275
276 /* set all socket/fd options for new sockets (e.g. before connect): 
277  *  disable nagle, tos lowdelay, reuseaddr, non-blocking
278  *
279  * return -1 on error */
280 static int init_sock_opt(int s, int af)
281 {
282         int flags;
283         int optval;
284         
285 #ifdef DISABLE_NAGLE
286         flags=1;
287         if ( (tcp_proto_no!=-1) && (setsockopt(s, tcp_proto_no , TCP_NODELAY,
288                                         &flags, sizeof(flags))<0) ){
289                 LM_WARN("could not disable Nagle: %s\n", strerror(errno));
290         }
291 #endif
292         /* tos*/
293         optval = tos;
294         if(af==AF_INET){
295                 if (setsockopt(s, IPPROTO_IP, IP_TOS, (void*)&optval,
296                                         sizeof(optval)) ==-1){
297                         LM_WARN("setsockopt tos: %s\n", strerror(errno));
298                         /* continue since this is not critical */
299                 }
300         } else if(af==AF_INET6){
301                 if (setsockopt(s, IPPROTO_IPV6, IPV6_TCLASS,
302                                         (void*)&optval, sizeof(optval)) ==-1) {
303                         LM_WARN("setsockopt v6 tos: %s\n", strerror(errno));
304                         /* continue since this is not critical */
305                 }
306         }
307
308 #if  !defined(TCP_DONT_REUSEADDR) 
309         optval=1;
310         if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,
311                                                 (void*)&optval, sizeof(optval))==-1){
312                 LM_ERR("setsockopt SO_REUSEADDR %s\n", strerror(errno));
313                 /* continue, not critical */
314         }
315 #endif /* !TCP_DONT_REUSEADDR */
316 #ifdef HAVE_TCP_SYNCNT
317         if ((optval=cfg_get(tcp, tcp_cfg, syncnt))){
318                 if (setsockopt(s, IPPROTO_TCP, TCP_SYNCNT, &optval,
319                                                 sizeof(optval))<0){
320                         LM_WARN("failed to set maximum SYN retr. count: %s\n", strerror(errno));
321                 }
322         }
323 #endif
324 #ifdef HAVE_TCP_LINGER2
325         if ((optval=cfg_get(tcp, tcp_cfg, linger2))){
326                 if (setsockopt(s, IPPROTO_TCP, TCP_LINGER2, &optval,
327                                                 sizeof(optval))<0){
328                         LM_WARN("failed to set maximum LINGER2 timeout: %s\n", strerror(errno));
329                 }
330         }
331 #endif
332 #ifdef HAVE_TCP_QUICKACK
333         if (cfg_get(tcp, tcp_cfg, delayed_ack)){
334                 optval=0; /* reset quick ack => delayed ack */
335                 if (setsockopt(s, IPPROTO_TCP, TCP_QUICKACK, &optval,
336                                                 sizeof(optval))<0){
337                         LM_WARN("failed to reset TCP_QUICKACK: %s\n", strerror(errno));
338                 }
339         }
340 #endif /* HAVE_TCP_QUICKACK */
341         init_sock_keepalive(s);
342         
343         /* non-blocking */
344         flags=fcntl(s, F_GETFL);
345         if (flags==-1){
346                 LM_ERR("fnctl failed: (%d) %s\n", errno, strerror(errno));
347                 goto error;
348         }
349         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
350                 LM_ERR("fcntl: set non-blocking failed: (%d) %s\n", errno, strerror(errno));
351                 goto error;
352         }
353         return 0;
354 error:
355         return -1;
356 }
357
358
359
360 /* set all socket/fd options for "accepted" sockets 
361  *  only nonblocking is set since the rest is inherited from the
362  *  "parent" (listening) socket
363  *  Note: setting O_NONBLOCK is required on linux but it's not needed on
364  *        BSD and possibly solaris (where the flag is inherited from the 
365  *        parent socket). However since there is no standard document 
366  *        requiring a specific behaviour in this case it's safer to always set
367  *        it (at least for now)  --andrei
368  *  TODO: check on which OSes  O_NONBLOCK is inherited and make this 
369  *        function a nop.
370  *
371  * return -1 on error */
372 static int init_sock_opt_accept(int s)
373 {
374         int flags;
375         
376         /* non-blocking */
377         flags=fcntl(s, F_GETFL);
378         if (flags==-1){
379                 LM_ERR("fnctl failed: (%d) %s\n", errno, strerror(errno));
380                 goto error;
381         }
382         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
383                 LM_ERR("fcntl: set non-blocking failed: (%d) %s\n", errno, strerror(errno));
384                 goto error;
385         }
386         return 0;
387 error:
388         return -1;
389 }
390
391
392
393 /** close a socket, handling errno.
394  * On EINTR, repeat the close().
395  * Filter expected errors (return success if close() failed because
396  * EPIPE, ECONNRST a.s.o). Note that this happens on *BSDs (on linux close()
397  * does not fail for socket level errors).
398  * @param s - open valid socket.
399  * @return - 0 on success, < 0 on error (whatever close() returns). On error
400  *           errno is set.
401  */
402 static int tcp_safe_close(int s)
403 {
404         int ret;
405 retry:
406         if (unlikely((ret = close(s)) < 0 )) {
407                 switch(errno) {
408                         case EINTR:
409                                 goto retry;
410                         case EPIPE:
411                         case ENOTCONN:
412                         case ECONNRESET:
413                         case ECONNREFUSED:
414                         case ENETUNREACH:
415                         case EHOSTUNREACH:
416                                 /* on *BSD we really get these errors at close() time 
417                                    => ignore them */
418                                 ret = 0;
419                                 break;
420                         default:
421                                 break;
422                 }
423         }
424         return ret;
425 }
426
427
428
429 /* blocking connect on a non-blocking fd; it will timeout after
430  * tcp_connect_timeout 
431  * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
432  * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
433  */
434 static int tcp_blocking_connect(int fd, int type, snd_flags_t* send_flags,
435                                                                 const struct sockaddr *servaddr,
436                                                                 socklen_t addrlen)
437 {
438         int n;
439 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
440         fd_set sel_set;
441         fd_set orig_set;
442         struct timeval timeout;
443 #else
444         struct pollfd pf;
445 #endif
446         int elapsed;
447         int to;
448         int ticks;
449         int err;
450         unsigned int err_len;
451         int poll_err;
452         
453         poll_err=0;
454         to=cfg_get(tcp, tcp_cfg, connect_timeout_s);
455         ticks=get_ticks();
456 again:
457         n=connect(fd, servaddr, addrlen);
458         if (n==-1){
459                 if (errno==EINTR){
460                         elapsed=(get_ticks()-ticks)*TIMER_TICK;
461                         if (elapsed<to)         goto again;
462                         else goto error_timeout;
463                 }
464                 if (errno!=EINPROGRESS && errno!=EALREADY){
465                         goto error_errno;
466                 }
467         }else goto end;
468         
469         /* poll/select loop */
470 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
471                 FD_ZERO(&orig_set);
472                 FD_SET(fd, &orig_set);
473 #else
474                 pf.fd=fd;
475                 pf.events=POLLOUT;
476 #endif
477         while(1){
478                 elapsed=(get_ticks()-ticks)*TIMER_TICK;
479                 if (elapsed>=to)
480                         goto error_timeout;
481 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
482                 sel_set=orig_set;
483                 timeout.tv_sec=to-elapsed;
484                 timeout.tv_usec=0;
485                 n=select(fd+1, 0, &sel_set, 0, &timeout);
486 #else
487                 n=poll(&pf, 1, (to-elapsed)*1000);
488 #endif
489                 if (n<0){
490                         if (errno==EINTR) continue;
491                         LM_ERR("%s: poll/select failed: (%d) %s\n",
492                                         su2a((union sockaddr_union*)servaddr, addrlen),
493                                         errno, strerror(errno));
494                         goto error;
495                 }else if (n==0) /* timeout */ continue;
496 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
497                 if (FD_ISSET(fd, &sel_set))
498 #else
499                 if (pf.revents&(POLLERR|POLLHUP|POLLNVAL)){ 
500                         LM_ERR("%s: poll error: flags %x\n",
501                                         su2a((union sockaddr_union*)servaddr, addrlen),
502                                         pf.revents);
503                         poll_err=1;
504                 }
505 #endif
506                 {
507                         err_len=sizeof(err);
508                         getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
509                         if ((err==0) && (poll_err==0)) goto end;
510                         if (err!=EINPROGRESS && err!=EALREADY){
511                                 LM_ERR("%s: SO_ERROR (%d) %s\n",
512                                                 su2a((union sockaddr_union*)servaddr, addrlen),
513                                                 err, strerror(err));
514                                 errno=err;
515                                 goto error_errno;
516                         }
517                 }
518         }
519 error_errno:
520         switch(errno){
521                 case ENETUNREACH:
522                 case EHOSTUNREACH:
523 #ifdef USE_DST_BLACKLIST
524                         dst_blacklist_su(BLST_ERR_CONNECT, type,
525                                                          (union sockaddr_union*)servaddr, send_flags, 0);
526 #endif /* USE_DST_BLACKLIST */
527                         TCP_EV_CONNECT_UNREACHABLE(errno, 0, 0,
528                                                         (union sockaddr_union*)servaddr, type);
529                         break;
530                 case ETIMEDOUT:
531 #ifdef USE_DST_BLACKLIST
532                         dst_blacklist_su(BLST_ERR_CONNECT, type,
533                                                          (union sockaddr_union*)servaddr, send_flags, 0);
534 #endif /* USE_DST_BLACKLIST */
535                         TCP_EV_CONNECT_TIMEOUT(errno, 0, 0,
536                                                         (union sockaddr_union*)servaddr, type);
537                         break;
538                 case ECONNREFUSED:
539                 case ECONNRESET:
540 #ifdef USE_DST_BLACKLIST
541                         dst_blacklist_su(BLST_ERR_CONNECT, type,
542                                                          (union sockaddr_union*)servaddr, send_flags, 0);
543 #endif /* USE_DST_BLACKLIST */
544                         TCP_EV_CONNECT_RST(errno, 0, 0,
545                                                         (union sockaddr_union*)servaddr, type);
546                         break;
547                 case EAGAIN: /* not posix, but supported on linux and bsd */
548                         TCP_EV_CONNECT_NO_MORE_PORTS(errno, 0, 0,
549                                                         (union sockaddr_union*)servaddr, type);
550                         break;
551                 default:
552                         TCP_EV_CONNECT_ERR(errno, 0, 0,
553                                                                 (union sockaddr_union*)servaddr, type);
554         }
555         LM_ERR("%s: (%d) %s\n",
556                         su2a((union sockaddr_union*)servaddr, addrlen),
557                         errno, strerror(errno));
558         goto error;
559 error_timeout:
560         /* timeout */
561 #ifdef USE_DST_BLACKLIST
562         dst_blacklist_su(BLST_ERR_CONNECT, type,
563                                                 (union sockaddr_union*)servaddr, send_flags, 0);
564 #endif /* USE_DST_BLACKLIST */
565         TCP_EV_CONNECT_TIMEOUT(0, 0, 0, (union sockaddr_union*)servaddr, type);
566         LM_ERR("%s: timeout %d s elapsed from %d s\n",
567                                 su2a((union sockaddr_union*)servaddr, addrlen),
568                                 elapsed, cfg_get(tcp, tcp_cfg, connect_timeout_s));
569 error:
570         TCP_STATS_CONNECT_FAILED();
571         return -1;
572 end:
573         return 0;
574 }
575
576
577
578 #ifdef TCP_ASYNC
579
580
581 /* unsafe version */
582 #define _wbufq_empty(con) ((con)->wbuf_q.first==0)
583 /* unsafe version */
584 #define _wbufq_non_empty(con) ((con)->wbuf_q.first!=0)
585
586
587 /* unsafe version, call while holding the connection write lock */
588 inline static int _wbufq_add(struct  tcp_connection* c, const char* data, 
589                                                         unsigned int size)
590 {
591         struct tcp_wbuffer_queue* q;
592         struct tcp_wbuffer* wb;
593         unsigned int last_free;
594         unsigned int wb_size;
595         unsigned int crt_size;
596         ticks_t t;
597         
598         q=&c->wbuf_q;
599         t=get_ticks_raw();
600         if (unlikely(   ((q->queued+size)>cfg_get(tcp, tcp_cfg, tcpconn_wq_max)) ||
601                                         ((*tcp_total_wq+size)>cfg_get(tcp, tcp_cfg, tcp_wq_max)) ||
602                                         (q->first &&
603                                         TICKS_LT(q->wr_timeout, t)) )){
604                 LM_ERR("(%d bytes): write queue full or timeout "
605                                         " (%d, total %d, last write %d s ago)\n",
606                                         size, q->queued, *tcp_total_wq,
607                                         TICKS_TO_S(t-(q->wr_timeout-
608                                                                 cfg_get(tcp, tcp_cfg, send_timeout))));
609                 if (q->first && TICKS_LT(q->wr_timeout, t)){
610                         if (unlikely(c->state==S_CONN_CONNECT)){
611 #ifdef USE_DST_BLACKLIST
612                                 (void)dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
613                                                                                 &c->rcv.src_su, &c->send_flags, 0);
614 #endif /* USE_DST_BLACKLIST */
615                                 TCP_EV_CONNECT_TIMEOUT(0, TCP_LADDR(c), TCP_LPORT(c),
616                                                                                         TCP_PSU(c), TCP_PROTO(c));
617                                 TCP_STATS_CONNECT_FAILED();
618                         }else{
619 #ifdef USE_DST_BLACKLIST
620                                 (void)dst_blacklist_su( BLST_ERR_SEND, c->rcv.proto,
621                                                                         &c->rcv.src_su, &c->send_flags, 0);
622 #endif /* USE_DST_BLACKLIST */
623                                 TCP_EV_SEND_TIMEOUT(0, &c->rcv);
624                                 TCP_STATS_SEND_TIMEOUT();
625                         }
626                 }else{
627                         /* if it's not a timeout => queue full */
628                         TCP_EV_SENDQ_FULL(0, &c->rcv);
629                         TCP_STATS_SENDQ_FULL();
630                 }
631                 goto error;
632         }
633         
634         if (unlikely(q->last==0)){
635                 wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
636                 wb=shm_malloc(sizeof(*wb)+wb_size-1);
637                 if (unlikely(wb==0))
638                         goto error;
639                 wb->b_size=wb_size;
640                 wb->next=0;
641                 q->last=wb;
642                 q->first=wb;
643                 q->last_used=0;
644                 q->offset=0;
645                 q->wr_timeout=get_ticks_raw()+
646                         ((c->state==S_CONN_CONNECT)?
647                                         S_TO_TICKS(cfg_get(tcp, tcp_cfg, connect_timeout_s)):
648                                         cfg_get(tcp, tcp_cfg, send_timeout));
649         }else{
650                 wb=q->last;
651         }
652         
653         while(size){
654                 last_free=wb->b_size-q->last_used;
655                 if (last_free==0){
656                         wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
657                         wb=shm_malloc(sizeof(*wb)+wb_size-1);
658                         if (unlikely(wb==0))
659                                 goto error;
660                         wb->b_size=wb_size;
661                         wb->next=0;
662                         q->last->next=wb;
663                         q->last=wb;
664                         q->last_used=0;
665                         last_free=wb->b_size;
666                 }
667                 crt_size=MIN_unsigned(last_free, size);
668                 memcpy(wb->buf+q->last_used, data, crt_size);
669                 q->last_used+=crt_size;
670                 size-=crt_size;
671                 data+=crt_size;
672                 q->queued+=crt_size;
673                 atomic_add_int((int*)tcp_total_wq, crt_size);
674         }
675         return 0;
676 error:
677         return -1;
678 }
679
680
681
682 /* unsafe version, call while holding the connection write lock
683  * inserts data at the beginning, it ignores the max queue size checks and
684  * the timeout (use sparingly)
685  * Note: it should never be called on a write buffer after wbufq_run() */
686 inline static int _wbufq_insert(struct  tcp_connection* c, const char* data, 
687                                                         unsigned int size)
688 {
689         struct tcp_wbuffer_queue* q;
690         struct tcp_wbuffer* wb;
691         
692         q=&c->wbuf_q;
693         if (likely(q->first==0)) /* if empty, use wbufq_add */
694                 return _wbufq_add(c, data, size);
695         
696         if (unlikely((*tcp_total_wq+size)>cfg_get(tcp, tcp_cfg, tcp_wq_max))){
697                 LM_ERR("(%d bytes): write queue full"
698                                         " (%d, total %d, last write %d s ago)\n",
699                                         size, q->queued, *tcp_total_wq,
700                                         TICKS_TO_S(get_ticks_raw()-q->wr_timeout-
701                                                                         cfg_get(tcp, tcp_cfg, send_timeout)));
702                 goto error;
703         }
704         if (unlikely(q->offset)){
705                 LM_CRIT("non-null offset %d (bad call, should"
706                                 "never be called after the wbufq_run())\n", q->offset);
707                 goto error;
708         }
709         if ((q->first==q->last) && ((q->last->b_size-q->last_used)>=size)){
710                 /* one block with enough space in it for size bytes */
711                 memmove(q->first->buf+size, q->first->buf, q->last_used);
712                 memcpy(q->first->buf, data, size);
713                 q->last_used+=size;
714         }else{
715                 /* create a size bytes block directly */
716                 wb=shm_malloc(sizeof(*wb)+size-1);
717                 if (unlikely(wb==0))
718                         goto error;
719                 wb->b_size=size;
720                 /* insert it */
721                 wb->next=q->first;
722                 q->first=wb;
723                 memcpy(wb->buf, data, size);
724         }
725         
726         q->queued+=size;
727         atomic_add_int((int*)tcp_total_wq, size);
728         return 0;
729 error:
730         return -1;
731 }
732
733
734
735 /* unsafe version, call while holding the connection write lock */
736 inline static void _wbufq_destroy( struct  tcp_wbuffer_queue* q)
737 {
738         struct tcp_wbuffer* wb;
739         struct tcp_wbuffer* next_wb;
740         int unqueued;
741         
742         unqueued=0;
743         if (likely(q->first)){
744                 wb=q->first;
745                 do{
746                         next_wb=wb->next;
747                         unqueued+=(wb==q->last)?q->last_used:wb->b_size;
748                         if (wb==q->first)
749                                 unqueued-=q->offset;
750                         shm_free(wb);
751                         wb=next_wb;
752                 }while(wb);
753         }
754         memset(q, 0, sizeof(*q));
755         atomic_add_int((int*)tcp_total_wq, -unqueued);
756 }
757
758
759
760 /* tries to empty the queue  (safe version, c->write_lock must not be hold)
761  * returns -1 on error, bytes written on success (>=0) 
762  * if the whole queue is emptied => sets *empty*/
763 inline static int wbufq_run(int fd, struct tcp_connection* c, int* empty)
764 {
765         struct tcp_wbuffer_queue* q;
766         struct tcp_wbuffer* wb;
767         int n;
768         int ret;
769         int block_size;
770         char* buf;
771         
772         *empty=0;
773         ret=0;
774         lock_get(&c->write_lock);
775         q=&c->wbuf_q;
776         while(q->first){
777                 block_size=((q->first==q->last)?q->last_used:q->first->b_size)-
778                                                 q->offset;
779                 buf=q->first->buf+q->offset;
780                 n=_tcpconn_write_nb(fd, c, buf, block_size);
781                 if (likely(n>0)){
782                         ret+=n;
783                         if (likely(n==block_size)){
784                                 wb=q->first;
785                                 q->first=q->first->next; 
786                                 shm_free(wb);
787                                 q->offset=0;
788                                 q->queued-=block_size;
789                                 atomic_add_int((int*)tcp_total_wq, -block_size);
790                         }else{
791                                 q->offset+=n;
792                                 q->queued-=n;
793                                 atomic_add_int((int*)tcp_total_wq, -n);
794                                 break;
795                         }
796                 }else{
797                         if (n<0){
798                                 /* EINTR is handled inside _tcpconn_write_nb */
799                                 if (!(errno==EAGAIN || errno==EWOULDBLOCK)){
800                                         if (unlikely(c->state==S_CONN_CONNECT)){
801                                                 switch(errno){
802                                                         case ENETUNREACH:
803                                                         case EHOSTUNREACH: /* not posix for send() */
804 #ifdef USE_DST_BLACKLIST
805                                                                 dst_blacklist_su(BLST_ERR_CONNECT,
806                                                                                                         c->rcv.proto,
807                                                                                                         &c->rcv.src_su,
808                                                                                                         &c->send_flags, 0);
809 #endif /* USE_DST_BLACKLIST */
810                                                                 TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
811                                                                                                         TCP_LPORT(c), TCP_PSU(c),
812                                                                                                         TCP_PROTO(c));
813                                                                 break;
814                                                         case ECONNREFUSED:
815                                                         case ECONNRESET:
816 #ifdef USE_DST_BLACKLIST
817                                                                 dst_blacklist_su(BLST_ERR_CONNECT,
818                                                                                                         c->rcv.proto,
819                                                                                                         &c->rcv.src_su,
820                                                                                                         &c->send_flags, 0);
821 #endif /* USE_DST_BLACKLIST */
822                                                                 TCP_EV_CONNECT_RST(0, TCP_LADDR(c),
823                                                                                                         TCP_LPORT(c), TCP_PSU(c),
824                                                                                                         TCP_PROTO(c));
825                                                                 break;
826                                                         default:
827                                                                 TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
828                                                                                                         TCP_LPORT(c), TCP_PSU(c),
829                                                                                                         TCP_PROTO(c));
830                                                 }
831                                                 TCP_STATS_CONNECT_FAILED();
832                                         }else{
833                                                 switch(errno){
834                                                         case ECONNREFUSED:
835                                                         case ECONNRESET:
836                                                                 TCP_STATS_CON_RESET();
837                                                                 /* no break */
838                                                         case ENETUNREACH:
839                                                         case EHOSTUNREACH: /* not posix for send() */
840 #ifdef USE_DST_BLACKLIST
841                                                                 dst_blacklist_su(BLST_ERR_SEND,
842                                                                                                         c->rcv.proto,
843                                                                                                         &c->rcv.src_su,
844                                                                                                         &c->send_flags, 0);
845 #endif /* USE_DST_BLACKLIST */
846                                                                 break;
847                                                 }
848                                         }
849                                         ret=-1;
850                                         LM_ERR("%s [%d]\n", strerror(errno), errno);
851                                 }
852                         }
853                         break;
854                 }
855         }
856         if (likely(q->first==0)){
857                 q->last=0;
858                 q->last_used=0;
859                 q->offset=0;
860                 *empty=1;
861         }
862         lock_release(&c->write_lock);
863         if (likely(ret>0)){
864                 q->wr_timeout=get_ticks_raw()+cfg_get(tcp, tcp_cfg, send_timeout);
865                 if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
866                         TCP_STATS_ESTABLISHED(c->state);
867                         c->state=S_CONN_OK;
868                 }
869         }
870         return ret;
871 }
872
873 #endif /* TCP_ASYNC */
874
875
876
877 #if 0
878 /* blocking write even on non-blocking sockets 
879  * if TCP_TIMEOUT will return with error */
880 static int tcp_blocking_write(struct tcp_connection* c, int fd, char* buf,
881                                                                 unsigned int len)
882 {
883         int n;
884         fd_set sel_set;
885         struct timeval timeout;
886         int ticks;
887         int initial_len;
888         
889         initial_len=len;
890 again:
891         
892         n=send(fd, buf, len,
893 #ifdef HAVE_MSG_NOSIGNAL
894                         MSG_NOSIGNAL
895 #else
896                         0
897 #endif
898                 );
899         if (n<0){
900                 if (errno==EINTR)       goto again;
901                 else if (errno!=EAGAIN && errno!=EWOULDBLOCK){
902                         LM_ERR("failed to send: (%d) %s\n", errno, strerror(errno));
903                         TCP_EV_SEND_TIMEOUT(errno, &c->rcv);
904                         TCP_STATS_SEND_TIMEOUT();
905                         goto error;
906                 }
907         }else if (n<len){
908                 /* partial write */
909                 buf+=n;
910                 len-=n;
911         }else{
912                 /* success: full write */
913                 goto end;
914         }
915         while(1){
916                 FD_ZERO(&sel_set);
917                 FD_SET(fd, &sel_set);
918                 timeout.tv_sec=tcp_send_timeout;
919                 timeout.tv_usec=0;
920                 ticks=get_ticks();
921                 n=select(fd+1, 0, &sel_set, 0, &timeout);
922                 if (n<0){
923                         if (errno==EINTR) continue; /* signal, ignore */
924                         LM_ERR("select failed: (%d) %s\n", errno, strerror(errno));
925                         goto error;
926                 }else if (n==0){
927                         /* timeout */
928                         if (get_ticks()-ticks>=tcp_send_timeout){
929                                 LM_ERR("send timeout (%d)\n", tcp_send_timeout);
930                                 goto error;
931                         }
932                         continue;
933                 }
934                 if (FD_ISSET(fd, &sel_set)){
935                         /* we can write again */
936                         goto again;
937                 }
938         }
939 error:
940                 return -1;
941 end:
942                 return initial_len;
943 }
944 #endif
945
946
947
948 struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
949                                                                         union sockaddr_union* local_addr,
950                                                                         struct socket_info* ba, int type, 
951                                                                         int state)
952 {
953         struct tcp_connection *c;
954         int rd_b_size;
955         
956         rd_b_size=cfg_get(tcp, tcp_cfg, rd_buf_size);
957         c=shm_malloc(sizeof(struct tcp_connection) + rd_b_size);
958         if (c==0){
959                 LM_ERR("mem. allocation failure\n");
960                 goto error;
961         }
962         memset(c, 0, sizeof(struct tcp_connection)); /* zero init (skip rd buf)*/
963         c->s=sock;
964         c->fd=-1; /* not initialized */
965         if (lock_init(&c->write_lock)==0){
966                 LM_ERR("init lock failed\n");
967                 goto error;
968         }
969         
970         c->rcv.src_su=*su;
971         
972         atomic_set(&c->refcnt, 0);
973         local_timer_init(&c->timer, tcpconn_main_timeout, c, 0);
974         su2ip_addr(&c->rcv.src_ip, su);
975         c->rcv.src_port=su_getport(su);
976         c->rcv.bind_address=ba;
977         if (likely(local_addr)){
978                 su2ip_addr(&c->rcv.dst_ip, local_addr);
979                 c->rcv.dst_port=su_getport(local_addr);
980         }else if (ba){
981                 c->rcv.dst_ip=ba->address;
982                 c->rcv.dst_port=ba->port_no;
983         }
984         print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
985         LM_DBG("on port %d, type %d\n", c->rcv.src_port, type);
986         init_tcp_req(&c->req, (char*)c+sizeof(struct tcp_connection), rd_b_size);
987         c->id=(*connection_id)++;
988         c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
989         c->rcv.proto_reserved2=0;
990         c->state=state;
991         c->extra_data=0;
992 #ifdef USE_TLS
993         if (type==PROTO_TLS){
994                 if (tls_tcpconn_init(c, sock)==-1) goto error;
995         }else
996 #endif /* USE_TLS*/
997         {
998                 c->type=PROTO_TCP;
999                 c->rcv.proto=PROTO_TCP;
1000                 c->timeout=get_ticks_raw()+cfg_get(tcp, tcp_cfg, con_lifetime);
1001                 c->lifetime = cfg_get(tcp, tcp_cfg, con_lifetime);
1002         }
1003         
1004         return c;
1005         
1006 error:
1007         if (c) shm_free(c);
1008         return 0;
1009 }
1010
1011
1012
1013 /* do the actual connect, set sock. options a.s.o
1014  * returns socket on success, -1 on error
1015  * sets also *res_local_addr, res_si and state (S_CONN_CONNECT for an
1016  * unfinished connect and S_CONN_OK for a finished one)*/
1017 inline static int tcp_do_connect(       union sockaddr_union* server,
1018                                                                         union sockaddr_union* from,
1019                                                                         int type,
1020                                                                         snd_flags_t* send_flags,
1021                                                                         union sockaddr_union* res_local_addr,
1022                                                                         struct socket_info** res_si,
1023                                                                         enum tcp_conn_states *state
1024                                                                         )
1025 {
1026         int s;
1027         union sockaddr_union my_name;
1028         socklen_t my_name_len;
1029         struct ip_addr ip;
1030 #ifdef TCP_ASYNC
1031         int n;
1032 #endif /* TCP_ASYNC */
1033
1034         s=socket(AF2PF(server->s.sa_family), SOCK_STREAM, 0);
1035         if (unlikely(s==-1)){
1036                 LM_ERR("%s: socket: (%d) %s\n",
1037                                 su2a(server, sizeof(*server)), errno, strerror(errno));
1038                 goto error;
1039         }
1040         if (init_sock_opt(s, server->s.sa_family)<0){
1041                 LM_ERR("%s: init_sock_opt failed\n",
1042                                         su2a(server, sizeof(*server)));
1043                 goto error;
1044         }
1045         
1046         if (unlikely(from && bind(s, &from->s, sockaddru_len(*from)) != 0)){
1047                 LM_WARN("binding to source address %s failed: %s [%d]\n",
1048                                         su2a(from, sizeof(*from)),
1049                                         strerror(errno), errno);
1050         }
1051         *state=S_CONN_OK;
1052 #ifdef TCP_ASYNC
1053         if (likely(cfg_get(tcp, tcp_cfg, async))){
1054 again:
1055                 n=connect(s, &server->s, sockaddru_len(*server));
1056                 if (likely(n==-1)){ /*non-blocking => most probable EINPROGRESS*/
1057                         if (likely(errno==EINPROGRESS))
1058                                 *state=S_CONN_CONNECT;
1059                         else if (errno==EINTR) goto again;
1060                         else if (errno!=EALREADY){
1061                                 switch(errno){
1062                                         case ENETUNREACH:
1063                                         case EHOSTUNREACH:
1064 #ifdef USE_DST_BLACKLIST
1065                                                 dst_blacklist_su(BLST_ERR_CONNECT, type, server,
1066                                                                                         send_flags, 0);
1067 #endif /* USE_DST_BLACKLIST */
1068                                                 TCP_EV_CONNECT_UNREACHABLE(errno, 0, 0, server, type);
1069                                                 break;
1070                                         case ETIMEDOUT:
1071 #ifdef USE_DST_BLACKLIST
1072                                                 dst_blacklist_su(BLST_ERR_CONNECT, type, server,
1073                                                                                         send_flags, 0);
1074 #endif /* USE_DST_BLACKLIST */
1075                                                 TCP_EV_CONNECT_TIMEOUT(errno, 0, 0, server, type);
1076                                                 break;
1077                                         case ECONNREFUSED:
1078                                         case ECONNRESET:
1079 #ifdef USE_DST_BLACKLIST
1080                                                 dst_blacklist_su(BLST_ERR_CONNECT, type, server,
1081                                                                                         send_flags, 0);
1082 #endif /* USE_DST_BLACKLIST */
1083                                                 TCP_EV_CONNECT_RST(errno, 0, 0, server, type);
1084                                                 break;
1085                                         case EAGAIN:/* not posix, but supported on linux and bsd */
1086                                                 TCP_EV_CONNECT_NO_MORE_PORTS(errno, 0, 0, server,type);
1087                                                 break;
1088                                         default:
1089                                                 TCP_EV_CONNECT_ERR(errno, 0, 0, server, type);
1090                                 }
1091                                 TCP_STATS_CONNECT_FAILED();
1092                                 LM_ERR("connect %s: (%d) %s\n",
1093                                                         su2a(server, sizeof(*server)),
1094                                                         errno, strerror(errno));
1095                                 goto error;
1096                         }
1097                 }
1098         }else{
1099 #endif /* TCP_ASYNC */
1100                 if (tcp_blocking_connect(s, type,  send_flags, &server->s,
1101                                                                         sockaddru_len(*server))<0){
1102                         LM_ERR("tcp_blocking_connect %s failed\n",
1103                                                 su2a(server, sizeof(*server)));
1104                         goto error;
1105                 }
1106 #ifdef TCP_ASYNC
1107         }
1108 #endif /* TCP_ASYNC */
1109         if (from){
1110                 su2ip_addr(&ip, from);
1111                 if (!ip_addr_any(&ip))
1112                         /* we already know the source ip, skip the sys. call */
1113                         goto find_socket;
1114         }
1115         my_name_len=sizeof(my_name);
1116         if (unlikely(getsockname(s, &my_name.s, &my_name_len)!=0)){
1117                 LM_ERR("getsockname failed: %s(%d)\n", strerror(errno), errno);
1118                 *res_si=0;
1119                 goto error;
1120         }
1121         from=&my_name; /* update from with the real "from" address */
1122         su2ip_addr(&ip, &my_name);
1123 find_socket:
1124 #ifdef USE_TLS
1125         if (unlikely(type==PROTO_TLS))
1126                 *res_si=find_si(&ip, 0, PROTO_TLS);
1127         else
1128 #endif
1129                 *res_si=find_si(&ip, 0, PROTO_TCP);
1130         
1131         if (unlikely(*res_si==0)){
1132                 LM_WARN("%s: could not find corresponding"
1133                                 " listening socket for %s, using default...\n",
1134                                         su2a(server, sizeof(*server)), ip_addr2a(&ip));
1135                 if (server->s.sa_family==AF_INET) *res_si=sendipv4_tcp;
1136                 else *res_si=sendipv6_tcp;
1137         }
1138         *res_local_addr=*from;
1139         return s;
1140 error:
1141         if (s!=-1) tcp_safe_close(s);
1142         return -1;
1143 }
1144
1145
1146
1147 struct tcp_connection* tcpconn_connect( union sockaddr_union* server,
1148                                                                                 union sockaddr_union* from,
1149                                                                                 int type, snd_flags_t* send_flags)
1150 {
1151         int s;
1152         struct socket_info* si;
1153         union sockaddr_union my_name;
1154         struct tcp_connection* con;
1155         enum tcp_conn_states state;
1156
1157         s=-1;
1158         
1159         if (*tcp_connections_no >= cfg_get(tcp, tcp_cfg, max_connections)){
1160                 LM_ERR("maximum number of connections exceeded (%d/%d)\n",
1161                                         *tcp_connections_no,
1162                                         cfg_get(tcp, tcp_cfg, max_connections));
1163                 goto error;
1164         }
1165         if (unlikely(type==PROTO_TLS)) {
1166                 if (*tls_connections_no >= cfg_get(tcp, tcp_cfg, max_tls_connections)){
1167                         LM_ERR("maximum number of tls connections"
1168                                                 " exceeded (%d/%d)\n",
1169                                                 *tls_connections_no,
1170                                                 cfg_get(tcp, tcp_cfg, max_tls_connections));
1171                         goto error;
1172                 }
1173         }
1174
1175         s=tcp_do_connect(server, from, type,  send_flags, &my_name, &si, &state);
1176         if (s==-1){
1177                 LM_ERR("tcp_do_connect %s: failed (%d) %s\n",
1178                                 su2a(server, sizeof(*server)), errno, strerror(errno));
1179                 goto error;
1180         }
1181         con=tcpconn_new(s, server, &my_name, si, type, state);
1182         if (con==0){
1183                 LM_ERR("%s: tcpconn_new failed, closing the "
1184                                  " socket\n", su2a(server, sizeof(*server)));
1185                 goto error;
1186         }
1187         tcpconn_set_send_flags(con, *send_flags);
1188         return con;
1189 error:
1190         if (s!=-1) tcp_safe_close(s); /* close the opened socket */
1191         return 0;
1192 }
1193
1194
1195
1196 #ifdef TCP_CONNECT_WAIT
1197 int tcpconn_finish_connect( struct tcp_connection* c,
1198                                                                                                 union sockaddr_union* from)
1199 {
1200         int s;
1201         int r;
1202         union sockaddr_union local_addr;
1203         struct socket_info* si;
1204         enum tcp_conn_states state;
1205         struct tcp_conn_alias* a;
1206         int new_conn_alias_flags;
1207         
1208         s=tcp_do_connect(&c->rcv.src_su, from, c->type, &c->send_flags,
1209                                                 &local_addr, &si, &state);
1210         if (unlikely(s==-1)){
1211                 LM_ERR("%s: tcp_do_connect for %p failed\n",
1212                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)), c);
1213                 return -1;
1214         }
1215         c->rcv.bind_address=si;
1216         su2ip_addr(&c->rcv.dst_ip, &local_addr);
1217         c->rcv.dst_port=su_getport(&local_addr);
1218         /* update aliases if needed */
1219         if (likely(from==0)){
1220                 new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
1221                 /* add aliases */
1222                 TCPCONN_LOCK;
1223                 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
1224                                                                                                         new_conn_alias_flags);
1225                 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
1226                                                                         c->rcv.dst_port, new_conn_alias_flags);
1227                 TCPCONN_UNLOCK;
1228         }else if (su_cmp(from, &local_addr)!=1){
1229                 new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
1230                 TCPCONN_LOCK;
1231                         /* remove all the aliases except the first one and re-add them
1232                          * (there shouldn't be more then the 3 default aliases at this 
1233                          * stage) */
1234                         if (c->aliases > 1) {
1235                                 for (r=1; r<c->aliases; r++){
1236                                         a=&c->con_aliases[r];
1237                                         tcpconn_listrm(tcpconn_aliases_hash[a->hash],
1238                                                                         a, next, prev);
1239                                 }
1240                                 c->aliases=1;
1241                         }
1242                         /* add the local_ip:0 and local_ip:local_port aliases */
1243                         _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
1244                                                                                                 0, new_conn_alias_flags);
1245                         _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
1246                                                                         c->rcv.dst_port, new_conn_alias_flags);
1247                 TCPCONN_UNLOCK;
1248         }
1249         
1250         return s;
1251 }
1252 #endif /* TCP_CONNECT_WAIT */
1253
1254
1255
1256 /* adds a tcp connection to the tcpconn hashes
1257  * Note: it's called _only_ from the tcp_main process */
1258 inline static struct tcp_connection*  tcpconn_add(struct tcp_connection *c)
1259 {
1260         struct ip_addr zero_ip;
1261         int new_conn_alias_flags;
1262
1263         if (likely(c)){
1264                 ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
1265                 c->id_hash=tcp_id_hash(c->id);
1266                 c->aliases=0;
1267                 new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
1268                 TCPCONN_LOCK;
1269                 c->flags|=F_CONN_HASHED;
1270                 /* add it at the begining of the list*/
1271                 tcpconn_listadd(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
1272                 /* set the aliases */
1273                 /* first alias is for (peer_ip, peer_port, 0 ,0) -- for finding
1274                  *  any connection to peer_ip, peer_port
1275                  * the second alias is for (peer_ip, peer_port, local_addr, 0) -- for
1276                  *  finding any conenction to peer_ip, peer_port from local_addr 
1277                  * the third alias is for (peer_ip, peer_port, local_addr, local_port) 
1278                  *   -- for finding if a fully specified connection exists */
1279                 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &zero_ip, 0,
1280                                                                                                         new_conn_alias_flags);
1281                 if (likely(c->rcv.dst_ip.af && ! ip_addr_any(&c->rcv.dst_ip))){
1282                         _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
1283                                                                                                         new_conn_alias_flags);
1284                         _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
1285                                                                         c->rcv.dst_port, new_conn_alias_flags);
1286                 }
1287                 /* ignore add_alias errors, there are some valid cases when one
1288                  *  of the add_alias would fail (e.g. first add_alias for 2 connections
1289                  *   with the same destination but different src. ip*/
1290                 TCPCONN_UNLOCK;
1291                 LM_DBG("hashes: %d:%d:%d, %d\n",
1292                                                                                                 c->con_aliases[0].hash,
1293                                                                                                 c->con_aliases[1].hash,
1294                                                                                                 c->con_aliases[2].hash,
1295                                                                                                 c->id_hash);
1296                 return c;
1297         }else{
1298                 LM_CRIT("null connection pointer\n");
1299                 return 0;
1300         }
1301 }
1302
1303
1304 static inline void _tcpconn_detach(struct tcp_connection *c)
1305 {
1306         int r;
1307         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
1308         /* remove all the aliases */
1309         for (r=0; r<c->aliases; r++)
1310                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
1311                                                 &c->con_aliases[r], next, prev);
1312         c->aliases = 0;
1313 }
1314
1315
1316
1317 static inline void _tcpconn_free(struct tcp_connection* c)
1318 {
1319 #ifdef TCP_ASYNC
1320         if (unlikely(_wbufq_non_empty(c)))
1321                 _wbufq_destroy(&c->wbuf_q);
1322 #endif
1323         lock_destroy(&c->write_lock);
1324 #ifdef USE_TLS
1325         if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) tls_tcpconn_clean(c);
1326 #endif
1327         shm_free(c);
1328 }
1329
1330
1331
1332 /* unsafe tcpconn_rm version (nolocks) */
1333 void _tcpconn_rm(struct tcp_connection* c)
1334 {
1335         _tcpconn_detach(c);
1336         _tcpconn_free(c);
1337 }
1338
1339
1340
1341 void tcpconn_rm(struct tcp_connection* c)
1342 {
1343         int r;
1344         TCPCONN_LOCK;
1345         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
1346         /* remove all the aliases */
1347         for (r=0; r<c->aliases; r++)
1348                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
1349                                                 &c->con_aliases[r], next, prev);
1350         c->aliases = 0;
1351         TCPCONN_UNLOCK;
1352         lock_destroy(&c->write_lock);
1353 #ifdef USE_TLS
1354         if ((c->type==PROTO_TLS || c->type==PROTO_WSS)&&(c->extra_data)) tls_tcpconn_clean(c);
1355 #endif
1356         shm_free(c);
1357 }
1358
1359
1360 /* finds a connection, if id=0 uses the ip addr, port, local_ip and local port
1361  *  (host byte order) and tries to find the connection that matches all of
1362  *   them. Wild cards can be used for local_ip and local_port (a 0 filled
1363  *   ip address and/or a 0 local port).
1364  * WARNING: unprotected (locks) use tcpconn_get unless you really
1365  * know what you are doing */
1366 struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port,
1367                                                                                 struct ip_addr* l_ip, int l_port)
1368 {
1369
1370         struct tcp_connection *c;
1371         struct tcp_conn_alias* a;
1372         unsigned hash;
1373         int is_local_ip_any;
1374         
1375 #ifdef EXTRA_DEBUG
1376         LM_DBG("%d  port %d\n",id, port);
1377         if (ip) print_ip("tcpconn_find: ip ", ip, "\n");
1378 #endif
1379         if (likely(id)){
1380                 hash=tcp_id_hash(id);
1381                 for (c=tcpconn_id_hash[hash]; c; c=c->id_next){
1382 #ifdef EXTRA_DEBUG
1383                         LM_DBG("c=%p, c->id=%d, port=%d\n", c, c->id, c->rcv.src_port);
1384                         print_ip("ip=", &c->rcv.src_ip, "\n");
1385 #endif
1386                         if ((id==c->id)&&(c->state!=S_CONN_BAD)) return c;
1387                 }
1388         }else if (likely(ip)){
1389                 hash=tcp_addr_hash(ip, port, l_ip, l_port);
1390                 is_local_ip_any=ip_addr_any(l_ip);
1391                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
1392 #ifdef EXTRA_DEBUG
1393                         LM_DBG("a=%p, c=%p, c->id=%d, alias port= %d port=%d\n", a, a->parent,
1394                                         a->parent->id, a->port, a->parent->rcv.src_port);
1395                         print_ip("ip=",&a->parent->rcv.src_ip,"\n");
1396 #endif
1397                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
1398                                         ((l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
1399                                         (ip_addr_cmp(ip, &a->parent->rcv.src_ip)) &&
1400                                         (is_local_ip_any ||
1401                                                 ip_addr_cmp(l_ip, &a->parent->rcv.dst_ip))
1402                                 )
1403                                 return a->parent;
1404                 }
1405         }
1406         return 0;
1407 }
1408
1409
1410
1411 /* _tcpconn_find with locks and timeout
1412  * local_addr contains the desired local ip:port. If null any local address 
1413  * will be used.  IN*ADDR_ANY or 0 port are wild cards.
1414  * If found, the connection's reference counter will be incremented, you might
1415  * want to decrement it after use.
1416  */
1417 struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
1418                                                                         union sockaddr_union* local_addr,
1419                                                                         ticks_t timeout)
1420 {
1421         struct tcp_connection* c;
1422         struct ip_addr local_ip;
1423         int local_port;
1424         
1425         local_port=0;
1426         if (likely(ip)){
1427                 if (unlikely(local_addr)){
1428                         su2ip_addr(&local_ip, local_addr);
1429                         local_port=su_getport(local_addr);
1430                 }else{
1431                         ip_addr_mk_any(ip->af, &local_ip);
1432                         local_port=0;
1433                 }
1434         }
1435         TCPCONN_LOCK;
1436         c=_tcpconn_find(id, ip, port, &local_ip, local_port);
1437         if (likely(c)){ 
1438                         atomic_inc(&c->refcnt);
1439                         /* update the timeout only if the connection is not handled
1440                          * by a tcp reader _and_the timeout is non-zero  (the tcp
1441                          * reader process uses c->timeout for its own internal
1442                          * timeout and c->timeout will be overwritten * anyway on
1443                          * return to tcp_main) */
1444                         if (likely(c->reader_pid==0 && timeout != 0))
1445                                 c->timeout=get_ticks_raw()+timeout;
1446         }
1447         TCPCONN_UNLOCK;
1448         return c;
1449 }
1450
1451
1452
1453 /* add c->dst:port, local_addr as an alias for the "id" connection, 
1454  * flags: TCP_ALIAS_FORCE_ADD  - add an alias even if a previous one exists
1455  *        TCP_ALIAS_REPLACE    - if a prev. alias exists, replace it with the
1456  *                                new one
1457  * returns 0 on success, <0 on failure ( -1  - null c, -2 too many aliases,
1458  *  -3 alias already present and pointing to another connection)
1459  * WARNING: must be called with TCPCONN_LOCK held */
1460 inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
1461                                                                                 struct ip_addr* l_ip, int l_port,
1462                                                                                 int flags)
1463 {
1464         unsigned hash;
1465         struct tcp_conn_alias* a;
1466         struct tcp_conn_alias* nxt;
1467         struct tcp_connection* p;
1468         int is_local_ip_any;
1469         int i;
1470         int r;
1471         
1472         a=0;
1473         is_local_ip_any=ip_addr_any(l_ip);
1474         if (likely(c)){
1475                 hash=tcp_addr_hash(&c->rcv.src_ip, port, l_ip, l_port);
1476                 /* search the aliases for an already existing one */
1477                 for (a=tcpconn_aliases_hash[hash], nxt=0; a; a=nxt){
1478                         nxt=a->next;
1479                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
1480                                         ( (l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
1481                                         (ip_addr_cmp(&c->rcv.src_ip, &a->parent->rcv.src_ip)) &&
1482                                         ( is_local_ip_any || 
1483                                           ip_addr_cmp(&a->parent->rcv.dst_ip, l_ip))
1484                                         ){
1485                                 /* found */
1486                                 if (unlikely(a->parent!=c)){
1487                                         if (flags & TCP_ALIAS_FORCE_ADD)
1488                                                 /* still have to walk the whole list to check if
1489                                                  * the alias was not already added */
1490                                                 continue;
1491                                         else if (flags & TCP_ALIAS_REPLACE){
1492                                                 /* remove the alias =>
1493                                                  * remove the current alias and all the following
1494                                                  *  ones from the corresponding connection, shift the 
1495                                                  *  connection aliases array and re-add the other 
1496                                                  *  aliases (!= current one) */
1497                                                 p=a->parent;
1498                                                 for (i=0; (i<p->aliases) && (&(p->con_aliases[i])!=a);
1499                                                                 i++);
1500                                                 if (unlikely(i==p->aliases)){
1501                                                         LM_CRIT("alias %p not found in con %p (id %d)\n",
1502                                                                         a, p, p->id);
1503                                                         goto error_not_found;
1504                                                 }
1505                                                 for (r=i; r<p->aliases; r++){
1506                                                         tcpconn_listrm(
1507                                                                 tcpconn_aliases_hash[p->con_aliases[r].hash],
1508                                                                 &p->con_aliases[r], next, prev);
1509                                                 }
1510                                                 if (likely((i+1)<p->aliases)){
1511                                                         memmove(&p->con_aliases[i], &p->con_aliases[i+1],
1512                                                                                         (p->aliases-i-1)*
1513                                                                                                 sizeof(p->con_aliases[0]));
1514                                                 }
1515                                                 p->aliases--;
1516                                                 /* re-add the remaining aliases */
1517                                                 for (r=i; r<p->aliases; r++){
1518                                                         tcpconn_listadd(
1519                                                                 tcpconn_aliases_hash[p->con_aliases[r].hash], 
1520                                                                 &p->con_aliases[r], next, prev);
1521                                                 }
1522                                         }else
1523                                                 goto error_sec;
1524                                 }else goto ok;
1525                         }
1526                 }
1527                 if (unlikely(c->aliases>=TCP_CON_MAX_ALIASES)) goto error_aliases;
1528                 c->con_aliases[c->aliases].parent=c;
1529                 c->con_aliases[c->aliases].port=port;
1530                 c->con_aliases[c->aliases].hash=hash;
1531                 tcpconn_listadd(tcpconn_aliases_hash[hash], 
1532                                                                 &c->con_aliases[c->aliases], next, prev);
1533                 c->aliases++;
1534         }else goto error_not_found;
1535 ok:
1536 #ifdef EXTRA_DEBUG
1537         if (a) LM_DBG("alias already present\n");
1538         else   LM_DBG("alias port %d for hash %d, id %d\n",
1539                         port, hash, c->id);
1540 #endif
1541         return 0;
1542 error_aliases:
1543         /* too many aliases */
1544         return -2;
1545 error_not_found:
1546         /* null connection */
1547         return -1;
1548 error_sec:
1549         /* alias already present and pointing to a different connection
1550          * (hijack attempt?) */
1551         return -3;
1552 }
1553
1554
1555
1556 /* add port as an alias for the "id" connection, 
1557  * returns 0 on success,-1 on failure */
1558 int tcpconn_add_alias(int id, int port, int proto)
1559 {
1560         struct tcp_connection* c;
1561         int ret;
1562         struct ip_addr zero_ip;
1563         int r;
1564         int alias_flags;
1565         
1566         /* fix the port */
1567         port=port?port:((proto==PROTO_TLS)?SIPS_PORT:SIP_PORT);
1568         TCPCONN_LOCK;
1569         /* check if alias already exists */
1570         c=_tcpconn_find(id, 0, 0, 0, 0);
1571         if (likely(c)){
1572                 ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
1573                 alias_flags=cfg_get(tcp, tcp_cfg, alias_flags);
1574                 /* alias src_ip:port, 0, 0 */
1575                 ret=_tcpconn_add_alias_unsafe(c, port,  &zero_ip, 0, 
1576                                                                                 alias_flags);
1577                 if (ret<0 && ret!=-3) goto error;
1578                 /* alias src_ip:port, local_ip, 0 */
1579                 ret=_tcpconn_add_alias_unsafe(c, port,  &c->rcv.dst_ip, 0, 
1580                                                                                 alias_flags);
1581                 if (ret<0 && ret!=-3) goto error;
1582                 /* alias src_ip:port, local_ip, local_port */
1583                 ret=_tcpconn_add_alias_unsafe(c, port, &c->rcv.dst_ip, c->rcv.dst_port,
1584                                                                                 alias_flags);
1585                 if (unlikely(ret<0)) goto error;
1586         }else goto error_not_found;
1587         TCPCONN_UNLOCK;
1588         return 0;
1589 error_not_found:
1590         TCPCONN_UNLOCK;
1591         LM_ERR("no connection found for id %d\n",id);
1592         return -1;
1593 error:
1594         TCPCONN_UNLOCK;
1595         switch(ret){
1596                 case -2:
1597                         LM_ERR("too many aliases (%d) for connection %p (id %d) %s:%d <- %d\n",
1598                                         c->aliases, c, c->id, ip_addr2a(&c->rcv.src_ip),
1599                                         c->rcv.src_port, port);
1600                         for (r=0; r<c->aliases; r++){
1601                                 LM_ERR("alias %d: for %p (%d) %s:%d <-%d hash %x\n",  r, c, c->id, 
1602                                                 ip_addr2a(&c->rcv.src_ip), c->rcv.src_port, 
1603                                                 c->con_aliases[r].port, c->con_aliases[r].hash);
1604                         }
1605                         break;
1606                 case -3:
1607                         LM_ERR("possible port hijack attempt\n");
1608                         LM_ERR("alias for %d port %d already"
1609                                                 " present and points to another connection \n",
1610                                                 c->id, port);
1611                         break;
1612                 default:
1613                         LM_ERR("unknown error %d\n", ret);
1614         }
1615         return -1;
1616 }
1617
1618
1619
1620 #ifdef TCP_FD_CACHE
1621
1622 static void tcp_fd_cache_init(void)
1623 {
1624         int r;
1625         for (r=0; r<TCP_FD_CACHE_SIZE; r++)
1626                 fd_cache[r].fd=-1;
1627 }
1628
1629
1630 inline static struct fd_cache_entry* tcp_fd_cache_get(struct tcp_connection *c)
1631 {
1632         int h;
1633         
1634         h=c->id%TCP_FD_CACHE_SIZE;
1635         if ((fd_cache[h].fd>0) && (fd_cache[h].id==c->id) && (fd_cache[h].con==c))
1636                 return &fd_cache[h];
1637         return 0;
1638 }
1639
1640
1641 inline static void tcp_fd_cache_rm(struct fd_cache_entry* e)
1642 {
1643         e->fd=-1;
1644 }
1645
1646
1647 inline static void tcp_fd_cache_add(struct tcp_connection *c, int fd)
1648 {
1649         int h;
1650         
1651         h=c->id%TCP_FD_CACHE_SIZE;
1652         if (likely(fd_cache[h].fd>0))
1653                 tcp_safe_close(fd_cache[h].fd);
1654         fd_cache[h].fd=fd;
1655         fd_cache[h].id=c->id;
1656         fd_cache[h].con=c;
1657 }
1658
1659 #endif /* TCP_FD_CACHE */
1660
1661
1662
1663 inline static int tcpconn_chld_put(struct tcp_connection* tcpconn);
1664
1665 static int tcpconn_send_put(struct tcp_connection* c, const char* buf,
1666                                                         unsigned len, snd_flags_t send_flags);
1667 static int tcpconn_do_send(int fd, struct tcp_connection* c,
1668                                                         const char* buf, unsigned len,
1669                                                         snd_flags_t send_flags, long* resp, int locked);
1670
1671 static int tcpconn_1st_send(int fd, struct tcp_connection* c,
1672                                                         const char* buf, unsigned len,
1673                                                         snd_flags_t send_flags, long* resp, int locked);
1674
1675 /* finds a tcpconn & sends on it
1676  * uses the dst members to, proto (TCP|TLS) and id and tries to send
1677  *  from the "from" address (if non null and id==0)
1678  * returns: number of bytes written (>=0) on success
1679  *          <0 on error */
1680 int tcp_send(struct dest_info* dst, union sockaddr_union* from,
1681                                         const char* buf, unsigned len)
1682 {
1683         struct tcp_connection *c;
1684         struct ip_addr ip;
1685         int port;
1686         int fd;
1687         long response[2];
1688         int n;
1689         ticks_t con_lifetime;
1690 #ifdef USE_TLS
1691         const char* rest_buf;
1692         const char* t_buf;
1693         unsigned rest_len, t_len;
1694         long resp;
1695         snd_flags_t t_send_flags;
1696 #endif /* USE_TLS */
1697         
1698         port=su_getport(&dst->to);
1699         con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
1700         if (likely(port)){
1701                 su2ip_addr(&ip, &dst->to);
1702                 c=tcpconn_get(dst->id, &ip, port, from, con_lifetime); 
1703         }else if (likely(dst->id)){
1704                 c=tcpconn_get(dst->id, 0, 0, 0, con_lifetime);
1705         }else{
1706                 LM_CRIT("null id & to\n");
1707                 return -1;
1708         }
1709         
1710         if (likely(dst->id)){
1711                 if (unlikely(c==0)) {
1712                         if (likely(port)){
1713                                 /* try again w/o id */
1714                                 c=tcpconn_get(0, &ip, port, from, con_lifetime);
1715                         }else{
1716                                 LM_ERR("id %d not found, dropping\n", dst->id);
1717                                 return -1;
1718                         }
1719                 }
1720         }
1721         /* connection not found or unusable => open a new one and send on it */
1722         if (unlikely((c==0) || tcpconn_close_after_send(c))){
1723                 if (unlikely(c)){
1724                         /* can't use c if it's marked as close-after-send  =>
1725                            release it and try opening new one */
1726                         tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
1727                         c=0;
1728                 }
1729                 /* check if connect() is disabled */
1730                 if (unlikely((dst->send_flags.f & SND_F_FORCE_CON_REUSE) ||
1731                                                 cfg_get(tcp, tcp_cfg, no_connect)))
1732                         return -1;
1733                 LM_DBG("no open tcp connection found, opening new one\n");
1734                 /* create tcp connection */
1735                 if (likely(from==0)){
1736                         /* check to see if we have to use a specific source addr. */
1737                         switch (dst->to.s.sa_family) {
1738                                 case AF_INET:
1739                                                 from = tcp_source_ipv4;
1740                                         break;
1741                                 case AF_INET6:
1742                                                 from = tcp_source_ipv6;
1743                                         break;
1744                                 default:
1745                                         /* error, bad af, ignore ... */
1746                                         break;
1747                         }
1748                 }
1749 #if defined(TCP_CONNECT_WAIT) && defined(TCP_ASYNC)
1750                 if (likely(cfg_get(tcp, tcp_cfg, tcp_connect_wait) && 
1751                                         cfg_get(tcp, tcp_cfg, async) )){
1752                         if (unlikely(*tcp_connections_no >=
1753                                                         cfg_get(tcp, tcp_cfg, max_connections))){
1754                                 LM_ERR("%s: maximum number of connections exceeded (%d/%d)\n",
1755                                                         su2a(&dst->to, sizeof(dst->to)),
1756                                                         *tcp_connections_no,
1757                                                         cfg_get(tcp, tcp_cfg, max_connections));
1758                                 return -1;
1759                         }
1760                         if (unlikely(dst->proto==PROTO_TLS)) {
1761                                 if (unlikely(*tls_connections_no >=
1762                                                         cfg_get(tcp, tcp_cfg, max_tls_connections))){
1763                                         LM_ERR("%s: maximum number of tls connections exceeded (%d/%d)\n",
1764                                                         su2a(&dst->to, sizeof(dst->to)),
1765                                                         *tls_connections_no,
1766                                                         cfg_get(tcp, tcp_cfg, max_tls_connections));
1767                                         return -1;
1768                                 }
1769                         }
1770                         c=tcpconn_new(-1, &dst->to, from, 0, dst->proto,
1771                                                         S_CONN_CONNECT);
1772                         if (unlikely(c==0)){
1773                                 LM_ERR("%s: could not create new connection\n",
1774                                                 su2a(&dst->to, sizeof(dst->to)));
1775                                 return -1;
1776                         }
1777                         c->flags|=F_CONN_PENDING|F_CONN_FD_CLOSED;
1778                         tcpconn_set_send_flags(c, dst->send_flags);
1779                         atomic_set(&c->refcnt, 2); /* ref from here and from main hash
1780                                                                                  table */
1781                         /* add it to id hash and aliases */
1782                         if (unlikely(tcpconn_add(c)==0)){
1783                                 LM_ERR("%s: could not add connection %p\n",
1784                                                 su2a(&dst->to, sizeof(dst->to)), c);
1785                                 _tcpconn_free(c);
1786                                 n=-1;
1787                                 goto end_no_conn;
1788                         }
1789                         /* do connect and if src ip or port changed, update the 
1790                          * aliases */
1791                         if (unlikely((fd=tcpconn_finish_connect(c, from))<0)){
1792                                 /* tcpconn_finish_connect will automatically blacklist
1793                                    on error => no need to do it here */
1794                                 LM_ERR("%s: tcpconn_finish_connect(%p) failed\n",
1795                                                 su2a(&dst->to, sizeof(dst->to)), c);
1796                                 goto conn_wait_error;
1797                         }
1798                         /* ? TODO: it might be faster just to queue the write directly
1799                          *  and send to main CONN_NEW_PENDING_WRITE */
1800                         /* delay sending the fd to main after the send */
1801                         
1802                         /* NOTE: no lock here, because the connection is marked as
1803                          * pending and nobody else will try to write on it. However
1804                          * this might produce out-of-order writes. If this is not
1805                          * desired either lock before the write or use 
1806                          * _wbufq_insert(...)
1807                          * NOTE2: _wbufq_insert() is used now (no out-of-order).
1808                          */
1809 #ifdef USE_TLS
1810                         if (unlikely(c->type==PROTO_TLS)) {
1811                         /* for TLS the TLS processing and the send must happen
1812                            atomically w/ respect to other sends on the same connection
1813                            (otherwise reordering might occur which would break TLS) =>
1814                            lock. However in this case this send will always be the first.
1815                            We can have the send() outside the lock only if this is the
1816                            first and only send (tls_encode is not called again), or
1817                            this is the last send for a tls_encode() loop and all the
1818                            previous ones did return CONN_NEW_COMPLETE or CONN_EOF.
1819                         */
1820                                 response[1] = CONN_NOP;
1821                                 t_buf = buf;
1822                                 t_len = len;
1823                                 lock_get(&c->write_lock);
1824 redo_tls_encode:
1825                                         t_send_flags = dst->send_flags;
1826                                         n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
1827                                                                         &t_send_flags);
1828                                         /* There are 4 cases:
1829                                            1. entire buffer consumed from the first try
1830                                              (rest_len == rest_buf == 0)
1831                                            2. rest_buf & first call
1832                                            3. rest_buf & not first call
1833                                                   3a. CONN_NEW_COMPLETE or CONN_EOF
1834                                                   3b. CONN_NEW_PENDING_WRITE
1835                                            4. entire buffer consumed, but not first call
1836                                                4a. CONN_NEW_COMPLETE or CONN_EOF
1837                                                    4b. CONN_NEW_PENDING_WRITE
1838                                                 We misuse response[1] == CONN_NOP to test for the
1839                                                 first call.
1840                                         */
1841                                         if (unlikely(n < 0)) {
1842                                                 lock_release(&c->write_lock);
1843                                                 goto conn_wait_error;
1844                                         }
1845                                         if (likely(rest_len == 0)) {
1846                                                 /* 1 or 4*: CONN_NEW_COMPLETE, CONN_EOF,  CONN_NOP
1847                                                     or CONN_NEW_PENDING_WRITE (*rest_len == 0) */
1848                                                 if (likely(response[1] != CONN_NEW_PENDING_WRITE)) {
1849                                                         /* 1 or 4a => it's safe to do the send outside the
1850                                                            lock (it will either send directly or
1851                                                            wbufq_insert())
1852                                                         */
1853                                                         lock_release(&c->write_lock);
1854                                                         if (likely(t_len != 0)) {
1855                                                                 n=tcpconn_1st_send(fd, c, t_buf, t_len,
1856                                                                                                         t_send_flags,
1857                                                                                                         &response[1], 0);
1858                                                         } else { /* t_len == 0 */
1859                                                                 if (response[1] == CONN_NOP) {
1860                                                                         /* nothing to send (e.g  parallel send
1861                                                                            tls_encode queues some data and then
1862                                                                            WANT_READ => this tls_encode will queue
1863                                                                            the cleartext too and will have nothing
1864                                                                            to send right now) and initial send =>
1865                                                                            behave as if the send was successful
1866                                                                            (but never return EOF here) */
1867                                                                         response[1] = CONN_NEW_COMPLETE;
1868                                                                 }
1869                                                         }
1870                                                         /* exit */
1871                                                 } else {
1872                                                         /* CONN_NEW_PENDING_WRITE:  4b: it was a
1873                                                            repeated tls_encode() (or otherwise we would
1874                                                            have here CONN_NOP) => add to the queue */
1875                                                         if (unlikely(t_len &&
1876                                                                                         _wbufq_add(c, t_buf, t_len) < 0)) {
1877                                                                 response[1] = CONN_ERROR;
1878                                                                 n = -1;
1879                                                         }
1880                                                         lock_release(&c->write_lock);
1881                                                         /* exit (no send) */
1882                                                 }
1883                                         } else {  /* rest_len != 0 */
1884                                                 /* 2 or 3*: if tls_encode hasn't finished, we have to
1885                                                    call tcpconn_1st_send() under lock (otherwise if it
1886                                                    returns CONN_NEW_PENDING_WRITE, there is no way
1887                                                    to find the right place to add the new queued
1888                                                    data from the 2nd tls_encode()) */
1889                                                 if (likely((response[1] == CONN_NOP /*2*/ ||
1890                                                                         response[1] == CONN_NEW_COMPLETE /*3a*/ ||
1891                                                                         response[1] == CONN_EOF /*3a*/) && t_len))
1892                                                         n = tcpconn_1st_send(fd, c, t_buf, t_len,
1893                                                                                                         t_send_flags,
1894                                                                                                         &response[1], 1);
1895                                                 else if (unlikely(t_len &&
1896                                                                                         _wbufq_add(c, t_buf, t_len) < 0)) {
1897                                                         /*3b: CONN_NEW_PENDING_WRITE*/
1898                                                         response[1] = CONN_ERROR;
1899                                                         n = -1;
1900                                                 }
1901                                                 if (likely(n >= 0)) {
1902                                                         /* if t_len == 0 => nothing was sent => previous
1903                                                            response will be kept */
1904                                                         t_buf = rest_buf;
1905                                                         t_len = rest_len;
1906                                                         goto redo_tls_encode;
1907                                                 } else {
1908                                                         lock_release(&c->write_lock);
1909                                                         /* error exit */
1910                                                 }
1911                                         }
1912                         } else
1913 #endif /* USE_TLS */
1914                                 n=tcpconn_1st_send(fd, c, buf, len, dst->send_flags,
1915                                                                         &response[1], 0);
1916                         if (unlikely(n<0)) /* this will catch CONN_ERROR too */
1917                                 goto conn_wait_error;
1918                         if (unlikely(response[1]==CONN_EOF)){
1919                                 /* if close-after-send requested, don't bother
1920                                    sending the fd back to tcp_main, try closing it
1921                                    immediately (no other tcp_send should use it,
1922                                    because it is marked as close-after-send before
1923                                    being added to the hash) */
1924                                 goto conn_wait_close;
1925                         }
1926                         /* send to tcp_main */
1927                         response[0]=(long)c;
1928                         if (unlikely(send_fd(unix_tcp_sock, response,
1929                                                                         sizeof(response), fd) <= 0)){
1930                                 LM_ERR("%s: %ld for %p failed:" " %s (%d)\n",
1931                                                         su2a(&dst->to, sizeof(dst->to)),
1932                                                         response[1], c, strerror(errno), errno);
1933                                 goto conn_wait_error;
1934                         }
1935                         goto conn_wait_success;
1936                 }
1937 #endif /* TCP_CONNECT_WAIT  && TCP_ASYNC */
1938                 if (unlikely((c=tcpconn_connect(&dst->to, from, dst->proto,
1939                                                                                 &dst->send_flags))==0)){
1940                         LM_ERR("%s: connect failed\n", su2a(&dst->to, sizeof(dst->to)));
1941                         return -1;
1942                 }
1943                 tcpconn_set_send_flags(c, dst->send_flags);
1944                 if (likely(c->state==S_CONN_OK))
1945                         TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
1946                 atomic_set(&c->refcnt, 2); /* ref. from here and it will also
1947                                                                           be added in the tcp_main hash */
1948                 fd=c->s;
1949                 c->flags|=F_CONN_FD_CLOSED; /* not yet opened in main */
1950                 /* ? TODO: it might be faster just to queue the write and
1951                  * send to main a CONN_NEW_PENDING_WRITE */
1952                 
1953                 /* send the new tcpconn to "tcp main" */
1954                 response[0]=(long)c;
1955                 response[1]=CONN_NEW;
1956                 n=send_fd(unix_tcp_sock, response, sizeof(response), c->s);
1957                 if (unlikely(n<=0)){
1958                         LM_ERR("%s: failed send_fd: %s (%d)\n",
1959                                         su2a(&dst->to, sizeof(dst->to)),
1960                                         strerror(errno), errno);
1961                         /* we can safely delete it, it's not referenced by anybody */
1962                         _tcpconn_free(c);
1963                         n=-1;
1964                         goto end_no_conn;
1965                 }
1966                 /* new connection => send on it directly */
1967 #ifdef USE_TLS
1968                 if (unlikely(c->type==PROTO_TLS)) {
1969                         /* for TLS the TLS processing and the send must happen
1970                            atomically w/ respect to other sends on the same connection
1971                            (otherwise reordering might occur which would break TLS) =>
1972                            lock.
1973                         */
1974                         response[1] = CONN_NOP;
1975                         t_buf = buf;
1976                         t_len = len;
1977                         lock_get(&c->write_lock);
1978                                 do {
1979                                         t_send_flags = dst->send_flags;
1980                                         n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
1981                                                                         &t_send_flags);
1982                                         if (likely(n > 0)) {
1983                                                 n = tcpconn_do_send(fd, c, t_buf, t_len, t_send_flags,
1984                                                                                                 &resp, 1);
1985                                                 if (likely(response[1] != CONN_QUEUED_WRITE ||
1986                                                                         resp == CONN_ERROR))
1987                                                         /* don't overwrite a previous CONN_QUEUED_WRITE
1988                                                            unless error */
1989                                                         response[1] = resp;
1990                                         } else  if (unlikely(n < 0)) {
1991                                                 response[1] = CONN_ERROR;
1992                                                 break;
1993                                         }
1994                                         /* else do nothing for n (t_len) == 0, keep
1995                                            the last reponse */
1996                                         t_buf = rest_buf;
1997                                         t_len = rest_len;
1998                                 } while(unlikely(rest_len && n > 0));
1999                         lock_release(&c->write_lock);
2000                 } else
2001 #endif /* USE_TLS */
2002                         n = tcpconn_do_send(fd, c, buf, len, dst->send_flags,
2003                                                                         &response[1], 0);
2004                 if (unlikely(response[1] != CONN_NOP)) {
2005                         response[0]=(long)c;
2006                         if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
2007                                 BUG("tcp_main command %ld sending failed (write):"
2008                                                 "%s (%d)\n", response[1], strerror(errno), errno);
2009                                 /* all commands != CONN_NOP returned by tcpconn_do_send()
2010                                    (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec
2011                                    refcnt => if sending the command fails we have to
2012                                    dec. refcnt by hand */
2013                                 tcpconn_chld_put(c); /* deref. it manually */
2014                                 n=-1;
2015                         }
2016                         /* here refcnt for c is already decremented => c contents can
2017                            no longer be used and refcnt _must_ _not_ be decremented
2018                            again on exit */
2019                         if (unlikely(n < 0 || response[1] == CONN_EOF)) {
2020                                 /* on error or eof, close fd */
2021                                 tcp_safe_close(fd);
2022                         } else if (response[1] == CONN_QUEUED_WRITE) {
2023 #ifdef TCP_FD_CACHE
2024                                 if (cfg_get(tcp, tcp_cfg, fd_cache)) {
2025                                         tcp_fd_cache_add(c, fd);
2026                                 } else
2027 #endif /* TCP_FD_CACHE */
2028                                         tcp_safe_close(fd);
2029                         } else {
2030                                 BUG("unexpected tcpconn_do_send() return & response:"
2031                                                 " %d, %ld\n", n, response[1]);
2032                         }
2033                         goto end_no_deref;
2034                 }
2035 #ifdef TCP_FD_CACHE
2036                 if (cfg_get(tcp, tcp_cfg, fd_cache)) {
2037                         tcp_fd_cache_add(c, fd);
2038                 }else
2039 #endif /* TCP_FD_CACHE */
2040                         tcp_safe_close(fd);
2041         /* here we can have only commands that _do_ _not_ dec refcnt.
2042            (CONN_EOF, CON_ERROR, CON_QUEUED_WRITE are all treated above) */
2043                 goto release_c;
2044         } /* if (c==0 or unusable) new connection */
2045         /* existing connection, send on it */
2046         n = tcpconn_send_put(c, buf, len, dst->send_flags);
2047         /* no deref needed (automatically done inside tcpconn_send_put() */
2048         return n;
2049 #ifdef TCP_CONNECT_WAIT
2050 conn_wait_success:
2051 #ifdef TCP_FD_CACHE
2052         if (cfg_get(tcp, tcp_cfg, fd_cache)) {
2053                 tcp_fd_cache_add(c, fd);
2054         } else
2055 #endif /* TCP_FD_CACHE */
2056                 if (unlikely (tcp_safe_close(fd) < 0))
2057                         LM_ERR("closing temporary send fd for %p: %s: "
2058                                         "close(%d) failed (flags 0x%x): %s (%d)\n", c,
2059                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2060                                         fd, c->flags, strerror(errno), errno);
2061         tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
2062         return n;
2063 conn_wait_error:
2064         n=-1;
2065 conn_wait_close:
2066         /* connect or send failed or immediate close-after-send was requested on
2067          * newly created connection which was not yet sent to tcp_main (but was
2068          * already hashed) => don't send to main, unhash and destroy directly
2069          * (if refcnt>2 it will be destroyed when the last sender releases the
2070          * connection (tcpconn_chld_put(c))) or when tcp_main receives a
2071          * CONN_ERROR it*/
2072         c->state=S_CONN_BAD;
2073         /* we are here only if we opened a new fd (and not reused a cached or
2074            a reader one) => if the connect was successful close the fd */
2075         if (fd>=0) {
2076                 if (unlikely(tcp_safe_close(fd) < 0 ))
2077                         LM_ERR("closing temporary send fd for %p: %s: "
2078                                         "close(%d) failed (flags 0x%x): %s (%d)\n", c,
2079                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2080                                         fd, c->flags, strerror(errno), errno);
2081         }
2082         /* here the connection is for sure in the hash (tcp_main will not
2083            remove it because it's marked as PENDing) and the refcnt is at least
2084            2
2085          */
2086         TCPCONN_LOCK;
2087                 _tcpconn_detach(c);
2088                 c->flags&=~F_CONN_HASHED;
2089                 tcpconn_put(c);
2090         TCPCONN_UNLOCK;
2091         /* dec refcnt -> mark it for destruction */
2092         tcpconn_chld_put(c);
2093         return n;
2094 #endif /* TCP_CONNECT_WAIT */
2095 release_c:
2096         tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
2097 end_no_deref:
2098 end_no_conn:
2099         return n;
2100 }
2101
2102
2103
2104 /** sends on an existing tcpconn and auto-dec. con. ref counter.
2105  * As opposed to tcp_send(), this function requires an existing
2106  * tcp connection.
2107  * WARNING: the tcp_connection will be de-referenced.
2108  * @param c - existing tcp connection pointer.
2109  * @param buf - data to be sent.
2110  * @param len - data length,
2111  * @return >=0 on success, -1 on error.
2112  */
2113 static int tcpconn_send_put(struct tcp_connection* c, const char* buf,
2114                                                                 unsigned len, snd_flags_t send_flags)
2115 {
2116         struct tcp_connection *tmp;
2117         int fd;
2118         long response[2];
2119         int n;
2120         int do_close_fd;
2121 #ifdef USE_TLS
2122         const char* rest_buf;
2123         const char* t_buf;
2124         unsigned rest_len, t_len;
2125         long resp;
2126         snd_flags_t t_send_flags;
2127 #endif /* USE_TLS */
2128 #ifdef TCP_FD_CACHE
2129         struct fd_cache_entry* fd_cache_e;
2130         int use_fd_cache;
2131         
2132         use_fd_cache=cfg_get(tcp, tcp_cfg, fd_cache);
2133         fd_cache_e=0;
2134 #endif /* TCP_FD_CACHE */
2135         do_close_fd=1; /* close the fd on exit */
2136         response[1] = CONN_NOP;
2137 #ifdef TCP_ASYNC
2138         /* if data is already queued, we don't need the fd */
2139 #ifdef TCP_CONNECT_WAIT
2140                 if (unlikely(cfg_get(tcp, tcp_cfg, async) &&
2141                                                 (_wbufq_non_empty(c) || (c->flags&F_CONN_PENDING)) ))
2142 #else /* ! TCP_CONNECT_WAIT */
2143                 if (unlikely(cfg_get(tcp, tcp_cfg, async) && (_wbufq_non_empty(c)) ))
2144 #endif /* TCP_CONNECT_WAIT */
2145                 {
2146                         lock_get(&c->write_lock);
2147 #ifdef TCP_CONNECT_WAIT
2148                                 if (likely(_wbufq_non_empty(c) || (c->flags&F_CONN_PENDING)))
2149 #else /* ! TCP_CONNECT_WAIT */
2150                                 if (likely(_wbufq_non_empty(c)))
2151 #endif /* TCP_CONNECT_WAIT */
2152                                 {
2153                                         do_close_fd=0;
2154 #ifdef USE_TLS
2155                                         if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) {
2156                                                 t_buf = buf;
2157                                                 t_len = len;
2158                                                 do {
2159                                                         t_send_flags = send_flags;
2160                                                         n = tls_encode(c, &t_buf, &t_len,
2161                                                                                         &rest_buf, &rest_len,
2162                                                                                         &t_send_flags);
2163                                                         if (unlikely((n < 0) || (t_len &&
2164                                                                          (_wbufq_add(c, t_buf, t_len) < 0)))) {
2165                                                                 lock_release(&c->write_lock);
2166                                                                 n=-1;
2167                                                                 response[1] = CONN_ERROR;
2168                                                                 c->state=S_CONN_BAD;
2169                                                                 c->timeout=get_ticks_raw(); /* force timeout */
2170                                                                 goto error;
2171                                                         }
2172                                                         t_buf = rest_buf;
2173                                                         t_len = rest_len;
2174                                                 } while(unlikely(rest_len && n > 0));
2175                                         } else
2176 #endif /* USE_TLS */
2177                                                 if (unlikely(len && (_wbufq_add(c, buf, len)<0))){
2178                                                         lock_release(&c->write_lock);
2179                                                         n=-1;
2180                                                         response[1] = CONN_ERROR;
2181                                                         c->state=S_CONN_BAD;
2182                                                         c->timeout=get_ticks_raw(); /* force timeout */
2183                                                         goto error;
2184                                                 }
2185                                         n=len;
2186                                         lock_release(&c->write_lock);
2187                                         goto release_c;
2188                                 }
2189                         lock_release(&c->write_lock);
2190                 }
2191 #endif /* TCP_ASYNC */
2192                 /* check if this is not the same reader process holding
2193                  *  c  and if so send directly on c->fd */
2194                 if (c->reader_pid==my_pid()){
2195                         LM_DBG("send from reader (%d (%d)), reusing fd\n",
2196                                         my_pid(), process_no);
2197                         fd=c->fd;
2198                         do_close_fd=0; /* don't close the fd on exit, it's in use */
2199 #ifdef TCP_FD_CACHE
2200                         use_fd_cache=0; /* don't cache: problems would arise due to the
2201                                                            close() on cache eviction (if the fd is still 
2202                                                            used). If it has to be cached then dup() _must_ 
2203                                                            be used */
2204                 }else if (likely(use_fd_cache && 
2205                                                         ((fd_cache_e=tcp_fd_cache_get(c))!=0))){
2206                         fd=fd_cache_e->fd;
2207                         do_close_fd=0;
2208                         LM_DBG("found fd in cache (%d, %p, %d)\n", fd, c, fd_cache_e->id);
2209 #endif /* TCP_FD_CACHE */
2210                 }else{
2211                         LM_DBG("tcp connection found (%p), acquiring fd\n", c);
2212                         /* get the fd */
2213                         response[0]=(long)c;
2214                         response[1]=CONN_GET_FD;
2215                         n=send_all(unix_tcp_sock, response, sizeof(response));
2216                         if (unlikely(n<=0)){
2217                                 LM_ERR("failed to get fd(write):%s (%d)\n", strerror(errno), errno);
2218                                 n=-1;
2219                                 goto release_c;
2220                         }
2221                         LM_DBG("c=%p, n=%d\n", c, n);
2222                         n=receive_fd(unix_tcp_sock, &tmp, sizeof(tmp), &fd, MSG_WAITALL);
2223                         if (unlikely(n<=0)){
2224                                 LM_ERR("failed to get fd(receive_fd): %s (%d)\n",
2225                                                 strerror(errno), errno);
2226                                 n=-1;
2227                                 do_close_fd=0;
2228                                 goto release_c;
2229                         }
2230                         /* handle fd closed or bad connection/error
2231                                 (it's possible that this happened in the time between
2232                                 we found the intial connection and the time when we get
2233                                 the fd)
2234                          */
2235                         if (unlikely(c!=tmp || fd==-1 || c->state==S_CONN_BAD)){
2236                                 if (unlikely(c!=tmp && tmp!=0))
2237                                         BUG("tcp_send: get_fd: got different connection:"
2238                                                 "  %p (id= %d, refcnt=%d state=%d) != "
2239                                                 "  %p (n=%d)\n",
2240                                                   c,   c->id,   atomic_get(&c->refcnt),   c->state,
2241                                                   tmp, n
2242                                                 );
2243                                 n=-1; /* fail */
2244                                 /* don't cache fd & close it */
2245                                 do_close_fd = (fd==-1)?0:1;
2246 #ifdef TCP_FD_CACHE
2247                                 use_fd_cache = 0;
2248 #endif /* TCP_FD_CACHE */
2249                                 goto end;
2250                         }
2251                         LM_DBG("after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
2252                 }
2253         
2254 #ifdef USE_TLS
2255                 if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) {
2256                         /* for TLS the TLS processing and the send must happen
2257                            atomically w/ respect to other sends on the same connection
2258                            (otherwise reordering might occur which would break TLS) =>
2259                            lock.
2260                         */
2261                         response[1] = CONN_NOP;
2262                         t_buf = buf;
2263                         t_len = len;
2264                         lock_get(&c->write_lock);
2265                                 do {
2266                                         t_send_flags = send_flags;
2267                                         n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
2268                                                                         &t_send_flags);
2269                                         if (likely(n > 0)) {
2270                                                 n = tcpconn_do_send(fd, c, t_buf, t_len, t_send_flags,
2271                                                                                                 &resp, 1);
2272                                                 if (likely(response[1] != CONN_QUEUED_WRITE ||
2273                                                                         resp == CONN_ERROR))
2274                                                         /* don't overwrite a previous CONN_QUEUED_WRITE
2275                                                            unless error */
2276                                                         response[1] = resp;
2277                                         } else if (unlikely(n < 0)) {
2278                                                 response[1] = CONN_ERROR;
2279                                                 break;
2280                                         }
2281                                         /* else do nothing for n (t_len) == 0, keep
2282                                            the last reponse */
2283                                         t_buf = rest_buf;
2284                                         t_len = rest_len;
2285                                 } while(unlikely(rest_len && n > 0));
2286                         lock_release(&c->write_lock);
2287                 } else
2288 #endif
2289                         n = tcpconn_do_send(fd, c, buf, len, send_flags, &response[1], 0);
2290         if (unlikely(response[1] != CONN_NOP)) {
2291 error:
2292                 response[0]=(long)c;
2293                 if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
2294                         BUG("tcp_main command %ld sending failed (write):%s (%d)\n",
2295                                         response[1], strerror(errno), errno);
2296                         /* all commands != CONN_NOP returned by tcpconn_do_send()
2297                            (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec refcnt
2298                            => if sending the command fails we have to dec. refcnt by hand
2299                          */
2300                         tcpconn_chld_put(c); /* deref. it manually */
2301                         n=-1;
2302                 }
2303                 /* here refcnt for c is already decremented => c contents can no
2304                    longer be used and refcnt _must_ _not_ be decremented again
2305                    on exit */
2306                 if (unlikely(n < 0 || response[1] == CONN_EOF)) {
2307                         /* on error or eof, remove from cache or close fd */
2308 #ifdef TCP_FD_CACHE
2309                         if (unlikely(fd_cache_e)){
2310                                 tcp_fd_cache_rm(fd_cache_e);
2311                                 fd_cache_e = 0;
2312                                 tcp_safe_close(fd);
2313                         }else
2314 #endif /* TCP_FD_CACHE */
2315                                 if (do_close_fd) tcp_safe_close(fd);
2316                 } else if (response[1] == CONN_QUEUED_WRITE) {
2317 #ifdef TCP_FD_CACHE
2318                         if (unlikely((fd_cache_e==0) && use_fd_cache)){
2319                                 tcp_fd_cache_add(c, fd);
2320                         }else
2321 #endif /* TCP_FD_CACHE */
2322                                 if (do_close_fd) tcp_safe_close(fd);
2323                 } else {
2324                         BUG("unexpected tcpconn_do_send() return & response: %d, %ld\n",
2325                                         n, response[1]);
2326                 }
2327                 return n; /* no tcpconn_put */
2328         }
2329 end:
2330 #ifdef TCP_FD_CACHE
2331         if (unlikely((fd_cache_e==0) && use_fd_cache)){
2332                 tcp_fd_cache_add(c, fd);
2333         }else
2334 #endif /* TCP_FD_CACHE */
2335         if (do_close_fd) {
2336                 if (unlikely(tcp_safe_close(fd) < 0))
2337                         LM_ERR("closing temporary send fd for %p: %s: "
2338                                         "close(%d) failed (flags 0x%x): %s (%d)\n", c,
2339                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2340                                         fd, c->flags, strerror(errno), errno);
2341         }
2342         /* here we can have only commands that _do_ _not_ dec refcnt.
2343            (CONN_EOF, CON_ERROR, CON_QUEUED_WRITE are all treated above) */
2344 release_c:
2345         tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
2346         return n;
2347 }
2348
2349
2350
2351 /* unsafe send on a known tcp connection.
2352  * Directly send on a known tcp connection with a given fd.
2353  * It is assumed that the connection locks are already held.
2354  * Side effects: if needed it will send state update commands to
2355  *  tcp_main (e.g. CON_EOF, CON_ERROR, CON_QUEUED_WRITE).
2356  * @param fd - fd used for sending.
2357  * @param c - existing tcp connection pointer (state and flags might be
2358  *            changed).
2359  * @param buf - data to be sent.
2360  * @param len - data length.
2361  * @param send_flags
2362  * @return <0 on error, number of bytes sent on success.
2363  */
2364 int tcpconn_send_unsafe(int fd, struct tcp_connection *c,
2365                                                 const char* buf, unsigned len, snd_flags_t send_flags)
2366 {
2367         int n;
2368         long response[2];
2369         
2370         n = tcpconn_do_send(fd, c, buf, len, send_flags, &response[1], 1);
2371         if (unlikely(response[1] != CONN_NOP)) {
2372                 /* all commands != CONN_NOP returned by tcpconn_do_send()
2373                    (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec refcnt
2374                    => increment it (we don't want the connection to be destroyed
2375                    from under us)
2376                  */
2377                 atomic_inc(&c->refcnt);
2378                 response[0]=(long)c;
2379                 if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
2380                         BUG("connection %p command %ld sending failed (write):%s (%d)\n",
2381                                         c, response[1], strerror(errno), errno);
2382                         /* send failed => deref. it back by hand */
2383                         tcpconn_chld_put(c); 
2384                         n=-1;
2385                 }
2386                 /* here refcnt for c is already decremented => c contents can no
2387                    longer be used and refcnt _must_ _not_ be decremented again
2388                    on exit */
2389                 return n;
2390         }
2391         return n;
2392 }
2393
2394
2395
2396 /** lower level send (connection and fd should be known).
2397  * It takes care of possible write-queueing, blacklisting a.s.o.
2398  * It expects a valid tcp connection. It doesn't touch the ref. cnts.
2399  * It will also set the connection flags from send_flags (it's better
2400  * to do it here, because it's guaranteed to be under lock).
2401  * @param fd - fd used for sending.
2402  * @param c - existing tcp connection pointer (state and flags might be
2403  *            changed).
2404  * @param buf - data to be sent.
2405  * @param len - data length.
2406  * @param send_flags
2407  * @param resp - filled with a cmd. for tcp_main:
2408  *                      CONN_NOP - nothing needs to be done (do not send
2409  *                                 anything to tcp_main).
2410  *                      CONN_ERROR - error, connection should be closed.
2411  *                      CONN_EOF - no error, but connection should be closed.
2412  *                      CONN_QUEUED_WRITE - new write queue (connection
2413  *                                 should be watched for write and the wr.
2414  *                                 queue flushed).
2415  * @param locked - if set assume the connection is already locked (call from
2416  *                  tls) and do not lock/unlock the connection.
2417  * @return >=0 on success, < 0 on error && *resp == CON_ERROR.
2418  *
2419  */
2420 static int tcpconn_do_send(int fd, struct tcp_connection* c,
2421                                                         const char* buf, unsigned len,
2422                                                         snd_flags_t send_flags, long* resp,
2423                                                         int locked)
2424 {
2425         int  n;
2426 #ifdef TCP_ASYNC
2427         int enable_write_watch;
2428 #endif /* TCP_ASYNC */
2429
2430         LM_DBG("sending...\n");
2431         *resp = CONN_NOP;
2432         if (likely(!locked)) lock_get(&c->write_lock);
2433         /* update connection send flags with the current ones */
2434         tcpconn_set_send_flags(c, send_flags);
2435 #ifdef TCP_ASYNC
2436         if (likely(cfg_get(tcp, tcp_cfg, async))){
2437                 if (_wbufq_non_empty(c)
2438 #ifdef TCP_CONNECT_WAIT
2439                         || (c->flags&F_CONN_PENDING) 
2440 #endif /* TCP_CONNECT_WAIT */
2441                         ){
2442                         if (unlikely(_wbufq_add(c, buf, len)<0)){
2443                                 if (likely(!locked)) lock_release(&c->write_lock);
2444                                 n=-1;
2445                                 goto error;
2446                         }
2447                         if (likely(!locked)) lock_release(&c->write_lock);
2448                         n=len;
2449                         goto end;
2450                 }
2451                 n=_tcpconn_write_nb(fd, c, buf, len);
2452         }else{
2453 #endif /* TCP_ASYNC */
2454                 /* n=tcp_blocking_write(c, fd, buf, len); */
2455                 n=tsend_stream(fd, buf, len,
2456                                                 TICKS_TO_S(cfg_get(tcp, tcp_cfg, send_timeout)) *
2457                                                 1000);
2458 #ifdef TCP_ASYNC
2459         }
2460 #else /* ! TCP_ASYNC */
2461         if (likely(!locked)) lock_release(&c->write_lock);
2462 #endif /* TCP_ASYNC */
2463         
2464         LM_DBG("after real write: c= %p n=%d fd=%d\n",c, n, fd);
2465         LM_DBG("buf=\n%.*s\n", (int)len, buf);
2466         if (unlikely(n<(int)len)){
2467 #ifdef TCP_ASYNC
2468                 if (cfg_get(tcp, tcp_cfg, async) &&
2469                                 ((n>=0) || errno==EAGAIN || errno==EWOULDBLOCK)){
2470                         enable_write_watch=_wbufq_empty(c);
2471                         if (n<0) n=0;
2472                         else if (unlikely(c->state==S_CONN_CONNECT ||
2473                                                 c->state==S_CONN_ACCEPT)){
2474                                 TCP_STATS_ESTABLISHED(c->state);
2475                                 c->state=S_CONN_OK; /* something was written */
2476                         }
2477                         if (unlikely(_wbufq_add(c, buf+n, len-n)<0)){
2478                                 if (likely(!locked)) lock_release(&c->write_lock);
2479                                 n=-1;
2480                                 goto error;
2481                         }
2482                         if (likely(!locked)) lock_release(&c->write_lock);
2483                         n=len;
2484                         if (likely(enable_write_watch))
2485                                 *resp=CONN_QUEUED_WRITE;
2486                         goto end;
2487                 }else{
2488                         if (likely(!locked)) lock_release(&c->write_lock);
2489                 }
2490 #endif /* TCP_ASYNC */
2491                 if (unlikely(c->state==S_CONN_CONNECT)){
2492                         switch(errno){
2493                                 case ENETUNREACH:
2494                                 case EHOSTUNREACH: /* not posix for send() */
2495 #ifdef USE_DST_BLACKLIST
2496                                         dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
2497                                                                                 &c->rcv.src_su, &c->send_flags, 0);
2498 #endif /* USE_DST_BLACKLIST */
2499                                         TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
2500                                                                         TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
2501                                         break;
2502                                 case ECONNREFUSED:
2503                                 case ECONNRESET:
2504 #ifdef USE_DST_BLACKLIST
2505                                         dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
2506                                                                                 &c->rcv.src_su, &c->send_flags, 0);
2507 #endif /* USE_DST_BLACKLIST */
2508                                         TCP_EV_CONNECT_RST(errno, TCP_LADDR(c), TCP_LPORT(c),
2509                                                                                 TCP_PSU(c), TCP_PROTO(c));
2510                                         break;
2511                                 default:
2512                                         TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c), TCP_LPORT(c),
2513                                                                                 TCP_PSU(c), TCP_PROTO(c));
2514                                 }
2515                         TCP_STATS_CONNECT_FAILED();
2516                 }else{
2517                         switch(errno){
2518                                 case ECONNREFUSED:
2519                                 case ECONNRESET:
2520                                         TCP_STATS_CON_RESET();
2521                                         /* no break */
2522                                 case ENETUNREACH:
2523                                 /*case EHOSTUNREACH: -- not posix */
2524 #ifdef USE_DST_BLACKLIST
2525                                         dst_blacklist_su(BLST_ERR_SEND, c->rcv.proto,
2526                                                                                 &c->rcv.src_su, &c->send_flags, 0);
2527 #endif /* USE_DST_BLACKLIST */
2528                                         break;
2529                         }
2530                 }
2531                 LM_ERR("failed to send on %p (%s:%d->%s): %s (%d)\n",
2532                                         c, ip_addr2a(&c->rcv.dst_ip), c->rcv.dst_port,
2533                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2534                                         strerror(errno), errno);
2535                 n = -1;
2536 #ifdef TCP_ASYNC
2537 error:
2538 #endif /* TCP_ASYNC */
2539                 /* error on the connection , mark it as bad and set 0 timeout */
2540                 c->state=S_CONN_BAD;
2541                 c->timeout=get_ticks_raw();
2542                 /* tell "main" it should drop this (optional it will t/o anyway?)*/
2543                 *resp=CONN_ERROR;
2544                 return n; /* error return, no tcpconn_put */
2545         }
2546         
2547 #ifdef TCP_ASYNC
2548         if (likely(!locked)) lock_release(&c->write_lock);
2549 #endif /* TCP_ASYNC */
2550         /* in non-async mode here we're either in S_CONN_OK or S_CONN_ACCEPT*/
2551         if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
2552                         TCP_STATS_ESTABLISHED(c->state);
2553                         c->state=S_CONN_OK;
2554         }
2555         if (unlikely(send_flags.f & SND_F_CON_CLOSE)){
2556                 /* close after write => send EOF request to tcp_main */
2557                 c->state=S_CONN_BAD;
2558                 c->timeout=get_ticks_raw();
2559                 /* tell "main" it should drop this*/
2560                 *resp=CONN_EOF;
2561                 return n;
2562         }
2563 end:
2564         return n;
2565 }
2566
2567
2568
2569 /** low level 1st send on a new connection.
2570  * It takes care of possible write-queueing, blacklisting a.s.o.
2571  * It expects a valid just-opened tcp connection. It doesn't touch the 
2572  * ref. counters. It's used only in the async first send case.
2573  * @param fd - fd used for sending.
2574  * @param c - existing tcp connection pointer (state and flags might be
2575  *            changed). The connection must be new (no previous send on it).
2576  * @param buf - data to be sent.
2577  * @param len - data length.
2578  * @param send_flags
2579  * @param resp - filled with a fd sending cmd. for tcp_main on success. It
2580  *                      _must_ be one of the commands listed below:
2581  *                      CONN_NEW_PENDING_WRITE - new connection, first write
2582  *                                 was partially successful (or EAGAIN) and
2583  *                                 was queued (connection should be watched
2584  *                                 for write and the write queue flushed).
2585  *                                 The fd should be sent to tcp_main.
2586  *                      CONN_NEW_COMPLETE - new connection, first write
2587  *                                 completed successfully and no data is
2588  *                                 queued. The fd should be sent to tcp_main.
2589  *                      CONN_EOF - no error, but the connection should be
2590  *                                  closed (e.g. SND_F_CON_CLOSE send flag).
2591  *                      CONN_ERROR - error, _must_ return < 0.
2592  * @param locked - if set assume the connection is already locked (call from
2593  *                  tls) and do not lock/unlock the connection.
2594  * @return >=0 on success, < 0 on error (on error *resp is undefined).
2595  *
2596  */
2597 static int tcpconn_1st_send(int fd, struct tcp_connection* c,
2598                                                         const char* buf, unsigned len,
2599                                                         snd_flags_t send_flags, long* resp,
2600                                                         int locked)
2601 {
2602         int n;
2603         
2604         n=_tcpconn_write_nb(fd, c, buf, len);
2605         if (unlikely(n<(int)len)){
2606                 /* on EAGAIN or ENOTCONN return success.
2607                    ENOTCONN appears on newer FreeBSD versions (non-blocking socket,
2608                    connect() & send immediately) */
2609                 if ((n>=0) || errno==EAGAIN || errno==EWOULDBLOCK || errno==ENOTCONN){
2610                         LM_DBG("pending write on new connection %p "
2611                                 "(%d/%d bytes written)\n", c, n, len);
2612                         if (unlikely(n<0)) n=0;
2613                         else{
2614                                 if (likely(c->state == S_CONN_CONNECT))
2615                                         TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
2616                                 c->state=S_CONN_OK; /* partial write => connect()
2617                                                                                                 ended */
2618                         }
2619                         /* add to the write queue */
2620                         if (likely(!locked)) lock_get(&c->write_lock);
2621                                 if (unlikely(_wbufq_insert(c, buf+n, len-n)<0)){
2622                                         if (likely(!locked)) lock_release(&c->write_lock);
2623                                         n=-1;
2624                                         LM_ERR("%s: EAGAIN and write queue full or failed for %p\n",
2625                                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)), c);
2626                                         goto error;
2627                                 }
2628                         if (likely(!locked)) lock_release(&c->write_lock);
2629                         /* send to tcp_main */
2630                         *resp=CONN_NEW_PENDING_WRITE;
2631                         n=len;
2632                         goto end;
2633                 }
2634                 /* n < 0 and not EAGAIN => write error */
2635                 /* if first write failed it's most likely a
2636                    connect error */
2637                 switch(errno){
2638                         case ENETUNREACH:
2639                         case EHOSTUNREACH:  /* not posix for send() */
2640 #ifdef USE_DST_BLACKLIST
2641                                 dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
2642                                                                         &c->rcv.src_su, &c->send_flags, 0);
2643 #endif /* USE_DST_BLACKLIST */
2644                                 TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
2645                                                                 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
2646                                 break;
2647                         case ECONNREFUSED:
2648                         case ECONNRESET:
2649 #ifdef USE_DST_BLACKLIST
2650                                 dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
2651                                                                         &c->rcv.src_su, &c->send_flags, 0);
2652 #endif /* USE_DST_BLACKLIST */
2653                                 TCP_EV_CONNECT_RST(errno, TCP_LADDR(c),
2654                                                                 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
2655                                 break;
2656                         default:
2657                                 TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
2658                                                                 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
2659                 }
2660                 /* error: destroy it directly */
2661                 TCP_STATS_CONNECT_FAILED();
2662                 LM_ERR("%s: connect & send  for %p failed:" " %s (%d)\n",
2663                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2664                                         c, strerror(errno), errno);
2665                 goto error;
2666         }
2667         LM_INFO("quick connect for %p\n", c);
2668         if (likely(c->state == S_CONN_CONNECT))
2669                 TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
2670         if (unlikely(send_flags.f & SND_F_CON_CLOSE)){
2671                 /* close after write =>  EOF => close immediately */
2672                 c->state=S_CONN_BAD;
2673                 /* tell our caller that it should drop this*/
2674                 *resp=CONN_EOF;
2675         }else{
2676                 c->state=S_CONN_OK;
2677                 /* send to tcp_main */
2678                 *resp=CONN_NEW_COMPLETE;
2679         }
2680 end:
2681         return n; /* >= 0 */
2682 error:
2683         *resp=CONN_ERROR;
2684         return -1;
2685 }
2686
2687
2688
2689 int tcp_init(struct socket_info* sock_info)
2690 {
2691         union sockaddr_union* addr;
2692         int optval;
2693 #ifdef HAVE_TCP_ACCEPT_FILTER
2694         struct accept_filter_arg afa;
2695 #endif /* HAVE_TCP_ACCEPT_FILTER */
2696 #ifdef DISABLE_NAGLE
2697         int flag;
2698         struct protoent* pe;
2699
2700         if (tcp_proto_no==-1){ /* if not already set */
2701                 pe=getprotobyname("tcp");
2702                 if (pe==0){
2703                         LM_ERR("could not get TCP protocol number\n");
2704                         tcp_proto_no=-1;
2705                 }else{
2706                         tcp_proto_no=pe->p_proto;
2707                 }
2708         }
2709 #endif
2710
2711         addr=&sock_info->su;
2712         /* sock_info->proto=PROTO_TCP; */
2713         if (init_su(addr, &sock_info->address, sock_info->port_no)<0){
2714                 LM_ERR("could no init sockaddr_union\n");
2715                 goto error;
2716         }
2717         LM_DBG("added %s\n", su2a(addr, sizeof(*addr)));
2718         sock_info->socket=socket(AF2PF(addr->s.sa_family), SOCK_STREAM, 0);
2719         if (sock_info->socket==-1){
2720                 LM_ERR("tcp_init: socket: %s\n", strerror(errno));
2721                 goto error;
2722         }
2723 #ifdef DISABLE_NAGLE
2724         flag=1;
2725         if ( (tcp_proto_no!=-1) &&
2726                  (setsockopt(sock_info->socket, tcp_proto_no , TCP_NODELAY,
2727                                          &flag, sizeof(flag))<0) ){
2728                 LM_ERR("could not disable Nagle: %s\n", strerror(errno));
2729         }
2730 #endif
2731
2732
2733 #if  !defined(TCP_DONT_REUSEADDR) 
2734         /* Stevens, "Network Programming", Section 7.5, "Generic Socket
2735      * Options": "...server started,..a child continues..on existing
2736          * connection..listening server is restarted...call to bind fails
2737          * ... ALL TCP servers should specify the SO_REUSEADDRE option 
2738          * to allow the server to be restarted in this situation
2739          *
2740          * Indeed, without this option, the server can't restart.
2741          *   -jiri
2742          */
2743         optval=1;
2744         if (setsockopt(sock_info->socket, SOL_SOCKET, SO_REUSEADDR,
2745                                 (void*)&optval, sizeof(optval))==-1) {
2746                 LM_ERR("setsockopt %s\n", strerror(errno));
2747                 goto error;
2748         }
2749 #endif
2750         /* tos */
2751         optval = tos;
2752         if(sock_info->address.af==AF_INET){
2753                 if (setsockopt(sock_info->socket, IPPROTO_IP, IP_TOS, (void*)&optval,
2754                                         sizeof(optval)) ==-1){
2755                         LM_WARN("setsockopt tos: %s (%d)\n", strerror(errno), tos);
2756                         /* continue since this is not critical */
2757                 }
2758         } else if(sock_info->address.af==AF_INET6){
2759                 if (setsockopt(sock_info->socket, IPPROTO_IPV6, IPV6_TCLASS,
2760                                         (void*)&optval, sizeof(optval)) ==-1) {
2761                         LM_WARN("setsockopt v6 tos: %s (%d)\n", strerror(errno), tos);
2762                         /* continue since this is not critical */
2763                 }
2764         }
2765 #ifdef HAVE_TCP_DEFER_ACCEPT
2766         /* linux only */
2767         if ((optval=cfg_get(tcp, tcp_cfg, defer_accept))){
2768                 if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_DEFER_ACCEPT,
2769                                         (void*)&optval, sizeof(optval)) ==-1){
2770                         LM_WARN("setsockopt TCP_DEFER_ACCEPT %s\n", strerror(errno));
2771                 /* continue since this is not critical */
2772                 }
2773         }
2774 #endif /* HAVE_TCP_DEFFER_ACCEPT */
2775 #ifdef HAVE_TCP_SYNCNT
2776         if ((optval=cfg_get(tcp, tcp_cfg, syncnt))){
2777                 if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_SYNCNT, &optval,
2778                                                 sizeof(optval))<0){
2779                         LM_WARN("failed to set maximum SYN retr. count: %s\n", strerror(errno));
2780                 }
2781         }
2782 #endif
2783 #ifdef HAVE_TCP_LINGER2
2784         if ((optval=cfg_get(tcp, tcp_cfg, linger2))){
2785                 if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_LINGER2, &optval,
2786                                                 sizeof(optval))<0){
2787                         LM_WARN("failed to set maximum LINGER2 timeout: %s\n", strerror(errno));
2788                 }
2789         }
2790 #endif
2791         init_sock_keepalive(sock_info->socket);
2792         if (bind(sock_info->socket, &addr->s, sockaddru_len(*addr))==-1){
2793                 LM_ERR("bind(%x, %p, %d) on %s:%d : %s\n",
2794                                 sock_info->socket,  &addr->s, 
2795                                 (unsigned)sockaddru_len(*addr),
2796                                 sock_info->address_str.s,
2797                                 sock_info->port_no,
2798                                 strerror(errno));
2799                 goto error;
2800         }
2801         if (listen(sock_info->socket, TCP_LISTEN_BACKLOG)==-1){
2802                 LM_ERR("listen(%x, %p, %d) on %s: %s\n",
2803                                 sock_info->socket, &addr->s, 
2804                                 (unsigned)sockaddru_len(*addr),
2805                                 sock_info->address_str.s,
2806                                 strerror(errno));
2807                 goto error;
2808         }
2809 #ifdef HAVE_TCP_ACCEPT_FILTER
2810         /* freebsd */
2811         if (cfg_get(tcp, tcp_cfg, defer_accept)){
2812                 memset(&afa, 0, sizeof(afa));
2813                 strcpy(afa.af_name, "dataready");
2814                 if (setsockopt(sock_info->socket, SOL_SOCKET, SO_ACCEPTFILTER,