Makefile.defs: version set 3.4.0-dev2
[sip-router] / tcp_main.c
1 /*
2  * $Id$
3  *
4  * Copyright (C) 2001-2003 FhG Fokus
5  *
6  * This file is part of ser, a free SIP server.
7  *
8  * ser is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version
12  *
13  * For a license to use the ser software under conditions
14  * other than those described here, or to purchase support for this
15  * software, please contact iptel.org by e-mail at the following addresses:
16  *    info@iptel.org
17  *
18  * ser is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26  */
27 /*
28  * History:
29  * --------
30  *  2002-11-29  created by andrei
31  *  2002-12-11  added tcp_send (andrei)
32  *  2003-01-20  locking fixes, hashtables (andrei)
33  *  2003-02-20  s/lock_t/gen_lock_t/ to avoid a conflict on solaris (andrei)
34  *  2003-02-25  Nagle is disabled if -DDISABLE_NAGLE (andrei)
35  *  2003-03-29  SO_REUSEADDR before calling bind to allow
36  *              server restart, Nagle set on the (hopefuly) 
37  *              correct socket (jiri)
38  *  2003-03-31  always try to find the corresponding tcp listen socket for
39  *               a temp. socket and store in in *->bind_address: added
40  *               find_tcp_si, modified tcpconn_connect (andrei)
41  *  2003-04-14  set sockopts to TOS low delay (andrei)
42  *  2003-06-30  moved tcp new connect checking & handling to
43  *               handle_new_connect (andrei)
44  *  2003-07-09  tls_close called before closing the tcp connection (andrei)
45  *  2003-10-24  converted to the new socket_info lists (andrei)
46  *  2003-10-27  tcp port aliases support added (andrei)
47  *  2003-11-04  always lock before manipulating refcnt; sendchild
48  *              does not inc refcnt by itself anymore (andrei)
49  *  2003-11-07  different unix sockets are used for fd passing
50  *              to/from readers/writers (andrei)
51  *  2003-11-17  handle_new_connect & tcp_connect will close the 
52  *              new socket if tcpconn_new return 0 (e.g. out of mem) (andrei)
53  *  2003-11-28  tcp_blocking_write & tcp_blocking_connect added (andrei)
54  *  2004-11-08  dropped find_tcp_si and replaced with find_si (andrei)
55  *  2005-06-07  new tcp optimized code, supports epoll (LT), sigio + real time
56  *               signals, poll & select (andrei)
57  *  2005-06-26  *bsd kqueue support (andrei)
58  *  2005-07-04  solaris /dev/poll support (andrei)
59  *  2005-07-08  tcp_max_connections, tcp_connection_lifetime, don't accept
60  *               more connections if tcp_max_connections is exceeded (andrei)
61  *  2005-10-21  cleanup all the open connections on exit
62  *              decrement the no. of open connections on timeout too    (andrei) *  2006-01-30  queue send_fd request and execute them at the end of the
63  *              poll loop  (#ifdef) (andrei)
64  *              process all children requests, before attempting to send
65  *              them new stuff (fixes some deadlocks) (andrei)
66  *  2006-02-03  timers are run only once per s (andrei)
67  *              tcp children fds can be non-blocking; send fds are queued on
68  *              EAGAIN; lots of bug fixes (andrei)
69  *  2006-02-06  better tcp_max_connections checks, tcp_connections_no moved to
70  *              shm (andrei)
71  *  2006-04-12  tcp_send() changed to use struct dest_info (andrei)
72  *  2006-11-02  switched to atomic ops for refcnt, locking improvements 
73  *               (andrei)
74  *  2006-11-04  switched to raw ticks (to fix conversion errors which could
75  *               result in inf. lifetime) (andrei)
76  *  2007-07-25  tcpconn_connect can now bind the socket on a specified
77  *                source addr/port (andrei)
78  *  2007-07-26   tcp_send() and tcpconn_get() can now use a specified source
79  *                addr./port (andrei)
80  *  2007-08-23   getsockname() for INADDR_ANY(SI_IS_ANY) sockets (andrei)
81  *  2007-08-27   split init_sock_opt into a lightweight init_sock_opt_accept() 
82  *               used when accepting connections and init_sock_opt used for 
83  *               connect/ new sockets (andrei)
84  *  2007-11-22  always add the connection & clear the coresponding flags before
85  *               io_watch_add-ing its fd - it's safer this way (andrei)
86  *  2007-11-26  improved tcp timers: switched to local_timer (andrei)
87  *  2007-11-27  added send fd cache and reader fd reuse (andrei)
88  *  2007-11-28  added support for TCP_DEFER_ACCEPT, KEEPALIVE, KEEPINTVL,
89  *               KEEPCNT, QUICKACK, SYNCNT, LINGER2 (andrei)
90  *  2007-12-04  support for queueing write requests (andrei)
91  *  2007-12-12  destroy connection asap on wbuf. timeout (andrei)
92  *  2007-12-13  changed the refcnt and destroy scheme, now refcnt is 1 if
93  *                linked into the hash tables (was 0) (andrei)
94  *  2007-12-21  support for pending connects (connections are added to the
95  *               hash immediately and writes on them are buffered) (andrei)
96  *  2008-02-05  handle POLLRDHUP (if supported), POLLERR and
97  *               POLLHUP (andrei)
98  *              on write error check if there's still data in the socket 
99  *               read buffer and process it first (andrei)
100  *  2009-02-26  direct blacklist support (andrei)
101  *  2009-03-20  s/wq_timeout/send_timeout ; send_timeout is now in ticks
102  *              (andrei)
103  *  2009-04-09  tcp ev and tcp stats macros added (andrei)
104  *  2009-09-15  support for force connection reuse and close after send
105  *               send flags (andrei)
106  *  2010-03-23  tcp_send() split in 3 smaller functions (andrei)
107  */
108
109 /** tcp main/dispatcher and tcp send functions.
110  * @file tcp_main.c
111  * @ingroup core
112  * Module: @ref core
113  */
114
115
116 #ifdef USE_TCP
117
118
119 #ifndef SHM_MEM
120 #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
121 #endif
122
123 #define HANDLE_IO_INLINE
124 #include "io_wait.h" /* include first to make sure the needed features are
125                                                 turned on (e.g. _GNU_SOURCE for POLLRDHUP) */
126
127 #include <sys/time.h>
128 #include <sys/types.h>
129 #include <sys/select.h>
130 #include <sys/socket.h>
131 #ifdef HAVE_FILIO_H
132 #include <sys/filio.h> /* needed on solaris 2.x for FIONREAD */
133 #elif defined __OS_solaris
134 #define BSD_COMP  /* needed on older solaris for FIONREAD */
135 #endif /* HAVE_FILIO_H / __OS_solaris */
136 #include <sys/ioctl.h>  /* ioctl() used on write error */
137 #include <netinet/in.h>
138 #include <netinet/in_systm.h>
139 #include <netinet/ip.h>
140 #include <netinet/tcp.h>
141 #include <sys/uio.h>  /* writev*/
142 #include <netdb.h>
143 #include <stdlib.h> /*exit() */
144
145 #include <unistd.h>
146
147 #include <errno.h>
148 #include <string.h>
149
150 #ifdef HAVE_SELECT
151 #include <sys/select.h>
152 #endif
153 #include <sys/poll.h>
154
155
156 #include "ip_addr.h"
157 #include "pass_fd.h"
158 #include "tcp_conn.h"
159 #include "globals.h"
160 #include "pt.h"
161 #include "locking.h"
162 #include "mem/mem.h"
163 #include "mem/shm_mem.h"
164 #include "timer.h"
165 #include "sr_module.h"
166 #include "tcp_server.h"
167 #include "tcp_init.h"
168 #include "tcp_int_send.h"
169 #include "tcp_stats.h"
170 #include "tcp_ev.h"
171 #include "tsend.h"
172 #include "timer_ticks.h"
173 #include "local_timer.h"
174 #ifdef CORE_TLS
175 #include "tls/tls_server.h"
176 #define tls_loaded() 1
177 #else
178 #include "tls_hooks_init.h"
179 #include "tls_hooks.h"
180 #endif /* CORE_TLS*/
181 #ifdef USE_DST_BLACKLIST
182 #include "dst_blacklist.h"
183 #endif /* USE_DST_BLACKLIST */
184
185 #include "tcp_info.h"
186 #include "tcp_options.h"
187 #include "ut.h"
188 #include "cfg/cfg_struct.h"
189
190 #define local_malloc pkg_malloc
191 #define local_free   pkg_free
192
193 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
194
195
196 #ifdef NO_MSG_DONTWAIT
197 #ifndef MSG_DONTWAIT
198 /* should work inside tcp_main */
199 #define MSG_DONTWAIT 0
200 #endif
201 #endif /*NO_MSG_DONTWAIT */
202
203
204 #define TCP_PASS_NEW_CONNECTION_ON_DATA /* don't pass a new connection
205                                                                                    immediately to a child, wait for
206                                                                                    some data on it first */
207 #define TCP_LISTEN_BACKLOG 1024
208 #define SEND_FD_QUEUE /* queue send fd requests on EAGAIN, instead of sending 
209                                                         them immediately */
210 #define TCP_CHILD_NON_BLOCKING 
211 #ifdef SEND_FD_QUEUE
212 #ifndef TCP_CHILD_NON_BLOCKING
213 #define TCP_CHILD_NON_BLOCKING
214 #endif
215 #define MAX_SEND_FD_QUEUE_SIZE  tcp_main_max_fd_no
216 #define SEND_FD_QUEUE_SIZE              128  /* initial size */
217 #define SEND_FD_QUEUE_TIMEOUT   MS_TO_TICKS(2000)  /* 2 s */
218 #endif
219
220 /* minimum interval local_timer_run() is allowed to run, in ticks */
221 #define TCPCONN_TIMEOUT_MIN_RUN 1  /* once per tick */
222 #define TCPCONN_WAIT_TIMEOUT 1 /* 1 tick */
223
224 #ifdef TCP_ASYNC
225 static unsigned int* tcp_total_wq=0;
226 #endif
227
228
229 enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
230                                 F_TCPCONN, F_TCPCHILD, F_PROC };
231
232
233 #ifdef TCP_FD_CACHE
234
235 #define TCP_FD_CACHE_SIZE 8
236
237 struct fd_cache_entry{
238         struct tcp_connection* con;
239         int id;
240         int fd;
241 };
242
243
244 static struct fd_cache_entry fd_cache[TCP_FD_CACHE_SIZE];
245 #endif /* TCP_FD_CACHE */
246
247 static int is_tcp_main=0;
248
249
250 enum poll_types tcp_poll_method=0; /* by default choose the best method */
251 int tcp_main_max_fd_no=0;
252 int tcp_max_connections=DEFAULT_TCP_MAX_CONNECTIONS;
253 int tls_max_connections=DEFAULT_TLS_MAX_CONNECTIONS;
254
255 static union sockaddr_union tcp_source_ipv4_addr; /* saved bind/srv v4 addr. */
256 static union sockaddr_union* tcp_source_ipv4=0;
257 #ifdef USE_IPV6
258 static union sockaddr_union tcp_source_ipv6_addr; /* saved bind/src v6 addr. */
259 static union sockaddr_union* tcp_source_ipv6=0;
260 #endif
261
262 static int* tcp_connections_no=0; /* current tcp (+tls) open connections */
263 static int* tls_connections_no=0; /* current tls open connections */
264
265 /* connection hash table (after ip&port) , includes also aliases */
266 struct tcp_conn_alias** tcpconn_aliases_hash=0;
267 /* connection hash table (after connection id) */
268 struct tcp_connection** tcpconn_id_hash=0;
269 gen_lock_t* tcpconn_lock=0;
270
271 struct tcp_child* tcp_children=0;
272 static int* connection_id=0; /*  unique for each connection, used for 
273                                                                 quickly finding the corresponding connection
274                                                                 for a reply */
275 int unix_tcp_sock;
276
277 static int tcp_proto_no=-1; /* tcp protocol number as returned by
278                                                            getprotobyname */
279
280 static io_wait_h io_h;
281
282 static struct local_timer tcp_main_ltimer;
283 static ticks_t tcp_main_prev_ticks;
284
285 /* tell if there are tcp workers that should handle only specific socket
286  * - used to optimize the search of least loaded worker for a tcp socket
287  * - 0 - no workers per tcp sockets have been set
288  * - 1 + generic_workers - when there are workers per tcp sockets
289  */
290 static int tcp_sockets_gworkers = 0;
291
292 static ticks_t tcpconn_main_timeout(ticks_t , struct timer_ln* , void* );
293
294 inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
295                                                                                 struct ip_addr* l_ip, int l_port,
296                                                                                 int flags);
297
298
299
300 /* sets source address used when opening new sockets and no source is specified
301  *  (by default the address is choosen by the kernel)
302  * Should be used only on init.
303  * returns -1 on error */
304 int tcp_set_src_addr(struct ip_addr* ip)
305 {
306         switch (ip->af){
307                 case AF_INET:
308                         ip_addr2su(&tcp_source_ipv4_addr, ip, 0);
309                         tcp_source_ipv4=&tcp_source_ipv4_addr;
310                         break;
311                 #ifdef USE_IPV6
312                 case AF_INET6:
313                         ip_addr2su(&tcp_source_ipv6_addr, ip, 0);
314                         tcp_source_ipv6=&tcp_source_ipv6_addr;
315                         break;
316                 #endif
317                 default:
318                         return -1;
319         }
320         return 0;
321 }
322
323
324
325 static inline int init_sock_keepalive(int s)
326 {
327         int optval;
328         
329 #ifdef HAVE_SO_KEEPALIVE
330         if (cfg_get(tcp, tcp_cfg, keepalive)){
331                 optval=1;
332                 if (setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, &optval,
333                                                 sizeof(optval))<0){
334                         LOG(L_WARN, "WARNING: init_sock_keepalive: failed to enable"
335                                                 " SO_KEEPALIVE: %s\n", strerror(errno));
336                         return -1;
337                 }
338         }
339 #endif
340 #ifdef HAVE_TCP_KEEPINTVL
341         if ((optval=cfg_get(tcp, tcp_cfg, keepintvl))){
342                 if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &optval,
343                                                 sizeof(optval))<0){
344                         LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
345                                                 " keepalive probes interval: %s\n", strerror(errno));
346                 }
347         }
348 #endif
349 #ifdef HAVE_TCP_KEEPIDLE
350         if ((optval=cfg_get(tcp, tcp_cfg, keepidle))){
351                 if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &optval,
352                                                 sizeof(optval))<0){
353                         LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
354                                                 " keepalive idle interval: %s\n", strerror(errno));
355                 }
356         }
357 #endif
358 #ifdef HAVE_TCP_KEEPCNT
359         if ((optval=cfg_get(tcp, tcp_cfg, keepcnt))){
360                 if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &optval,
361                                                 sizeof(optval))<0){
362                         LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
363                                                 " maximum keepalive count: %s\n", strerror(errno));
364                 }
365         }
366 #endif
367         return 0;
368 }
369
370
371
372 /* set all socket/fd options for new sockets (e.g. before connect): 
373  *  disable nagle, tos lowdelay, reuseaddr, non-blocking
374  *
375  * return -1 on error */
376 static int init_sock_opt(int s)
377 {
378         int flags;
379         int optval;
380         
381 #ifdef DISABLE_NAGLE
382         flags=1;
383         if ( (tcp_proto_no!=-1) && (setsockopt(s, tcp_proto_no , TCP_NODELAY,
384                                         &flags, sizeof(flags))<0) ){
385                 LOG(L_WARN, "WARNING: init_sock_opt: could not disable Nagle: %s\n",
386                                 strerror(errno));
387         }
388 #endif
389         /* tos*/
390         optval = tos;
391         if (setsockopt(s, IPPROTO_IP, IP_TOS, (void*)&optval,sizeof(optval)) ==-1){
392                 LOG(L_WARN, "WARNING: init_sock_opt: setsockopt tos: %s\n",
393                                 strerror(errno));
394                 /* continue since this is not critical */
395         }
396 #if  !defined(TCP_DONT_REUSEADDR) 
397         optval=1;
398         if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,
399                                                 (void*)&optval, sizeof(optval))==-1){
400                 LOG(L_ERR, "ERROR: setsockopt SO_REUSEADDR %s\n",
401                                 strerror(errno));
402                 /* continue, not critical */
403         }
404 #endif /* !TCP_DONT_REUSEADDR */
405 #ifdef HAVE_TCP_SYNCNT
406         if ((optval=cfg_get(tcp, tcp_cfg, syncnt))){
407                 if (setsockopt(s, IPPROTO_TCP, TCP_SYNCNT, &optval,
408                                                 sizeof(optval))<0){
409                         LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
410                                                 " maximum SYN retr. count: %s\n", strerror(errno));
411                 }
412         }
413 #endif
414 #ifdef HAVE_TCP_LINGER2
415         if ((optval=cfg_get(tcp, tcp_cfg, linger2))){
416                 if (setsockopt(s, IPPROTO_TCP, TCP_LINGER2, &optval,
417                                                 sizeof(optval))<0){
418                         LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
419                                                 " maximum LINGER2 timeout: %s\n", strerror(errno));
420                 }
421         }
422 #endif
423 #ifdef HAVE_TCP_QUICKACK
424         if (cfg_get(tcp, tcp_cfg, delayed_ack)){
425                 optval=0; /* reset quick ack => delayed ack */
426                 if (setsockopt(s, IPPROTO_TCP, TCP_QUICKACK, &optval,
427                                                 sizeof(optval))<0){
428                         LOG(L_WARN, "WARNING: init_sock_opt: failed to reset"
429                                                 " TCP_QUICKACK: %s\n", strerror(errno));
430                 }
431         }
432 #endif /* HAVE_TCP_QUICKACK */
433         init_sock_keepalive(s);
434         
435         /* non-blocking */
436         flags=fcntl(s, F_GETFL);
437         if (flags==-1){
438                 LOG(L_ERR, "ERROR: init_sock_opt: fnctl failed: (%d) %s\n",
439                                 errno, strerror(errno));
440                 goto error;
441         }
442         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
443                 LOG(L_ERR, "ERROR: init_sock_opt: fcntl: set non-blocking failed:"
444                                 " (%d) %s\n", errno, strerror(errno));
445                 goto error;
446         }
447         return 0;
448 error:
449         return -1;
450 }
451
452
453
454 /* set all socket/fd options for "accepted" sockets 
455  *  only nonblocking is set since the rest is inherited from the
456  *  "parent" (listening) socket
457  *  Note: setting O_NONBLOCK is required on linux but it's not needed on
458  *        BSD and possibly solaris (where the flag is inherited from the 
459  *        parent socket). However since there is no standard document 
460  *        requiring a specific behaviour in this case it's safer to always set
461  *        it (at least for now)  --andrei
462  *  TODO: check on which OSes  O_NONBLOCK is inherited and make this 
463  *        function a nop.
464  *
465  * return -1 on error */
466 static int init_sock_opt_accept(int s)
467 {
468         int flags;
469         
470         /* non-blocking */
471         flags=fcntl(s, F_GETFL);
472         if (flags==-1){
473                 LOG(L_ERR, "ERROR: init_sock_opt_accept: fnctl failed: (%d) %s\n",
474                                 errno, strerror(errno));
475                 goto error;
476         }
477         if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
478                 LOG(L_ERR, "ERROR: init_sock_opt_accept: "
479                                         "fcntl: set non-blocking failed: (%d) %s\n",
480                                         errno, strerror(errno));
481                 goto error;
482         }
483         return 0;
484 error:
485         return -1;
486 }
487
488
489
490 /** close a socket, handling errno.
491  * On EINTR, repeat the close().
492  * Filter expected errors (return success if close() failed because
493  * EPIPE, ECONNRST a.s.o). Note that this happens on *BSDs (on linux close()
494  * does not fail for socket level errors).
495  * @param s - open valid socket.
496  * @return - 0 on success, < 0 on error (whatever close() returns). On error
497  *           errno is set.
498  */
499 static int tcp_safe_close(int s)
500 {
501         int ret;
502 retry:
503         if (unlikely((ret = close(s)) < 0 )) {
504                 switch(errno) {
505                         case EINTR:
506                                 goto retry;
507                         case EPIPE:
508                         case ENOTCONN:
509                         case ECONNRESET:
510                         case ECONNREFUSED:
511                         case ENETUNREACH:
512                         case EHOSTUNREACH:
513                                 /* on *BSD we really get these errors at close() time 
514                                    => ignore them */
515                                 ret = 0;
516                                 break;
517                         default:
518                                 break;
519                 }
520         }
521         return ret;
522 }
523
524
525
526 /* blocking connect on a non-blocking fd; it will timeout after
527  * tcp_connect_timeout 
528  * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
529  * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
530  */
531 static int tcp_blocking_connect(int fd, int type, snd_flags_t* send_flags,
532                                                                 const struct sockaddr *servaddr,
533                                                                 socklen_t addrlen)
534 {
535         int n;
536 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
537         fd_set sel_set;
538         fd_set orig_set;
539         struct timeval timeout;
540 #else
541         struct pollfd pf;
542 #endif
543         int elapsed;
544         int to;
545         int ticks;
546         int err;
547         unsigned int err_len;
548         int poll_err;
549         
550         poll_err=0;
551         to=cfg_get(tcp, tcp_cfg, connect_timeout_s);
552         ticks=get_ticks();
553 again:
554         n=connect(fd, servaddr, addrlen);
555         if (n==-1){
556                 if (errno==EINTR){
557                         elapsed=(get_ticks()-ticks)*TIMER_TICK;
558                         if (elapsed<to)         goto again;
559                         else goto error_timeout;
560                 }
561                 if (errno!=EINPROGRESS && errno!=EALREADY){
562                         goto error_errno;
563                 }
564         }else goto end;
565         
566         /* poll/select loop */
567 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
568                 FD_ZERO(&orig_set);
569                 FD_SET(fd, &orig_set);
570 #else
571                 pf.fd=fd;
572                 pf.events=POLLOUT;
573 #endif
574         while(1){
575                 elapsed=(get_ticks()-ticks)*TIMER_TICK;
576                 if (elapsed>=to)
577                         goto error_timeout;
578 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
579                 sel_set=orig_set;
580                 timeout.tv_sec=to-elapsed;
581                 timeout.tv_usec=0;
582                 n=select(fd+1, 0, &sel_set, 0, &timeout);
583 #else
584                 n=poll(&pf, 1, (to-elapsed)*1000);
585 #endif
586                 if (n<0){
587                         if (errno==EINTR) continue;
588                         LOG(L_ERR, "ERROR: tcp_blocking_connect %s: poll/select failed:"
589                                         " (%d) %s\n",
590                                         su2a((union sockaddr_union*)servaddr, addrlen),
591                                         errno, strerror(errno));
592                         goto error;
593                 }else if (n==0) /* timeout */ continue;
594 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
595                 if (FD_ISSET(fd, &sel_set))
596 #else
597                 if (pf.revents&(POLLERR|POLLHUP|POLLNVAL)){ 
598                         LOG(L_ERR, "ERROR: tcp_blocking_connect %s: poll error: "
599                                         "flags %x\n",
600                                         su2a((union sockaddr_union*)servaddr, addrlen),
601                                         pf.revents);
602                         poll_err=1;
603                 }
604 #endif
605                 {
606                         err_len=sizeof(err);
607                         getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
608                         if ((err==0) && (poll_err==0)) goto end;
609                         if (err!=EINPROGRESS && err!=EALREADY){
610                                 LOG(L_ERR, "ERROR: tcp_blocking_connect %s: SO_ERROR (%d) "
611                                                 "%s\n",
612                                                 su2a((union sockaddr_union*)servaddr, addrlen),
613                                                 err, strerror(err));
614                                 errno=err;
615                                 goto error_errno;
616                         }
617                 }
618         }
619 error_errno:
620         switch(errno){
621                 case ENETUNREACH:
622                 case EHOSTUNREACH:
623 #ifdef USE_DST_BLACKLIST
624                         dst_blacklist_su(BLST_ERR_CONNECT, type,
625                                                          (union sockaddr_union*)servaddr, send_flags, 0);
626 #endif /* USE_DST_BLACKLIST */
627                         TCP_EV_CONNECT_UNREACHABLE(errno, 0, 0,
628                                                         (union sockaddr_union*)servaddr, type);
629                         break;
630                 case ETIMEDOUT:
631 #ifdef USE_DST_BLACKLIST
632                         dst_blacklist_su(BLST_ERR_CONNECT, type,
633                                                          (union sockaddr_union*)servaddr, send_flags, 0);
634 #endif /* USE_DST_BLACKLIST */
635                         TCP_EV_CONNECT_TIMEOUT(errno, 0, 0,
636                                                         (union sockaddr_union*)servaddr, type);
637                         break;
638                 case ECONNREFUSED:
639                 case ECONNRESET:
640 #ifdef USE_DST_BLACKLIST
641                         dst_blacklist_su(BLST_ERR_CONNECT, type,
642                                                          (union sockaddr_union*)servaddr, send_flags, 0);
643 #endif /* USE_DST_BLACKLIST */
644                         TCP_EV_CONNECT_RST(errno, 0, 0,
645                                                         (union sockaddr_union*)servaddr, type);
646                         break;
647                 case EAGAIN: /* not posix, but supported on linux and bsd */
648                         TCP_EV_CONNECT_NO_MORE_PORTS(errno, 0, 0,
649                                                         (union sockaddr_union*)servaddr, type);
650                         break;
651                 default:
652                         TCP_EV_CONNECT_ERR(errno, 0, 0,
653                                                                 (union sockaddr_union*)servaddr, type);
654         }
655         LOG(L_ERR, "ERROR: tcp_blocking_connect %s: (%d) %s\n",
656                         su2a((union sockaddr_union*)servaddr, addrlen),
657                         errno, strerror(errno));
658         goto error;
659 error_timeout:
660         /* timeout */
661 #ifdef USE_DST_BLACKLIST
662         dst_blacklist_su(BLST_ERR_CONNECT, type,
663                                                 (union sockaddr_union*)servaddr, send_flags, 0);
664 #endif /* USE_DST_BLACKLIST */
665         TCP_EV_CONNECT_TIMEOUT(0, 0, 0, (union sockaddr_union*)servaddr, type);
666         LOG(L_ERR, "ERROR: tcp_blocking_connect %s: timeout %d s elapsed "
667                                 "from %d s\n", su2a((union sockaddr_union*)servaddr, addrlen),
668                                 elapsed, cfg_get(tcp, tcp_cfg, connect_timeout_s));
669 error:
670         TCP_STATS_CONNECT_FAILED();
671         return -1;
672 end:
673         return 0;
674 }
675
676
677
678 #ifdef TCP_ASYNC
679
680
681 /* unsafe version */
682 #define _wbufq_empty(con) ((con)->wbuf_q.first==0)
683 /* unsafe version */
684 #define _wbufq_non_empty(con) ((con)->wbuf_q.first!=0)
685
686
687 /* unsafe version, call while holding the connection write lock */
688 inline static int _wbufq_add(struct  tcp_connection* c, const char* data, 
689                                                         unsigned int size)
690 {
691         struct tcp_wbuffer_queue* q;
692         struct tcp_wbuffer* wb;
693         unsigned int last_free;
694         unsigned int wb_size;
695         unsigned int crt_size;
696         ticks_t t;
697         
698         q=&c->wbuf_q;
699         t=get_ticks_raw();
700         if (unlikely(   ((q->queued+size)>cfg_get(tcp, tcp_cfg, tcpconn_wq_max)) ||
701                                         ((*tcp_total_wq+size)>cfg_get(tcp, tcp_cfg, tcp_wq_max)) ||
702                                         (q->first &&
703                                         TICKS_LT(q->wr_timeout, t)) )){
704                 LOG(L_ERR, "ERROR: wbufq_add(%d bytes): write queue full or timeout "
705                                         " (%d, total %d, last write %d s ago)\n",
706                                         size, q->queued, *tcp_total_wq,
707                                         TICKS_TO_S(t-(q->wr_timeout-
708                                                                 cfg_get(tcp, tcp_cfg, send_timeout))));
709                 if (q->first && TICKS_LT(q->wr_timeout, t)){
710                         if (unlikely(c->state==S_CONN_CONNECT)){
711 #ifdef USE_DST_BLACKLIST
712                                 dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
713                                                                                 &c->rcv.src_su, &c->send_flags, 0);
714 #endif /* USE_DST_BLACKLIST */
715                                 TCP_EV_CONNECT_TIMEOUT(0, TCP_LADDR(c), TCP_LPORT(c),
716                                                                                         TCP_PSU(c), TCP_PROTO(c));
717                                 TCP_STATS_CONNECT_FAILED();
718                         }else{
719 #ifdef USE_DST_BLACKLIST
720                                 dst_blacklist_su( BLST_ERR_SEND, c->rcv.proto,
721                                                                         &c->rcv.src_su, &c->send_flags, 0);
722 #endif /* USE_DST_BLACKLIST */
723                                 TCP_EV_SEND_TIMEOUT(0, &c->rcv);
724                                 TCP_STATS_SEND_TIMEOUT();
725                         }
726                 }else{
727                         /* if it's not a timeout => queue full */
728                         TCP_EV_SENDQ_FULL(0, &c->rcv);
729                         TCP_STATS_SENDQ_FULL();
730                 }
731                 goto error;
732         }
733         
734         if (unlikely(q->last==0)){
735                 wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
736                 wb=shm_malloc(sizeof(*wb)+wb_size-1);
737                 if (unlikely(wb==0))
738                         goto error;
739                 wb->b_size=wb_size;
740                 wb->next=0;
741                 q->last=wb;
742                 q->first=wb;
743                 q->last_used=0;
744                 q->offset=0;
745                 q->wr_timeout=get_ticks_raw()+
746                         ((c->state==S_CONN_CONNECT)?
747                                         S_TO_TICKS(cfg_get(tcp, tcp_cfg, connect_timeout_s)):
748                                         cfg_get(tcp, tcp_cfg, send_timeout));
749         }else{
750                 wb=q->last;
751         }
752         
753         while(size){
754                 last_free=wb->b_size-q->last_used;
755                 if (last_free==0){
756                         wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
757                         wb=shm_malloc(sizeof(*wb)+wb_size-1);
758                         if (unlikely(wb==0))
759                                 goto error;
760                         wb->b_size=wb_size;
761                         wb->next=0;
762                         q->last->next=wb;
763                         q->last=wb;
764                         q->last_used=0;
765                         last_free=wb->b_size;
766                 }
767                 crt_size=MIN_unsigned(last_free, size);
768                 memcpy(wb->buf+q->last_used, data, crt_size);
769                 q->last_used+=crt_size;
770                 size-=crt_size;
771                 data+=crt_size;
772                 q->queued+=crt_size;
773                 atomic_add_int((int*)tcp_total_wq, crt_size);
774         }
775         return 0;
776 error:
777         return -1;
778 }
779
780
781
782 /* unsafe version, call while holding the connection write lock
783  * inserts data at the beginning, it ignores the max queue size checks and
784  * the timeout (use sparingly)
785  * Note: it should never be called on a write buffer after wbufq_run() */
786 inline static int _wbufq_insert(struct  tcp_connection* c, const char* data, 
787                                                         unsigned int size)
788 {
789         struct tcp_wbuffer_queue* q;
790         struct tcp_wbuffer* wb;
791         
792         q=&c->wbuf_q;
793         if (likely(q->first==0)) /* if empty, use wbufq_add */
794                 return _wbufq_add(c, data, size);
795         
796         if (unlikely((*tcp_total_wq+size)>cfg_get(tcp, tcp_cfg, tcp_wq_max))){
797                 LOG(L_ERR, "ERROR: wbufq_insert(%d bytes): write queue full"
798                                         " (%d, total %d, last write %d s ago)\n",
799                                         size, q->queued, *tcp_total_wq,
800                                         TICKS_TO_S(get_ticks_raw()-q->wr_timeout-
801                                                                         cfg_get(tcp, tcp_cfg, send_timeout)));
802                 goto error;
803         }
804         if (unlikely(q->offset)){
805                 LOG(L_CRIT, "BUG: wbufq_insert: non-null offset %d (bad call, should"
806                                 "never be called after the wbufq_run())\n", q->offset);
807                 goto error;
808         }
809         if ((q->first==q->last) && ((q->last->b_size-q->last_used)>=size)){
810                 /* one block with enough space in it for size bytes */
811                 memmove(q->first->buf+size, q->first->buf, size);
812                 memcpy(q->first->buf, data, size);
813                 q->last_used+=size;
814         }else{
815                 /* create a size bytes block directly */
816                 wb=shm_malloc(sizeof(*wb)+size-1);
817                 if (unlikely(wb==0))
818                         goto error;
819                 wb->b_size=size;
820                 /* insert it */
821                 wb->next=q->first;
822                 q->first=wb;
823                 memcpy(wb->buf, data, size);
824         }
825         
826         q->queued+=size;
827         atomic_add_int((int*)tcp_total_wq, size);
828         return 0;
829 error:
830         return -1;
831 }
832
833
834
835 /* unsafe version, call while holding the connection write lock */
836 inline static void _wbufq_destroy( struct  tcp_wbuffer_queue* q)
837 {
838         struct tcp_wbuffer* wb;
839         struct tcp_wbuffer* next_wb;
840         int unqueued;
841         
842         unqueued=0;
843         if (likely(q->first)){
844                 wb=q->first;
845                 do{
846                         next_wb=wb->next;
847                         unqueued+=(wb==q->last)?q->last_used:wb->b_size;
848                         if (wb==q->first)
849                                 unqueued-=q->offset;
850                         shm_free(wb);
851                         wb=next_wb;
852                 }while(wb);
853         }
854         memset(q, 0, sizeof(*q));
855         atomic_add_int((int*)tcp_total_wq, -unqueued);
856 }
857
858
859
860 /* tries to empty the queue  (safe version, c->write_lock must not be hold)
861  * returns -1 on error, bytes written on success (>=0) 
862  * if the whole queue is emptied => sets *empty*/
863 inline static int wbufq_run(int fd, struct tcp_connection* c, int* empty)
864 {
865         struct tcp_wbuffer_queue* q;
866         struct tcp_wbuffer* wb;
867         int n;
868         int ret;
869         int block_size;
870         char* buf;
871         
872         *empty=0;
873         ret=0;
874         lock_get(&c->write_lock);
875         q=&c->wbuf_q;
876         while(q->first){
877                 block_size=((q->first==q->last)?q->last_used:q->first->b_size)-
878                                                 q->offset;
879                 buf=q->first->buf+q->offset;
880                 n=_tcpconn_write_nb(fd, c, buf, block_size);
881                 if (likely(n>0)){
882                         ret+=n;
883                         if (likely(n==block_size)){
884                                 wb=q->first;
885                                 q->first=q->first->next; 
886                                 shm_free(wb);
887                                 q->offset=0;
888                                 q->queued-=block_size;
889                                 atomic_add_int((int*)tcp_total_wq, -block_size);
890                         }else{
891                                 q->offset+=n;
892                                 q->queued-=n;
893                                 atomic_add_int((int*)tcp_total_wq, -n);
894                                 break;
895                         }
896                 }else{
897                         if (n<0){
898                                 /* EINTR is handled inside _tcpconn_write_nb */
899                                 if (!(errno==EAGAIN || errno==EWOULDBLOCK)){
900                                         if (unlikely(c->state==S_CONN_CONNECT)){
901                                                 switch(errno){
902                                                         case ENETUNREACH:
903                                                         case EHOSTUNREACH: /* not posix for send() */
904 #ifdef USE_DST_BLACKLIST
905                                                                 dst_blacklist_su(BLST_ERR_CONNECT,
906                                                                                                         c->rcv.proto,
907                                                                                                         &c->rcv.src_su,
908                                                                                                         &c->send_flags, 0);
909 #endif /* USE_DST_BLACKLIST */
910                                                                 TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
911                                                                                                         TCP_LPORT(c), TCP_PSU(c),
912                                                                                                         TCP_PROTO(c));
913                                                                 break;
914                                                         case ECONNREFUSED:
915                                                         case ECONNRESET:
916 #ifdef USE_DST_BLACKLIST
917                                                                 dst_blacklist_su(BLST_ERR_CONNECT,
918                                                                                                         c->rcv.proto,
919                                                                                                         &c->rcv.src_su,
920                                                                                                         &c->send_flags, 0);
921 #endif /* USE_DST_BLACKLIST */
922                                                                 TCP_EV_CONNECT_RST(0, TCP_LADDR(c),
923                                                                                                         TCP_LPORT(c), TCP_PSU(c),
924                                                                                                         TCP_PROTO(c));
925                                                                 break;
926                                                         default:
927                                                                 TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
928                                                                                                         TCP_LPORT(c), TCP_PSU(c),
929                                                                                                         TCP_PROTO(c));
930                                                 }
931                                                 TCP_STATS_CONNECT_FAILED();
932                                         }else{
933                                                 switch(errno){
934                                                         case ECONNREFUSED:
935                                                         case ECONNRESET:
936                                                                 TCP_STATS_CON_RESET();
937                                                                 /* no break */
938                                                         case ENETUNREACH:
939                                                         case EHOSTUNREACH: /* not posix for send() */
940 #ifdef USE_DST_BLACKLIST
941                                                                 dst_blacklist_su(BLST_ERR_SEND,
942                                                                                                         c->rcv.proto,
943                                                                                                         &c->rcv.src_su,
944                                                                                                         &c->send_flags, 0);
945 #endif /* USE_DST_BLACKLIST */
946                                                                 break;
947                                                 }
948                                         }
949                                         ret=-1;
950                                         LOG(L_ERR, "ERROR: wbuf_runq: %s [%d]\n",
951                                                 strerror(errno), errno);
952                                 }
953                         }
954                         break;
955                 }
956         }
957         if (likely(q->first==0)){
958                 q->last=0;
959                 q->last_used=0;
960                 q->offset=0;
961                 *empty=1;
962         }
963         lock_release(&c->write_lock);
964         if (likely(ret>0)){
965                 q->wr_timeout=get_ticks_raw()+cfg_get(tcp, tcp_cfg, send_timeout);
966                 if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
967                         TCP_STATS_ESTABLISHED(c->state);
968                         c->state=S_CONN_OK;
969                 }
970         }
971         return ret;
972 }
973
974 #endif /* TCP_ASYNC */
975
976
977
978 #if 0
979 /* blocking write even on non-blocking sockets 
980  * if TCP_TIMEOUT will return with error */
981 static int tcp_blocking_write(struct tcp_connection* c, int fd, char* buf,
982                                                                 unsigned int len)
983 {
984         int n;
985         fd_set sel_set;
986         struct timeval timeout;
987         int ticks;
988         int initial_len;
989         
990         initial_len=len;
991 again:
992         
993         n=send(fd, buf, len,
994 #ifdef HAVE_MSG_NOSIGNAL
995                         MSG_NOSIGNAL
996 #else
997                         0
998 #endif
999                 );
1000         if (n<0){
1001                 if (errno==EINTR)       goto again;
1002                 else if (errno!=EAGAIN && errno!=EWOULDBLOCK){
1003                         LOG(L_ERR, "tcp_blocking_write: failed to send: (%d) %s\n",
1004                                         errno, strerror(errno));
1005                         TCP_EV_SEND_TIMEOUT(errno, &c->rcv);
1006                         TCP_STATS_SEND_TIMEOUT();
1007                         goto error;
1008                 }
1009         }else if (n<len){
1010                 /* partial write */
1011                 buf+=n;
1012                 len-=n;
1013         }else{
1014                 /* success: full write */
1015                 goto end;
1016         }
1017         while(1){
1018                 FD_ZERO(&sel_set);
1019                 FD_SET(fd, &sel_set);
1020                 timeout.tv_sec=tcp_send_timeout;
1021                 timeout.tv_usec=0;
1022                 ticks=get_ticks();
1023                 n=select(fd+1, 0, &sel_set, 0, &timeout);
1024                 if (n<0){
1025                         if (errno==EINTR) continue; /* signal, ignore */
1026                         LOG(L_ERR, "ERROR: tcp_blocking_write: select failed: "
1027                                         " (%d) %s\n", errno, strerror(errno));
1028                         goto error;
1029                 }else if (n==0){
1030                         /* timeout */
1031                         if (get_ticks()-ticks>=tcp_send_timeout){
1032                                 LOG(L_ERR, "ERROR: tcp_blocking_write: send timeout (%d)\n",
1033                                                 tcp_send_timeout);
1034                                 goto error;
1035                         }
1036                         continue;
1037                 }
1038                 if (FD_ISSET(fd, &sel_set)){
1039                         /* we can write again */
1040                         goto again;
1041                 }
1042         }
1043 error:
1044                 return -1;
1045 end:
1046                 return initial_len;
1047 }
1048 #endif
1049
1050
1051
1052 struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
1053                                                                         union sockaddr_union* local_addr,
1054                                                                         struct socket_info* ba, int type, 
1055                                                                         int state)
1056 {
1057         struct tcp_connection *c;
1058         int rd_b_size;
1059         
1060         rd_b_size=cfg_get(tcp, tcp_cfg, rd_buf_size);
1061         c=shm_malloc(sizeof(struct tcp_connection) + rd_b_size);
1062         if (c==0){
1063                 LOG(L_ERR, "ERROR: tcpconn_new: mem. allocation failure\n");
1064                 goto error;
1065         }
1066         memset(c, 0, sizeof(struct tcp_connection)); /* zero init (skip rd buf)*/
1067         c->s=sock;
1068         c->fd=-1; /* not initialized */
1069         if (lock_init(&c->write_lock)==0){
1070                 LOG(L_ERR, "ERROR: tcpconn_new: init lock failed\n");
1071                 goto error;
1072         }
1073         
1074         c->rcv.src_su=*su;
1075         
1076         atomic_set(&c->refcnt, 0);
1077         local_timer_init(&c->timer, tcpconn_main_timeout, c, 0);
1078         su2ip_addr(&c->rcv.src_ip, su);
1079         c->rcv.src_port=su_getport(su);
1080         c->rcv.bind_address=ba;
1081         if (likely(local_addr)){
1082                 su2ip_addr(&c->rcv.dst_ip, local_addr);
1083                 c->rcv.dst_port=su_getport(local_addr);
1084         }else if (ba){
1085                 c->rcv.dst_ip=ba->address;
1086                 c->rcv.dst_port=ba->port_no;
1087         }
1088         print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
1089         DBG(     "tcpconn_new: on port %d, type %d\n", c->rcv.src_port, type);
1090         init_tcp_req(&c->req, (char*)c+sizeof(struct tcp_connection), rd_b_size);
1091         c->id=(*connection_id)++;
1092         c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
1093         c->rcv.proto_reserved2=0;
1094         c->state=state;
1095         c->extra_data=0;
1096 #ifdef USE_TLS
1097         if (type==PROTO_TLS){
1098                 if (tls_tcpconn_init(c, sock)==-1) goto error;
1099         }else
1100 #endif /* USE_TLS*/
1101         {
1102                 c->type=PROTO_TCP;
1103                 c->rcv.proto=PROTO_TCP;
1104                 c->timeout=get_ticks_raw()+cfg_get(tcp, tcp_cfg, con_lifetime);
1105         }
1106         
1107         return c;
1108         
1109 error:
1110         if (c) shm_free(c);
1111         return 0;
1112 }
1113
1114
1115
1116 /* do the actual connect, set sock. options a.s.o
1117  * returns socket on success, -1 on error
1118  * sets also *res_local_addr, res_si and state (S_CONN_CONNECT for an
1119  * unfinished connect and S_CONN_OK for a finished one)*/
1120 inline static int tcp_do_connect(       union sockaddr_union* server,
1121                                                                         union sockaddr_union* from,
1122                                                                         int type,
1123                                                                         snd_flags_t* send_flags,
1124                                                                         union sockaddr_union* res_local_addr,
1125                                                                         struct socket_info** res_si,
1126                                                                         enum tcp_conn_states *state
1127                                                                         )
1128 {
1129         int s;
1130         union sockaddr_union my_name;
1131         socklen_t my_name_len;
1132         struct ip_addr ip;
1133 #ifdef TCP_ASYNC
1134         int n;
1135 #endif /* TCP_ASYNC */
1136
1137         s=socket(AF2PF(server->s.sa_family), SOCK_STREAM, 0);
1138         if (unlikely(s==-1)){
1139                 LOG(L_ERR, "ERROR: tcp_do_connect %s: socket: (%d) %s\n",
1140                                 su2a(server, sizeof(*server)), errno, strerror(errno));
1141                 goto error;
1142         }
1143         if (init_sock_opt(s)<0){
1144                 LOG(L_ERR, "ERROR: tcp_do_connect %s: init_sock_opt failed\n",
1145                                         su2a(server, sizeof(*server)));
1146                 goto error;
1147         }
1148         
1149         if (unlikely(from && bind(s, &from->s, sockaddru_len(*from)) != 0)){
1150                 LOG(L_WARN, "WARNING: tcp_do_connect: binding to source address"
1151                                         " %s failed: %s [%d]\n", su2a(from, sizeof(*from)),
1152                                         strerror(errno), errno);
1153         }
1154         *state=S_CONN_OK;
1155 #ifdef TCP_ASYNC
1156         if (likely(cfg_get(tcp, tcp_cfg, async))){
1157 again:
1158                 n=connect(s, &server->s, sockaddru_len(*server));
1159                 if (likely(n==-1)){ /*non-blocking => most probable EINPROGRESS*/
1160                         if (likely(errno==EINPROGRESS))
1161                                 *state=S_CONN_CONNECT;
1162                         else if (errno==EINTR) goto again;
1163                         else if (errno!=EALREADY){
1164                                 switch(errno){
1165                                         case ENETUNREACH:
1166                                         case EHOSTUNREACH:
1167 #ifdef USE_DST_BLACKLIST
1168                                                 dst_blacklist_su(BLST_ERR_CONNECT, type, server,
1169                                                                                         send_flags, 0);
1170 #endif /* USE_DST_BLACKLIST */
1171                                                 TCP_EV_CONNECT_UNREACHABLE(errno, 0, 0, server, type);
1172                                                 break;
1173                                         case ETIMEDOUT:
1174 #ifdef USE_DST_BLACKLIST
1175                                                 dst_blacklist_su(BLST_ERR_CONNECT, type, server,
1176                                                                                         send_flags, 0);
1177 #endif /* USE_DST_BLACKLIST */
1178                                                 TCP_EV_CONNECT_TIMEOUT(errno, 0, 0, server, type);
1179                                                 break;
1180                                         case ECONNREFUSED:
1181                                         case ECONNRESET:
1182 #ifdef USE_DST_BLACKLIST
1183                                                 dst_blacklist_su(BLST_ERR_CONNECT, type, server,
1184                                                                                         send_flags, 0);
1185 #endif /* USE_DST_BLACKLIST */
1186                                                 TCP_EV_CONNECT_RST(errno, 0, 0, server, type);
1187                                                 break;
1188                                         case EAGAIN:/* not posix, but supported on linux and bsd */
1189                                                 TCP_EV_CONNECT_NO_MORE_PORTS(errno, 0, 0, server,type);
1190                                                 break;
1191                                         default:
1192                                                 TCP_EV_CONNECT_ERR(errno, 0, 0, server, type);
1193                                 }
1194                                 TCP_STATS_CONNECT_FAILED();
1195                                 LOG(L_ERR, "ERROR: tcp_do_connect: connect %s: (%d) %s\n",
1196                                                         su2a(server, sizeof(*server)),
1197                                                         errno, strerror(errno));
1198                                 goto error;
1199                         }
1200                 }
1201         }else{
1202 #endif /* TCP_ASYNC */
1203                 if (tcp_blocking_connect(s, type,  send_flags, &server->s,
1204                                                                         sockaddru_len(*server))<0){
1205                         LOG(L_ERR, "ERROR: tcp_do_connect: tcp_blocking_connect %s"
1206                                                 " failed\n", su2a(server, sizeof(*server)));
1207                         goto error;
1208                 }
1209 #ifdef TCP_ASYNC
1210         }
1211 #endif /* TCP_ASYNC */
1212         if (from){
1213                 su2ip_addr(&ip, from);
1214                 if (!ip_addr_any(&ip))
1215                         /* we already know the source ip, skip the sys. call */
1216                         goto find_socket;
1217         }
1218         my_name_len=sizeof(my_name);
1219         if (unlikely(getsockname(s, &my_name.s, &my_name_len)!=0)){
1220                 LOG(L_ERR, "ERROR: tcp_do_connect: getsockname failed: %s(%d)\n",
1221                                 strerror(errno), errno);
1222                 *res_si=0;
1223                 goto error;
1224         }
1225         from=&my_name; /* update from with the real "from" address */
1226         su2ip_addr(&ip, &my_name);
1227 find_socket:
1228 #ifdef USE_TLS
1229         if (unlikely(type==PROTO_TLS))
1230                 *res_si=find_si(&ip, 0, PROTO_TLS);
1231         else
1232 #endif
1233                 *res_si=find_si(&ip, 0, PROTO_TCP);
1234         
1235         if (unlikely(*res_si==0)){
1236                 LOG(L_WARN, "WARNING: tcp_do_connect %s: could not find corresponding"
1237                                 " listening socket for %s, using default...\n",
1238                                         su2a(server, sizeof(*server)), ip_addr2a(&ip));
1239                 if (server->s.sa_family==AF_INET) *res_si=sendipv4_tcp;
1240 #ifdef USE_IPV6
1241                 else *res_si=sendipv6_tcp;
1242 #endif
1243         }
1244         *res_local_addr=*from;
1245         return s;
1246 error:
1247         if (s!=-1) tcp_safe_close(s);
1248         return -1;
1249 }
1250
1251
1252
1253 struct tcp_connection* tcpconn_connect( union sockaddr_union* server,
1254                                                                                 union sockaddr_union* from,
1255                                                                                 int type, snd_flags_t* send_flags)
1256 {
1257         int s;
1258         struct socket_info* si;
1259         union sockaddr_union my_name;
1260         struct tcp_connection* con;
1261         enum tcp_conn_states state;
1262
1263         s=-1;
1264         
1265         if (*tcp_connections_no >= cfg_get(tcp, tcp_cfg, max_connections)){
1266                 LOG(L_ERR, "ERROR: tcpconn_connect: maximum number of connections"
1267                                         " exceeded (%d/%d)\n",
1268                                         *tcp_connections_no,
1269                                         cfg_get(tcp, tcp_cfg, max_connections));
1270                 goto error;
1271         }
1272         if (unlikely(type==PROTO_TLS)) {
1273                 if (*tls_connections_no >= cfg_get(tcp, tcp_cfg, max_tls_connections)){
1274                         LM_ERR("ERROR: maximum number of tls connections"
1275                                                 " exceeded (%d/%d)\n",
1276                                                 *tls_connections_no,
1277                                                 cfg_get(tcp, tcp_cfg, max_tls_connections));
1278                         goto error;
1279                 }
1280         }
1281
1282         s=tcp_do_connect(server, from, type,  send_flags, &my_name, &si, &state);
1283         if (s==-1){
1284                 LOG(L_ERR, "ERROR: tcp_do_connect %s: failed (%d) %s\n",
1285                                 su2a(server, sizeof(*server)), errno, strerror(errno));
1286                 goto error;
1287         }
1288         con=tcpconn_new(s, server, &my_name, si, type, state);
1289         if (con==0){
1290                 LOG(L_ERR, "ERROR: tcp_connect %s: tcpconn_new failed, closing the "
1291                                  " socket\n", su2a(server, sizeof(*server)));
1292                 goto error;
1293         }
1294         tcpconn_set_send_flags(con, *send_flags);
1295         return con;
1296 error:
1297         if (s!=-1) tcp_safe_close(s); /* close the opened socket */
1298         return 0;
1299 }
1300
1301
1302
1303 #ifdef TCP_CONNECT_WAIT
1304 int tcpconn_finish_connect( struct tcp_connection* c,
1305                                                                                                 union sockaddr_union* from)
1306 {
1307         int s;
1308         int r;
1309         union sockaddr_union local_addr;
1310         struct socket_info* si;
1311         enum tcp_conn_states state;
1312         struct tcp_conn_alias* a;
1313         int new_conn_alias_flags;
1314         
1315         s=tcp_do_connect(&c->rcv.src_su, from, c->type, &c->send_flags,
1316                                                 &local_addr, &si, &state);
1317         if (unlikely(s==-1)){
1318                 LOG(L_ERR, "ERROR: tcpconn_finish_connect %s: tcp_do_connect for %p"
1319                                         " failed\n", su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
1320                                         c);
1321                 return -1;
1322         }
1323         c->rcv.bind_address=si;
1324         su2ip_addr(&c->rcv.dst_ip, &local_addr);
1325         c->rcv.dst_port=su_getport(&local_addr);
1326         /* update aliases if needed */
1327         if (likely(from==0)){
1328                 new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
1329                 /* add aliases */
1330                 TCPCONN_LOCK;
1331                 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
1332                                                                                                         new_conn_alias_flags);
1333                 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
1334                                                                         c->rcv.dst_port, new_conn_alias_flags);
1335                 TCPCONN_UNLOCK;
1336         }else if (su_cmp(from, &local_addr)!=1){
1337                 new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
1338                 TCPCONN_LOCK;
1339                         /* remove all the aliases except the first one and re-add them
1340                          * (there shouldn't be more then the 3 default aliases at this 
1341                          * stage) */
1342                         for (r=1; r<c->aliases; r++){
1343                                 a=&c->con_aliases[r];
1344                                 tcpconn_listrm(tcpconn_aliases_hash[a->hash], a, next, prev);
1345                         }
1346                         c->aliases=1;
1347                         /* add the local_ip:0 and local_ip:local_port aliases */
1348                         _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
1349                                                                                                 0, new_conn_alias_flags);
1350                         _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
1351                                                                         c->rcv.dst_port, new_conn_alias_flags);
1352                 TCPCONN_UNLOCK;
1353         }
1354         
1355         return s;
1356 }
1357 #endif /* TCP_CONNECT_WAIT */
1358
1359
1360
1361 /* adds a tcp connection to the tcpconn hashes
1362  * Note: it's called _only_ from the tcp_main process */
1363 inline static struct tcp_connection*  tcpconn_add(struct tcp_connection *c)
1364 {
1365         struct ip_addr zero_ip;
1366         int new_conn_alias_flags;
1367
1368         if (likely(c)){
1369                 ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
1370                 c->id_hash=tcp_id_hash(c->id);
1371                 c->aliases=0;
1372                 new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
1373                 TCPCONN_LOCK;
1374                 c->flags|=F_CONN_HASHED;
1375                 /* add it at the begining of the list*/
1376                 tcpconn_listadd(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
1377                 /* set the aliases */
1378                 /* first alias is for (peer_ip, peer_port, 0 ,0) -- for finding
1379                  *  any connection to peer_ip, peer_port
1380                  * the second alias is for (peer_ip, peer_port, local_addr, 0) -- for
1381                  *  finding any conenction to peer_ip, peer_port from local_addr 
1382                  * the third alias is for (peer_ip, peer_port, local_addr, local_port) 
1383                  *   -- for finding if a fully specified connection exists */
1384                 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &zero_ip, 0,
1385                                                                                                         new_conn_alias_flags);
1386                 if (likely(c->rcv.dst_ip.af && ! ip_addr_any(&c->rcv.dst_ip))){
1387                         _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
1388                                                                                                         new_conn_alias_flags);
1389                         _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
1390                                                                         c->rcv.dst_port, new_conn_alias_flags);
1391                 }
1392                 /* ignore add_alias errors, there are some valid cases when one
1393                  *  of the add_alias would fail (e.g. first add_alias for 2 connections
1394                  *   with the same destination but different src. ip*/
1395                 TCPCONN_UNLOCK;
1396                 DBG("tcpconn_add: hashes: %d:%d:%d, %d\n",
1397                                                                                                 c->con_aliases[0].hash,
1398                                                                                                 c->con_aliases[1].hash,
1399                                                                                                 c->con_aliases[2].hash,
1400                                                                                                 c->id_hash);
1401                 return c;
1402         }else{
1403                 LOG(L_CRIT, "tcpconn_add: BUG: null connection pointer\n");
1404                 return 0;
1405         }
1406 }
1407
1408
1409 static inline void _tcpconn_detach(struct tcp_connection *c)
1410 {
1411         int r;
1412         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
1413         /* remove all the aliases */
1414         for (r=0; r<c->aliases; r++)
1415                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
1416                                                 &c->con_aliases[r], next, prev);
1417         c->aliases = 0;
1418 }
1419
1420
1421
1422 static inline void _tcpconn_free(struct tcp_connection* c)
1423 {
1424 #ifdef TCP_ASYNC
1425         if (unlikely(_wbufq_non_empty(c)))
1426                 _wbufq_destroy(&c->wbuf_q);
1427 #endif
1428         lock_destroy(&c->write_lock);
1429 #ifdef USE_TLS
1430         if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) tls_tcpconn_clean(c);
1431 #endif
1432         shm_free(c);
1433 }
1434
1435
1436
1437 /* unsafe tcpconn_rm version (nolocks) */
1438 void _tcpconn_rm(struct tcp_connection* c)
1439 {
1440         _tcpconn_detach(c);
1441         _tcpconn_free(c);
1442 }
1443
1444
1445
1446 void tcpconn_rm(struct tcp_connection* c)
1447 {
1448         int r;
1449         TCPCONN_LOCK;
1450         tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
1451         /* remove all the aliases */
1452         for (r=0; r<c->aliases; r++)
1453                 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
1454                                                 &c->con_aliases[r], next, prev);
1455         c->aliases = 0;
1456         TCPCONN_UNLOCK;
1457         lock_destroy(&c->write_lock);
1458 #ifdef USE_TLS
1459         if ((c->type==PROTO_TLS || c->type==PROTO_WSS)&&(c->extra_data)) tls_tcpconn_clean(c);
1460 #endif
1461         shm_free(c);
1462 }
1463
1464
1465 /* finds a connection, if id=0 uses the ip addr, port, local_ip and local port
1466  *  (host byte order) and tries to find the connection that matches all of
1467  *   them. Wild cards can be used for local_ip and local_port (a 0 filled
1468  *   ip address and/or a 0 local port).
1469  * WARNING: unprotected (locks) use tcpconn_get unless you really
1470  * know what you are doing */
1471 struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port,
1472                                                                                 struct ip_addr* l_ip, int l_port)
1473 {
1474
1475         struct tcp_connection *c;
1476         struct tcp_conn_alias* a;
1477         unsigned hash;
1478         int is_local_ip_any;
1479         
1480 #ifdef EXTRA_DEBUG
1481         DBG("tcpconn_find: %d  port %d\n",id, port);
1482         if (ip) print_ip("tcpconn_find: ip ", ip, "\n");
1483 #endif
1484         if (likely(id)){
1485                 hash=tcp_id_hash(id);
1486                 for (c=tcpconn_id_hash[hash]; c; c=c->id_next){
1487 #ifdef EXTRA_DEBUG
1488                         DBG("c=%p, c->id=%d, port=%d\n",c, c->id, c->rcv.src_port);
1489                         print_ip("ip=", &c->rcv.src_ip, "\n");
1490 #endif
1491                         if ((id==c->id)&&(c->state!=S_CONN_BAD)) return c;
1492                 }
1493         }else if (likely(ip)){
1494                 hash=tcp_addr_hash(ip, port, l_ip, l_port);
1495                 is_local_ip_any=ip_addr_any(l_ip);
1496                 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
1497 #ifdef EXTRA_DEBUG
1498                         DBG("a=%p, c=%p, c->id=%d, alias port= %d port=%d\n", a, a->parent,
1499                                         a->parent->id, a->port, a->parent->rcv.src_port);
1500                         print_ip("ip=",&a->parent->rcv.src_ip,"\n");
1501 #endif
1502                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
1503                                         ((l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
1504                                         (ip_addr_cmp(ip, &a->parent->rcv.src_ip)) &&
1505                                         (is_local_ip_any ||
1506                                                 ip_addr_cmp(l_ip, &a->parent->rcv.dst_ip))
1507                                 )
1508                                 return a->parent;
1509                 }
1510         }
1511         return 0;
1512 }
1513
1514
1515
1516 /* _tcpconn_find with locks and timeout
1517  * local_addr contains the desired local ip:port. If null any local address 
1518  * will be used.  IN*ADDR_ANY or 0 port are wild cards.
1519  */
1520 struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
1521                                                                         union sockaddr_union* local_addr,
1522                                                                         ticks_t timeout)
1523 {
1524         struct tcp_connection* c;
1525         struct ip_addr local_ip;
1526         int local_port;
1527         
1528         local_port=0;
1529         if (likely(ip)){
1530                 if (unlikely(local_addr)){
1531                         su2ip_addr(&local_ip, local_addr);
1532                         local_port=su_getport(local_addr);
1533                 }else{
1534                         ip_addr_mk_any(ip->af, &local_ip);
1535                         local_port=0;
1536                 }
1537         }
1538         TCPCONN_LOCK;
1539         c=_tcpconn_find(id, ip, port, &local_ip, local_port);
1540         if (likely(c)){ 
1541                         atomic_inc(&c->refcnt);
1542                         /* update the timeout only if the connection is not handled
1543                          * by a tcp reader _and_the timeout is non-zero  (the tcp
1544                          * reader process uses c->timeout for its own internal
1545                          * timeout and c->timeout will be overwritten * anyway on
1546                          * return to tcp_main) */
1547                         if (likely(c->reader_pid==0 && timeout != 0))
1548                                 c->timeout=get_ticks_raw()+timeout;
1549         }
1550         TCPCONN_UNLOCK;
1551         return c;
1552 }
1553
1554
1555
1556 /* add c->dst:port, local_addr as an alias for the "id" connection, 
1557  * flags: TCP_ALIAS_FORCE_ADD  - add an alias even if a previous one exists
1558  *        TCP_ALIAS_REPLACE    - if a prev. alias exists, replace it with the
1559  *                                new one
1560  * returns 0 on success, <0 on failure ( -1  - null c, -2 too many aliases,
1561  *  -3 alias already present and pointing to another connection)
1562  * WARNING: must be called with TCPCONN_LOCK held */
1563 inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
1564                                                                                 struct ip_addr* l_ip, int l_port,
1565                                                                                 int flags)
1566 {
1567         unsigned hash;
1568         struct tcp_conn_alias* a;
1569         struct tcp_conn_alias* nxt;
1570         struct tcp_connection* p;
1571         int is_local_ip_any;
1572         int i;
1573         int r;
1574         
1575         a=0;
1576         is_local_ip_any=ip_addr_any(l_ip);
1577         if (likely(c)){
1578                 hash=tcp_addr_hash(&c->rcv.src_ip, port, l_ip, l_port);
1579                 /* search the aliases for an already existing one */
1580                 for (a=tcpconn_aliases_hash[hash], nxt=0; a; a=nxt){
1581                         nxt=a->next;
1582                         if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
1583                                         ( (l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
1584                                         (ip_addr_cmp(&c->rcv.src_ip, &a->parent->rcv.src_ip)) &&
1585                                         ( is_local_ip_any || 
1586                                           ip_addr_cmp(&a->parent->rcv.dst_ip, l_ip))
1587                                         ){
1588                                 /* found */
1589                                 if (unlikely(a->parent!=c)){
1590                                         if (flags & TCP_ALIAS_FORCE_ADD)
1591                                                 /* still have to walk the whole list to check if
1592                                                  * the alias was not already added */
1593                                                 continue;
1594                                         else if (flags & TCP_ALIAS_REPLACE){
1595                                                 /* remove the alias =>
1596                                                  * remove the current alias and all the following
1597                                                  *  ones from the corresponding connection, shift the 
1598                                                  *  connection aliases array and re-add the other 
1599                                                  *  aliases (!= current one) */
1600                                                 p=a->parent;
1601                                                 for (i=0; (i<p->aliases) && (&(p->con_aliases[i])!=a);
1602                                                                 i++);
1603                                                 if (unlikely(i==p->aliases)){
1604                                                         LOG(L_CRIT, "BUG: _tcpconn_add_alias_unsafe: "
1605                                                                         " alias %p not found in con %p (id %d)\n",
1606                                                                         a, p, p->id);
1607                                                         goto error_not_found;
1608                                                 }
1609                                                 for (r=i; r<p->aliases; r++){
1610                                                         tcpconn_listrm(
1611                                                                 tcpconn_aliases_hash[p->con_aliases[r].hash],
1612                                                                 &p->con_aliases[r], next, prev);
1613                                                 }
1614                                                 if (likely((i+1)<p->aliases)){
1615                                                         memmove(&p->con_aliases[i], &p->con_aliases[i+1],
1616                                                                                         (p->aliases-i-1)*
1617                                                                                                 sizeof(p->con_aliases[0]));
1618                                                 }
1619                                                 p->aliases--;
1620                                                 /* re-add the remaining aliases */
1621                                                 for (r=i; r<p->aliases; r++){
1622                                                         tcpconn_listadd(
1623                                                                 tcpconn_aliases_hash[p->con_aliases[r].hash], 
1624                                                                 &p->con_aliases[r], next, prev);
1625                                                 }
1626                                         }else
1627                                                 goto error_sec;
1628                                 }else goto ok;
1629                         }
1630                 }
1631                 if (unlikely(c->aliases>=TCP_CON_MAX_ALIASES)) goto error_aliases;
1632                 c->con_aliases[c->aliases].parent=c;
1633                 c->con_aliases[c->aliases].port=port;
1634                 c->con_aliases[c->aliases].hash=hash;
1635                 tcpconn_listadd(tcpconn_aliases_hash[hash], 
1636                                                                 &c->con_aliases[c->aliases], next, prev);
1637                 c->aliases++;
1638         }else goto error_not_found;
1639 ok:
1640 #ifdef EXTRA_DEBUG
1641         if (a) DBG("_tcpconn_add_alias_unsafe: alias already present\n");
1642         else   DBG("_tcpconn_add_alias_unsafe: alias port %d for hash %d, id %d\n",
1643                         port, hash, c->id);
1644 #endif
1645         return 0;
1646 error_aliases:
1647         /* too many aliases */
1648         return -2;
1649 error_not_found:
1650         /* null connection */
1651         return -1;
1652 error_sec:
1653         /* alias already present and pointing to a different connection
1654          * (hijack attempt?) */
1655         return -3;
1656 }
1657
1658
1659
1660 /* add port as an alias for the "id" connection, 
1661  * returns 0 on success,-1 on failure */
1662 int tcpconn_add_alias(int id, int port, int proto)
1663 {
1664         struct tcp_connection* c;
1665         int ret;
1666         struct ip_addr zero_ip;
1667         int r;
1668         int alias_flags;
1669         
1670         /* fix the port */
1671         port=port?port:((proto==PROTO_TLS)?SIPS_PORT:SIP_PORT);
1672         TCPCONN_LOCK;
1673         /* check if alias already exists */
1674         c=_tcpconn_find(id, 0, 0, 0, 0);
1675         if (likely(c)){
1676                 ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
1677                 alias_flags=cfg_get(tcp, tcp_cfg, alias_flags);
1678                 /* alias src_ip:port, 0, 0 */
1679                 ret=_tcpconn_add_alias_unsafe(c, port,  &zero_ip, 0, 
1680                                                                                 alias_flags);
1681                 if (ret<0 && ret!=-3) goto error;
1682                 /* alias src_ip:port, local_ip, 0 */
1683                 ret=_tcpconn_add_alias_unsafe(c, port,  &c->rcv.dst_ip, 0, 
1684                                                                                 alias_flags);
1685                 if (ret<0 && ret!=-3) goto error;
1686                 /* alias src_ip:port, local_ip, local_port */
1687                 ret=_tcpconn_add_alias_unsafe(c, port, &c->rcv.dst_ip, c->rcv.dst_port,
1688                                                                                 alias_flags);
1689                 if (unlikely(ret<0)) goto error;
1690         }else goto error_not_found;
1691         TCPCONN_UNLOCK;
1692         return 0;
1693 error_not_found:
1694         TCPCONN_UNLOCK;
1695         LOG(L_ERR, "ERROR: tcpconn_add_alias: no connection found for id %d\n",id);
1696         return -1;
1697 error:
1698         TCPCONN_UNLOCK;
1699         switch(ret){
1700                 case -2:
1701                         LOG(L_ERR, "ERROR: tcpconn_add_alias: too many aliases (%d)"
1702                                         " for connection %p (id %d) %s:%d <- %d\n",
1703                                         c->aliases, c, c->id, ip_addr2a(&c->rcv.src_ip),
1704                                         c->rcv.src_port, port);
1705                         for (r=0; r<c->aliases; r++){
1706                                 LOG(L_ERR, "ERROR: tcpconn_add_alias: alias %d: for %p (%d)"
1707                                                 " %s:%d <-%d hash %x\n",  r, c, c->id, 
1708                                                  ip_addr2a(&c->rcv.src_ip), c->rcv.src_port, 
1709                                                 c->con_aliases[r].port, c->con_aliases[r].hash);
1710                         }
1711                         break;
1712                 case -3:
1713                         LOG(L_ERR, "ERROR: tcpconn_add_alias: possible port"
1714                                         " hijack attempt\n");
1715                         LOG(L_ERR, "ERROR: tcpconn_add_alias: alias for %d port %d already"
1716                                                 " present and points to another connection \n",
1717                                                 c->id, port);
1718                         break;
1719                 default:
1720                         LOG(L_ERR, "ERROR: tcpconn_add_alias: unknown error %d\n", ret);
1721         }
1722         return -1;
1723 }
1724
1725
1726
1727 #ifdef TCP_FD_CACHE
1728
1729 static void tcp_fd_cache_init(void)
1730 {
1731         int r;
1732         for (r=0; r<TCP_FD_CACHE_SIZE; r++)
1733                 fd_cache[r].fd=-1;
1734 }
1735
1736
1737 inline static struct fd_cache_entry* tcp_fd_cache_get(struct tcp_connection *c)
1738 {
1739         int h;
1740         
1741         h=c->id%TCP_FD_CACHE_SIZE;
1742         if ((fd_cache[h].fd>0) && (fd_cache[h].id==c->id) && (fd_cache[h].con==c))
1743                 return &fd_cache[h];
1744         return 0;
1745 }
1746
1747
1748 inline static void tcp_fd_cache_rm(struct fd_cache_entry* e)
1749 {
1750         e->fd=-1;
1751 }
1752
1753
1754 inline static void tcp_fd_cache_add(struct tcp_connection *c, int fd)
1755 {
1756         int h;
1757         
1758         h=c->id%TCP_FD_CACHE_SIZE;
1759         if (likely(fd_cache[h].fd>0))
1760                 tcp_safe_close(fd_cache[h].fd);
1761         fd_cache[h].fd=fd;
1762         fd_cache[h].id=c->id;
1763         fd_cache[h].con=c;
1764 }
1765
1766 #endif /* TCP_FD_CACHE */
1767
1768
1769
1770 inline static int tcpconn_chld_put(struct tcp_connection* tcpconn);
1771
1772 static int tcpconn_send_put(struct tcp_connection* c, const char* buf,
1773                                                         unsigned len, snd_flags_t send_flags);
1774 static int tcpconn_do_send(int fd, struct tcp_connection* c,
1775                                                         const char* buf, unsigned len,
1776                                                         snd_flags_t send_flags, long* resp, int locked);
1777
1778 static int tcpconn_1st_send(int fd, struct tcp_connection* c,
1779                                                         const char* buf, unsigned len,
1780                                                         snd_flags_t send_flags, long* resp, int locked);
1781
1782 /* finds a tcpconn & sends on it
1783  * uses the dst members to, proto (TCP|TLS) and id and tries to send
1784  *  from the "from" address (if non null and id==0)
1785  * returns: number of bytes written (>=0) on success
1786  *          <0 on error */
1787 int tcp_send(struct dest_info* dst, union sockaddr_union* from,
1788                                         const char* buf, unsigned len)
1789 {
1790         struct tcp_connection *c;
1791         struct ip_addr ip;
1792         int port;
1793         int fd;
1794         long response[2];
1795         int n;
1796         ticks_t con_lifetime;
1797 #ifdef USE_TLS
1798         const char* rest_buf;
1799         const char* t_buf;
1800         unsigned rest_len, t_len;
1801         long resp;
1802         snd_flags_t t_send_flags;
1803 #endif /* USE_TLS */
1804         
1805         port=su_getport(&dst->to);
1806         con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
1807         if (likely(port)){
1808                 su2ip_addr(&ip, &dst->to);
1809                 c=tcpconn_get(dst->id, &ip, port, from, con_lifetime); 
1810         }else if (likely(dst->id)){
1811                 c=tcpconn_get(dst->id, 0, 0, 0, con_lifetime);
1812         }else{
1813                 LOG(L_CRIT, "BUG: tcp_send called with null id & to\n");
1814                 return -1;
1815         }
1816         
1817         if (likely(dst->id)){
1818                 if (unlikely(c==0)) {
1819                         if (likely(port)){
1820                                 /* try again w/o id */
1821                                 c=tcpconn_get(0, &ip, port, from, con_lifetime);
1822                         }else{
1823                                 LOG(L_ERR, "ERROR: tcp_send: id %d not found, dropping\n",
1824                                                 dst->id);
1825                                 return -1;
1826                         }
1827                 }
1828         }
1829         /* connection not found or unusable => open a new one and send on it */
1830         if (unlikely((c==0) || tcpconn_close_after_send(c))){
1831                 if (unlikely(c)){
1832                         /* can't use c if it's marked as close-after-send  =>
1833                            release it and try opening new one */
1834                         tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
1835                         c=0;
1836                 }
1837                 /* check if connect() is disabled */
1838                 if (unlikely((dst->send_flags.f & SND_F_FORCE_CON_REUSE) ||
1839                                                 cfg_get(tcp, tcp_cfg, no_connect)))
1840                         return -1;
1841                 DBG("tcp_send: no open tcp connection found, opening new one\n");
1842                 /* create tcp connection */
1843                 if (likely(from==0)){
1844                         /* check to see if we have to use a specific source addr. */
1845                         switch (dst->to.s.sa_family) {
1846                                 case AF_INET:
1847                                                 from = tcp_source_ipv4;
1848                                         break;
1849 #ifdef USE_IPV6
1850                                 case AF_INET6:
1851                                                 from = tcp_source_ipv6;
1852                                         break;
1853 #endif
1854                                 default:
1855                                         /* error, bad af, ignore ... */
1856                                         break;
1857                         }
1858                 }
1859 #if defined(TCP_CONNECT_WAIT) && defined(TCP_ASYNC)
1860                 if (likely(cfg_get(tcp, tcp_cfg, tcp_connect_wait) && 
1861                                         cfg_get(tcp, tcp_cfg, async) )){
1862                         if (unlikely(*tcp_connections_no >=
1863                                                         cfg_get(tcp, tcp_cfg, max_connections))){
1864                                 LOG(L_ERR, "ERROR: tcp_send %s: maximum number of"
1865                                                         " connections exceeded (%d/%d)\n",
1866                                                         su2a(&dst->to, sizeof(dst->to)),
1867                                                         *tcp_connections_no,
1868                                                         cfg_get(tcp, tcp_cfg, max_connections));
1869                                 return -1;
1870                         }
1871                         if (unlikely(dst->proto==PROTO_TLS)) {
1872                                 if (unlikely(*tls_connections_no >=
1873                                                         cfg_get(tcp, tcp_cfg, max_tls_connections))){
1874                                         LM_ERR("tcp_send %s: maximum number of"
1875                                                         " tls connections exceeded (%d/%d)\n",
1876                                                         su2a(&dst->to, sizeof(dst->to)),
1877                                                         *tls_connections_no,
1878                                                         cfg_get(tcp, tcp_cfg, max_tls_connections));
1879                                         return -1;
1880                                 }
1881                         }
1882                         c=tcpconn_new(-1, &dst->to, from, 0, dst->proto,
1883                                                         S_CONN_CONNECT);
1884                         if (unlikely(c==0)){
1885                                 LOG(L_ERR, "ERROR: tcp_send %s: could not create new"
1886                                                 " connection\n",
1887                                                 su2a(&dst->to, sizeof(dst->to)));
1888                                 return -1;
1889                         }
1890                         c->flags|=F_CONN_PENDING|F_CONN_FD_CLOSED;
1891                         tcpconn_set_send_flags(c, dst->send_flags);
1892                         atomic_set(&c->refcnt, 2); /* ref from here and from main hash
1893                                                                                  table */
1894                         /* add it to id hash and aliases */
1895                         if (unlikely(tcpconn_add(c)==0)){
1896                                 LOG(L_ERR, "ERROR: tcp_send %s: could not add "
1897                                                         "connection %p\n",
1898                                                         su2a(&dst->to, sizeof(dst->to)),
1899                                                                 c);
1900                                 _tcpconn_free(c);
1901                                 n=-1;
1902                                 goto end_no_conn;
1903                         }
1904                         /* do connect and if src ip or port changed, update the 
1905                          * aliases */
1906                         if (unlikely((fd=tcpconn_finish_connect(c, from))<0)){
1907                                 /* tcpconn_finish_connect will automatically blacklist
1908                                    on error => no need to do it here */
1909                                 LOG(L_ERR, "ERROR: tcp_send %s: tcpconn_finish_connect(%p)"
1910                                                 " failed\n", su2a(&dst->to, sizeof(dst->to)),
1911                                                         c);
1912                                 goto conn_wait_error;
1913                         }
1914                         /* ? TODO: it might be faster just to queue the write directly
1915                          *  and send to main CONN_NEW_PENDING_WRITE */
1916                         /* delay sending the fd to main after the send */
1917                         
1918                         /* NOTE: no lock here, because the connection is marked as
1919                          * pending and nobody else will try to write on it. However
1920                          * this might produce out-of-order writes. If this is not
1921                          * desired either lock before the write or use 
1922                          * _wbufq_insert(...)
1923                          * NOTE2: _wbufq_insert() is used now (no out-of-order).
1924                          */
1925 #ifdef USE_TLS
1926                         if (unlikely(c->type==PROTO_TLS)) {
1927                         /* for TLS the TLS processing and the send must happen
1928                            atomically w/ respect to other sends on the same connection
1929                            (otherwise reordering might occur which would break TLS) =>
1930                            lock. However in this case this send will always be the first.
1931                            We can have the send() outside the lock only if this is the
1932                            first and only send (tls_encode is not called again), or
1933                            this is the last send for a tls_encode() loop and all the
1934                            previous ones did return CONN_NEW_COMPLETE or CONN_EOF.
1935                         */
1936                                 response[1] = CONN_NOP;
1937                                 t_buf = buf;
1938                                 t_len = len;
1939                                 lock_get(&c->write_lock);
1940 redo_tls_encode:
1941                                         t_send_flags = dst->send_flags;
1942                                         n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
1943                                                                         &t_send_flags);
1944                                         /* There are 4 cases:
1945                                            1. entire buffer consumed from the first try
1946                                              (rest_len == rest_buf == 0)
1947                                            2. rest_buf & first call
1948                                            3. rest_buf & not first call
1949                                                   3a. CONN_NEW_COMPLETE or CONN_EOF
1950                                                   3b. CONN_NEW_PENDING_WRITE
1951                                            4. entire buffer consumed, but not first call
1952                                                4a. CONN_NEW_COMPLETE or CONN_EOF
1953                                                    4b. CONN_NEW_PENDING_WRITE
1954                                                 We misuse response[1] == CONN_NOP to test for the
1955                                                 first call.
1956                                         */
1957                                         if (unlikely(n < 0)) {
1958                                                 lock_release(&c->write_lock);
1959                                                 goto conn_wait_error;
1960                                         }
1961                                         if (likely(rest_len == 0)) {
1962                                                 /* 1 or 4*: CONN_NEW_COMPLETE, CONN_EOF,  CONN_NOP
1963                                                     or CONN_NEW_PENDING_WRITE (*rest_len == 0) */
1964                                                 if (likely(response[1] != CONN_NEW_PENDING_WRITE)) {
1965                                                         /* 1 or 4a => it's safe to do the send outside the
1966                                                            lock (it will either send directly or
1967                                                            wbufq_insert())
1968                                                         */
1969                                                         lock_release(&c->write_lock);
1970                                                         if (likely(t_len != 0)) {
1971                                                                 n=tcpconn_1st_send(fd, c, t_buf, t_len,
1972                                                                                                         t_send_flags,
1973                                                                                                         &response[1], 0);
1974                                                         } else { /* t_len == 0 */
1975                                                                 if (response[1] == CONN_NOP) {
1976                                                                         /* nothing to send (e.g  parallel send
1977                                                                            tls_encode queues some data and then
1978                                                                            WANT_READ => this tls_encode will queue
1979                                                                            the cleartext too and will have nothing
1980                                                                            to send right now) and initial send =>
1981                                                                            behave as if the send was successful
1982                                                                            (but never return EOF here) */
1983                                                                         response[1] = CONN_NEW_COMPLETE;
1984                                                                 }
1985                                                         }
1986                                                         /* exit */
1987                                                 } else {
1988                                                         /* CONN_NEW_PENDING_WRITE:  4b: it was a
1989                                                            repeated tls_encode() (or otherwise we would
1990                                                            have here CONN_NOP) => add to the queue */
1991                                                         if (unlikely(t_len &&
1992                                                                                         _wbufq_add(c, t_buf, t_len) < 0)) {
1993                                                                 response[1] = CONN_ERROR;
1994                                                                 n = -1;
1995                                                         }
1996                                                         lock_release(&c->write_lock);
1997                                                         /* exit (no send) */
1998                                                 }
1999                                         } else {  /* rest_len != 0 */
2000                                                 /* 2 or 3*: if tls_encode hasn't finished, we have to
2001                                                    call tcpconn_1st_send() under lock (otherwise if it
2002                                                    returns CONN_NEW_PENDING_WRITE, there is no way
2003                                                    to find the right place to add the new queued
2004                                                    data from the 2nd tls_encode()) */
2005                                                 if (likely((response[1] == CONN_NOP /*2*/ ||
2006                                                                         response[1] == CONN_NEW_COMPLETE /*3a*/ ||
2007                                                                         response[1] == CONN_EOF /*3a*/) && t_len))
2008                                                         n = tcpconn_1st_send(fd, c, t_buf, t_len,
2009                                                                                                         t_send_flags,
2010                                                                                                         &response[1], 1);
2011                                                 else if (unlikely(t_len &&
2012                                                                                         _wbufq_add(c, t_buf, t_len) < 0)) {
2013                                                         /*3b: CONN_NEW_PENDING_WRITE*/
2014                                                         response[1] = CONN_ERROR;
2015                                                         n = -1;
2016                                                 }
2017                                                 if (likely(n >= 0)) {
2018                                                         /* if t_len == 0 => nothing was sent => previous
2019                                                            response will be kept */
2020                                                         t_buf = rest_buf;
2021                                                         t_len = rest_len;
2022                                                         goto redo_tls_encode;
2023                                                 } else {
2024                                                         lock_release(&c->write_lock);
2025                                                         /* error exit */
2026                                                 }
2027                                         }
2028                         } else
2029 #endif /* USE_TLS */
2030                                 n=tcpconn_1st_send(fd, c, buf, len, dst->send_flags,
2031                                                                         &response[1], 0);
2032                         if (unlikely(n<0)) /* this will catch CONN_ERROR too */
2033                                 goto conn_wait_error;
2034                         if (unlikely(response[1]==CONN_EOF)){
2035                                 /* if close-after-send requested, don't bother
2036                                    sending the fd back to tcp_main, try closing it
2037                                    immediately (no other tcp_send should use it,
2038                                    because it is marked as close-after-send before
2039                                    being added to the hash) */
2040                                 goto conn_wait_close;
2041                         }
2042                         /* send to tcp_main */
2043                         response[0]=(long)c;
2044                         if (unlikely(send_fd(unix_tcp_sock, response,
2045                                                                         sizeof(response), fd) <= 0)){
2046                                 LOG(L_ERR, "BUG: tcp_send %s: %ld for %p"
2047                                                         " failed:" " %s (%d)\n",
2048                                                         su2a(&dst->to, sizeof(dst->to)),
2049                                                         response[1], c, strerror(errno), errno);
2050                                 goto conn_wait_error;
2051                         }
2052                         goto conn_wait_success;
2053                 }
2054 #endif /* TCP_CONNECT_WAIT  && TCP_ASYNC */
2055                 if (unlikely((c=tcpconn_connect(&dst->to, from, dst->proto,
2056                                                                                 &dst->send_flags))==0)){
2057                         LOG(L_ERR, "ERROR: tcp_send %s: connect failed\n",
2058                                                         su2a(&dst->to, sizeof(dst->to)));
2059                         return -1;
2060                 }
2061                 tcpconn_set_send_flags(c, dst->send_flags);
2062                 if (likely(c->state==S_CONN_OK))
2063                         TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
2064                 atomic_set(&c->refcnt, 2); /* ref. from here and it will also
2065                                                                           be added in the tcp_main hash */
2066                 fd=c->s;
2067                 c->flags|=F_CONN_FD_CLOSED; /* not yet opened in main */
2068                 /* ? TODO: it might be faster just to queue the write and
2069                  * send to main a CONN_NEW_PENDING_WRITE */
2070                 
2071                 /* send the new tcpconn to "tcp main" */
2072                 response[0]=(long)c;
2073                 response[1]=CONN_NEW;
2074                 n=send_fd(unix_tcp_sock, response, sizeof(response), c->s);
2075                 if (unlikely(n<=0)){
2076                         LOG(L_ERR, "BUG: tcp_send %s: failed send_fd: %s (%d)\n",
2077                                         su2a(&dst->to, sizeof(dst->to)),
2078                                         strerror(errno), errno);
2079                         /* we can safely delete it, it's not referenced by anybody */
2080                         _tcpconn_free(c);
2081                         n=-1;
2082                         goto end_no_conn;
2083                 }
2084                 /* new connection => send on it directly */
2085 #ifdef USE_TLS
2086                 if (unlikely(c->type==PROTO_TLS)) {
2087                         /* for TLS the TLS processing and the send must happen
2088                            atomically w/ respect to other sends on the same connection
2089                            (otherwise reordering might occur which would break TLS) =>
2090                            lock.
2091                         */
2092                         response[1] = CONN_NOP;
2093                         t_buf = buf;
2094                         t_len = len;
2095                         lock_get(&c->write_lock);
2096                                 do {
2097                                         t_send_flags = dst->send_flags;
2098                                         n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
2099                                                                         &t_send_flags);
2100                                         if (likely(n > 0)) {
2101                                                 n = tcpconn_do_send(fd, c, t_buf, t_len, t_send_flags,
2102                                                                                                 &resp, 1);
2103                                                 if (likely(response[1] != CONN_QUEUED_WRITE ||
2104                                                                         resp == CONN_ERROR))
2105                                                         /* don't overwrite a previous CONN_QUEUED_WRITE
2106                                                            unless error */
2107                                                         response[1] = resp;
2108                                         } else  if (unlikely(n < 0)) {
2109                                                 response[1] = CONN_ERROR;
2110                                                 break;
2111                                         }
2112                                         /* else do nothing for n (t_len) == 0, keep
2113                                            the last reponse */
2114                                         t_buf = rest_buf;
2115                                         t_len = rest_len;
2116                                 } while(unlikely(rest_len && n > 0));
2117                         lock_release(&c->write_lock);
2118                 } else
2119 #endif /* USE_TLS */
2120                         n = tcpconn_do_send(fd, c, buf, len, dst->send_flags,
2121                                                                         &response[1], 0);
2122                 if (unlikely(response[1] != CONN_NOP)) {
2123                         response[0]=(long)c;
2124                         if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
2125                                 BUG("tcp_main command %ld sending failed (write):"
2126                                                 "%s (%d)\n", response[1], strerror(errno), errno);
2127                                 /* all commands != CONN_NOP returned by tcpconn_do_send()
2128                                    (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec
2129                                    refcnt => if sending the command fails we have to
2130                                    dec. refcnt by hand */
2131                                 tcpconn_chld_put(c); /* deref. it manually */
2132                                 n=-1;
2133                         }
2134                         /* here refcnt for c is already decremented => c contents can
2135                            no longer be used and refcnt _must_ _not_ be decremented
2136                            again on exit */
2137                         if (unlikely(n < 0 || response[1] == CONN_EOF)) {
2138                                 /* on error or eof, close fd */
2139                                 tcp_safe_close(fd);
2140                         } else if (response[1] == CONN_QUEUED_WRITE) {
2141 #ifdef TCP_FD_CACHE
2142                                 if (cfg_get(tcp, tcp_cfg, fd_cache)) {
2143                                         tcp_fd_cache_add(c, fd);
2144                                 } else
2145 #endif /* TCP_FD_CACHE */
2146                                         tcp_safe_close(fd);
2147                         } else {
2148                                 BUG("unexpected tcpconn_do_send() return & response:"
2149                                                 " %d, %ld\n", n, response[1]);
2150                         }
2151                         goto end_no_deref;
2152                 }
2153 #ifdef TCP_FD_CACHE
2154                 if (cfg_get(tcp, tcp_cfg, fd_cache)) {
2155                         tcp_fd_cache_add(c, fd);
2156                 }else
2157 #endif /* TCP_FD_CACHE */
2158                         tcp_safe_close(fd);
2159         /* here we can have only commands that _do_ _not_ dec refcnt.
2160            (CONN_EOF, CON_ERROR, CON_QUEUED_WRITE are all treated above) */
2161                 goto release_c;
2162         } /* if (c==0 or unusable) new connection */
2163         /* existing connection, send on it */
2164         n = tcpconn_send_put(c, buf, len, dst->send_flags);
2165         /* no deref needed (automatically done inside tcpconn_send_put() */
2166         return n;
2167 #ifdef TCP_CONNECT_WAIT
2168 conn_wait_success:
2169 #ifdef TCP_FD_CACHE
2170         if (cfg_get(tcp, tcp_cfg, fd_cache)) {
2171                 tcp_fd_cache_add(c, fd);
2172         } else
2173 #endif /* TCP_FD_CACHE */
2174                 if (unlikely (tcp_safe_close(fd) < 0))
2175                         LOG(L_ERR, "closing temporary send fd for %p: %s: "
2176                                         "close(%d) failed (flags 0x%x): %s (%d)\n", c,
2177                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2178                                         fd, c->flags, strerror(errno), errno);
2179         tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
2180         return n;
2181 conn_wait_error:
2182         n=-1;
2183 conn_wait_close:
2184         /* connect or send failed or immediate close-after-send was requested on
2185          * newly created connection which was not yet sent to tcp_main (but was
2186          * already hashed) => don't send to main, unhash and destroy directly
2187          * (if refcnt>2 it will be destroyed when the last sender releases the
2188          * connection (tcpconn_chld_put(c))) or when tcp_main receives a
2189          * CONN_ERROR it*/
2190         c->state=S_CONN_BAD;
2191         /* we are here only if we opened a new fd (and not reused a cached or
2192            a reader one) => if the connect was successful close the fd */
2193         if (fd>=0) {
2194                 if (unlikely(tcp_safe_close(fd) < 0 ))
2195                         LOG(L_ERR, "closing temporary send fd for %p: %s: "
2196                                         "close(%d) failed (flags 0x%x): %s (%d)\n", c,
2197                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2198                                         fd, c->flags, strerror(errno), errno);
2199         }
2200         /* here the connection is for sure in the hash (tcp_main will not
2201            remove it because it's marked as PENDing) and the refcnt is at least
2202            2
2203          */
2204         TCPCONN_LOCK;
2205                 _tcpconn_detach(c);
2206                 c->flags&=~F_CONN_HASHED;
2207                 tcpconn_put(c);
2208         TCPCONN_UNLOCK;
2209         /* dec refcnt -> mark it for destruction */
2210         tcpconn_chld_put(c);
2211         return n;
2212 #endif /* TCP_CONNECT_WAIT */
2213 release_c:
2214         tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
2215 end_no_deref:
2216 end_no_conn:
2217         return n;
2218 }
2219
2220
2221
2222 /** sends on an existing tcpconn and auto-dec. con. ref counter.
2223  * As opposed to tcp_send(), this function requires an existing
2224  * tcp connection.
2225  * WARNING: the tcp_connection will be de-referenced.
2226  * @param c - existing tcp connection pointer.
2227  * @param buf - data to be sent.
2228  * @param len - data length,
2229  * @return >=0 on success, -1 on error.
2230  */
2231 static int tcpconn_send_put(struct tcp_connection* c, const char* buf,
2232                                                                 unsigned len, snd_flags_t send_flags)
2233 {
2234         struct tcp_connection *tmp;
2235         int fd;
2236         long response[2];
2237         int n;
2238         int do_close_fd;
2239 #ifdef USE_TLS
2240         const char* rest_buf;
2241         const char* t_buf;
2242         unsigned rest_len, t_len;
2243         long resp;
2244         snd_flags_t t_send_flags;
2245 #endif /* USE_TLS */
2246 #ifdef TCP_FD_CACHE
2247         struct fd_cache_entry* fd_cache_e;
2248         int use_fd_cache;
2249         
2250         use_fd_cache=cfg_get(tcp, tcp_cfg, fd_cache);
2251         fd_cache_e=0;
2252 #endif /* TCP_FD_CACHE */
2253         do_close_fd=1; /* close the fd on exit */
2254         response[1] = CONN_NOP;
2255 #ifdef TCP_ASYNC
2256         /* if data is already queued, we don't need the fd */
2257 #ifdef TCP_CONNECT_WAIT
2258                 if (unlikely(cfg_get(tcp, tcp_cfg, async) &&
2259                                                 (_wbufq_non_empty(c) || (c->flags&F_CONN_PENDING)) ))
2260 #else /* ! TCP_CONNECT_WAIT */
2261                 if (unlikely(cfg_get(tcp, tcp_cfg, async) && (_wbufq_non_empty(c)) ))
2262 #endif /* TCP_CONNECT_WAIT */
2263                 {
2264                         lock_get(&c->write_lock);
2265 #ifdef TCP_CONNECT_WAIT
2266                                 if (likely(_wbufq_non_empty(c) || (c->flags&F_CONN_PENDING)))
2267 #else /* ! TCP_CONNECT_WAIT */
2268                                 if (likely(_wbufq_non_empty(c)))
2269 #endif /* TCP_CONNECT_WAIT */
2270                                 {
2271                                         do_close_fd=0;
2272 #ifdef USE_TLS
2273                                         if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) {
2274                                                 t_buf = buf;
2275                                                 t_len = len;
2276                                                 do {
2277                                                         t_send_flags = send_flags;
2278                                                         n = tls_encode(c, &t_buf, &t_len,
2279                                                                                         &rest_buf, &rest_len,
2280                                                                                         &t_send_flags);
2281                                                         if (unlikely((n < 0) || (t_len &&
2282                                                                          (_wbufq_add(c, t_buf, t_len) < 0)))) {
2283                                                                 lock_release(&c->write_lock);
2284                                                                 n=-1;
2285                                                                 response[1] = CONN_ERROR;
2286                                                                 c->state=S_CONN_BAD;
2287                                                                 c->timeout=get_ticks_raw(); /* force timeout */
2288                                                                 goto error;
2289                                                         }
2290                                                         t_buf = rest_buf;
2291                                                         t_len = rest_len;
2292                                                 } while(unlikely(rest_len && n > 0));
2293                                         } else
2294 #endif /* USE_TLS */
2295                                                 if (unlikely(len && (_wbufq_add(c, buf, len)<0))){
2296                                                         lock_release(&c->write_lock);
2297                                                         n=-1;
2298                                                         response[1] = CONN_ERROR;
2299                                                         c->state=S_CONN_BAD;
2300                                                         c->timeout=get_ticks_raw(); /* force timeout */
2301                                                         goto error;
2302                                                 }
2303                                         n=len;
2304                                         lock_release(&c->write_lock);
2305                                         goto release_c;
2306                                 }
2307                         lock_release(&c->write_lock);
2308                 }
2309 #endif /* TCP_ASYNC */
2310                 /* check if this is not the same reader process holding
2311                  *  c  and if so send directly on c->fd */
2312                 if (c->reader_pid==my_pid()){
2313                         DBG("tcp_send: send from reader (%d (%d)), reusing fd\n",
2314                                         my_pid(), process_no);
2315                         fd=c->fd;
2316                         do_close_fd=0; /* don't close the fd on exit, it's in use */
2317 #ifdef TCP_FD_CACHE
2318                         use_fd_cache=0; /* don't cache: problems would arise due to the
2319                                                            close() on cache eviction (if the fd is still 
2320                                                            used). If it has to be cached then dup() _must_ 
2321                                                            be used */
2322                 }else if (likely(use_fd_cache && 
2323                                                         ((fd_cache_e=tcp_fd_cache_get(c))!=0))){
2324                         fd=fd_cache_e->fd;
2325                         do_close_fd=0;
2326                         DBG("tcp_send: found fd in cache ( %d, %p, %d)\n",
2327                                         fd, c, fd_cache_e->id);
2328 #endif /* TCP_FD_CACHE */
2329                 }else{
2330                         DBG("tcp_send: tcp connection found (%p), acquiring fd\n", c);
2331                         /* get the fd */
2332                         response[0]=(long)c;
2333                         response[1]=CONN_GET_FD;
2334                         n=send_all(unix_tcp_sock, response, sizeof(response));
2335                         if (unlikely(n<=0)){
2336                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(write):%s (%d)\n",
2337                                                 strerror(errno), errno);
2338                                 n=-1;
2339                                 goto release_c;
2340                         }
2341                         DBG("tcp_send, c= %p, n=%d\n", c, n);
2342                         n=receive_fd(unix_tcp_sock, &tmp, sizeof(tmp), &fd, MSG_WAITALL);
2343                         if (unlikely(n<=0)){
2344                                 LOG(L_ERR, "BUG: tcp_send: failed to get fd(receive_fd):"
2345                                                         " %s (%d)\n", strerror(errno), errno);
2346                                 n=-1;
2347                                 do_close_fd=0;
2348                                 goto release_c;
2349                         }
2350                         /* handle fd closed or bad connection/error
2351                                 (it's possible that this happened in the time between
2352                                 we found the intial connection and the time when we get
2353                                 the fd)
2354                          */
2355                         if (unlikely(c!=tmp || fd==-1 || c->state==S_CONN_BAD)){
2356                                 if (unlikely(c!=tmp && tmp!=0))
2357                                         BUG("tcp_send: get_fd: got different connection:"
2358                                                 "  %p (id= %d, refcnt=%d state=%d) != "
2359                                                 "  %p (n=%d)\n",
2360                                                   c,   c->id,   atomic_get(&c->refcnt),   c->state,
2361                                                   tmp, n
2362                                                 );
2363                                 n=-1; /* fail */
2364                                 /* don't cache fd & close it */
2365                                 do_close_fd = (fd==-1)?0:1;
2366 #ifdef TCP_FD_CACHE
2367                                 use_fd_cache = 0;
2368 #endif /* TCP_FD_CACHE */
2369                                 goto end;
2370                         }
2371                         DBG("tcp_send: after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
2372                 }
2373         
2374 #ifdef USE_TLS
2375                 if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) {
2376                         /* for TLS the TLS processing and the send must happen
2377                            atomically w/ respect to other sends on the same connection
2378                            (otherwise reordering might occur which would break TLS) =>
2379                            lock.
2380                         */
2381                         response[1] = CONN_NOP;
2382                         t_buf = buf;
2383                         t_len = len;
2384                         lock_get(&c->write_lock);
2385                                 do {
2386                                         t_send_flags = send_flags;
2387                                         n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
2388                                                                         &t_send_flags);
2389                                         if (likely(n > 0)) {
2390                                                 n = tcpconn_do_send(fd, c, t_buf, t_len, t_send_flags,
2391                                                                                                 &resp, 1);
2392                                                 if (likely(response[1] != CONN_QUEUED_WRITE ||
2393                                                                         resp == CONN_ERROR))
2394                                                         /* don't overwrite a previous CONN_QUEUED_WRITE
2395                                                            unless error */
2396                                                         response[1] = resp;
2397                                         } else if (unlikely(n < 0)) {
2398                                                 response[1] = CONN_ERROR;
2399                                                 break;
2400                                         }
2401                                         /* else do nothing for n (t_len) == 0, keep
2402                                            the last reponse */
2403                                         t_buf = rest_buf;
2404                                         t_len = rest_len;
2405                                 } while(unlikely(rest_len && n > 0));
2406                         lock_release(&c->write_lock);
2407                 } else
2408 #endif
2409                         n = tcpconn_do_send(fd, c, buf, len, send_flags, &response[1], 0);
2410         if (unlikely(response[1] != CONN_NOP)) {
2411 error:
2412                 response[0]=(long)c;
2413                 if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
2414                         BUG("tcp_main command %ld sending failed (write):%s (%d)\n",
2415                                         response[1], strerror(errno), errno);
2416                         /* all commands != CONN_NOP returned by tcpconn_do_send()
2417                            (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec refcnt
2418                            => if sending the command fails we have to dec. refcnt by hand
2419                          */
2420                         tcpconn_chld_put(c); /* deref. it manually */
2421                         n=-1;
2422                 }
2423                 /* here refcnt for c is already decremented => c contents can no
2424                    longer be used and refcnt _must_ _not_ be decremented again
2425                    on exit */
2426                 if (unlikely(n < 0 || response[1] == CONN_EOF)) {
2427                         /* on error or eof, remove from cache or close fd */
2428 #ifdef TCP_FD_CACHE
2429                         if (unlikely(fd_cache_e)){
2430                                 tcp_fd_cache_rm(fd_cache_e);
2431                                 fd_cache_e = 0;
2432                                 tcp_safe_close(fd);
2433                         }else
2434 #endif /* TCP_FD_CACHE */
2435                                 if (do_close_fd) tcp_safe_close(fd);
2436                 } else if (response[1] == CONN_QUEUED_WRITE) {
2437 #ifdef TCP_FD_CACHE
2438                         if (unlikely((fd_cache_e==0) && use_fd_cache)){
2439                                 tcp_fd_cache_add(c, fd);
2440                         }else
2441 #endif /* TCP_FD_CACHE */
2442                                 if (do_close_fd) tcp_safe_close(fd);
2443                 } else {
2444                         BUG("unexpected tcpconn_do_send() return & response: %d, %ld\n",
2445                                         n, response[1]);
2446                 }
2447                 return n; /* no tcpconn_put */
2448         }
2449 end:
2450 #ifdef TCP_FD_CACHE
2451         if (unlikely((fd_cache_e==0) && use_fd_cache)){
2452                 tcp_fd_cache_add(c, fd);
2453         }else
2454 #endif /* TCP_FD_CACHE */
2455         if (do_close_fd) {
2456                 if (unlikely(tcp_safe_close(fd) < 0))
2457                         LOG(L_ERR, "closing temporary send fd for %p: %s: "
2458                                         "close(%d) failed (flags 0x%x): %s (%d)\n", c,
2459                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2460                                         fd, c->flags, strerror(errno), errno);
2461         }
2462         /* here we can have only commands that _do_ _not_ dec refcnt.
2463            (CONN_EOF, CON_ERROR, CON_QUEUED_WRITE are all treated above) */
2464 release_c:
2465         tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
2466         return n;
2467 }
2468
2469
2470
2471 /* unsafe send on a known tcp connection.
2472  * Directly send on a known tcp connection with a given fd.
2473  * It is assumed that the connection locks are already held.
2474  * Side effects: if needed it will send state update commands to
2475  *  tcp_main (e.g. CON_EOF, CON_ERROR, CON_QUEUED_WRITE).
2476  * @param fd - fd used for sending.
2477  * @param c - existing tcp connection pointer (state and flags might be
2478  *            changed).
2479  * @param buf - data to be sent.
2480  * @param len - data length.
2481  * @param send_flags
2482  * @return <0 on error, number of bytes sent on success.
2483  */
2484 int tcpconn_send_unsafe(int fd, struct tcp_connection *c,
2485                                                 const char* buf, unsigned len, snd_flags_t send_flags)
2486 {
2487         int n;
2488         long response[2];
2489         
2490         n = tcpconn_do_send(fd, c, buf, len, send_flags, &response[1], 1);
2491         if (unlikely(response[1] != CONN_NOP)) {
2492                 /* all commands != CONN_NOP returned by tcpconn_do_send()
2493                    (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec refcnt
2494                    => increment it (we don't want the connection to be destroyed
2495                    from under us)
2496                  */
2497                 atomic_inc(&c->refcnt);
2498                 response[0]=(long)c;
2499                 if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
2500                         BUG("connection %p command %ld sending failed (write):%s (%d)\n",
2501                                         c, response[1], strerror(errno), errno);
2502                         /* send failed => deref. it back by hand */
2503                         tcpconn_chld_put(c); 
2504                         n=-1;
2505                 }
2506                 /* here refcnt for c is already decremented => c contents can no
2507                    longer be used and refcnt _must_ _not_ be decremented again
2508                    on exit */
2509                 return n;
2510         }
2511         return n;
2512 }
2513
2514
2515
2516 /** lower level send (connection and fd should be known).
2517  * It takes care of possible write-queueing, blacklisting a.s.o.
2518  * It expects a valid tcp connection. It doesn't touch the ref. cnts.
2519  * It will also set the connection flags from send_flags (it's better
2520  * to do it here, because it's guaranteed to be under lock).
2521  * @param fd - fd used for sending.
2522  * @param c - existing tcp connection pointer (state and flags might be
2523  *            changed).
2524  * @param buf - data to be sent.
2525  * @param len - data length.
2526  * @param send_flags
2527  * @param resp - filled with a cmd. for tcp_main:
2528  *                      CONN_NOP - nothing needs to be done (do not send
2529  *                                 anything to tcp_main).
2530  *                      CONN_ERROR - error, connection should be closed.
2531  *                      CONN_EOF - no error, but connection should be closed.
2532  *                      CONN_QUEUED_WRITE - new write queue (connection
2533  *                                 should be watched for write and the wr.
2534  *                                 queue flushed).
2535  * @param locked - if set assume the connection is already locked (call from
2536  *                  tls) and do not lock/unlock the connection.
2537  * @return >=0 on success, < 0 on error && *resp == CON_ERROR.
2538  *
2539  */
2540 static int tcpconn_do_send(int fd, struct tcp_connection* c,
2541                                                         const char* buf, unsigned len,
2542                                                         snd_flags_t send_flags, long* resp,
2543                                                         int locked)
2544 {
2545         int  n;
2546 #ifdef TCP_ASYNC
2547         int enable_write_watch;
2548 #endif /* TCP_ASYNC */
2549
2550         DBG("tcp_send: sending...\n");
2551         *resp = CONN_NOP;
2552         if (likely(!locked)) lock_get(&c->write_lock);
2553         /* update connection send flags with the current ones */
2554         tcpconn_set_send_flags(c, send_flags);
2555 #ifdef TCP_ASYNC
2556         if (likely(cfg_get(tcp, tcp_cfg, async))){
2557                 if (_wbufq_non_empty(c)
2558 #ifdef TCP_CONNECT_WAIT
2559                         || (c->flags&F_CONN_PENDING) 
2560 #endif /* TCP_CONNECT_WAIT */
2561                         ){
2562                         if (unlikely(_wbufq_add(c, buf, len)<0)){
2563                                 if (likely(!locked)) lock_release(&c->write_lock);
2564                                 n=-1;
2565                                 goto error;
2566                         }
2567                         if (likely(!locked)) lock_release(&c->write_lock);
2568                         n=len;
2569                         goto end;
2570                 }
2571                 n=_tcpconn_write_nb(fd, c, buf, len);
2572         }else{
2573 #endif /* TCP_ASYNC */
2574                 /* n=tcp_blocking_write(c, fd, buf, len); */
2575                 n=tsend_stream(fd, buf, len,
2576                                                 TICKS_TO_S(cfg_get(tcp, tcp_cfg, send_timeout)) *
2577                                                 1000);
2578 #ifdef TCP_ASYNC
2579         }
2580 #else /* ! TCP_ASYNC */
2581         if (likely(!locked)) lock_release(&c->write_lock);
2582 #endif /* TCP_ASYNC */
2583         
2584         DBG("tcp_send: after real write: c= %p n=%d fd=%d\n",c, n, fd);
2585         DBG("tcp_send: buf=\n%.*s\n", (int)len, buf);
2586         if (unlikely(n<(int)len)){
2587 #ifdef TCP_ASYNC
2588                 if (cfg_get(tcp, tcp_cfg, async) &&
2589                                 ((n>=0) || errno==EAGAIN || errno==EWOULDBLOCK)){
2590                         enable_write_watch=_wbufq_empty(c);
2591                         if (n<0) n=0;
2592                         else if (unlikely(c->state==S_CONN_CONNECT ||
2593                                                 c->state==S_CONN_ACCEPT)){
2594                                 TCP_STATS_ESTABLISHED(c->state);
2595                                 c->state=S_CONN_OK; /* something was written */
2596                         }
2597                         if (unlikely(_wbufq_add(c, buf+n, len-n)<0)){
2598                                 if (likely(!locked)) lock_release(&c->write_lock);
2599                                 n=-1;
2600                                 goto error;
2601                         }
2602                         if (likely(!locked)) lock_release(&c->write_lock);
2603                         n=len;
2604                         if (likely(enable_write_watch))
2605                                 *resp=CONN_QUEUED_WRITE;
2606                         goto end;
2607                 }else{
2608                         if (likely(!locked)) lock_release(&c->write_lock);
2609                 }
2610 #endif /* TCP_ASYNC */
2611                 if (unlikely(c->state==S_CONN_CONNECT)){
2612                         switch(errno){
2613                                 case ENETUNREACH:
2614                                 case EHOSTUNREACH: /* not posix for send() */
2615 #ifdef USE_DST_BLACKLIST
2616                                         dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
2617                                                                                 &c->rcv.src_su, &c->send_flags, 0);
2618 #endif /* USE_DST_BLACKLIST */
2619                                         TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
2620                                                                         TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
2621                                         break;
2622                                 case ECONNREFUSED:
2623                                 case ECONNRESET:
2624 #ifdef USE_DST_BLACKLIST
2625                                         dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
2626                                                                                 &c->rcv.src_su, &c->send_flags, 0);
2627 #endif /* USE_DST_BLACKLIST */
2628                                         TCP_EV_CONNECT_RST(errno, TCP_LADDR(c), TCP_LPORT(c),
2629                                                                                 TCP_PSU(c), TCP_PROTO(c));
2630                                         break;
2631                                 default:
2632                                         TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c), TCP_LPORT(c),
2633                                                                                 TCP_PSU(c), TCP_PROTO(c));
2634                                 }
2635                         TCP_STATS_CONNECT_FAILED();
2636                 }else{
2637                         switch(errno){
2638                                 case ECONNREFUSED:
2639                                 case ECONNRESET:
2640                                         TCP_STATS_CON_RESET();
2641                                         /* no break */
2642                                 case ENETUNREACH:
2643                                 /*case EHOSTUNREACH: -- not posix */
2644 #ifdef USE_DST_BLACKLIST
2645                                         dst_blacklist_su(BLST_ERR_SEND, c->rcv.proto,
2646                                                                                 &c->rcv.src_su, &c->send_flags, 0);
2647 #endif /* USE_DST_BLACKLIST */
2648                                         break;
2649                         }
2650                 }
2651                 LOG(L_ERR, "ERROR: tcp_send: failed to send on %p (%s:%d->%s): %s (%d)"
2652                                         "\n", c, ip_addr2a(&c->rcv.dst_ip), c->rcv.dst_port,
2653                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2654                                         strerror(errno), errno);
2655                 n = -1;
2656 #ifdef TCP_ASYNC
2657 error:
2658 #endif /* TCP_ASYNC */
2659                 /* error on the connection , mark it as bad and set 0 timeout */
2660                 c->state=S_CONN_BAD;
2661                 c->timeout=get_ticks_raw();
2662                 /* tell "main" it should drop this (optional it will t/o anyway?)*/
2663                 *resp=CONN_ERROR;
2664                 return n; /* error return, no tcpconn_put */
2665         }
2666         
2667 #ifdef TCP_ASYNC
2668         if (likely(!locked)) lock_release(&c->write_lock);
2669 #endif /* TCP_ASYNC */
2670         /* in non-async mode here we're either in S_CONN_OK or S_CONN_ACCEPT*/
2671         if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
2672                         TCP_STATS_ESTABLISHED(c->state);
2673                         c->state=S_CONN_OK;
2674         }
2675         if (unlikely(send_flags.f & SND_F_CON_CLOSE)){
2676                 /* close after write => send EOF request to tcp_main */
2677                 c->state=S_CONN_BAD;
2678                 c->timeout=get_ticks_raw();
2679                 /* tell "main" it should drop this*/
2680                 *resp=CONN_EOF;
2681                 return n;
2682         }
2683 end:
2684         return n;
2685 }
2686
2687
2688
2689 /** low level 1st send on a new connection.
2690  * It takes care of possible write-queueing, blacklisting a.s.o.
2691  * It expects a valid just-opened tcp connection. It doesn't touch the 
2692  * ref. counters. It's used only in the async first send case.
2693  * @param fd - fd used for sending.
2694  * @param c - existing tcp connection pointer (state and flags might be
2695  *            changed). The connection must be new (no previous send on it).
2696  * @param buf - data to be sent.
2697  * @param len - data length.
2698  * @param send_flags
2699  * @param resp - filled with a fd sending cmd. for tcp_main on success. It
2700  *                      _must_ be one of the commands listed below:
2701  *                      CONN_NEW_PENDING_WRITE - new connection, first write
2702  *                                 was partially successful (or EAGAIN) and
2703  *                                 was queued (connection should be watched
2704  *                                 for write and the write queue flushed).
2705  *                                 The fd should be sent to tcp_main.
2706  *                      CONN_NEW_COMPLETE - new connection, first write
2707  *                                 completed successfully and no data is
2708  *                                 queued. The fd should be sent to tcp_main.
2709  *                      CONN_EOF - no error, but the connection should be
2710  *                                  closed (e.g. SND_F_CON_CLOSE send flag).
2711  *                      CONN_ERROR - error, _must_ return < 0.
2712  * @param locked - if set assume the connection is already locked (call from
2713  *                  tls) and do not lock/unlock the connection.
2714  * @return >=0 on success, < 0 on error (on error *resp is undefined).
2715  *
2716  */
2717 static int tcpconn_1st_send(int fd, struct tcp_connection* c,
2718                                                         const char* buf, unsigned len,
2719                                                         snd_flags_t send_flags, long* resp,
2720                                                         int locked)
2721 {
2722         int n;
2723         
2724         n=_tcpconn_write_nb(fd, c, buf, len);
2725         if (unlikely(n<(int)len)){
2726                 /* on EAGAIN or ENOTCONN return success.
2727                    ENOTCONN appears on newer FreeBSD versions (non-blocking socket,
2728                    connect() & send immediately) */
2729                 if ((n>=0) || errno==EAGAIN || errno==EWOULDBLOCK || errno==ENOTCONN){
2730                         DBG("pending write on new connection %p "
2731                                 " (%d/%d bytes written)\n", c, n, len);
2732                         if (unlikely(n<0)) n=0;
2733                         else{
2734                                 if (likely(c->state == S_CONN_CONNECT))
2735                                         TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
2736                                 c->state=S_CONN_OK; /* partial write => connect()
2737                                                                                                 ended */
2738                         }
2739                         /* add to the write queue */
2740                         if (likely(!locked)) lock_get(&c->write_lock);
2741                                 if (unlikely(_wbufq_insert(c, buf+n, len-n)<0)){
2742                                         if (likely(!locked)) lock_release(&c->write_lock);
2743                                         n=-1;
2744                                         LOG(L_ERR, "%s: EAGAIN and"
2745                                                         " write queue full or failed for %p\n",
2746                                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)), c);
2747                                         goto error;
2748                                 }
2749                         if (likely(!locked)) lock_release(&c->write_lock);
2750                         /* send to tcp_main */
2751                         *resp=CONN_NEW_PENDING_WRITE;
2752                         n=len;
2753                         goto end;
2754                 }
2755                 /* n < 0 and not EAGAIN => write error */
2756                 /* if first write failed it's most likely a
2757                    connect error */
2758                 switch(errno){
2759                         case ENETUNREACH:
2760                         case EHOSTUNREACH:  /* not posix for send() */
2761 #ifdef USE_DST_BLACKLIST
2762                                 dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
2763                                                                         &c->rcv.src_su, &c->send_flags, 0);
2764 #endif /* USE_DST_BLACKLIST */
2765                                 TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
2766                                                                 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
2767                                 break;
2768                         case ECONNREFUSED:
2769                         case ECONNRESET:
2770 #ifdef USE_DST_BLACKLIST
2771                                 dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
2772                                                                         &c->rcv.src_su, &c->send_flags, 0);
2773 #endif /* USE_DST_BLACKLIST */
2774                                 TCP_EV_CONNECT_RST(errno, TCP_LADDR(c),
2775                                                                 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
2776                                 break;
2777                         default:
2778                                 TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
2779                                                                 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
2780                 }
2781                 /* error: destroy it directly */
2782                 TCP_STATS_CONNECT_FAILED();
2783                 LOG(L_ERR, "%s: connect & send  for %p failed:" " %s (%d)\n",
2784                                         su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2785                                         c, strerror(errno), errno);
2786                 goto error;
2787         }
2788         LOG(L_INFO, "quick connect for %p\n", c);