4 * Copyright (C) 2001-2003 FhG Fokus
6 * This file is part of ser, a free SIP server.
8 * ser is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version
13 * For a license to use the ser software under conditions
14 * other than those described here, or to purchase support for this
15 * software, please contact iptel.org by e-mail at the following addresses:
18 * ser is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 * 2002-11-29 created by andrei
31 * 2002-12-11 added tcp_send (andrei)
32 * 2003-01-20 locking fixes, hashtables (andrei)
33 * 2003-02-20 s/lock_t/gen_lock_t/ to avoid a conflict on solaris (andrei)
34 * 2003-02-25 Nagle is disabled if -DDISABLE_NAGLE (andrei)
35 * 2003-03-29 SO_REUSEADDR before calling bind to allow
36 * server restart, Nagle set on the (hopefuly)
37 * correct socket (jiri)
38 * 2003-03-31 always try to find the corresponding tcp listen socket for
39 * a temp. socket and store in in *->bind_address: added
40 * find_tcp_si, modified tcpconn_connect (andrei)
41 * 2003-04-14 set sockopts to TOS low delay (andrei)
42 * 2003-06-30 moved tcp new connect checking & handling to
43 * handle_new_connect (andrei)
44 * 2003-07-09 tls_close called before closing the tcp connection (andrei)
45 * 2003-10-24 converted to the new socket_info lists (andrei)
46 * 2003-10-27 tcp port aliases support added (andrei)
47 * 2003-11-04 always lock before manipulating refcnt; sendchild
48 * does not inc refcnt by itself anymore (andrei)
49 * 2003-11-07 different unix sockets are used for fd passing
50 * to/from readers/writers (andrei)
51 * 2003-11-17 handle_new_connect & tcp_connect will close the
52 * new socket if tcpconn_new return 0 (e.g. out of mem) (andrei)
53 * 2003-11-28 tcp_blocking_write & tcp_blocking_connect added (andrei)
54 * 2004-11-08 dropped find_tcp_si and replaced with find_si (andrei)
55 * 2005-06-07 new tcp optimized code, supports epoll (LT), sigio + real time
56 * signals, poll & select (andrei)
57 * 2005-06-26 *bsd kqueue support (andrei)
58 * 2005-07-04 solaris /dev/poll support (andrei)
59 * 2005-07-08 tcp_max_connections, tcp_connection_lifetime, don't accept
60 * more connections if tcp_max_connections is exceeded (andrei)
61 * 2005-10-21 cleanup all the open connections on exit
62 * decrement the no. of open connections on timeout too (andrei) * 2006-01-30 queue send_fd request and execute them at the end of the
63 * poll loop (#ifdef) (andrei)
64 * process all children requests, before attempting to send
65 * them new stuff (fixes some deadlocks) (andrei)
66 * 2006-02-03 timers are run only once per s (andrei)
67 * tcp children fds can be non-blocking; send fds are queued on
68 * EAGAIN; lots of bug fixes (andrei)
69 * 2006-02-06 better tcp_max_connections checks, tcp_connections_no moved to
71 * 2006-04-12 tcp_send() changed to use struct dest_info (andrei)
72 * 2006-11-02 switched to atomic ops for refcnt, locking improvements
74 * 2006-11-04 switched to raw ticks (to fix conversion errors which could
75 * result in inf. lifetime) (andrei)
76 * 2007-07-25 tcpconn_connect can now bind the socket on a specified
77 * source addr/port (andrei)
78 * 2007-07-26 tcp_send() and tcpconn_get() can now use a specified source
80 * 2007-08-23 getsockname() for INADDR_ANY(SI_IS_ANY) sockets (andrei)
81 * 2007-08-27 split init_sock_opt into a lightweight init_sock_opt_accept()
82 * used when accepting connections and init_sock_opt used for
83 * connect/ new sockets (andrei)
84 * 2007-11-22 always add the connection & clear the coresponding flags before
85 * io_watch_add-ing its fd - it's safer this way (andrei)
86 * 2007-11-26 improved tcp timers: switched to local_timer (andrei)
87 * 2007-11-27 added send fd cache and reader fd reuse (andrei)
88 * 2007-11-28 added support for TCP_DEFER_ACCEPT, KEEPALIVE, KEEPINTVL,
89 * KEEPCNT, QUICKACK, SYNCNT, LINGER2 (andrei)
90 * 2007-12-04 support for queueing write requests (andrei)
91 * 2007-12-12 destroy connection asap on wbuf. timeout (andrei)
92 * 2007-12-13 changed the refcnt and destroy scheme, now refcnt is 1 if
93 * linked into the hash tables (was 0) (andrei)
94 * 2007-12-21 support for pending connects (connections are added to the
95 * hash immediately and writes on them are buffered) (andrei)
96 * 2008-02-05 handle POLLRDHUP (if supported), POLLERR and
98 * on write error check if there's still data in the socket
99 * read buffer and process it first (andrei)
100 * 2009-02-26 direct blacklist support (andrei)
101 * 2009-03-20 s/wq_timeout/send_timeout ; send_timeout is now in ticks
103 * 2009-04-09 tcp ev and tcp stats macros added (andrei)
104 * 2009-09-15 support for force connection reuse and close after send
105 * send flags (andrei)
106 * 2010-03-23 tcp_send() split in 3 smaller functions (andrei)
109 /** tcp main/dispatcher and tcp send functions.
120 #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
123 #define HANDLE_IO_INLINE
124 #include "io_wait.h" /* include first to make sure the needed features are
125 turned on (e.g. _GNU_SOURCE for POLLRDHUP) */
127 #include <sys/time.h>
128 #include <sys/types.h>
129 #include <sys/select.h>
130 #include <sys/socket.h>
132 #include <sys/filio.h> /* needed on solaris 2.x for FIONREAD */
133 #elif defined __OS_solaris
134 #define BSD_COMP /* needed on older solaris for FIONREAD */
135 #endif /* HAVE_FILIO_H / __OS_solaris */
136 #include <sys/ioctl.h> /* ioctl() used on write error */
137 #include <netinet/in.h>
138 #include <netinet/in_systm.h>
139 #include <netinet/ip.h>
140 #include <netinet/tcp.h>
141 #include <sys/uio.h> /* writev*/
143 #include <stdlib.h> /*exit() */
151 #include <sys/select.h>
153 #include <sys/poll.h>
158 #include "tcp_conn.h"
163 #include "mem/shm_mem.h"
165 #include "sr_module.h"
166 #include "tcp_server.h"
167 #include "tcp_init.h"
168 #include "tcp_int_send.h"
169 #include "tcp_stats.h"
172 #include "timer_ticks.h"
173 #include "local_timer.h"
175 #include "tls/tls_server.h"
176 #define tls_loaded() 1
178 #include "tls_hooks_init.h"
179 #include "tls_hooks.h"
181 #ifdef USE_DST_BLACKLIST
182 #include "dst_blacklist.h"
183 #endif /* USE_DST_BLACKLIST */
185 #include "tcp_info.h"
186 #include "tcp_options.h"
188 #include "cfg/cfg_struct.h"
190 #define local_malloc pkg_malloc
191 #define local_free pkg_free
193 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
196 #ifdef NO_MSG_DONTWAIT
198 /* should work inside tcp_main */
199 #define MSG_DONTWAIT 0
201 #endif /*NO_MSG_DONTWAIT */
204 #define TCP_PASS_NEW_CONNECTION_ON_DATA /* don't pass a new connection
205 immediately to a child, wait for
206 some data on it first */
207 #define TCP_LISTEN_BACKLOG 1024
208 #define SEND_FD_QUEUE /* queue send fd requests on EAGAIN, instead of sending
210 #define TCP_CHILD_NON_BLOCKING
212 #ifndef TCP_CHILD_NON_BLOCKING
213 #define TCP_CHILD_NON_BLOCKING
215 #define MAX_SEND_FD_QUEUE_SIZE tcp_main_max_fd_no
216 #define SEND_FD_QUEUE_SIZE 128 /* initial size */
217 #define SEND_FD_QUEUE_TIMEOUT MS_TO_TICKS(2000) /* 2 s */
220 /* minimum interval local_timer_run() is allowed to run, in ticks */
221 #define TCPCONN_TIMEOUT_MIN_RUN 1 /* once per tick */
222 #define TCPCONN_WAIT_TIMEOUT 1 /* 1 tick */
225 static unsigned int* tcp_total_wq=0;
229 enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
230 F_TCPCONN, F_TCPCHILD, F_PROC };
235 #define TCP_FD_CACHE_SIZE 8
237 struct fd_cache_entry{
238 struct tcp_connection* con;
244 static struct fd_cache_entry fd_cache[TCP_FD_CACHE_SIZE];
245 #endif /* TCP_FD_CACHE */
247 static int is_tcp_main=0;
250 enum poll_types tcp_poll_method=0; /* by default choose the best method */
251 int tcp_main_max_fd_no=0;
252 int tcp_max_connections=DEFAULT_TCP_MAX_CONNECTIONS;
253 int tls_max_connections=DEFAULT_TLS_MAX_CONNECTIONS;
255 static union sockaddr_union tcp_source_ipv4_addr; /* saved bind/srv v4 addr. */
256 static union sockaddr_union* tcp_source_ipv4=0;
258 static union sockaddr_union tcp_source_ipv6_addr; /* saved bind/src v6 addr. */
259 static union sockaddr_union* tcp_source_ipv6=0;
262 static int* tcp_connections_no=0; /* current tcp (+tls) open connections */
263 static int* tls_connections_no=0; /* current tls open connections */
265 /* connection hash table (after ip&port) , includes also aliases */
266 struct tcp_conn_alias** tcpconn_aliases_hash=0;
267 /* connection hash table (after connection id) */
268 struct tcp_connection** tcpconn_id_hash=0;
269 gen_lock_t* tcpconn_lock=0;
271 struct tcp_child* tcp_children=0;
272 static int* connection_id=0; /* unique for each connection, used for
273 quickly finding the corresponding connection
277 static int tcp_proto_no=-1; /* tcp protocol number as returned by
280 static io_wait_h io_h;
282 static struct local_timer tcp_main_ltimer;
283 static ticks_t tcp_main_prev_ticks;
285 /* tell if there are tcp workers that should handle only specific socket
286 * - used to optimize the search of least loaded worker for a tcp socket
287 * - 0 - no workers per tcp sockets have been set
288 * - 1 + generic_workers - when there are workers per tcp sockets
290 static int tcp_sockets_gworkers = 0;
292 static ticks_t tcpconn_main_timeout(ticks_t , struct timer_ln* , void* );
294 inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
295 struct ip_addr* l_ip, int l_port,
300 /* sets source address used when opening new sockets and no source is specified
301 * (by default the address is choosen by the kernel)
302 * Should be used only on init.
303 * returns -1 on error */
304 int tcp_set_src_addr(struct ip_addr* ip)
308 ip_addr2su(&tcp_source_ipv4_addr, ip, 0);
309 tcp_source_ipv4=&tcp_source_ipv4_addr;
313 ip_addr2su(&tcp_source_ipv6_addr, ip, 0);
314 tcp_source_ipv6=&tcp_source_ipv6_addr;
325 static inline int init_sock_keepalive(int s)
329 #ifdef HAVE_SO_KEEPALIVE
330 if (cfg_get(tcp, tcp_cfg, keepalive)){
332 if (setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, &optval,
334 LOG(L_WARN, "WARNING: init_sock_keepalive: failed to enable"
335 " SO_KEEPALIVE: %s\n", strerror(errno));
340 #ifdef HAVE_TCP_KEEPINTVL
341 if ((optval=cfg_get(tcp, tcp_cfg, keepintvl))){
342 if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &optval,
344 LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
345 " keepalive probes interval: %s\n", strerror(errno));
349 #ifdef HAVE_TCP_KEEPIDLE
350 if ((optval=cfg_get(tcp, tcp_cfg, keepidle))){
351 if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &optval,
353 LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
354 " keepalive idle interval: %s\n", strerror(errno));
358 #ifdef HAVE_TCP_KEEPCNT
359 if ((optval=cfg_get(tcp, tcp_cfg, keepcnt))){
360 if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &optval,
362 LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
363 " maximum keepalive count: %s\n", strerror(errno));
372 /* set all socket/fd options for new sockets (e.g. before connect):
373 * disable nagle, tos lowdelay, reuseaddr, non-blocking
375 * return -1 on error */
376 static int init_sock_opt(int s)
383 if ( (tcp_proto_no!=-1) && (setsockopt(s, tcp_proto_no , TCP_NODELAY,
384 &flags, sizeof(flags))<0) ){
385 LOG(L_WARN, "WARNING: init_sock_opt: could not disable Nagle: %s\n",
391 if (setsockopt(s, IPPROTO_IP, IP_TOS, (void*)&optval,sizeof(optval)) ==-1){
392 LOG(L_WARN, "WARNING: init_sock_opt: setsockopt tos: %s\n",
394 /* continue since this is not critical */
396 #if !defined(TCP_DONT_REUSEADDR)
398 if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,
399 (void*)&optval, sizeof(optval))==-1){
400 LOG(L_ERR, "ERROR: setsockopt SO_REUSEADDR %s\n",
402 /* continue, not critical */
404 #endif /* !TCP_DONT_REUSEADDR */
405 #ifdef HAVE_TCP_SYNCNT
406 if ((optval=cfg_get(tcp, tcp_cfg, syncnt))){
407 if (setsockopt(s, IPPROTO_TCP, TCP_SYNCNT, &optval,
409 LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
410 " maximum SYN retr. count: %s\n", strerror(errno));
414 #ifdef HAVE_TCP_LINGER2
415 if ((optval=cfg_get(tcp, tcp_cfg, linger2))){
416 if (setsockopt(s, IPPROTO_TCP, TCP_LINGER2, &optval,
418 LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
419 " maximum LINGER2 timeout: %s\n", strerror(errno));
423 #ifdef HAVE_TCP_QUICKACK
424 if (cfg_get(tcp, tcp_cfg, delayed_ack)){
425 optval=0; /* reset quick ack => delayed ack */
426 if (setsockopt(s, IPPROTO_TCP, TCP_QUICKACK, &optval,
428 LOG(L_WARN, "WARNING: init_sock_opt: failed to reset"
429 " TCP_QUICKACK: %s\n", strerror(errno));
432 #endif /* HAVE_TCP_QUICKACK */
433 init_sock_keepalive(s);
436 flags=fcntl(s, F_GETFL);
438 LOG(L_ERR, "ERROR: init_sock_opt: fnctl failed: (%d) %s\n",
439 errno, strerror(errno));
442 if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
443 LOG(L_ERR, "ERROR: init_sock_opt: fcntl: set non-blocking failed:"
444 " (%d) %s\n", errno, strerror(errno));
454 /* set all socket/fd options for "accepted" sockets
455 * only nonblocking is set since the rest is inherited from the
456 * "parent" (listening) socket
457 * Note: setting O_NONBLOCK is required on linux but it's not needed on
458 * BSD and possibly solaris (where the flag is inherited from the
459 * parent socket). However since there is no standard document
460 * requiring a specific behaviour in this case it's safer to always set
461 * it (at least for now) --andrei
462 * TODO: check on which OSes O_NONBLOCK is inherited and make this
465 * return -1 on error */
466 static int init_sock_opt_accept(int s)
471 flags=fcntl(s, F_GETFL);
473 LOG(L_ERR, "ERROR: init_sock_opt_accept: fnctl failed: (%d) %s\n",
474 errno, strerror(errno));
477 if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
478 LOG(L_ERR, "ERROR: init_sock_opt_accept: "
479 "fcntl: set non-blocking failed: (%d) %s\n",
480 errno, strerror(errno));
490 /** close a socket, handling errno.
491 * On EINTR, repeat the close().
492 * Filter expected errors (return success if close() failed because
493 * EPIPE, ECONNRST a.s.o). Note that this happens on *BSDs (on linux close()
494 * does not fail for socket level errors).
495 * @param s - open valid socket.
496 * @return - 0 on success, < 0 on error (whatever close() returns). On error
499 static int tcp_safe_close(int s)
503 if (unlikely((ret = close(s)) < 0 )) {
513 /* on *BSD we really get these errors at close() time
526 /* blocking connect on a non-blocking fd; it will timeout after
527 * tcp_connect_timeout
528 * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
529 * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
531 static int tcp_blocking_connect(int fd, int type, snd_flags_t* send_flags,
532 const struct sockaddr *servaddr,
536 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
539 struct timeval timeout;
547 unsigned int err_len;
551 to=cfg_get(tcp, tcp_cfg, connect_timeout_s);
554 n=connect(fd, servaddr, addrlen);
557 elapsed=(get_ticks()-ticks)*TIMER_TICK;
558 if (elapsed<to) goto again;
559 else goto error_timeout;
561 if (errno!=EINPROGRESS && errno!=EALREADY){
566 /* poll/select loop */
567 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
569 FD_SET(fd, &orig_set);
575 elapsed=(get_ticks()-ticks)*TIMER_TICK;
578 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
580 timeout.tv_sec=to-elapsed;
582 n=select(fd+1, 0, &sel_set, 0, &timeout);
584 n=poll(&pf, 1, (to-elapsed)*1000);
587 if (errno==EINTR) continue;
588 LOG(L_ERR, "ERROR: tcp_blocking_connect %s: poll/select failed:"
590 su2a((union sockaddr_union*)servaddr, addrlen),
591 errno, strerror(errno));
593 }else if (n==0) /* timeout */ continue;
594 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
595 if (FD_ISSET(fd, &sel_set))
597 if (pf.revents&(POLLERR|POLLHUP|POLLNVAL)){
598 LOG(L_ERR, "ERROR: tcp_blocking_connect %s: poll error: "
600 su2a((union sockaddr_union*)servaddr, addrlen),
607 getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
608 if ((err==0) && (poll_err==0)) goto end;
609 if (err!=EINPROGRESS && err!=EALREADY){
610 LOG(L_ERR, "ERROR: tcp_blocking_connect %s: SO_ERROR (%d) "
612 su2a((union sockaddr_union*)servaddr, addrlen),
623 #ifdef USE_DST_BLACKLIST
624 dst_blacklist_su(BLST_ERR_CONNECT, type,
625 (union sockaddr_union*)servaddr, send_flags, 0);
626 #endif /* USE_DST_BLACKLIST */
627 TCP_EV_CONNECT_UNREACHABLE(errno, 0, 0,
628 (union sockaddr_union*)servaddr, type);
631 #ifdef USE_DST_BLACKLIST
632 dst_blacklist_su(BLST_ERR_CONNECT, type,
633 (union sockaddr_union*)servaddr, send_flags, 0);
634 #endif /* USE_DST_BLACKLIST */
635 TCP_EV_CONNECT_TIMEOUT(errno, 0, 0,
636 (union sockaddr_union*)servaddr, type);
640 #ifdef USE_DST_BLACKLIST
641 dst_blacklist_su(BLST_ERR_CONNECT, type,
642 (union sockaddr_union*)servaddr, send_flags, 0);
643 #endif /* USE_DST_BLACKLIST */
644 TCP_EV_CONNECT_RST(errno, 0, 0,
645 (union sockaddr_union*)servaddr, type);
647 case EAGAIN: /* not posix, but supported on linux and bsd */
648 TCP_EV_CONNECT_NO_MORE_PORTS(errno, 0, 0,
649 (union sockaddr_union*)servaddr, type);
652 TCP_EV_CONNECT_ERR(errno, 0, 0,
653 (union sockaddr_union*)servaddr, type);
655 LOG(L_ERR, "ERROR: tcp_blocking_connect %s: (%d) %s\n",
656 su2a((union sockaddr_union*)servaddr, addrlen),
657 errno, strerror(errno));
661 #ifdef USE_DST_BLACKLIST
662 dst_blacklist_su(BLST_ERR_CONNECT, type,
663 (union sockaddr_union*)servaddr, send_flags, 0);
664 #endif /* USE_DST_BLACKLIST */
665 TCP_EV_CONNECT_TIMEOUT(0, 0, 0, (union sockaddr_union*)servaddr, type);
666 LOG(L_ERR, "ERROR: tcp_blocking_connect %s: timeout %d s elapsed "
667 "from %d s\n", su2a((union sockaddr_union*)servaddr, addrlen),
668 elapsed, cfg_get(tcp, tcp_cfg, connect_timeout_s));
670 TCP_STATS_CONNECT_FAILED();
682 #define _wbufq_empty(con) ((con)->wbuf_q.first==0)
684 #define _wbufq_non_empty(con) ((con)->wbuf_q.first!=0)
687 /* unsafe version, call while holding the connection write lock */
688 inline static int _wbufq_add(struct tcp_connection* c, const char* data,
691 struct tcp_wbuffer_queue* q;
692 struct tcp_wbuffer* wb;
693 unsigned int last_free;
694 unsigned int wb_size;
695 unsigned int crt_size;
700 if (unlikely( ((q->queued+size)>cfg_get(tcp, tcp_cfg, tcpconn_wq_max)) ||
701 ((*tcp_total_wq+size)>cfg_get(tcp, tcp_cfg, tcp_wq_max)) ||
703 TICKS_LT(q->wr_timeout, t)) )){
704 LOG(L_ERR, "ERROR: wbufq_add(%d bytes): write queue full or timeout "
705 " (%d, total %d, last write %d s ago)\n",
706 size, q->queued, *tcp_total_wq,
707 TICKS_TO_S(t-(q->wr_timeout-
708 cfg_get(tcp, tcp_cfg, send_timeout))));
709 if (q->first && TICKS_LT(q->wr_timeout, t)){
710 if (unlikely(c->state==S_CONN_CONNECT)){
711 #ifdef USE_DST_BLACKLIST
712 dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
713 &c->rcv.src_su, &c->send_flags, 0);
714 #endif /* USE_DST_BLACKLIST */
715 TCP_EV_CONNECT_TIMEOUT(0, TCP_LADDR(c), TCP_LPORT(c),
716 TCP_PSU(c), TCP_PROTO(c));
717 TCP_STATS_CONNECT_FAILED();
719 #ifdef USE_DST_BLACKLIST
720 dst_blacklist_su( BLST_ERR_SEND, c->rcv.proto,
721 &c->rcv.src_su, &c->send_flags, 0);
722 #endif /* USE_DST_BLACKLIST */
723 TCP_EV_SEND_TIMEOUT(0, &c->rcv);
724 TCP_STATS_SEND_TIMEOUT();
727 /* if it's not a timeout => queue full */
728 TCP_EV_SENDQ_FULL(0, &c->rcv);
729 TCP_STATS_SENDQ_FULL();
734 if (unlikely(q->last==0)){
735 wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
736 wb=shm_malloc(sizeof(*wb)+wb_size-1);
745 q->wr_timeout=get_ticks_raw()+
746 ((c->state==S_CONN_CONNECT)?
747 S_TO_TICKS(cfg_get(tcp, tcp_cfg, connect_timeout_s)):
748 cfg_get(tcp, tcp_cfg, send_timeout));
754 last_free=wb->b_size-q->last_used;
756 wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
757 wb=shm_malloc(sizeof(*wb)+wb_size-1);
765 last_free=wb->b_size;
767 crt_size=MIN_unsigned(last_free, size);
768 memcpy(wb->buf+q->last_used, data, crt_size);
769 q->last_used+=crt_size;
773 atomic_add_int((int*)tcp_total_wq, crt_size);
782 /* unsafe version, call while holding the connection write lock
783 * inserts data at the beginning, it ignores the max queue size checks and
784 * the timeout (use sparingly)
785 * Note: it should never be called on a write buffer after wbufq_run() */
786 inline static int _wbufq_insert(struct tcp_connection* c, const char* data,
789 struct tcp_wbuffer_queue* q;
790 struct tcp_wbuffer* wb;
793 if (likely(q->first==0)) /* if empty, use wbufq_add */
794 return _wbufq_add(c, data, size);
796 if (unlikely((*tcp_total_wq+size)>cfg_get(tcp, tcp_cfg, tcp_wq_max))){
797 LOG(L_ERR, "ERROR: wbufq_insert(%d bytes): write queue full"
798 " (%d, total %d, last write %d s ago)\n",
799 size, q->queued, *tcp_total_wq,
800 TICKS_TO_S(get_ticks_raw()-q->wr_timeout-
801 cfg_get(tcp, tcp_cfg, send_timeout)));
804 if (unlikely(q->offset)){
805 LOG(L_CRIT, "BUG: wbufq_insert: non-null offset %d (bad call, should"
806 "never be called after the wbufq_run())\n", q->offset);
809 if ((q->first==q->last) && ((q->last->b_size-q->last_used)>=size)){
810 /* one block with enough space in it for size bytes */
811 memmove(q->first->buf+size, q->first->buf, size);
812 memcpy(q->first->buf, data, size);
815 /* create a size bytes block directly */
816 wb=shm_malloc(sizeof(*wb)+size-1);
823 memcpy(wb->buf, data, size);
827 atomic_add_int((int*)tcp_total_wq, size);
835 /* unsafe version, call while holding the connection write lock */
836 inline static void _wbufq_destroy( struct tcp_wbuffer_queue* q)
838 struct tcp_wbuffer* wb;
839 struct tcp_wbuffer* next_wb;
843 if (likely(q->first)){
847 unqueued+=(wb==q->last)?q->last_used:wb->b_size;
854 memset(q, 0, sizeof(*q));
855 atomic_add_int((int*)tcp_total_wq, -unqueued);
860 /* tries to empty the queue (safe version, c->write_lock must not be hold)
861 * returns -1 on error, bytes written on success (>=0)
862 * if the whole queue is emptied => sets *empty*/
863 inline static int wbufq_run(int fd, struct tcp_connection* c, int* empty)
865 struct tcp_wbuffer_queue* q;
866 struct tcp_wbuffer* wb;
874 lock_get(&c->write_lock);
877 block_size=((q->first==q->last)?q->last_used:q->first->b_size)-
879 buf=q->first->buf+q->offset;
880 n=_tcpconn_write_nb(fd, c, buf, block_size);
883 if (likely(n==block_size)){
885 q->first=q->first->next;
888 q->queued-=block_size;
889 atomic_add_int((int*)tcp_total_wq, -block_size);
893 atomic_add_int((int*)tcp_total_wq, -n);
898 /* EINTR is handled inside _tcpconn_write_nb */
899 if (!(errno==EAGAIN || errno==EWOULDBLOCK)){
900 if (unlikely(c->state==S_CONN_CONNECT)){
903 case EHOSTUNREACH: /* not posix for send() */
904 #ifdef USE_DST_BLACKLIST
905 dst_blacklist_su(BLST_ERR_CONNECT,
909 #endif /* USE_DST_BLACKLIST */
910 TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
911 TCP_LPORT(c), TCP_PSU(c),
916 #ifdef USE_DST_BLACKLIST
917 dst_blacklist_su(BLST_ERR_CONNECT,
921 #endif /* USE_DST_BLACKLIST */
922 TCP_EV_CONNECT_RST(0, TCP_LADDR(c),
923 TCP_LPORT(c), TCP_PSU(c),
927 TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
928 TCP_LPORT(c), TCP_PSU(c),
931 TCP_STATS_CONNECT_FAILED();
936 TCP_STATS_CON_RESET();
939 case EHOSTUNREACH: /* not posix for send() */
940 #ifdef USE_DST_BLACKLIST
941 dst_blacklist_su(BLST_ERR_SEND,
945 #endif /* USE_DST_BLACKLIST */
950 LOG(L_ERR, "ERROR: wbuf_runq: %s [%d]\n",
951 strerror(errno), errno);
957 if (likely(q->first==0)){
963 lock_release(&c->write_lock);
965 q->wr_timeout=get_ticks_raw()+cfg_get(tcp, tcp_cfg, send_timeout);
966 if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
967 TCP_STATS_ESTABLISHED(c->state);
974 #endif /* TCP_ASYNC */
979 /* blocking write even on non-blocking sockets
980 * if TCP_TIMEOUT will return with error */
981 static int tcp_blocking_write(struct tcp_connection* c, int fd, char* buf,
986 struct timeval timeout;
994 #ifdef HAVE_MSG_NOSIGNAL
1001 if (errno==EINTR) goto again;
1002 else if (errno!=EAGAIN && errno!=EWOULDBLOCK){
1003 LOG(L_ERR, "tcp_blocking_write: failed to send: (%d) %s\n",
1004 errno, strerror(errno));
1005 TCP_EV_SEND_TIMEOUT(errno, &c->rcv);
1006 TCP_STATS_SEND_TIMEOUT();
1014 /* success: full write */
1019 FD_SET(fd, &sel_set);
1020 timeout.tv_sec=tcp_send_timeout;
1023 n=select(fd+1, 0, &sel_set, 0, &timeout);
1025 if (errno==EINTR) continue; /* signal, ignore */
1026 LOG(L_ERR, "ERROR: tcp_blocking_write: select failed: "
1027 " (%d) %s\n", errno, strerror(errno));
1031 if (get_ticks()-ticks>=tcp_send_timeout){
1032 LOG(L_ERR, "ERROR: tcp_blocking_write: send timeout (%d)\n",
1038 if (FD_ISSET(fd, &sel_set)){
1039 /* we can write again */
1052 struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
1053 union sockaddr_union* local_addr,
1054 struct socket_info* ba, int type,
1057 struct tcp_connection *c;
1060 rd_b_size=cfg_get(tcp, tcp_cfg, rd_buf_size);
1061 c=shm_malloc(sizeof(struct tcp_connection) + rd_b_size);
1063 LOG(L_ERR, "ERROR: tcpconn_new: mem. allocation failure\n");
1066 memset(c, 0, sizeof(struct tcp_connection)); /* zero init (skip rd buf)*/
1068 c->fd=-1; /* not initialized */
1069 if (lock_init(&c->write_lock)==0){
1070 LOG(L_ERR, "ERROR: tcpconn_new: init lock failed\n");
1076 atomic_set(&c->refcnt, 0);
1077 local_timer_init(&c->timer, tcpconn_main_timeout, c, 0);
1078 su2ip_addr(&c->rcv.src_ip, su);
1079 c->rcv.src_port=su_getport(su);
1080 c->rcv.bind_address=ba;
1081 if (likely(local_addr)){
1082 su2ip_addr(&c->rcv.dst_ip, local_addr);
1083 c->rcv.dst_port=su_getport(local_addr);
1085 c->rcv.dst_ip=ba->address;
1086 c->rcv.dst_port=ba->port_no;
1088 print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
1089 DBG( "tcpconn_new: on port %d, type %d\n", c->rcv.src_port, type);
1090 init_tcp_req(&c->req, (char*)c+sizeof(struct tcp_connection), rd_b_size);
1091 c->id=(*connection_id)++;
1092 c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
1093 c->rcv.proto_reserved2=0;
1097 if (type==PROTO_TLS){
1098 if (tls_tcpconn_init(c, sock)==-1) goto error;
1103 c->rcv.proto=PROTO_TCP;
1104 c->timeout=get_ticks_raw()+cfg_get(tcp, tcp_cfg, con_lifetime);
1116 /* do the actual connect, set sock. options a.s.o
1117 * returns socket on success, -1 on error
1118 * sets also *res_local_addr, res_si and state (S_CONN_CONNECT for an
1119 * unfinished connect and S_CONN_OK for a finished one)*/
1120 inline static int tcp_do_connect( union sockaddr_union* server,
1121 union sockaddr_union* from,
1123 snd_flags_t* send_flags,
1124 union sockaddr_union* res_local_addr,
1125 struct socket_info** res_si,
1126 enum tcp_conn_states *state
1130 union sockaddr_union my_name;
1131 socklen_t my_name_len;
1135 #endif /* TCP_ASYNC */
1137 s=socket(AF2PF(server->s.sa_family), SOCK_STREAM, 0);
1138 if (unlikely(s==-1)){
1139 LOG(L_ERR, "ERROR: tcp_do_connect %s: socket: (%d) %s\n",
1140 su2a(server, sizeof(*server)), errno, strerror(errno));
1143 if (init_sock_opt(s)<0){
1144 LOG(L_ERR, "ERROR: tcp_do_connect %s: init_sock_opt failed\n",
1145 su2a(server, sizeof(*server)));
1149 if (unlikely(from && bind(s, &from->s, sockaddru_len(*from)) != 0)){
1150 LOG(L_WARN, "WARNING: tcp_do_connect: binding to source address"
1151 " %s failed: %s [%d]\n", su2a(from, sizeof(*from)),
1152 strerror(errno), errno);
1156 if (likely(cfg_get(tcp, tcp_cfg, async))){
1158 n=connect(s, &server->s, sockaddru_len(*server));
1159 if (likely(n==-1)){ /*non-blocking => most probable EINPROGRESS*/
1160 if (likely(errno==EINPROGRESS))
1161 *state=S_CONN_CONNECT;
1162 else if (errno==EINTR) goto again;
1163 else if (errno!=EALREADY){
1167 #ifdef USE_DST_BLACKLIST
1168 dst_blacklist_su(BLST_ERR_CONNECT, type, server,
1170 #endif /* USE_DST_BLACKLIST */
1171 TCP_EV_CONNECT_UNREACHABLE(errno, 0, 0, server, type);
1174 #ifdef USE_DST_BLACKLIST
1175 dst_blacklist_su(BLST_ERR_CONNECT, type, server,
1177 #endif /* USE_DST_BLACKLIST */
1178 TCP_EV_CONNECT_TIMEOUT(errno, 0, 0, server, type);
1182 #ifdef USE_DST_BLACKLIST
1183 dst_blacklist_su(BLST_ERR_CONNECT, type, server,
1185 #endif /* USE_DST_BLACKLIST */
1186 TCP_EV_CONNECT_RST(errno, 0, 0, server, type);
1188 case EAGAIN:/* not posix, but supported on linux and bsd */
1189 TCP_EV_CONNECT_NO_MORE_PORTS(errno, 0, 0, server,type);
1192 TCP_EV_CONNECT_ERR(errno, 0, 0, server, type);
1194 TCP_STATS_CONNECT_FAILED();
1195 LOG(L_ERR, "ERROR: tcp_do_connect: connect %s: (%d) %s\n",
1196 su2a(server, sizeof(*server)),
1197 errno, strerror(errno));
1202 #endif /* TCP_ASYNC */
1203 if (tcp_blocking_connect(s, type, send_flags, &server->s,
1204 sockaddru_len(*server))<0){
1205 LOG(L_ERR, "ERROR: tcp_do_connect: tcp_blocking_connect %s"
1206 " failed\n", su2a(server, sizeof(*server)));
1211 #endif /* TCP_ASYNC */
1213 su2ip_addr(&ip, from);
1214 if (!ip_addr_any(&ip))
1215 /* we already know the source ip, skip the sys. call */
1218 my_name_len=sizeof(my_name);
1219 if (unlikely(getsockname(s, &my_name.s, &my_name_len)!=0)){
1220 LOG(L_ERR, "ERROR: tcp_do_connect: getsockname failed: %s(%d)\n",
1221 strerror(errno), errno);
1225 from=&my_name; /* update from with the real "from" address */
1226 su2ip_addr(&ip, &my_name);
1229 if (unlikely(type==PROTO_TLS))
1230 *res_si=find_si(&ip, 0, PROTO_TLS);
1233 *res_si=find_si(&ip, 0, PROTO_TCP);
1235 if (unlikely(*res_si==0)){
1236 LOG(L_WARN, "WARNING: tcp_do_connect %s: could not find corresponding"
1237 " listening socket for %s, using default...\n",
1238 su2a(server, sizeof(*server)), ip_addr2a(&ip));
1239 if (server->s.sa_family==AF_INET) *res_si=sendipv4_tcp;
1241 else *res_si=sendipv6_tcp;
1244 *res_local_addr=*from;
1247 if (s!=-1) tcp_safe_close(s);
1253 struct tcp_connection* tcpconn_connect( union sockaddr_union* server,
1254 union sockaddr_union* from,
1255 int type, snd_flags_t* send_flags)
1258 struct socket_info* si;
1259 union sockaddr_union my_name;
1260 struct tcp_connection* con;
1261 enum tcp_conn_states state;
1265 if (*tcp_connections_no >= cfg_get(tcp, tcp_cfg, max_connections)){
1266 LOG(L_ERR, "ERROR: tcpconn_connect: maximum number of connections"
1267 " exceeded (%d/%d)\n",
1268 *tcp_connections_no,
1269 cfg_get(tcp, tcp_cfg, max_connections));
1272 if (unlikely(type==PROTO_TLS)) {
1273 if (*tls_connections_no >= cfg_get(tcp, tcp_cfg, max_tls_connections)){
1274 LM_ERR("ERROR: maximum number of tls connections"
1275 " exceeded (%d/%d)\n",
1276 *tls_connections_no,
1277 cfg_get(tcp, tcp_cfg, max_tls_connections));
1282 s=tcp_do_connect(server, from, type, send_flags, &my_name, &si, &state);
1284 LOG(L_ERR, "ERROR: tcp_do_connect %s: failed (%d) %s\n",
1285 su2a(server, sizeof(*server)), errno, strerror(errno));
1288 con=tcpconn_new(s, server, &my_name, si, type, state);
1290 LOG(L_ERR, "ERROR: tcp_connect %s: tcpconn_new failed, closing the "
1291 " socket\n", su2a(server, sizeof(*server)));
1294 tcpconn_set_send_flags(con, *send_flags);
1297 if (s!=-1) tcp_safe_close(s); /* close the opened socket */
1303 #ifdef TCP_CONNECT_WAIT
1304 int tcpconn_finish_connect( struct tcp_connection* c,
1305 union sockaddr_union* from)
1309 union sockaddr_union local_addr;
1310 struct socket_info* si;
1311 enum tcp_conn_states state;
1312 struct tcp_conn_alias* a;
1313 int new_conn_alias_flags;
1315 s=tcp_do_connect(&c->rcv.src_su, from, c->type, &c->send_flags,
1316 &local_addr, &si, &state);
1317 if (unlikely(s==-1)){
1318 LOG(L_ERR, "ERROR: tcpconn_finish_connect %s: tcp_do_connect for %p"
1319 " failed\n", su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
1323 c->rcv.bind_address=si;
1324 su2ip_addr(&c->rcv.dst_ip, &local_addr);
1325 c->rcv.dst_port=su_getport(&local_addr);
1326 /* update aliases if needed */
1327 if (likely(from==0)){
1328 new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
1331 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
1332 new_conn_alias_flags);
1333 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
1334 c->rcv.dst_port, new_conn_alias_flags);
1336 }else if (su_cmp(from, &local_addr)!=1){
1337 new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
1339 /* remove all the aliases except the first one and re-add them
1340 * (there shouldn't be more then the 3 default aliases at this
1342 for (r=1; r<c->aliases; r++){
1343 a=&c->con_aliases[r];
1344 tcpconn_listrm(tcpconn_aliases_hash[a->hash], a, next, prev);
1347 /* add the local_ip:0 and local_ip:local_port aliases */
1348 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
1349 0, new_conn_alias_flags);
1350 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
1351 c->rcv.dst_port, new_conn_alias_flags);
1357 #endif /* TCP_CONNECT_WAIT */
1361 /* adds a tcp connection to the tcpconn hashes
1362 * Note: it's called _only_ from the tcp_main process */
1363 inline static struct tcp_connection* tcpconn_add(struct tcp_connection *c)
1365 struct ip_addr zero_ip;
1366 int new_conn_alias_flags;
1369 ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
1370 c->id_hash=tcp_id_hash(c->id);
1372 new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
1374 c->flags|=F_CONN_HASHED;
1375 /* add it at the begining of the list*/
1376 tcpconn_listadd(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
1377 /* set the aliases */
1378 /* first alias is for (peer_ip, peer_port, 0 ,0) -- for finding
1379 * any connection to peer_ip, peer_port
1380 * the second alias is for (peer_ip, peer_port, local_addr, 0) -- for
1381 * finding any conenction to peer_ip, peer_port from local_addr
1382 * the third alias is for (peer_ip, peer_port, local_addr, local_port)
1383 * -- for finding if a fully specified connection exists */
1384 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &zero_ip, 0,
1385 new_conn_alias_flags);
1386 if (likely(c->rcv.dst_ip.af && ! ip_addr_any(&c->rcv.dst_ip))){
1387 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
1388 new_conn_alias_flags);
1389 _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
1390 c->rcv.dst_port, new_conn_alias_flags);
1392 /* ignore add_alias errors, there are some valid cases when one
1393 * of the add_alias would fail (e.g. first add_alias for 2 connections
1394 * with the same destination but different src. ip*/
1396 DBG("tcpconn_add: hashes: %d:%d:%d, %d\n",
1397 c->con_aliases[0].hash,
1398 c->con_aliases[1].hash,
1399 c->con_aliases[2].hash,
1403 LOG(L_CRIT, "tcpconn_add: BUG: null connection pointer\n");
1409 static inline void _tcpconn_detach(struct tcp_connection *c)
1412 tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
1413 /* remove all the aliases */
1414 for (r=0; r<c->aliases; r++)
1415 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash],
1416 &c->con_aliases[r], next, prev);
1422 static inline void _tcpconn_free(struct tcp_connection* c)
1425 if (unlikely(_wbufq_non_empty(c)))
1426 _wbufq_destroy(&c->wbuf_q);
1428 lock_destroy(&c->write_lock);
1430 if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) tls_tcpconn_clean(c);
1437 /* unsafe tcpconn_rm version (nolocks) */
1438 void _tcpconn_rm(struct tcp_connection* c)
1446 void tcpconn_rm(struct tcp_connection* c)
1450 tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
1451 /* remove all the aliases */
1452 for (r=0; r<c->aliases; r++)
1453 tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash],
1454 &c->con_aliases[r], next, prev);
1457 lock_destroy(&c->write_lock);
1459 if ((c->type==PROTO_TLS || c->type==PROTO_WSS)&&(c->extra_data)) tls_tcpconn_clean(c);
1465 /* finds a connection, if id=0 uses the ip addr, port, local_ip and local port
1466 * (host byte order) and tries to find the connection that matches all of
1467 * them. Wild cards can be used for local_ip and local_port (a 0 filled
1468 * ip address and/or a 0 local port).
1469 * WARNING: unprotected (locks) use tcpconn_get unless you really
1470 * know what you are doing */
1471 struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port,
1472 struct ip_addr* l_ip, int l_port)
1475 struct tcp_connection *c;
1476 struct tcp_conn_alias* a;
1478 int is_local_ip_any;
1481 DBG("tcpconn_find: %d port %d\n",id, port);
1482 if (ip) print_ip("tcpconn_find: ip ", ip, "\n");
1485 hash=tcp_id_hash(id);
1486 for (c=tcpconn_id_hash[hash]; c; c=c->id_next){
1488 DBG("c=%p, c->id=%d, port=%d\n",c, c->id, c->rcv.src_port);
1489 print_ip("ip=", &c->rcv.src_ip, "\n");
1491 if ((id==c->id)&&(c->state!=S_CONN_BAD)) return c;
1493 }else if (likely(ip)){
1494 hash=tcp_addr_hash(ip, port, l_ip, l_port);
1495 is_local_ip_any=ip_addr_any(l_ip);
1496 for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
1498 DBG("a=%p, c=%p, c->id=%d, alias port= %d port=%d\n", a, a->parent,
1499 a->parent->id, a->port, a->parent->rcv.src_port);
1500 print_ip("ip=",&a->parent->rcv.src_ip,"\n");
1502 if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
1503 ((l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
1504 (ip_addr_cmp(ip, &a->parent->rcv.src_ip)) &&
1506 ip_addr_cmp(l_ip, &a->parent->rcv.dst_ip))
1516 /* _tcpconn_find with locks and timeout
1517 * local_addr contains the desired local ip:port. If null any local address
1518 * will be used. IN*ADDR_ANY or 0 port are wild cards.
1520 struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
1521 union sockaddr_union* local_addr,
1524 struct tcp_connection* c;
1525 struct ip_addr local_ip;
1530 if (unlikely(local_addr)){
1531 su2ip_addr(&local_ip, local_addr);
1532 local_port=su_getport(local_addr);
1534 ip_addr_mk_any(ip->af, &local_ip);
1539 c=_tcpconn_find(id, ip, port, &local_ip, local_port);
1541 atomic_inc(&c->refcnt);
1542 /* update the timeout only if the connection is not handled
1543 * by a tcp reader _and_the timeout is non-zero (the tcp
1544 * reader process uses c->timeout for its own internal
1545 * timeout and c->timeout will be overwritten * anyway on
1546 * return to tcp_main) */
1547 if (likely(c->reader_pid==0 && timeout != 0))
1548 c->timeout=get_ticks_raw()+timeout;
1556 /* add c->dst:port, local_addr as an alias for the "id" connection,
1557 * flags: TCP_ALIAS_FORCE_ADD - add an alias even if a previous one exists
1558 * TCP_ALIAS_REPLACE - if a prev. alias exists, replace it with the
1560 * returns 0 on success, <0 on failure ( -1 - null c, -2 too many aliases,
1561 * -3 alias already present and pointing to another connection)
1562 * WARNING: must be called with TCPCONN_LOCK held */
1563 inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
1564 struct ip_addr* l_ip, int l_port,
1568 struct tcp_conn_alias* a;
1569 struct tcp_conn_alias* nxt;
1570 struct tcp_connection* p;
1571 int is_local_ip_any;
1576 is_local_ip_any=ip_addr_any(l_ip);
1578 hash=tcp_addr_hash(&c->rcv.src_ip, port, l_ip, l_port);
1579 /* search the aliases for an already existing one */
1580 for (a=tcpconn_aliases_hash[hash], nxt=0; a; a=nxt){
1582 if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
1583 ( (l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
1584 (ip_addr_cmp(&c->rcv.src_ip, &a->parent->rcv.src_ip)) &&
1585 ( is_local_ip_any ||
1586 ip_addr_cmp(&a->parent->rcv.dst_ip, l_ip))
1589 if (unlikely(a->parent!=c)){
1590 if (flags & TCP_ALIAS_FORCE_ADD)
1591 /* still have to walk the whole list to check if
1592 * the alias was not already added */
1594 else if (flags & TCP_ALIAS_REPLACE){
1595 /* remove the alias =>
1596 * remove the current alias and all the following
1597 * ones from the corresponding connection, shift the
1598 * connection aliases array and re-add the other
1599 * aliases (!= current one) */
1601 for (i=0; (i<p->aliases) && (&(p->con_aliases[i])!=a);
1603 if (unlikely(i==p->aliases)){
1604 LOG(L_CRIT, "BUG: _tcpconn_add_alias_unsafe: "
1605 " alias %p not found in con %p (id %d)\n",
1607 goto error_not_found;
1609 for (r=i; r<p->aliases; r++){
1611 tcpconn_aliases_hash[p->con_aliases[r].hash],
1612 &p->con_aliases[r], next, prev);
1614 if (likely((i+1)<p->aliases)){
1615 memmove(&p->con_aliases[i], &p->con_aliases[i+1],
1617 sizeof(p->con_aliases[0]));
1620 /* re-add the remaining aliases */
1621 for (r=i; r<p->aliases; r++){
1623 tcpconn_aliases_hash[p->con_aliases[r].hash],
1624 &p->con_aliases[r], next, prev);
1631 if (unlikely(c->aliases>=TCP_CON_MAX_ALIASES)) goto error_aliases;
1632 c->con_aliases[c->aliases].parent=c;
1633 c->con_aliases[c->aliases].port=port;
1634 c->con_aliases[c->aliases].hash=hash;
1635 tcpconn_listadd(tcpconn_aliases_hash[hash],
1636 &c->con_aliases[c->aliases], next, prev);
1638 }else goto error_not_found;
1641 if (a) DBG("_tcpconn_add_alias_unsafe: alias already present\n");
1642 else DBG("_tcpconn_add_alias_unsafe: alias port %d for hash %d, id %d\n",
1647 /* too many aliases */
1650 /* null connection */
1653 /* alias already present and pointing to a different connection
1654 * (hijack attempt?) */
1660 /* add port as an alias for the "id" connection,
1661 * returns 0 on success,-1 on failure */
1662 int tcpconn_add_alias(int id, int port, int proto)
1664 struct tcp_connection* c;
1666 struct ip_addr zero_ip;
1671 port=port?port:((proto==PROTO_TLS)?SIPS_PORT:SIP_PORT);
1673 /* check if alias already exists */
1674 c=_tcpconn_find(id, 0, 0, 0, 0);
1676 ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
1677 alias_flags=cfg_get(tcp, tcp_cfg, alias_flags);
1678 /* alias src_ip:port, 0, 0 */
1679 ret=_tcpconn_add_alias_unsafe(c, port, &zero_ip, 0,
1681 if (ret<0 && ret!=-3) goto error;
1682 /* alias src_ip:port, local_ip, 0 */
1683 ret=_tcpconn_add_alias_unsafe(c, port, &c->rcv.dst_ip, 0,
1685 if (ret<0 && ret!=-3) goto error;
1686 /* alias src_ip:port, local_ip, local_port */
1687 ret=_tcpconn_add_alias_unsafe(c, port, &c->rcv.dst_ip, c->rcv.dst_port,
1689 if (unlikely(ret<0)) goto error;
1690 }else goto error_not_found;
1695 LOG(L_ERR, "ERROR: tcpconn_add_alias: no connection found for id %d\n",id);
1701 LOG(L_ERR, "ERROR: tcpconn_add_alias: too many aliases (%d)"
1702 " for connection %p (id %d) %s:%d <- %d\n",
1703 c->aliases, c, c->id, ip_addr2a(&c->rcv.src_ip),
1704 c->rcv.src_port, port);
1705 for (r=0; r<c->aliases; r++){
1706 LOG(L_ERR, "ERROR: tcpconn_add_alias: alias %d: for %p (%d)"
1707 " %s:%d <-%d hash %x\n", r, c, c->id,
1708 ip_addr2a(&c->rcv.src_ip), c->rcv.src_port,
1709 c->con_aliases[r].port, c->con_aliases[r].hash);
1713 LOG(L_ERR, "ERROR: tcpconn_add_alias: possible port"
1714 " hijack attempt\n");
1715 LOG(L_ERR, "ERROR: tcpconn_add_alias: alias for %d port %d already"
1716 " present and points to another connection \n",
1720 LOG(L_ERR, "ERROR: tcpconn_add_alias: unknown error %d\n", ret);
1729 static void tcp_fd_cache_init(void)
1732 for (r=0; r<TCP_FD_CACHE_SIZE; r++)
1737 inline static struct fd_cache_entry* tcp_fd_cache_get(struct tcp_connection *c)
1741 h=c->id%TCP_FD_CACHE_SIZE;
1742 if ((fd_cache[h].fd>0) && (fd_cache[h].id==c->id) && (fd_cache[h].con==c))
1743 return &fd_cache[h];
1748 inline static void tcp_fd_cache_rm(struct fd_cache_entry* e)
1754 inline static void tcp_fd_cache_add(struct tcp_connection *c, int fd)
1758 h=c->id%TCP_FD_CACHE_SIZE;
1759 if (likely(fd_cache[h].fd>0))
1760 tcp_safe_close(fd_cache[h].fd);
1762 fd_cache[h].id=c->id;
1766 #endif /* TCP_FD_CACHE */
1770 inline static int tcpconn_chld_put(struct tcp_connection* tcpconn);
1772 static int tcpconn_send_put(struct tcp_connection* c, const char* buf,
1773 unsigned len, snd_flags_t send_flags);
1774 static int tcpconn_do_send(int fd, struct tcp_connection* c,
1775 const char* buf, unsigned len,
1776 snd_flags_t send_flags, long* resp, int locked);
1778 static int tcpconn_1st_send(int fd, struct tcp_connection* c,
1779 const char* buf, unsigned len,
1780 snd_flags_t send_flags, long* resp, int locked);
1782 /* finds a tcpconn & sends on it
1783 * uses the dst members to, proto (TCP|TLS) and id and tries to send
1784 * from the "from" address (if non null and id==0)
1785 * returns: number of bytes written (>=0) on success
1787 int tcp_send(struct dest_info* dst, union sockaddr_union* from,
1788 const char* buf, unsigned len)
1790 struct tcp_connection *c;
1796 ticks_t con_lifetime;
1798 const char* rest_buf;
1800 unsigned rest_len, t_len;
1802 snd_flags_t t_send_flags;
1803 #endif /* USE_TLS */
1805 port=su_getport(&dst->to);
1806 con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
1808 su2ip_addr(&ip, &dst->to);
1809 c=tcpconn_get(dst->id, &ip, port, from, con_lifetime);
1810 }else if (likely(dst->id)){
1811 c=tcpconn_get(dst->id, 0, 0, 0, con_lifetime);
1813 LOG(L_CRIT, "BUG: tcp_send called with null id & to\n");
1817 if (likely(dst->id)){
1818 if (unlikely(c==0)) {
1820 /* try again w/o id */
1821 c=tcpconn_get(0, &ip, port, from, con_lifetime);
1823 LOG(L_ERR, "ERROR: tcp_send: id %d not found, dropping\n",
1829 /* connection not found or unusable => open a new one and send on it */
1830 if (unlikely((c==0) || tcpconn_close_after_send(c))){
1832 /* can't use c if it's marked as close-after-send =>
1833 release it and try opening new one */
1834 tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
1837 /* check if connect() is disabled */
1838 if (unlikely((dst->send_flags.f & SND_F_FORCE_CON_REUSE) ||
1839 cfg_get(tcp, tcp_cfg, no_connect)))
1841 DBG("tcp_send: no open tcp connection found, opening new one\n");
1842 /* create tcp connection */
1843 if (likely(from==0)){
1844 /* check to see if we have to use a specific source addr. */
1845 switch (dst->to.s.sa_family) {
1847 from = tcp_source_ipv4;
1851 from = tcp_source_ipv6;
1855 /* error, bad af, ignore ... */
1859 #if defined(TCP_CONNECT_WAIT) && defined(TCP_ASYNC)
1860 if (likely(cfg_get(tcp, tcp_cfg, tcp_connect_wait) &&
1861 cfg_get(tcp, tcp_cfg, async) )){
1862 if (unlikely(*tcp_connections_no >=
1863 cfg_get(tcp, tcp_cfg, max_connections))){
1864 LOG(L_ERR, "ERROR: tcp_send %s: maximum number of"
1865 " connections exceeded (%d/%d)\n",
1866 su2a(&dst->to, sizeof(dst->to)),
1867 *tcp_connections_no,
1868 cfg_get(tcp, tcp_cfg, max_connections));
1871 if (unlikely(dst->proto==PROTO_TLS)) {
1872 if (unlikely(*tls_connections_no >=
1873 cfg_get(tcp, tcp_cfg, max_tls_connections))){
1874 LM_ERR("tcp_send %s: maximum number of"
1875 " tls connections exceeded (%d/%d)\n",
1876 su2a(&dst->to, sizeof(dst->to)),
1877 *tls_connections_no,
1878 cfg_get(tcp, tcp_cfg, max_tls_connections));
1882 c=tcpconn_new(-1, &dst->to, from, 0, dst->proto,
1884 if (unlikely(c==0)){
1885 LOG(L_ERR, "ERROR: tcp_send %s: could not create new"
1887 su2a(&dst->to, sizeof(dst->to)));
1890 c->flags|=F_CONN_PENDING|F_CONN_FD_CLOSED;
1891 tcpconn_set_send_flags(c, dst->send_flags);
1892 atomic_set(&c->refcnt, 2); /* ref from here and from main hash
1894 /* add it to id hash and aliases */
1895 if (unlikely(tcpconn_add(c)==0)){
1896 LOG(L_ERR, "ERROR: tcp_send %s: could not add "
1898 su2a(&dst->to, sizeof(dst->to)),
1904 /* do connect and if src ip or port changed, update the
1906 if (unlikely((fd=tcpconn_finish_connect(c, from))<0)){
1907 /* tcpconn_finish_connect will automatically blacklist
1908 on error => no need to do it here */
1909 LOG(L_ERR, "ERROR: tcp_send %s: tcpconn_finish_connect(%p)"
1910 " failed\n", su2a(&dst->to, sizeof(dst->to)),
1912 goto conn_wait_error;
1914 /* ? TODO: it might be faster just to queue the write directly
1915 * and send to main CONN_NEW_PENDING_WRITE */
1916 /* delay sending the fd to main after the send */
1918 /* NOTE: no lock here, because the connection is marked as
1919 * pending and nobody else will try to write on it. However
1920 * this might produce out-of-order writes. If this is not
1921 * desired either lock before the write or use
1922 * _wbufq_insert(...)
1923 * NOTE2: _wbufq_insert() is used now (no out-of-order).
1926 if (unlikely(c->type==PROTO_TLS)) {
1927 /* for TLS the TLS processing and the send must happen
1928 atomically w/ respect to other sends on the same connection
1929 (otherwise reordering might occur which would break TLS) =>
1930 lock. However in this case this send will always be the first.
1931 We can have the send() outside the lock only if this is the
1932 first and only send (tls_encode is not called again), or
1933 this is the last send for a tls_encode() loop and all the
1934 previous ones did return CONN_NEW_COMPLETE or CONN_EOF.
1936 response[1] = CONN_NOP;
1939 lock_get(&c->write_lock);
1941 t_send_flags = dst->send_flags;
1942 n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
1944 /* There are 4 cases:
1945 1. entire buffer consumed from the first try
1946 (rest_len == rest_buf == 0)
1947 2. rest_buf & first call
1948 3. rest_buf & not first call
1949 3a. CONN_NEW_COMPLETE or CONN_EOF
1950 3b. CONN_NEW_PENDING_WRITE
1951 4. entire buffer consumed, but not first call
1952 4a. CONN_NEW_COMPLETE or CONN_EOF
1953 4b. CONN_NEW_PENDING_WRITE
1954 We misuse response[1] == CONN_NOP to test for the
1957 if (unlikely(n < 0)) {
1958 lock_release(&c->write_lock);
1959 goto conn_wait_error;
1961 if (likely(rest_len == 0)) {
1962 /* 1 or 4*: CONN_NEW_COMPLETE, CONN_EOF, CONN_NOP
1963 or CONN_NEW_PENDING_WRITE (*rest_len == 0) */
1964 if (likely(response[1] != CONN_NEW_PENDING_WRITE)) {
1965 /* 1 or 4a => it's safe to do the send outside the
1966 lock (it will either send directly or
1969 lock_release(&c->write_lock);
1970 if (likely(t_len != 0)) {
1971 n=tcpconn_1st_send(fd, c, t_buf, t_len,
1974 } else { /* t_len == 0 */
1975 if (response[1] == CONN_NOP) {
1976 /* nothing to send (e.g parallel send
1977 tls_encode queues some data and then
1978 WANT_READ => this tls_encode will queue
1979 the cleartext too and will have nothing
1980 to send right now) and initial send =>
1981 behave as if the send was successful
1982 (but never return EOF here) */
1983 response[1] = CONN_NEW_COMPLETE;
1988 /* CONN_NEW_PENDING_WRITE: 4b: it was a
1989 repeated tls_encode() (or otherwise we would
1990 have here CONN_NOP) => add to the queue */
1991 if (unlikely(t_len &&
1992 _wbufq_add(c, t_buf, t_len) < 0)) {
1993 response[1] = CONN_ERROR;
1996 lock_release(&c->write_lock);
1997 /* exit (no send) */
1999 } else { /* rest_len != 0 */
2000 /* 2 or 3*: if tls_encode hasn't finished, we have to
2001 call tcpconn_1st_send() under lock (otherwise if it
2002 returns CONN_NEW_PENDING_WRITE, there is no way
2003 to find the right place to add the new queued
2004 data from the 2nd tls_encode()) */
2005 if (likely((response[1] == CONN_NOP /*2*/ ||
2006 response[1] == CONN_NEW_COMPLETE /*3a*/ ||
2007 response[1] == CONN_EOF /*3a*/) && t_len))
2008 n = tcpconn_1st_send(fd, c, t_buf, t_len,
2011 else if (unlikely(t_len &&
2012 _wbufq_add(c, t_buf, t_len) < 0)) {
2013 /*3b: CONN_NEW_PENDING_WRITE*/
2014 response[1] = CONN_ERROR;
2017 if (likely(n >= 0)) {
2018 /* if t_len == 0 => nothing was sent => previous
2019 response will be kept */
2022 goto redo_tls_encode;
2024 lock_release(&c->write_lock);
2029 #endif /* USE_TLS */
2030 n=tcpconn_1st_send(fd, c, buf, len, dst->send_flags,
2032 if (unlikely(n<0)) /* this will catch CONN_ERROR too */
2033 goto conn_wait_error;
2034 if (unlikely(response[1]==CONN_EOF)){
2035 /* if close-after-send requested, don't bother
2036 sending the fd back to tcp_main, try closing it
2037 immediately (no other tcp_send should use it,
2038 because it is marked as close-after-send before
2039 being added to the hash) */
2040 goto conn_wait_close;
2042 /* send to tcp_main */
2043 response[0]=(long)c;
2044 if (unlikely(send_fd(unix_tcp_sock, response,
2045 sizeof(response), fd) <= 0)){
2046 LOG(L_ERR, "BUG: tcp_send %s: %ld for %p"
2047 " failed:" " %s (%d)\n",
2048 su2a(&dst->to, sizeof(dst->to)),
2049 response[1], c, strerror(errno), errno);
2050 goto conn_wait_error;
2052 goto conn_wait_success;
2054 #endif /* TCP_CONNECT_WAIT && TCP_ASYNC */
2055 if (unlikely((c=tcpconn_connect(&dst->to, from, dst->proto,
2056 &dst->send_flags))==0)){
2057 LOG(L_ERR, "ERROR: tcp_send %s: connect failed\n",
2058 su2a(&dst->to, sizeof(dst->to)));
2061 tcpconn_set_send_flags(c, dst->send_flags);
2062 if (likely(c->state==S_CONN_OK))
2063 TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
2064 atomic_set(&c->refcnt, 2); /* ref. from here and it will also
2065 be added in the tcp_main hash */
2067 c->flags|=F_CONN_FD_CLOSED; /* not yet opened in main */
2068 /* ? TODO: it might be faster just to queue the write and
2069 * send to main a CONN_NEW_PENDING_WRITE */
2071 /* send the new tcpconn to "tcp main" */
2072 response[0]=(long)c;
2073 response[1]=CONN_NEW;
2074 n=send_fd(unix_tcp_sock, response, sizeof(response), c->s);
2075 if (unlikely(n<=0)){
2076 LOG(L_ERR, "BUG: tcp_send %s: failed send_fd: %s (%d)\n",
2077 su2a(&dst->to, sizeof(dst->to)),
2078 strerror(errno), errno);
2079 /* we can safely delete it, it's not referenced by anybody */
2084 /* new connection => send on it directly */
2086 if (unlikely(c->type==PROTO_TLS)) {
2087 /* for TLS the TLS processing and the send must happen
2088 atomically w/ respect to other sends on the same connection
2089 (otherwise reordering might occur which would break TLS) =>
2092 response[1] = CONN_NOP;
2095 lock_get(&c->write_lock);
2097 t_send_flags = dst->send_flags;
2098 n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
2100 if (likely(n > 0)) {
2101 n = tcpconn_do_send(fd, c, t_buf, t_len, t_send_flags,
2103 if (likely(response[1] != CONN_QUEUED_WRITE ||
2104 resp == CONN_ERROR))
2105 /* don't overwrite a previous CONN_QUEUED_WRITE
2108 } else if (unlikely(n < 0)) {
2109 response[1] = CONN_ERROR;
2112 /* else do nothing for n (t_len) == 0, keep
2116 } while(unlikely(rest_len && n > 0));
2117 lock_release(&c->write_lock);
2119 #endif /* USE_TLS */
2120 n = tcpconn_do_send(fd, c, buf, len, dst->send_flags,
2122 if (unlikely(response[1] != CONN_NOP)) {
2123 response[0]=(long)c;
2124 if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
2125 BUG("tcp_main command %ld sending failed (write):"
2126 "%s (%d)\n", response[1], strerror(errno), errno);
2127 /* all commands != CONN_NOP returned by tcpconn_do_send()
2128 (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec
2129 refcnt => if sending the command fails we have to
2130 dec. refcnt by hand */
2131 tcpconn_chld_put(c); /* deref. it manually */
2134 /* here refcnt for c is already decremented => c contents can
2135 no longer be used and refcnt _must_ _not_ be decremented
2137 if (unlikely(n < 0 || response[1] == CONN_EOF)) {
2138 /* on error or eof, close fd */
2140 } else if (response[1] == CONN_QUEUED_WRITE) {
2142 if (cfg_get(tcp, tcp_cfg, fd_cache)) {
2143 tcp_fd_cache_add(c, fd);
2145 #endif /* TCP_FD_CACHE */
2148 BUG("unexpected tcpconn_do_send() return & response:"
2149 " %d, %ld\n", n, response[1]);
2154 if (cfg_get(tcp, tcp_cfg, fd_cache)) {
2155 tcp_fd_cache_add(c, fd);
2157 #endif /* TCP_FD_CACHE */
2159 /* here we can have only commands that _do_ _not_ dec refcnt.
2160 (CONN_EOF, CON_ERROR, CON_QUEUED_WRITE are all treated above) */
2162 } /* if (c==0 or unusable) new connection */
2163 /* existing connection, send on it */
2164 n = tcpconn_send_put(c, buf, len, dst->send_flags);
2165 /* no deref needed (automatically done inside tcpconn_send_put() */
2167 #ifdef TCP_CONNECT_WAIT
2170 if (cfg_get(tcp, tcp_cfg, fd_cache)) {
2171 tcp_fd_cache_add(c, fd);
2173 #endif /* TCP_FD_CACHE */
2174 if (unlikely (tcp_safe_close(fd) < 0))
2175 LOG(L_ERR, "closing temporary send fd for %p: %s: "
2176 "close(%d) failed (flags 0x%x): %s (%d)\n", c,
2177 su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2178 fd, c->flags, strerror(errno), errno);
2179 tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
2184 /* connect or send failed or immediate close-after-send was requested on
2185 * newly created connection which was not yet sent to tcp_main (but was
2186 * already hashed) => don't send to main, unhash and destroy directly
2187 * (if refcnt>2 it will be destroyed when the last sender releases the
2188 * connection (tcpconn_chld_put(c))) or when tcp_main receives a
2190 c->state=S_CONN_BAD;
2191 /* we are here only if we opened a new fd (and not reused a cached or
2192 a reader one) => if the connect was successful close the fd */
2194 if (unlikely(tcp_safe_close(fd) < 0 ))
2195 LOG(L_ERR, "closing temporary send fd for %p: %s: "
2196 "close(%d) failed (flags 0x%x): %s (%d)\n", c,
2197 su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2198 fd, c->flags, strerror(errno), errno);
2200 /* here the connection is for sure in the hash (tcp_main will not
2201 remove it because it's marked as PENDing) and the refcnt is at least
2206 c->flags&=~F_CONN_HASHED;
2209 /* dec refcnt -> mark it for destruction */
2210 tcpconn_chld_put(c);
2212 #endif /* TCP_CONNECT_WAIT */
2214 tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
2222 /** sends on an existing tcpconn and auto-dec. con. ref counter.
2223 * As opposed to tcp_send(), this function requires an existing
2225 * WARNING: the tcp_connection will be de-referenced.
2226 * @param c - existing tcp connection pointer.
2227 * @param buf - data to be sent.
2228 * @param len - data length,
2229 * @return >=0 on success, -1 on error.
2231 static int tcpconn_send_put(struct tcp_connection* c, const char* buf,
2232 unsigned len, snd_flags_t send_flags)
2234 struct tcp_connection *tmp;
2240 const char* rest_buf;
2242 unsigned rest_len, t_len;
2244 snd_flags_t t_send_flags;
2245 #endif /* USE_TLS */
2247 struct fd_cache_entry* fd_cache_e;
2250 use_fd_cache=cfg_get(tcp, tcp_cfg, fd_cache);
2252 #endif /* TCP_FD_CACHE */
2253 do_close_fd=1; /* close the fd on exit */
2254 response[1] = CONN_NOP;
2256 /* if data is already queued, we don't need the fd */
2257 #ifdef TCP_CONNECT_WAIT
2258 if (unlikely(cfg_get(tcp, tcp_cfg, async) &&
2259 (_wbufq_non_empty(c) || (c->flags&F_CONN_PENDING)) ))
2260 #else /* ! TCP_CONNECT_WAIT */
2261 if (unlikely(cfg_get(tcp, tcp_cfg, async) && (_wbufq_non_empty(c)) ))
2262 #endif /* TCP_CONNECT_WAIT */
2264 lock_get(&c->write_lock);
2265 #ifdef TCP_CONNECT_WAIT
2266 if (likely(_wbufq_non_empty(c) || (c->flags&F_CONN_PENDING)))
2267 #else /* ! TCP_CONNECT_WAIT */
2268 if (likely(_wbufq_non_empty(c)))
2269 #endif /* TCP_CONNECT_WAIT */
2273 if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) {
2277 t_send_flags = send_flags;
2278 n = tls_encode(c, &t_buf, &t_len,
2279 &rest_buf, &rest_len,
2281 if (unlikely((n < 0) || (t_len &&
2282 (_wbufq_add(c, t_buf, t_len) < 0)))) {
2283 lock_release(&c->write_lock);
2285 response[1] = CONN_ERROR;
2286 c->state=S_CONN_BAD;
2287 c->timeout=get_ticks_raw(); /* force timeout */
2292 } while(unlikely(rest_len && n > 0));
2294 #endif /* USE_TLS */
2295 if (unlikely(len && (_wbufq_add(c, buf, len)<0))){
2296 lock_release(&c->write_lock);
2298 response[1] = CONN_ERROR;
2299 c->state=S_CONN_BAD;
2300 c->timeout=get_ticks_raw(); /* force timeout */
2304 lock_release(&c->write_lock);
2307 lock_release(&c->write_lock);
2309 #endif /* TCP_ASYNC */
2310 /* check if this is not the same reader process holding
2311 * c and if so send directly on c->fd */
2312 if (c->reader_pid==my_pid()){
2313 DBG("tcp_send: send from reader (%d (%d)), reusing fd\n",
2314 my_pid(), process_no);
2316 do_close_fd=0; /* don't close the fd on exit, it's in use */
2318 use_fd_cache=0; /* don't cache: problems would arise due to the
2319 close() on cache eviction (if the fd is still
2320 used). If it has to be cached then dup() _must_
2322 }else if (likely(use_fd_cache &&
2323 ((fd_cache_e=tcp_fd_cache_get(c))!=0))){
2326 DBG("tcp_send: found fd in cache ( %d, %p, %d)\n",
2327 fd, c, fd_cache_e->id);
2328 #endif /* TCP_FD_CACHE */
2330 DBG("tcp_send: tcp connection found (%p), acquiring fd\n", c);
2332 response[0]=(long)c;
2333 response[1]=CONN_GET_FD;
2334 n=send_all(unix_tcp_sock, response, sizeof(response));
2335 if (unlikely(n<=0)){
2336 LOG(L_ERR, "BUG: tcp_send: failed to get fd(write):%s (%d)\n",
2337 strerror(errno), errno);
2341 DBG("tcp_send, c= %p, n=%d\n", c, n);
2342 n=receive_fd(unix_tcp_sock, &tmp, sizeof(tmp), &fd, MSG_WAITALL);
2343 if (unlikely(n<=0)){
2344 LOG(L_ERR, "BUG: tcp_send: failed to get fd(receive_fd):"
2345 " %s (%d)\n", strerror(errno), errno);
2350 /* handle fd closed or bad connection/error
2351 (it's possible that this happened in the time between
2352 we found the intial connection and the time when we get
2355 if (unlikely(c!=tmp || fd==-1 || c->state==S_CONN_BAD)){
2356 if (unlikely(c!=tmp && tmp!=0))
2357 BUG("tcp_send: get_fd: got different connection:"
2358 " %p (id= %d, refcnt=%d state=%d) != "
2360 c, c->id, atomic_get(&c->refcnt), c->state,
2364 /* don't cache fd & close it */
2365 do_close_fd = (fd==-1)?0:1;
2368 #endif /* TCP_FD_CACHE */
2371 DBG("tcp_send: after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
2375 if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) {
2376 /* for TLS the TLS processing and the send must happen
2377 atomically w/ respect to other sends on the same connection
2378 (otherwise reordering might occur which would break TLS) =>
2381 response[1] = CONN_NOP;
2384 lock_get(&c->write_lock);
2386 t_send_flags = send_flags;
2387 n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
2389 if (likely(n > 0)) {
2390 n = tcpconn_do_send(fd, c, t_buf, t_len, t_send_flags,
2392 if (likely(response[1] != CONN_QUEUED_WRITE ||
2393 resp == CONN_ERROR))
2394 /* don't overwrite a previous CONN_QUEUED_WRITE
2397 } else if (unlikely(n < 0)) {
2398 response[1] = CONN_ERROR;
2401 /* else do nothing for n (t_len) == 0, keep
2405 } while(unlikely(rest_len && n > 0));
2406 lock_release(&c->write_lock);
2409 n = tcpconn_do_send(fd, c, buf, len, send_flags, &response[1], 0);
2410 if (unlikely(response[1] != CONN_NOP)) {
2412 response[0]=(long)c;
2413 if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
2414 BUG("tcp_main command %ld sending failed (write):%s (%d)\n",
2415 response[1], strerror(errno), errno);
2416 /* all commands != CONN_NOP returned by tcpconn_do_send()
2417 (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec refcnt
2418 => if sending the command fails we have to dec. refcnt by hand
2420 tcpconn_chld_put(c); /* deref. it manually */
2423 /* here refcnt for c is already decremented => c contents can no
2424 longer be used and refcnt _must_ _not_ be decremented again
2426 if (unlikely(n < 0 || response[1] == CONN_EOF)) {
2427 /* on error or eof, remove from cache or close fd */
2429 if (unlikely(fd_cache_e)){
2430 tcp_fd_cache_rm(fd_cache_e);
2434 #endif /* TCP_FD_CACHE */
2435 if (do_close_fd) tcp_safe_close(fd);
2436 } else if (response[1] == CONN_QUEUED_WRITE) {
2438 if (unlikely((fd_cache_e==0) && use_fd_cache)){
2439 tcp_fd_cache_add(c, fd);
2441 #endif /* TCP_FD_CACHE */
2442 if (do_close_fd) tcp_safe_close(fd);
2444 BUG("unexpected tcpconn_do_send() return & response: %d, %ld\n",
2447 return n; /* no tcpconn_put */
2451 if (unlikely((fd_cache_e==0) && use_fd_cache)){
2452 tcp_fd_cache_add(c, fd);
2454 #endif /* TCP_FD_CACHE */
2456 if (unlikely(tcp_safe_close(fd) < 0))
2457 LOG(L_ERR, "closing temporary send fd for %p: %s: "
2458 "close(%d) failed (flags 0x%x): %s (%d)\n", c,
2459 su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2460 fd, c->flags, strerror(errno), errno);
2462 /* here we can have only commands that _do_ _not_ dec refcnt.
2463 (CONN_EOF, CON_ERROR, CON_QUEUED_WRITE are all treated above) */
2465 tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
2471 /* unsafe send on a known tcp connection.
2472 * Directly send on a known tcp connection with a given fd.
2473 * It is assumed that the connection locks are already held.
2474 * Side effects: if needed it will send state update commands to
2475 * tcp_main (e.g. CON_EOF, CON_ERROR, CON_QUEUED_WRITE).
2476 * @param fd - fd used for sending.
2477 * @param c - existing tcp connection pointer (state and flags might be
2479 * @param buf - data to be sent.
2480 * @param len - data length.
2482 * @return <0 on error, number of bytes sent on success.
2484 int tcpconn_send_unsafe(int fd, struct tcp_connection *c,
2485 const char* buf, unsigned len, snd_flags_t send_flags)
2490 n = tcpconn_do_send(fd, c, buf, len, send_flags, &response[1], 1);
2491 if (unlikely(response[1] != CONN_NOP)) {
2492 /* all commands != CONN_NOP returned by tcpconn_do_send()
2493 (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec refcnt
2494 => increment it (we don't want the connection to be destroyed
2497 atomic_inc(&c->refcnt);
2498 response[0]=(long)c;
2499 if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
2500 BUG("connection %p command %ld sending failed (write):%s (%d)\n",
2501 c, response[1], strerror(errno), errno);
2502 /* send failed => deref. it back by hand */
2503 tcpconn_chld_put(c);
2506 /* here refcnt for c is already decremented => c contents can no
2507 longer be used and refcnt _must_ _not_ be decremented again
2516 /** lower level send (connection and fd should be known).
2517 * It takes care of possible write-queueing, blacklisting a.s.o.
2518 * It expects a valid tcp connection. It doesn't touch the ref. cnts.
2519 * It will also set the connection flags from send_flags (it's better
2520 * to do it here, because it's guaranteed to be under lock).
2521 * @param fd - fd used for sending.
2522 * @param c - existing tcp connection pointer (state and flags might be
2524 * @param buf - data to be sent.
2525 * @param len - data length.
2527 * @param resp - filled with a cmd. for tcp_main:
2528 * CONN_NOP - nothing needs to be done (do not send
2529 * anything to tcp_main).
2530 * CONN_ERROR - error, connection should be closed.
2531 * CONN_EOF - no error, but connection should be closed.
2532 * CONN_QUEUED_WRITE - new write queue (connection
2533 * should be watched for write and the wr.
2535 * @param locked - if set assume the connection is already locked (call from
2536 * tls) and do not lock/unlock the connection.
2537 * @return >=0 on success, < 0 on error && *resp == CON_ERROR.
2540 static int tcpconn_do_send(int fd, struct tcp_connection* c,
2541 const char* buf, unsigned len,
2542 snd_flags_t send_flags, long* resp,
2547 int enable_write_watch;
2548 #endif /* TCP_ASYNC */
2550 DBG("tcp_send: sending...\n");
2552 if (likely(!locked)) lock_get(&c->write_lock);
2553 /* update connection send flags with the current ones */
2554 tcpconn_set_send_flags(c, send_flags);
2556 if (likely(cfg_get(tcp, tcp_cfg, async))){
2557 if (_wbufq_non_empty(c)
2558 #ifdef TCP_CONNECT_WAIT
2559 || (c->flags&F_CONN_PENDING)
2560 #endif /* TCP_CONNECT_WAIT */
2562 if (unlikely(_wbufq_add(c, buf, len)<0)){
2563 if (likely(!locked)) lock_release(&c->write_lock);
2567 if (likely(!locked)) lock_release(&c->write_lock);
2571 n=_tcpconn_write_nb(fd, c, buf, len);
2573 #endif /* TCP_ASYNC */
2574 /* n=tcp_blocking_write(c, fd, buf, len); */
2575 n=tsend_stream(fd, buf, len,
2576 TICKS_TO_S(cfg_get(tcp, tcp_cfg, send_timeout)) *
2580 #else /* ! TCP_ASYNC */
2581 if (likely(!locked)) lock_release(&c->write_lock);
2582 #endif /* TCP_ASYNC */
2584 DBG("tcp_send: after real write: c= %p n=%d fd=%d\n",c, n, fd);
2585 DBG("tcp_send: buf=\n%.*s\n", (int)len, buf);
2586 if (unlikely(n<(int)len)){
2588 if (cfg_get(tcp, tcp_cfg, async) &&
2589 ((n>=0) || errno==EAGAIN || errno==EWOULDBLOCK)){
2590 enable_write_watch=_wbufq_empty(c);
2592 else if (unlikely(c->state==S_CONN_CONNECT ||
2593 c->state==S_CONN_ACCEPT)){
2594 TCP_STATS_ESTABLISHED(c->state);
2595 c->state=S_CONN_OK; /* something was written */
2597 if (unlikely(_wbufq_add(c, buf+n, len-n)<0)){
2598 if (likely(!locked)) lock_release(&c->write_lock);
2602 if (likely(!locked)) lock_release(&c->write_lock);
2604 if (likely(enable_write_watch))
2605 *resp=CONN_QUEUED_WRITE;
2608 if (likely(!locked)) lock_release(&c->write_lock);
2610 #endif /* TCP_ASYNC */
2611 if (unlikely(c->state==S_CONN_CONNECT)){
2614 case EHOSTUNREACH: /* not posix for send() */
2615 #ifdef USE_DST_BLACKLIST
2616 dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
2617 &c->rcv.src_su, &c->send_flags, 0);
2618 #endif /* USE_DST_BLACKLIST */
2619 TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
2620 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
2624 #ifdef USE_DST_BLACKLIST
2625 dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
2626 &c->rcv.src_su, &c->send_flags, 0);
2627 #endif /* USE_DST_BLACKLIST */
2628 TCP_EV_CONNECT_RST(errno, TCP_LADDR(c), TCP_LPORT(c),
2629 TCP_PSU(c), TCP_PROTO(c));
2632 TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c), TCP_LPORT(c),
2633 TCP_PSU(c), TCP_PROTO(c));
2635 TCP_STATS_CONNECT_FAILED();
2640 TCP_STATS_CON_RESET();
2643 /*case EHOSTUNREACH: -- not posix */
2644 #ifdef USE_DST_BLACKLIST
2645 dst_blacklist_su(BLST_ERR_SEND, c->rcv.proto,
2646 &c->rcv.src_su, &c->send_flags, 0);
2647 #endif /* USE_DST_BLACKLIST */
2651 LOG(L_ERR, "ERROR: tcp_send: failed to send on %p (%s:%d->%s): %s (%d)"
2652 "\n", c, ip_addr2a(&c->rcv.dst_ip), c->rcv.dst_port,
2653 su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2654 strerror(errno), errno);
2658 #endif /* TCP_ASYNC */
2659 /* error on the connection , mark it as bad and set 0 timeout */
2660 c->state=S_CONN_BAD;
2661 c->timeout=get_ticks_raw();
2662 /* tell "main" it should drop this (optional it will t/o anyway?)*/
2664 return n; /* error return, no tcpconn_put */
2668 if (likely(!locked)) lock_release(&c->write_lock);
2669 #endif /* TCP_ASYNC */
2670 /* in non-async mode here we're either in S_CONN_OK or S_CONN_ACCEPT*/
2671 if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
2672 TCP_STATS_ESTABLISHED(c->state);
2675 if (unlikely(send_flags.f & SND_F_CON_CLOSE)){
2676 /* close after write => send EOF request to tcp_main */
2677 c->state=S_CONN_BAD;
2678 c->timeout=get_ticks_raw();
2679 /* tell "main" it should drop this*/
2689 /** low level 1st send on a new connection.
2690 * It takes care of possible write-queueing, blacklisting a.s.o.
2691 * It expects a valid just-opened tcp connection. It doesn't touch the
2692 * ref. counters. It's used only in the async first send case.
2693 * @param fd - fd used for sending.
2694 * @param c - existing tcp connection pointer (state and flags might be
2695 * changed). The connection must be new (no previous send on it).
2696 * @param buf - data to be sent.
2697 * @param len - data length.
2699 * @param resp - filled with a fd sending cmd. for tcp_main on success. It
2700 * _must_ be one of the commands listed below:
2701 * CONN_NEW_PENDING_WRITE - new connection, first write
2702 * was partially successful (or EAGAIN) and
2703 * was queued (connection should be watched
2704 * for write and the write queue flushed).
2705 * The fd should be sent to tcp_main.
2706 * CONN_NEW_COMPLETE - new connection, first write
2707 * completed successfully and no data is
2708 * queued. The fd should be sent to tcp_main.
2709 * CONN_EOF - no error, but the connection should be
2710 * closed (e.g. SND_F_CON_CLOSE send flag).
2711 * CONN_ERROR - error, _must_ return < 0.
2712 * @param locked - if set assume the connection is already locked (call from
2713 * tls) and do not lock/unlock the connection.
2714 * @return >=0 on success, < 0 on error (on error *resp is undefined).
2717 static int tcpconn_1st_send(int fd, struct tcp_connection* c,
2718 const char* buf, unsigned len,
2719 snd_flags_t send_flags, long* resp,
2724 n=_tcpconn_write_nb(fd, c, buf, len);
2725 if (unlikely(n<(int)len)){
2726 /* on EAGAIN or ENOTCONN return success.
2727 ENOTCONN appears on newer FreeBSD versions (non-blocking socket,
2728 connect() & send immediately) */
2729 if ((n>=0) || errno==EAGAIN || errno==EWOULDBLOCK || errno==ENOTCONN){
2730 DBG("pending write on new connection %p "
2731 " (%d/%d bytes written)\n", c, n, len);
2732 if (unlikely(n<0)) n=0;
2734 if (likely(c->state == S_CONN_CONNECT))
2735 TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
2736 c->state=S_CONN_OK; /* partial write => connect()
2739 /* add to the write queue */
2740 if (likely(!locked)) lock_get(&c->write_lock);
2741 if (unlikely(_wbufq_insert(c, buf+n, len-n)<0)){
2742 if (likely(!locked)) lock_release(&c->write_lock);
2744 LOG(L_ERR, "%s: EAGAIN and"
2745 " write queue full or failed for %p\n",
2746 su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)), c);
2749 if (likely(!locked)) lock_release(&c->write_lock);
2750 /* send to tcp_main */
2751 *resp=CONN_NEW_PENDING_WRITE;
2755 /* n < 0 and not EAGAIN => write error */
2756 /* if first write failed it's most likely a
2760 case EHOSTUNREACH: /* not posix for send() */
2761 #ifdef USE_DST_BLACKLIST
2762 dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
2763 &c->rcv.src_su, &c->send_flags, 0);
2764 #endif /* USE_DST_BLACKLIST */
2765 TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
2766 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
2770 #ifdef USE_DST_BLACKLIST
2771 dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
2772 &c->rcv.src_su, &c->send_flags, 0);
2773 #endif /* USE_DST_BLACKLIST */
2774 TCP_EV_CONNECT_RST(errno, TCP_LADDR(c),
2775 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
2778 TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
2779 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
2781 /* error: destroy it directly */
2782 TCP_STATS_CONNECT_FAILED();
2783 LOG(L_ERR, "%s: connect & send for %p failed:" " %s (%d)\n",
2784 su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
2785 c, strerror(errno), errno);
2788 LOG(L_INFO, "quick connect for %p\n", c);