4 * Copyright (C) 2001-2003 FhG Fokus
6 * This file is part of ser, a free SIP server.
8 * ser is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version
13 * For a license to use the ser software under conditions
14 * other than those described here, or to purchase support for this
15 * software, please contact iptel.org by e-mail at the following addresses:
18 * ser is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 * 2002-12-?? created by andrei.
31 * 2003-02-10 zero term before calling receive_msg & undo afterward (andrei)
32 * 2003-05-13 l: (short form of Content-Length) is now recognized (andrei)
33 * 2003-07-01 tcp_read & friends take no a single tcp_connection
34 * parameter & they set c->state to S_CONN_EOF on eof (andrei)
35 * 2003-07-04 fixed tcp EOF handling (possible infinite loop) (andrei)
36 * 2005-07-05 migrated to the new io_wait code (andrei)
37 * 2006-02-03 use tsend_stream instead of send_all (andrei)
38 * 2006-10-13 added STUN support - state machine for TCP (vlada)
39 * 2007-02-20 fixed timeout calc. bug (andrei)
40 * 2007-11-26 improved tcp timers: switched to local_timer (andrei)
41 * 2008-02-04 optimizations: handle POLLRDHUP (if supported), detect short
42 * reads (sock. buffer empty) (andrei)
43 * 2009-02-26 direct blacklist support (andrei)
44 * 2009-04-09 tcp ev and tcp stats macros added (andrei)
45 * 2010-05-14 split tcp_read() into tcp_read() and tcp_read_data() (andrei)
46 * 2010-05-17 new RD_CONN_REPEAT_READ flag, used by the tls hooks (andrei)
49 /** tcp readers processes, tcp read and pre-parse msg. functions.
63 #include <sys/types.h>
64 #include <sys/select.h>
65 #include <sys/socket.h>
68 #include <stdlib.h> /* for abort() */
74 #include "tcp_stats.h"
80 #include "local_timer.h"
83 #include "cfg/cfg_struct.h"
85 #include "tls/tls_server.h"
87 #include "tls_hooks.h"
89 #ifdef USE_DST_BLACKLIST
90 #include "dst_blacklist.h"
91 #endif /* USE_DST_BLACKLIST */
93 #define HANDLE_IO_INLINE
95 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
100 #include "ser_stun.h"
102 int is_msg_complete(struct tcp_req* r);
104 #endif /* USE_STUN */
107 #define HTTP11CONTINUE "HTTP/1.1 100 Continue\r\nContent-Lenght: 0\r\n\r\n"
108 #define HTTP11CONTINUE_LEN (sizeof(HTTP11CONTINUE)-1)
111 #define TCPCONN_TIMEOUT_MIN_RUN 1 /* run the timers each new tick */
113 /* types used in io_wait* */
114 enum fd_types { F_NONE, F_TCPMAIN, F_TCPCONN };
116 /* list of tcp connections handled by this process */
117 static struct tcp_connection* tcp_conn_lst=0;
118 static io_wait_h io_w; /* io_wait handler*/
119 static int tcpmain_sock=-1;
121 static struct local_timer tcp_reader_ltimer;
122 static ticks_t tcp_reader_prev_ticks;
125 static inline char *strfindcasestrz(str *haystack, char *needlez)
131 needle.len = strlen(needlez);
132 for(i=0;i<haystack->len-needle.len;i++) {
133 for(j=0;j<needle.len;j++) {
134 if ( !((haystack->s[i+j]==needle.s[j]) ||
135 ( isalpha((int)haystack->s[i+j])
136 && ((haystack->s[i+j])^(needle.s[j]))==0x20 )) )
140 return haystack->s+i;
145 int tcp_http11_continue(struct tcp_connection *c)
147 struct dest_info dst;
149 struct msg_start fline;
155 msg.s = c->req.start;
156 msg.len = c->req.pos - c->req.start;
157 p = parse_first_line(msg.s, msg.len, &fline);
161 if(fline.type!=SIP_REQUEST)
164 /* check if http request */
165 if(fline.u.request.version.len < HTTP_VERSION_LEN
166 || strncasecmp(fline.u.request.version.s,
167 HTTP_VERSION, HTTP_VERSION_LEN))
170 /* check for Expect header */
171 if(strfindcasestrz(&msg, "Expect: 100-continue")!=NULL)
173 init_dst_from_rcv(&dst, &c->rcv);
174 if (tcp_send(&dst, 0, HTTP11CONTINUE, HTTP11CONTINUE_LEN) < 0) {
175 LOG(L_ERR, "HTTP/1.1 continue failed\n");
178 /* check for Transfer-Encoding header */
179 if(strfindcasestrz(&msg, "Transfer-Encoding: chunked")!=NULL)
181 c->req.flags |= F_TCP_REQ_BCHUNKED;
189 /** reads data from an existing tcp connection.
190 * Side-effects: blacklisting, sets connection state to S_CONN_OK, tcp stats.
191 * @param fd - connection file descriptor
192 * @param c - tcp connection structure. c->state might be changed and
193 * receive info might be used for blacklisting.
194 * @param buf - buffer where the received data will be stored.
195 * @param b_size - buffer size.
196 * @param flags - value/result - used to signal a seen or "forced" EOF on the
197 * connection (when it is known that no more data will come after the
198 * current socket buffer is emptied )=> return/signal EOF on the first
199 * short read (=> don't use it on POLLPRI, as OOB data will cause short
200 * reads even if there are still remaining bytes in the socket buffer)
201 * input: RD_CONN_FORCE_EOF - force EOF after the first successful read
203 * output: RD_CONN_SHORT_READ - if the read exhausted all the bytes
204 * in the socket read buffer.
205 * RD_CONN_EOF - if EOF detected (0 bytes read) or forced via
207 * RD_CONN_REPEAT_READ - the read should be repeated immediately
208 * (used only by the tls code for now).
209 * Note: RD_CONN_SHORT_READ & RD_CONN_EOF _are_ not cleared internally,
210 * so one should clear them before calling this function.
211 * @return number of bytes read, 0 on EOF or -1 on error,
212 * on EOF it also sets c->state to S_CONN_EOF.
213 * (to distinguish from reads that would block which could return 0)
214 * RD_CONN_SHORT_READ is also set in *flags for short reads.
215 * EOF checking should be done by checking the RD_CONN_EOF flag.
217 int tcp_read_data(int fd, struct tcp_connection *c,
218 char* buf, int b_size, int* flags)
223 bytes_read=read(fd, buf, b_size);
225 if (likely(bytes_read!=b_size)){
226 if(unlikely(bytes_read==-1)){
227 if (errno == EWOULDBLOCK || errno == EAGAIN){
228 bytes_read=0; /* nothing has been read */
229 }else if (errno == EINTR) goto again;
231 if (unlikely(c->state==S_CONN_CONNECT)){
234 #ifdef USE_DST_BLACKLIST
235 dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
238 #endif /* USE_DST_BLACKLIST */
239 TCP_EV_CONNECT_RST(errno, TCP_LADDR(c),
240 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
243 #ifdef USE_DST_BLACKLIST
244 dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
247 #endif /* USE_DST_BLACKLIST */
248 TCP_EV_CONNECT_TIMEOUT(errno, TCP_LADDR(c),
249 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
252 TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
253 TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
255 TCP_STATS_CONNECT_FAILED();
259 TCP_STATS_CON_RESET();
261 #ifdef USE_DST_BLACKLIST
262 dst_blacklist_su(BLST_ERR_SEND, c->rcv.proto,
265 #endif /* USE_DST_BLACKLIST */
269 LOG(cfg_get(core, core_cfg, corelog),
270 "error reading: %s (%d)\n", strerror(errno), errno);
273 }else if (unlikely((bytes_read==0) ||
274 (*flags & RD_CONN_FORCE_EOF))){
277 DBG("EOF on %p, FD %d\n", c, fd);
279 if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
280 TCP_STATS_ESTABLISHED(c->state);
285 *flags|=RD_CONN_SHORT_READ;
286 }else{ /* else normal full read */
287 if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
288 TCP_STATS_ESTABLISHED(c->state);
297 /* reads next available bytes
298 * c- tcp connection used for reading, tcp_read changes also c->state on
299 * EOF and c->req.error on read error
300 * * flags - value/result - used to signal a seen or "forced" EOF on the
301 * connection (when it is known that no more data will come after the
302 * current socket buffer is emptied )=> return/signal EOF on the first
303 * short read (=> don't use it on POLLPRI, as OOB data will cause short
304 * reads even if there are still remaining bytes in the socket buffer)
305 * return number of bytes read, 0 on EOF or -1 on error,
306 * on EOF it also sets c->state to S_CONN_EOF.
307 * (to distinguish from reads that would block which could return 0)
308 * RD_CONN_SHORT_READ is also set in *flags for short reads.
309 * sets also r->error */
310 int tcp_read(struct tcp_connection *c, int* flags)
312 int bytes_free, bytes_read;
318 bytes_free=r->b_size- (int)(r->pos - r->buf);
320 if (unlikely(bytes_free==0)){
321 LOG(L_ERR, "ERROR: tcp_read: buffer overrun, dropping\n");
322 r->error=TCP_REQ_OVERRUN;
325 bytes_read = tcp_read_data(fd, c, r->pos, bytes_free, flags);
326 if (unlikely(bytes_read < 0)){
327 r->error=TCP_READ_ERROR;
331 DBG("tcp_read: read %d bytes:\n%.*s\n", bytes_read, bytes_read, r->pos);
339 /* reads all headers (until double crlf), & parses the content-length header
340 * (WARNING: inefficient, tries to reuse receive_msg but will go through
341 * the headers twice [once here looking for Content-Length and for the end
342 * of the headers and once in receive_msg]; a more speed efficient version will
343 * result in either major code duplication or major changes to the receive code)
344 * returns number of bytes read & sets r->state & r->body
345 * when either r->body!=0 or r->state==H_BODY =>
346 * all headers have been read. It should be called in a while loop.
347 * returns < 0 if error or 0 if EOF */
348 int tcp_read_headers(struct tcp_connection *c, int* read_flags)
350 int bytes, remaining;
355 unsigned int mc; /* magic cookie */
356 unsigned short body_len;
359 #define crlf_default_skip_case \
366 #define content_len_beg_case \
369 if (!TCP_REQ_HAS_CLEN(r)) r->state=H_STARTWS; \
370 else r->state=H_SKIP; \
371 /* not interested if we already found one */ \
375 if(!TCP_REQ_HAS_CLEN(r)) r->state=H_CONT_LEN1; \
376 else r->state=H_SKIP; \
380 /* short form for Content-Length */ \
381 if (!TCP_REQ_HAS_CLEN(r)) r->state=H_L_COLON; \
382 else r->state=H_SKIP; \
385 #define change_state(upper, lower, newstate)\
389 r->state=(newstate); break; \
390 crlf_default_skip_case; \
393 #define change_state_case(state0, upper, lower, newstate)\
395 change_state(upper, lower, newstate); \
401 /* if we still have some unparsed part, parse it first, don't do the read*/
402 if (unlikely(r->parsed<r->pos)){
406 if (unlikely(c->type==PROTO_TLS))
407 bytes=tls_read(c, read_flags);
410 bytes=tcp_read(c, read_flags);
411 if (bytes<=0) return bytes;
415 while(p<r->pos && r->error==TCP_REQ_OK){
416 switch((unsigned char)r->state){
417 case H_BODY: /* read the body*/
419 if (remaining>r->bytes_to_go) remaining=r->bytes_to_go;
420 r->bytes_to_go-=remaining;
422 if (r->bytes_to_go==0){
423 r->flags|=F_TCP_REQ_COMPLETE;
429 /* find lf, we are in this state if we are not interested
430 * in anything till end of line*/
431 p=q_memchr(p, '\n', r->pos-p);
441 /* terminate on LF CR LF or LF LF */
449 if (TCP_REQ_HAS_CLEN(r)){
451 r->bytes_to_go=r->content_len;
452 if (r->bytes_to_go==0){
453 r->flags|=F_TCP_REQ_COMPLETE;
458 DBG("tcp_read_headers: ERROR: no clen, p=%X\n",
460 r->error=TCP_REQ_BAD_LEN;
463 content_len_beg_case;
474 if (cfg_get(tcp, tcp_cfg, accept_no_cl)!=0)
475 tcp_http11_continue(c);
477 if (TCP_REQ_HAS_CLEN(r)){
479 r->bytes_to_go=r->content_len;
480 if (r->bytes_to_go==0){
481 r->flags|=F_TCP_REQ_COMPLETE;
486 if (cfg_get(tcp, tcp_cfg, accept_no_cl)!=0) {
488 if(TCP_REQ_BCHUNKED(r)) {
490 /* at least 3 bytes: 0\r\n */
494 r->state=H_HTTP11_CHUNK_START;
500 r->flags|=F_TCP_REQ_COMPLETE;
504 DBG("tcp_read_headers: ERROR: no clen, p=%X\n",
506 r->error=TCP_REQ_BAD_LEN;
509 }else r->state=H_SKIP;
515 content_len_beg_case;
516 crlf_default_skip_case;
525 if (cfg_get(tcp, tcp_cfg, crlf_ping)) {
526 r->state=H_SKIP_EMPTY_CR_FOUND;
532 /* skip empty lines */
536 r->state=H_CONT_LEN1;
541 /* short form for Content-Length */
547 /* STUN support can be switched off even if it's compiled */
549 if (stun_allow_stun && (unsigned char)*p == 0x00) {
551 /* body will used as pointer to the last used byte */
554 DBG("stun msg detected\n");
563 case H_SKIP_EMPTY_CR_FOUND:
565 r->state=H_SKIP_EMPTY_CRLF_FOUND;
568 r->state=H_SKIP_EMPTY;
572 case H_SKIP_EMPTY_CRLF_FOUND:
574 r->state = H_SKIP_EMPTY_CRLFCR_FOUND;
577 r->state = H_SKIP_EMPTY;
581 case H_SKIP_EMPTY_CRLFCR_FOUND:
583 r->state = H_PING_CRLF;
584 r->flags |= F_TCP_REQ_HAS_CLEN |
585 F_TCP_REQ_COMPLETE; /* hack to avoid error check */
589 r->state = H_SKIP_EMPTY;
594 if ((r->pos - r->body) >= sizeof(struct stun_hdr)) {
595 /* copy second short from buffer where should be body
598 memcpy(&body_len, &r->start[sizeof(unsigned short)],
599 sizeof(unsigned short));
601 body_len = ntohs(body_len);
603 /* check if there is valid magic cookie */
604 memcpy(&mc, &r->start[sizeof(unsigned int)],
605 sizeof(unsigned int));
607 /* using has_content_len as a flag if there should be
610 r->flags |= (mc == MAGIC_COOKIE) ? F_TCP_REQ_HAS_CLEN : 0;
612 r->body += sizeof(struct stun_hdr);
616 r->state = H_STUN_READ_BODY;
619 if (is_msg_complete(r) != 0) {
623 /* set content_len to length of fingerprint */
624 body_len = sizeof(struct stun_attr) +
628 r->content_len=body_len;
635 case H_STUN_READ_BODY:
636 /* check if the whole body was read */
637 body_len=r->content_len;
638 if ((r->pos - r->body) >= body_len) {
641 if (is_msg_complete(r) != 0) {
646 /* set content_len to length of fingerprint */
647 body_len = sizeof(struct stun_attr)+SHA_DIGEST_LENGTH;
648 r->content_len=body_len;
657 /* content_len contains length of fingerprint in this place! */
658 body_len=r->content_len;
659 if ((r->pos - r->body) >= body_len) {
662 r->state = H_STUN_END;
663 r->flags |= F_TCP_REQ_COMPLETE |
664 F_TCP_REQ_HAS_CLEN; /* hack to avoid error check */
672 #endif /* USE_STUN */
673 change_state_case(H_CONT_LEN1, 'O', 'o', H_CONT_LEN2);
674 change_state_case(H_CONT_LEN2, 'N', 'n', H_CONT_LEN3);
675 change_state_case(H_CONT_LEN3, 'T', 't', H_CONT_LEN4);
676 change_state_case(H_CONT_LEN4, 'E', 'e', H_CONT_LEN5);
677 change_state_case(H_CONT_LEN5, 'N', 'n', H_CONT_LEN6);
678 change_state_case(H_CONT_LEN6, 'T', 't', H_CONT_LEN7);
679 change_state_case(H_CONT_LEN7, '-', '_', H_CONT_LEN8);
680 change_state_case(H_CONT_LEN8, 'L', 'l', H_CONT_LEN9);
681 change_state_case(H_CONT_LEN9, 'E', 'e', H_CONT_LEN10);
682 change_state_case(H_CONT_LEN10, 'N', 'n', H_CONT_LEN11);
683 change_state_case(H_CONT_LEN11, 'G', 'g', H_CONT_LEN12);
684 change_state_case(H_CONT_LEN12, 'T', 't', H_CONT_LEN13);
685 change_state_case(H_CONT_LEN13, 'H', 'h', H_L_COLON);
691 break; /* skip space */
693 r->state=H_CONT_LEN_BODY;
695 crlf_default_skip_case;
700 case H_CONT_LEN_BODY:
704 break; /* eat space */
715 r->state=H_CONT_LEN_BODY_PARSE;
716 r->content_len=(*p-'0');
718 /*FIXME: content length on different lines ! */
719 crlf_default_skip_case;
724 case H_CONT_LEN_BODY_PARSE:
736 r->content_len=r->content_len*10+(*p-'0');
740 case '\t': /* FIXME: check if line contains only WS */
742 r->flags|=F_TCP_REQ_HAS_CLEN;
745 /* end of line, parse successful */
747 r->flags|=F_TCP_REQ_HAS_CLEN;
750 LOG(L_ERR, "ERROR: tcp_read_headers: bad "
751 "Content-Length header value, unexpected "
752 "char %c in state %d\n", *p, r->state);
753 r->state=H_SKIP; /* try to find another?*/
759 case H_HTTP11_CHUNK_START: /* start a new body chunk: SIZE\r\nBODY\r\n */
761 r->state = H_HTTP11_CHUNK_SIZE;
763 case H_HTTP11_CHUNK_BODY: /* content of chunnk */
765 if (remaining>r->bytes_to_go) remaining=r->bytes_to_go;
766 r->bytes_to_go-=remaining;
768 if (r->bytes_to_go==0){
769 r->state = H_HTTP11_CHUNK_END;
770 /* shift back body content */
771 if(r->chunk_size>0 && p-r->chunk_size>r->body) {
772 memmove(r->body + r->content_len, p - r->chunk_size,
774 r->content_len += r->chunk_size;
780 case H_HTTP11_CHUNK_END:
784 case '\t': /* skip */
787 r->state = H_HTTP11_CHUNK_START;
790 LM_ERR("bad chunk, unexpected "
791 "char %c in state %d\n", *p, r->state);
792 r->state=H_SKIP; /* try to find another?*/
797 case H_HTTP11_CHUNK_SIZE:
799 case '0': case '1': case '2': case '3':
800 case '4': case '5': case '6': case '7':
803 r->chunk_size += *p - '0';
805 case 'a': case 'b': case 'c': case 'd':
808 r->chunk_size += *p - 'a' + 10;
810 case 'A': case 'B': case 'C': case 'D':
813 r->chunk_size += *p - 'A' + 10;
817 case '\t': /* skip */
820 /* end of line, parse successful */
821 r->state=H_HTTP11_CHUNK_BODY;
822 r->bytes_to_go = r->chunk_size;
823 if (r->bytes_to_go==0){
824 r->state=H_HTTP11_CHUNK_FINISH;
825 r->flags|=F_TCP_REQ_COMPLETE;
831 LM_ERR("bad chunk size value, unexpected "
832 "char %c in state %d\n", *p, r->state);
833 r->state=H_SKIP; /* try to find another?*/
840 LOG(L_CRIT, "BUG: tcp_read_headers: unexpected state %d\n",
852 int tcp_read_req(struct tcp_connection* con, int* bytes_read, int* read_flags)
859 struct dest_info dst;
869 if (likely(req->error==TCP_REQ_OK)){
870 bytes=tcp_read_headers(con, read_flags);
872 /* if timeout state=0; goto end__req; */
873 DBG("read= %d bytes, parsed=%d, state=%d, error=%d\n",
874 bytes, (int)(req->parsed-req->start), req->state,
876 DBG("tcp_read_req: last char=0x%02X, parsed msg=\n%.*s\n",
877 *(req->parsed-1), (int)(req->parsed-req->start),
880 if (unlikely(bytes==-1)){
881 LOG(cfg_get(core, core_cfg, corelog),
882 "ERROR: tcp_read_req: error reading \n");
888 * is EOF if eof on fd and req. not complete yet,
889 * if req. is complete we might have a second unparsed
890 * request after it, so postpone release_with_eof
892 if (unlikely((con->state==S_CONN_EOF) &&
893 (! TCP_REQ_COMPLETE(req)))) {
894 DBG( "tcp_read_req: EOF\n");
900 if (unlikely(req->error!=TCP_REQ_OK)){
901 LOG(L_ERR,"ERROR: tcp_read_req: bad request, state=%d, error=%d "
902 "buf:\n%.*s\nparsed:\n%.*s\n", req->state, req->error,
903 (int)(req->pos-req->buf), req->buf,
904 (int)(req->parsed-req->start), req->start);
905 DBG("- received from: port %d\n", con->rcv.src_port);
906 print_ip("- received from: ip ",&con->rcv.src_ip, "\n");
910 if (likely(TCP_REQ_COMPLETE(req))){
912 DBG("tcp_read_req: end of header part\n");
913 DBG("- received from: port %d\n", con->rcv.src_port);
914 print_ip("- received from: ip ", &con->rcv.src_ip, "\n");
915 DBG("tcp_read_req: headers:\n%.*s.\n",
916 (int)(req->body-req->start), req->start);
918 if (likely(TCP_REQ_HAS_CLEN(req))){
919 DBG("tcp_read_req: content-length= %d\n", req->content_len);
921 DBG("tcp_read_req: body:\n%.*s\n", req->content_len,req->body);
924 if (cfg_get(tcp, tcp_cfg, accept_no_cl)==0) {
925 req->error=TCP_REQ_BAD_LEN;
926 LOG(L_ERR, "ERROR: tcp_read_req: content length not present or"
932 /* if we are here everything is nice and ok*/
935 DBG("calling receive_msg(%p, %d, )\n",
936 req->start, (int)(req->parsed-req->start));
938 /* rcv.bind_address should always be !=0 */
939 bind_address=con->rcv.bind_address;
940 /* just for debugging use sendipv4 as receiving socket FIXME*/
942 if (con->rcv.dst_ip.af==AF_INET6){
943 bind_address=sendipv6_tcp;
945 bind_address=sendipv4_tcp;
948 con->rcv.proto_reserved1=con->id; /* copy the id */
949 c=*req->parsed; /* ugly hack: zero term the msg & save the
950 previous char, req->parsed should be ok
951 because we always alloc BUF_SIZE+1 */
954 if (req->state==H_PING_CRLF) {
955 init_dst_from_rcv(&dst, &con->rcv);
957 if (tcp_send(&dst, 0, CRLF, CRLF_LEN) < 0) {
958 LOG(L_ERR, "CRLF ping: tcp_send() failed\n");
963 if (unlikely(req->state==H_STUN_END)){
965 ret = stun_process_msg(req->start, req->parsed-req->start,
970 if (unlikely(req->state==H_HTTP11_CHUNK_FINISH)){
971 /* http chunked request */
972 req->body[req->content_len] = 0;
973 ret = receive_msg(req->start,
974 req->body + req->content_len - req->start,
978 ret = receive_msg(req->start, req->parsed-req->start,
981 if (unlikely(ret < 0)) {
988 /* prepare for next request */
989 size=req->pos-req->parsed;
992 req->error=TCP_REQ_OK;
993 req->state=H_SKIP_EMPTY;
997 req->pos=req->buf+size;
1000 memmove(req->buf, req->parsed, size);
1001 req->parsed=req->buf; /* fix req->parsed after using it */
1003 DBG("tcp_read_req: preparing for new request, kept %ld"
1006 /*if we still have some unparsed bytes, try to parse them too*/
1008 } else if (unlikely(con->state==S_CONN_EOF)){
1009 DBG( "tcp_read_req: EOF after reading complete request\n");
1012 req->parsed=req->buf; /* fix req->parsed */
1017 if (likely(bytes_read)) *bytes_read=total_bytes;
1023 void release_tcpconn(struct tcp_connection* c, long state, int unix_sock)
1027 DBG( "releasing con %p, state %ld, fd=%d, id=%d\n",
1028 c, state, c->fd, c->id);
1029 DBG(" extra_data %p\n", c->extra_data);
1030 /* release req & signal the parent */
1031 c->reader_pid=0; /* reset it */
1036 /* errno==EINTR, EWOULDBLOCK a.s.o todo */
1037 response[0]=(long)c;
1040 if (tsend_stream(unix_sock, (char*)response, sizeof(response), -1)<=0)
1041 LOG(L_ERR, "ERROR: release_tcpconn: tsend_stream failed\n");
1046 static ticks_t tcpconn_read_timeout(ticks_t t, struct timer_ln* tl, void* data)
1048 struct tcp_connection *c;
1050 c=(struct tcp_connection*)data;
1051 /* or (struct tcp...*)(tl-offset(c->timer)) */
1053 if (likely(!(c->state<0) && TICKS_LT(t, c->timeout))){
1054 /* timeout extended, exit */
1055 return (ticks_t)(c->timeout - t);
1057 /* if conn->state is ERROR or BAD => force timeout too */
1058 if (unlikely(io_watch_del(&io_w, c->fd, -1, IO_FD_CLOSING)<0)){
1059 LOG(L_ERR, "ERROR: tcpconn_read_timeout: io_watch_del failed for %p"
1060 " id %d fd %d, state %d, flags %x, main fd %d\n",
1061 c, c->id, c->fd, c->state, c->flags, c->s);
1063 tcpconn_listrm(tcp_conn_lst, c, c_next, c_prev);
1064 release_tcpconn(c, (c->state<0)?CONN_ERROR:CONN_RELEASE, tcpmain_sock);
1071 /* handle io routine, based on the fd_map type
1072 * (it will be called from io_wait_loop* )
1073 * params: fm - pointer to a fd hash entry
1074 * idx - index in the fd_array (or -1 if not known)
1075 * return: -1 on error, or when we are not interested any more on reads
1076 * from this fd (e.g.: we are closing it )
1077 * 0 on EAGAIN or when by some other way it is known that no more
1078 * io events are queued on the fd (the receive buffer is empty).
1079 * Usefull to detect when there are no more io events queued for
1080 * sigio_rt, epoll_et, kqueue.
1081 * >0 on successfull read from the fd (when there might be more io
1082 * queued -- the receive buffer might still be non-empty)
1084 inline static int handle_io(struct fd_map* fm, short events, int idx)
1089 struct tcp_connection* con;
1094 /* update the local config */
1100 ret=n=receive_fd(fm->fd, &con, sizeof(con), &s, 0);
1101 DBG("received n=%d con=%p, fd=%d\n", n, con, s);
1103 if (errno == EWOULDBLOCK || errno == EAGAIN){
1106 }else if (errno == EINTR) goto again;
1108 LOG(L_CRIT,"BUG: tcp_receive: handle_io: read_fd: %s \n",
1110 abort(); /* big error*/
1113 if (unlikely(n==0)){
1114 LOG(L_ERR, "WARNING: tcp_receive: handle_io: 0 bytes read\n");
1117 if (unlikely(con==0)){
1118 LOG(L_CRIT, "BUG: tcp_receive: handle_io null pointer\n");
1122 if (unlikely(s==-1)) {
1123 LOG(L_ERR, "ERROR: tcp_receive: handle_io: read_fd:"
1127 con->reader_pid=my_pid();
1128 if (unlikely(con==tcp_conn_lst)){
1129 LOG(L_CRIT, "BUG: tcp_receive: handle_io: duplicate"
1130 " connection received: %p, id %d, fd %d, refcnt %d"
1131 " state %d (n=%d)\n", con, con->id, con->fd,
1132 atomic_get(&con->refcnt), con->state, n);
1134 break; /* try to recover */
1136 if (unlikely(con->state==S_CONN_BAD)){
1137 LOG(L_WARN, "WARNING: tcp_receive: handle_io: received an"
1138 " already bad connection: %p id %d refcnt %d\n",
1139 con, con->id, atomic_get(&con->refcnt));
1142 /* if we received the fd there is most likely data waiting to
1143 * be read => process it first to avoid extra sys calls */
1144 read_flags=((con->flags & (F_CONN_EOF_SEEN|F_CONN_FORCE_EOF)) &&
1145 !(con->flags & F_CONN_OOB_DATA))? RD_CONN_FORCE_EOF
1149 #endif /* USE_TLS */
1150 resp=tcp_read_req(con, &n, &read_flags);
1151 if (unlikely(resp<0)){
1152 /* some error occured, but on the new fd, not on the tcp
1153 * main fd, so keep the ret value */
1154 if (unlikely(resp!=CONN_EOF))
1155 con->state=S_CONN_BAD;
1156 release_tcpconn(con, resp, tcpmain_sock);
1160 /* repeat read if requested (for now only tls might do this) */
1161 if (unlikely(read_flags & RD_CONN_REPEAT_READ))
1162 goto repeat_1st_read;
1163 #endif /* USE_TLS */
1165 /* must be before io_watch_add, io_watch_add might catch some
1166 * already existing events => might call handle_io and
1167 * handle_io might decide to del. the new connection =>
1168 * must be in the list */
1169 tcpconn_listadd(tcp_conn_lst, con, c_next, c_prev);
1171 con->timeout=t+S_TO_TICKS(TCP_CHILD_TIMEOUT);
1172 /* re-activate the timer */
1173 con->timer.f=tcpconn_read_timeout;
1174 local_timer_reinit(&con->timer);
1175 local_timer_add(&tcp_reader_ltimer, &con->timer,
1176 S_TO_TICKS(TCP_CHILD_TIMEOUT), t);
1177 if (unlikely(io_watch_add(&io_w, s, POLLIN, F_TCPCONN, con)<0)){
1178 LOG(L_CRIT, "ERROR: tcpconn_receive: handle_io: io_watch_add "
1179 "failed for %p id %d fd %d, state %d, flags %x,"
1180 " main fd %d, refcnt %d\n",
1181 con, con->id, con->fd, con->state, con->flags,
1182 con->s, atomic_get(&con->refcnt));
1183 tcpconn_listrm(tcp_conn_lst, con, c_next, c_prev);
1184 local_timer_del(&tcp_reader_ltimer, &con->timer);
1189 con=(struct tcp_connection*)fm->data;
1190 if (unlikely(con->state==S_CONN_BAD)){
1192 if (!(con->send_flags.f & SND_F_CON_CLOSE))
1193 LOG(L_WARN, "WARNING: tcp_receive: handle_io: F_TCPCONN"
1194 " connection marked as bad: %p id %d refcnt %d\n",
1195 con, con->id, atomic_get(&con->refcnt));
1200 (events & POLLRDHUP) |
1201 #endif /* POLLRDHUP */
1202 (events & (POLLHUP|POLLERR)) |
1203 (con->flags & (F_CONN_EOF_SEEN|F_CONN_FORCE_EOF)))
1204 && !(events & POLLPRI))? RD_CONN_FORCE_EOF: 0;
1207 #endif /* USE_TLS */
1208 resp=tcp_read_req(con, &ret, &read_flags);
1209 if (unlikely(resp<0)){
1211 ret=-1; /* some error occured */
1212 if (unlikely(io_watch_del(&io_w, con->fd, idx,
1213 IO_FD_CLOSING) < 0)){
1214 LOG(L_CRIT, "ERROR: tcpconn_receive: handle_io: "
1215 "io_watch_del failed for %p id %d fd %d,"
1216 " state %d, flags %x, main fd %d, refcnt %d\n",
1217 con, con->id, con->fd, con->state,
1218 con->flags, con->s, atomic_get(&con->refcnt));
1220 tcpconn_listrm(tcp_conn_lst, con, c_next, c_prev);
1221 local_timer_del(&tcp_reader_ltimer, &con->timer);
1222 if (unlikely(resp!=CONN_EOF))
1223 con->state=S_CONN_BAD;
1224 release_tcpconn(con, resp, tcpmain_sock);
1227 if (unlikely(read_flags & RD_CONN_REPEAT_READ))
1229 #endif /* USE_TLS */
1230 /* update timeout */
1231 con->timeout=get_ticks_raw()+S_TO_TICKS(TCP_CHILD_TIMEOUT);
1232 /* ret= 0 (read the whole socket buffer) if short read &
1233 * !POLLPRI, bytes read otherwise */
1234 ret&=(((read_flags & RD_CONN_SHORT_READ) &&
1235 !(events & POLLPRI)) - 1);
1239 LOG(L_CRIT, "BUG: handle_io: empty fd map %p (%d): "
1240 "{%d, %d, %p}\n", fm, (int)(fm-io_w.fd_hash),
1241 fm->fd, fm->type, fm->data);
1244 LOG(L_CRIT, "BUG: handle_io: uknown fd type %d\n", fm->type);
1250 con->state=S_CONN_BAD;
1251 release_tcpconn(con, CONN_ERROR, tcpmain_sock);
1259 inline static void tcp_reader_timer_run()
1263 ticks=get_ticks_raw();
1264 if (unlikely((ticks-tcp_reader_prev_ticks)<TCPCONN_TIMEOUT_MIN_RUN))
1266 tcp_reader_prev_ticks=ticks;
1267 local_timer_run(&tcp_reader_ltimer, ticks);
1272 void tcp_receive_loop(int unix_sock)
1276 tcpmain_sock=unix_sock; /* init com. socket */
1277 if (init_io_wait(&io_w, get_max_open_fds(), tcp_poll_method)<0)
1279 tcp_reader_prev_ticks=get_ticks_raw();
1280 if (init_local_timer(&tcp_reader_ltimer, get_ticks_raw())!=0)
1282 /* add the unix socket */
1283 if (io_watch_add(&io_w, tcpmain_sock, POLLIN, F_TCPMAIN, 0)<0){
1284 LOG(L_CRIT, "ERROR: tcp_receive_loop: init: failed to add socket "
1285 " to the fd list\n");
1289 /* initialize the config framework */
1290 if (cfg_child_init()) goto error;
1293 switch(io_w.poll_method){
1296 io_wait_loop_poll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1297 tcp_reader_timer_run();
1303 io_wait_loop_select(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1304 tcp_reader_timer_run();
1308 #ifdef HAVE_SIGIO_RT
1311 io_wait_loop_sigio_rt(&io_w, TCP_CHILD_SELECT_TIMEOUT);
1312 tcp_reader_timer_run();
1319 io_wait_loop_epoll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1320 tcp_reader_timer_run();
1325 io_wait_loop_epoll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 1);
1326 tcp_reader_timer_run();
1333 io_wait_loop_kqueue(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1334 tcp_reader_timer_run();
1341 io_wait_loop_devpoll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1342 tcp_reader_timer_run();
1347 LOG(L_CRIT, "BUG: tcp_receive_loop: no support for poll method "
1349 poll_method_name(io_w.poll_method), io_w.poll_method);
1353 destroy_io_wait(&io_w);
1354 LOG(L_CRIT, "ERROR: tcp_receive_loop: exiting...");
1361 int is_msg_complete(struct tcp_req* r)
1363 if (TCP_REQ_HAS_CLEN(r)) {
1364 r->state = H_STUN_FP;
1368 /* STUN message is complete */
1369 r->state = H_STUN_END;
1370 r->flags |= F_TCP_REQ_COMPLETE |
1371 F_TCP_REQ_HAS_CLEN; /* hack to avoid error check */
1377 #endif /* USE_TCP */