tcp: minor optimization
[sip-router] / tcp_read.c
1 /*
2  * $Id$
3  *
4  * Copyright (C) 2001-2003 FhG Fokus
5  *
6  * This file is part of ser, a free SIP server.
7  *
8  * ser is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version
12  *
13  * For a license to use the ser software under conditions
14  * other than those described here, or to purchase support for this
15  * software, please contact iptel.org by e-mail at the following addresses:
16  *    info@iptel.org
17  *
18  * ser is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26  */
27 /*
28  * History:
29  * --------
30  * 2002-12-??  created by andrei.
31  * 2003-02-10  zero term before calling receive_msg & undo afterward (andrei)
32  * 2003-05-13  l: (short form of Content-Length) is now recognized (andrei)
33  * 2003-07-01  tcp_read & friends take no a single tcp_connection 
34  *              parameter & they set c->state to S_CONN_EOF on eof (andrei)
35  * 2003-07-04  fixed tcp EOF handling (possible infinite loop) (andrei)
36  * 2005-07-05  migrated to the new io_wait code (andrei)
37  * 2006-02-03  use tsend_stream instead of send_all (andrei)
38  * 2006-10-13  added STUN support - state machine for TCP (vlada)
39  * 2007-02-20  fixed timeout calc. bug (andrei)
40  * 2007-11-26  improved tcp timers: switched to local_timer (andrei)
41  * 2008-02-04  optimizations: handle POLLRDHUP (if supported), detect short
42  *              reads (sock. buffer empty) (andrei)
43  * 2009-02-26  direct blacklist support (andrei)
44  */
45
46 #ifdef USE_TCP
47
48 #include <stdio.h>
49 #include <errno.h>
50 #include <string.h>
51
52
53 #include <sys/time.h>
54 #include <sys/types.h>
55 #include <sys/select.h>
56 #include <sys/socket.h>
57
58 #include <unistd.h>
59 #include <stdlib.h> /* for abort() */
60
61
62 #include "dprint.h"
63 #include "tcp_conn.h"
64 #include "pass_fd.h"
65 #include "globals.h"
66 #include "receive.h"
67 #include "timer.h"
68 #include "local_timer.h"
69 #include "ut.h"
70 #include "pt.h"
71 #include "cfg/cfg_struct.h"
72 #ifdef CORE_TLS
73 #include "tls/tls_server.h"
74 #else
75 #include "tls_hooks.h"
76 #endif /* CORE_TLS */
77 #ifdef USE_DST_BLACKLIST
78 #include "dst_blacklist.h"
79 #endif /* USE_DST_BLACKLIST */
80
81 #define HANDLE_IO_INLINE
82 #include "io_wait.h"
83 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
84 #include "tsend.h"
85 #include "forward.h"
86
87 #ifdef USE_STUN
88 #include "ser_stun.h"
89
90 int is_msg_complete(struct tcp_req* r);
91
92 #endif /* USE_STUN */
93
94 #define TCPCONN_TIMEOUT_MIN_RUN  1 /* run the timers each new tick */
95
96 #define RD_CONN_SHORT_READ      1
97 #define RD_CONN_EOF             2
98 #define RD_CONN_FORCE_EOF       65536
99
100 /* types used in io_wait* */
101 enum fd_types { F_NONE, F_TCPMAIN, F_TCPCONN };
102
103 /* list of tcp connections handled by this process */
104 static struct tcp_connection* tcp_conn_lst=0;
105 static io_wait_h io_w; /* io_wait handler*/
106 static int tcpmain_sock=-1;
107
108 static struct local_timer tcp_reader_ltimer;
109 static ticks_t tcp_reader_prev_ticks;
110
111
112 /* reads next available bytes
113  *   c- tcp connection used for reading, tcp_read changes also c->state on
114  *      EOF and c->req.error on read error
115  *   * flags - value/result - used to signal a seen or "forced" EOF on the 
116  *     connection (when it is known that no more data will come after the 
117  *     current socket buffer is emptied )=> return/signal EOF on the first 
118  *     short read (=> don't use it on POLLPRI, as OOB data will cause short
119  *      reads even if there are still remaining bytes in the socket buffer)
120  * return number of bytes read, 0 on EOF or -1 on error,
121  * on EOF it also sets c->state to S_CONN_EOF.
122  * (to distinguish from reads that would block which could return 0)
123  * RD_CONN_SHORT_READ is also set in *flags for short reads.
124  * sets also r->error */
125 int tcp_read(struct tcp_connection *c, int* flags)
126 {
127         int bytes_free, bytes_read;
128         struct tcp_req *r;
129         int fd;
130
131         r=&c->req;
132         fd=c->fd;
133         bytes_free=r->b_size- (int)(r->pos - r->buf);
134         
135         if (bytes_free==0){
136                 LOG(L_ERR, "ERROR: tcp_read: buffer overrun, dropping\n");
137                 r->error=TCP_REQ_OVERRUN;
138                 return -1;
139         }
140 again:
141         bytes_read=read(fd, r->pos, bytes_free);
142
143         if (likely(bytes_read!=bytes_free)){
144                 if(unlikely(bytes_read==-1)){
145                         if (errno == EWOULDBLOCK || errno == EAGAIN){
146                                 bytes_read=0; /* nothing has been read */
147                         }else if (errno == EINTR) goto again;
148                         else{
149 #ifdef USE_DST_BLACKLIST
150                                 if (cfg_get(core, core_cfg, use_dst_blacklist))
151                                         switch(errno){
152                                                 case ECONNRESET:
153                                                 case ETIMEDOUT:
154                                                         dst_blacklist_su((c->state==S_CONN_CONNECT)?
155                                                                                                         BLST_ERR_CONNECT:
156                                                                                                         BLST_ERR_SEND,
157                                                                                                         c->rcv.proto,
158                                                                                                         &c->rcv.src_su, 0);
159                                                         break;
160                                         }
161 #endif /* USE_DST_BLACKLIST */
162                                 LOG(L_ERR, "ERROR: tcp_read: error reading: %s (%d)\n",
163                                                         strerror(errno), errno);
164                                 r->error=TCP_READ_ERROR;
165                                 return -1;
166                         }
167                 }else if (unlikely((bytes_read==0) || 
168                                         (*flags & RD_CONN_FORCE_EOF))){
169                         c->state=S_CONN_EOF;
170                         *flags|=RD_CONN_EOF;
171                         DBG("tcp_read: EOF on %p, FD %d\n", c, fd);
172                 }else{
173                         if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT))
174                                 c->state=S_CONN_OK;
175                 }
176                 /* short read */
177                 *flags|=RD_CONN_SHORT_READ;
178         }else{ /* else normal full read */
179                 if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT))
180                         c->state=S_CONN_OK;
181         }
182 #ifdef EXTRA_DEBUG
183         DBG("tcp_read: read %d bytes:\n%.*s\n", bytes_read, bytes_read, r->pos);
184 #endif
185         r->pos+=bytes_read;
186         return bytes_read;
187 }
188
189
190
191 /* reads all headers (until double crlf), & parses the content-length header
192  * (WARNING: inefficient, tries to reuse receive_msg but will go through
193  * the headers twice [once here looking for Content-Length and for the end
194  * of the headers and once in receive_msg]; a more speed efficient version will
195  * result in either major code duplication or major changes to the receive code)
196  * returns number of bytes read & sets r->state & r->body
197  * when either r->body!=0 or r->state==H_BODY =>
198  * all headers have been read. It should be called in a while loop.
199  * returns < 0 if error or 0 if EOF */
200 int tcp_read_headers(struct tcp_connection *c, int* read_flags)
201 {
202         int bytes, remaining;
203         char *p;
204         struct tcp_req* r;
205         
206 #ifdef USE_STUN
207         unsigned int mc;   /* magic cookie */
208         unsigned short body_len;
209 #endif
210         
211         #define crlf_default_skip_case \
212                                         case '\n': \
213                                                 r->state=H_LF; \
214                                                 break; \
215                                         default: \
216                                                 r->state=H_SKIP
217         
218         #define content_len_beg_case \
219                                         case ' ': \
220                                         case '\t': \
221                                                 if (!TCP_REQ_HAS_CLEN(r)) r->state=H_STARTWS; \
222                                                 else r->state=H_SKIP; \
223                                                         /* not interested if we already found one */ \
224                                                 break; \
225                                         case 'C': \
226                                         case 'c': \
227                                                 if(!TCP_REQ_HAS_CLEN(r)) r->state=H_CONT_LEN1; \
228                                                 else r->state=H_SKIP; \
229                                                 break; \
230                                         case 'l': \
231                                         case 'L': \
232                                                 /* short form for Content-Length */ \
233                                                 if (!TCP_REQ_HAS_CLEN(r)) r->state=H_L_COLON; \
234                                                 else r->state=H_SKIP; \
235                                                 break
236                                                 
237         #define change_state(upper, lower, newstate)\
238                                         switch(*p){ \
239                                                 case upper: \
240                                                 case lower: \
241                                                         r->state=(newstate); break; \
242                                                 crlf_default_skip_case; \
243                                         }
244         
245         #define change_state_case(state0, upper, lower, newstate)\
246                                         case state0: \
247                                                           change_state(upper, lower, newstate); \
248                                                           p++; \
249                                                           break
250
251
252         r=&c->req;
253         /* if we still have some unparsed part, parse it first, don't do the read*/
254         if (unlikely(r->parsed<r->pos)){
255                 bytes=0;
256         }else{
257 #ifdef USE_TLS
258                 if (unlikely(c->type==PROTO_TLS))
259                         bytes=tls_read(c); /* FIXME: read_flags support */
260                 else
261 #endif
262                         bytes=tcp_read(c, read_flags);
263                 if (bytes<=0) return bytes;
264         }
265         p=r->parsed;
266         
267         while(p<r->pos && r->error==TCP_REQ_OK){
268                 switch((unsigned char)r->state){
269                         case H_BODY: /* read the body*/
270                                 remaining=r->pos-p;
271                                 if (remaining>r->bytes_to_go) remaining=r->bytes_to_go;
272                                 r->bytes_to_go-=remaining;
273                                 p+=remaining;
274                                 if (r->bytes_to_go==0){
275                                         r->flags|=F_TCP_REQ_COMPLETE;
276                                         goto skip;
277                                 }
278                                 break;
279                                 
280                         case H_SKIP:
281                                 /* find lf, we are in this state if we are not interested
282                                  * in anything till end of line*/
283                                 p=q_memchr(p, '\n', r->pos-p);
284                                 if (p){
285                                         p++;
286                                         r->state=H_LF;
287                                 }else{
288                                         p=r->pos;
289                                 }
290                                 break;
291                                 
292                         case H_LF:
293                                 /* terminate on LF CR LF or LF LF */
294                                 switch (*p){
295                                         case '\r':
296                                                 r->state=H_LFCR;
297                                                 break;
298                                         case '\n':
299                                                 /* found LF LF */
300                                                 r->state=H_BODY;
301                                                 if (TCP_REQ_HAS_CLEN(r)){
302                                                         r->body=p+1;
303                                                         r->bytes_to_go=r->content_len;
304                                                         if (r->bytes_to_go==0){
305                                                                 r->flags|=F_TCP_REQ_COMPLETE;
306                                                                 p++;
307                                                                 goto skip;
308                                                         }
309                                                 }else{
310                                                         DBG("tcp_read_headers: ERROR: no clen, p=%X\n",
311                                                                         *p);
312                                                         r->error=TCP_REQ_BAD_LEN;
313                                                 }
314                                                 break;
315                                         content_len_beg_case;
316                                         default: 
317                                                 r->state=H_SKIP;
318                                 }
319                                 p++;
320                                 break;
321                         case H_LFCR:
322                                 if (*p=='\n'){
323                                         /* found LF CR LF */
324                                         r->state=H_BODY;
325                                         if (TCP_REQ_HAS_CLEN(r)){
326                                                 r->body=p+1;
327                                                 r->bytes_to_go=r->content_len;
328                                                 if (r->bytes_to_go==0){
329                                                         r->flags|=F_TCP_REQ_COMPLETE;
330                                                         p++;
331                                                         goto skip;
332                                                 }
333                                         }else{
334                                                 DBG("tcp_read_headers: ERROR: no clen, p=%X\n",
335                                                                         *p);
336                                                 r->error=TCP_REQ_BAD_LEN;
337                                         }
338                                 }else r->state=H_SKIP;
339                                 p++;
340                                 break;
341                                 
342                         case H_STARTWS:
343                                 switch (*p){
344                                         content_len_beg_case;
345                                         crlf_default_skip_case;
346                                 }
347                                 p++;
348                                 break;
349                         case H_SKIP_EMPTY:
350                                 switch (*p){
351                                         case '\n':
352                                                 break;
353                                         case '\r':
354                                                 if (cfg_get(tcp, tcp_cfg, crlf_ping)) {
355                                                         r->state=H_SKIP_EMPTY_CR_FOUND;
356                                                         r->start=p;
357                                                 }
358                                                 break;
359                                         case ' ':
360                                         case '\t':
361                                                 /* skip empty lines */
362                                                 break;
363                                         case 'C': 
364                                         case 'c': 
365                                                 r->state=H_CONT_LEN1; 
366                                                 r->start=p;
367                                                 break;
368                                         case 'l':
369                                         case 'L':
370                                                 /* short form for Content-Length */
371                                                 r->state=H_L_COLON;
372                                                 r->start=p;
373                                                 break;
374                                         default:
375 #ifdef USE_STUN
376                                                 /* STUN support can be switched off even if it's compiled */
377                                                 /* stun test */                                         
378                                                 if (stun_allow_stun && (unsigned char)*p == 0x00) {
379                                                         r->state=H_STUN_MSG;
380                                                 /* body will used as pointer to the last used byte */
381                                                         r->body=p;
382                                                         body_len = 0;
383                                                         DBG("stun msg detected\n");
384                                                 }else
385 #endif
386                                                 r->state=H_SKIP;
387                                                 r->start=p;
388                                 };
389                                 p++;
390                                 break;
391
392                         case H_SKIP_EMPTY_CR_FOUND:
393                                 if (*p=='\n'){
394                                         r->state=H_SKIP_EMPTY_CRLF_FOUND;
395                                         p++;
396                                 }else{
397                                         r->state=H_SKIP_EMPTY;
398                                 }
399                                 break;
400
401                         case H_SKIP_EMPTY_CRLF_FOUND:
402                                 if (*p=='\r'){
403                                         r->state = H_SKIP_EMPTY_CRLFCR_FOUND;
404                                         p++;
405                                 }else{
406                                         r->state = H_SKIP_EMPTY;
407                                 }
408                                 break;
409
410                         case H_SKIP_EMPTY_CRLFCR_FOUND:
411                                 if (*p=='\n'){
412                                         r->state = H_PING_CRLF;
413                                         r->flags |= F_TCP_REQ_HAS_CLEN |
414                                                         F_TCP_REQ_COMPLETE; /* hack to avoid error check */
415                                         p++;
416                                         goto skip;
417                                 }else{
418                                         r->state = H_SKIP_EMPTY;
419                                 }
420                                 break;
421 #ifdef USE_STUN
422                         case H_STUN_MSG:
423                                 if ((r->pos - r->body) >= sizeof(struct stun_hdr)) {
424                                         r->content_len = 0;
425                                         /* copy second short from buffer where should be body 
426                                          * length 
427                                          */
428                                         memcpy(&body_len, &r->start[sizeof(unsigned short)], 
429                                                 sizeof(unsigned short));
430                                         
431                                         body_len = ntohs(r->content_len);
432                                         
433                                         /* check if there is valid magic cookie */
434                                         memcpy(&mc, &r->start[sizeof(unsigned int)], 
435                                                 sizeof(unsigned int));
436                                         mc = ntohl(mc);
437                                         /* using has_content_len as a flag if there should be
438                                          * fingerprint or no
439                                          */
440                                         r->flags |= (mc == MAGIC_COOKIE) ? F_TCP_REQ_HAS_CLEN : 0;
441                                         
442                                         r->body += sizeof(struct stun_hdr);
443                                         p = r->body; 
444                                         
445                                         if (body_len > 0) {
446                                                 r->state = H_STUN_READ_BODY;
447                                         }
448                                         else {
449                                                 if (is_msg_complete(r) != 0) {
450                                                         goto skip;
451                                                 }
452                                                 else {
453                                                         /* set content_len to length of fingerprint */
454                                                         body_len = sizeof(struct stun_attr) + 
455                                                                            SHA_DIGEST_LENGTH;
456                                                 }
457                                         }
458                                 }
459                                 else {
460                                         p = r->pos; 
461                                 }
462                                 break;
463                                 
464                         case H_STUN_READ_BODY:
465                                 /* check if the whole body was read */
466                                 if ((r->pos - r->body) >= body_len) {
467                                         r->body += body_len;
468                                         p = r->body;
469                                         if (is_msg_complete(r) != 0) {
470                                                 goto skip;
471                                         }
472                                         else {
473                                                 /* set content_len to length of fingerprint */
474                                                 body_len = sizeof(struct stun_attr)+SHA_DIGEST_LENGTH;
475                                         }
476                                 }
477                                 else {
478                                         p = r->pos;
479                                 }
480                                 break;
481                                 
482                         case H_STUN_FP:
483                                 /* content_len contains length of fingerprint in this place! */
484                                 if ((r->pos - r->body) >= body_len) {
485                                         r->body += body_len;
486                                         p = r->body;
487                                         r->state = H_STUN_END;
488                                         r->flags |= F_TCP_REQ_COMPLETE |
489                                                 F_TCP_REQ_HAS_CLEN; /* hack to avoid error check */
490                                         goto skip;
491                                 }
492                                 else {
493                                         p = r->pos;
494                                 }
495                                 break;
496 #endif /* USE_STUN */
497                         change_state_case(H_CONT_LEN1,  'O', 'o', H_CONT_LEN2);
498                         change_state_case(H_CONT_LEN2,  'N', 'n', H_CONT_LEN3);
499                         change_state_case(H_CONT_LEN3,  'T', 't', H_CONT_LEN4);
500                         change_state_case(H_CONT_LEN4,  'E', 'e', H_CONT_LEN5);
501                         change_state_case(H_CONT_LEN5,  'N', 'n', H_CONT_LEN6);
502                         change_state_case(H_CONT_LEN6,  'T', 't', H_CONT_LEN7);
503                         change_state_case(H_CONT_LEN7,  '-', '_', H_CONT_LEN8);
504                         change_state_case(H_CONT_LEN8,  'L', 'l', H_CONT_LEN9);
505                         change_state_case(H_CONT_LEN9,  'E', 'e', H_CONT_LEN10);
506                         change_state_case(H_CONT_LEN10, 'N', 'n', H_CONT_LEN11);
507                         change_state_case(H_CONT_LEN11, 'G', 'g', H_CONT_LEN12);
508                         change_state_case(H_CONT_LEN12, 'T', 't', H_CONT_LEN13);
509                         change_state_case(H_CONT_LEN13, 'H', 'h', H_L_COLON);
510                         
511                         case H_L_COLON:
512                                 switch(*p){
513                                         case ' ':
514                                         case '\t':
515                                                 break; /* skip space */
516                                         case ':':
517                                                 r->state=H_CONT_LEN_BODY;
518                                                 break;
519                                         crlf_default_skip_case;
520                                 };
521                                 p++;
522                                 break;
523                         
524                         case  H_CONT_LEN_BODY:
525                                 switch(*p){
526                                         case ' ':
527                                         case '\t':
528                                                 break; /* eat space */
529                                         case '0':
530                                         case '1':
531                                         case '2':
532                                         case '3':
533                                         case '4':
534                                         case '5':
535                                         case '6':
536                                         case '7':
537                                         case '8':
538                                         case '9':
539                                                 r->state=H_CONT_LEN_BODY_PARSE;
540                                                 r->content_len=(*p-'0');
541                                                 break;
542                                         /*FIXME: content length on different lines ! */
543                                         crlf_default_skip_case;
544                                 }
545                                 p++;
546                                 break;
547                                 
548                         case H_CONT_LEN_BODY_PARSE:
549                                 switch(*p){
550                                         case '0':
551                                         case '1':
552                                         case '2':
553                                         case '3':
554                                         case '4':
555                                         case '5':
556                                         case '6':
557                                         case '7':
558                                         case '8':
559                                         case '9':
560                                                 r->content_len=r->content_len*10+(*p-'0');
561                                                 break;
562                                         case '\r':
563                                         case ' ':
564                                         case '\t': /* FIXME: check if line contains only WS */
565                                                 r->state=H_SKIP;
566                                                 r->flags|=F_TCP_REQ_HAS_CLEN;
567                                                 break;
568                                         case '\n':
569                                                 /* end of line, parse successful */
570                                                 r->state=H_LF;
571                                                 r->flags|=F_TCP_REQ_HAS_CLEN;
572                                                 break;
573                                         default:
574                                                 LOG(L_ERR, "ERROR: tcp_read_headers: bad "
575                                                                 "Content-Length header value, unexpected "
576                                                                 "char %c in state %d\n", *p, r->state);
577                                                 r->state=H_SKIP; /* try to find another?*/
578                                 }
579                                 p++;
580                                 break;
581                         
582                         default:
583                                 LOG(L_CRIT, "BUG: tcp_read_headers: unexpected state %d\n",
584                                                 r->state);
585                                 abort();
586                 }
587         }
588 skip:
589         r->parsed=p;
590         return bytes;
591 }
592
593
594
595 int tcp_read_req(struct tcp_connection* con, int* bytes_read, int* read_flags)
596 {
597         int bytes;
598         int total_bytes;
599         int resp;
600         long size;
601         struct tcp_req* req;
602         struct dest_info dst;
603         int s;
604         char c;
605         int ret;
606                 
607                 bytes=-1;
608                 total_bytes=0;
609                 resp=CONN_RELEASE;
610                 s=con->fd;
611                 req=&con->req;
612 #ifdef USE_TLS
613                 if (con->type==PROTO_TLS){
614                         if (tls_fix_read_conn(con)!=0){
615                                 resp=CONN_ERROR;
616                                 goto end_req;
617                         }
618                         if (unlikely(con->state!=S_CONN_OK && con->state!=S_CONN_ACCEPT))
619                                 goto end_req; /* not enough data */
620                 }
621 #endif
622
623 again:
624                 if (likely(req->error==TCP_REQ_OK)){
625                         bytes=tcp_read_headers(con, read_flags);
626 #ifdef EXTRA_DEBUG
627                                                 /* if timeout state=0; goto end__req; */
628                         DBG("read= %d bytes, parsed=%d, state=%d, error=%d\n",
629                                         bytes, (int)(req->parsed-req->start), req->state,
630                                         req->error );
631                         DBG("tcp_read_req: last char=0x%02X, parsed msg=\n%.*s\n",
632                                         *(req->parsed-1), (int)(req->parsed-req->start),
633                                         req->start);
634 #endif
635                         if (unlikely(bytes==-1)){
636                                 LOG(L_ERR, "ERROR: tcp_read_req: error reading \n");
637                                 resp=CONN_ERROR;
638                                 goto end_req;
639                         }
640                         total_bytes+=bytes;
641                         /* eof check:
642                          * is EOF if eof on fd and req.  not complete yet,
643                          * if req. is complete we might have a second unparsed
644                          * request after it, so postpone release_with_eof
645                          */
646                         if (unlikely((con->state==S_CONN_EOF) && 
647                                                 (! TCP_REQ_COMPLETE(req)))) {
648                                 DBG( "tcp_read_req: EOF\n");
649                                 resp=CONN_EOF;
650                                 goto end_req;
651                         }
652                 
653                 }
654                 if (unlikely(req->error!=TCP_REQ_OK)){
655                         LOG(L_ERR,"ERROR: tcp_read_req: bad request, state=%d, error=%d "
656                                           "buf:\n%.*s\nparsed:\n%.*s\n", req->state, req->error,
657                                           (int)(req->pos-req->buf), req->buf,
658                                           (int)(req->parsed-req->start), req->start);
659                         DBG("- received from: port %d\n", con->rcv.src_port);
660                         print_ip("- received from: ip ",&con->rcv.src_ip, "\n");
661                         resp=CONN_ERROR;
662                         goto end_req;
663                 }
664                 if (likely(TCP_REQ_COMPLETE(req))){
665 #ifdef EXTRA_DEBUG
666                         DBG("tcp_read_req: end of header part\n");
667                         DBG("- received from: port %d\n", con->rcv.src_port);
668                         print_ip("- received from: ip ", &con->rcv.src_ip, "\n");
669                         DBG("tcp_read_req: headers:\n%.*s.\n",
670                                         (int)(req->body-req->start), req->start);
671 #endif
672                         if (likely(TCP_REQ_HAS_CLEN(req))){
673                                 DBG("tcp_read_req: content-length= %d\n", req->content_len);
674 #ifdef EXTRA_DEBUG
675                                 DBG("tcp_read_req: body:\n%.*s\n", req->content_len,req->body);
676 #endif
677                         }else{
678                                 req->error=TCP_REQ_BAD_LEN;
679                                 LOG(L_ERR, "ERROR: tcp_read_req: content length not present or"
680                                                 " unparsable\n");
681                                 resp=CONN_ERROR;
682                                 goto end_req;
683                         }
684                         /* if we are here everything is nice and ok*/
685                         resp=CONN_RELEASE;
686 #ifdef EXTRA_DEBUG
687                         DBG("calling receive_msg(%p, %d, )\n",
688                                         req->start, (int)(req->parsed-req->start));
689 #endif
690                         /* rcv.bind_address should always be !=0 */
691                         bind_address=con->rcv.bind_address;
692                         /* just for debugging use sendipv4 as receiving socket  FIXME*/
693                         /*
694                         if (con->rcv.dst_ip.af==AF_INET6){
695                                 bind_address=sendipv6_tcp;
696                         }else{
697                                 bind_address=sendipv4_tcp;
698                         }
699                         */
700                         con->rcv.proto_reserved1=con->id; /* copy the id */
701                         c=*req->parsed; /* ugly hack: zero term the msg & save the
702                                                            previous char, req->parsed should be ok
703                                                            because we always alloc BUF_SIZE+1 */
704                         *req->parsed=0;
705
706                         if (req->state==H_PING_CRLF) {
707                                 init_dst_from_rcv(&dst, &con->rcv);
708
709                                 if (tcp_send(&dst, 0, CRLF, CRLF_LEN) < 0) {
710                                         LOG(L_ERR, "CRLF ping: tcp_send() failed\n");
711                                 }
712                                 ret = 0;
713                         }else
714 #ifdef USE_STUN
715                         if (unlikely(req->state==H_STUN_END)){
716                                 /* stun request */
717                                 ret = stun_process_msg(req->start, req->parsed-req->start,
718                                                                          &con->rcv);
719                         }else
720 #endif
721                                 ret = receive_msg(req->start, req->parsed-req->start,
722                                                                         &con->rcv);
723                                 
724                         if (unlikely(ret < 0)) {
725                                 *req->parsed=c;
726                                 resp=CONN_ERROR;
727                                 goto end_req;
728                         }
729                         *req->parsed=c;
730                         
731                         /* prepare for next request */
732                         size=req->pos-req->parsed;
733                         req->start=req->buf;
734                         req->body=0;
735                         req->error=TCP_REQ_OK;
736                         req->state=H_SKIP_EMPTY;
737                         req->flags=0;
738                         req->content_len=0;
739                         req->bytes_to_go=0;
740                         req->pos=req->buf+size;
741                         
742                         if (unlikely(size)){ 
743                                 memmove(req->buf, req->parsed, size);
744                                 req->parsed=req->buf; /* fix req->parsed after using it */
745 #ifdef EXTRA_DEBUG
746                                 DBG("tcp_read_req: preparing for new request, kept %ld"
747                                                 " bytes\n", size);
748 #endif
749                                 /*if we still have some unparsed bytes, try to parse them too*/
750                                 goto again;
751                         } else if (unlikely(con->state==S_CONN_EOF)){
752                                 DBG( "tcp_read_req: EOF after reading complete request\n");
753                                 resp=CONN_EOF;
754                         }
755                         req->parsed=req->buf; /* fix req->parsed */
756                 }
757                 
758                 
759         end_req:
760                 if (likely(bytes_read)) *bytes_read=total_bytes;
761                 return resp;
762 }
763
764
765
766 void release_tcpconn(struct tcp_connection* c, long state, int unix_sock)
767 {
768         long response[2];
769         
770                 DBG( "releasing con %p, state %ld, fd=%d, id=%d\n",
771                                 c, state, c->fd, c->id);
772                 DBG(" extra_data %p\n", c->extra_data);
773                 /* release req & signal the parent */
774                 c->reader_pid=0; /* reset it */
775                 if (c->fd!=-1){
776                         close(c->fd);
777                         c->fd=-1;
778                 }
779                 /* errno==EINTR, EWOULDBLOCK a.s.o todo */
780                 response[0]=(long)c;
781                 response[1]=state;
782                 
783                 if (tsend_stream(unix_sock, (char*)response, sizeof(response), -1)<=0)
784                         LOG(L_ERR, "ERROR: release_tcpconn: tsend_stream failed\n");
785 }
786
787
788
789 static ticks_t tcpconn_read_timeout(ticks_t t, struct timer_ln* tl, void* data)
790 {
791         struct tcp_connection *c;
792         
793         c=(struct tcp_connection*)data; 
794         /* or (struct tcp...*)(tl-offset(c->timer)) */
795         
796         if (likely(!(c->state<0) && TICKS_LT(t, c->timeout))){
797                 /* timeout extended, exit */
798                 return (ticks_t)(c->timeout - t);
799         }
800         /* if conn->state is ERROR or BAD => force timeout too */
801         if (unlikely(io_watch_del(&io_w, c->fd, -1, IO_FD_CLOSING)<0)){
802                 LOG(L_ERR, "ERROR: tcpconn_read_timeout: io_watch_del failed for %p"
803                                         " id %d fd %d, state %d, flags %x, main fd %d\n",
804                                         c, c->id, c->fd, c->state, c->flags, c->s);
805         }
806         tcpconn_listrm(tcp_conn_lst, c, c_next, c_prev);
807         release_tcpconn(c, (c->state<0)?CONN_ERROR:CONN_RELEASE, tcpmain_sock);
808         
809         return 0;
810 }
811
812
813
814 /* handle io routine, based on the fd_map type
815  * (it will be called from io_wait_loop* )
816  * params:  fm  - pointer to a fd hash entry
817  *          idx - index in the fd_array (or -1 if not known)
818  * return: -1 on error, or when we are not interested any more on reads
819  *            from this fd (e.g.: we are closing it )
820  *          0 on EAGAIN or when by some other way it is known that no more 
821  *            io events are queued on the fd (the receive buffer is empty).
822  *            Usefull to detect when there are no more io events queued for
823  *            sigio_rt, epoll_et, kqueue.
824  *         >0 on successfull read from the fd (when there might be more io
825  *            queued -- the receive buffer might still be non-empty)
826  */
827 inline static int handle_io(struct fd_map* fm, short events, int idx)
828 {       
829         int ret;
830         int n;
831         int read_flags;
832         struct tcp_connection* con;
833         int s;
834         long resp;
835         ticks_t t;
836         
837         /* update the local config */
838         cfg_update();
839         
840         switch(fm->type){
841                 case F_TCPMAIN:
842 again:
843                         ret=n=receive_fd(fm->fd, &con, sizeof(con), &s, 0);
844                         DBG("received n=%d con=%p, fd=%d\n", n, con, s);
845                         if (unlikely(n<0)){
846                                 if (errno == EWOULDBLOCK || errno == EAGAIN){
847                                         ret=0;
848                                         break;
849                                 }else if (errno == EINTR) goto again;
850                                 else{
851                                         LOG(L_CRIT,"BUG: tcp_receive: handle_io: read_fd: %s \n",
852                                                         strerror(errno));
853                                                 abort(); /* big error*/
854                                 }
855                         }
856                         if (unlikely(n==0)){
857                                 LOG(L_ERR, "WARNING: tcp_receive: handle_io: 0 bytes read\n");
858                                 goto error;
859                         }
860                         if (unlikely(con==0)){
861                                         LOG(L_CRIT, "BUG: tcp_receive: handle_io null pointer\n");
862                                         goto error;
863                         }
864                         con->fd=s;
865                         if (unlikely(s==-1)) {
866                                 LOG(L_ERR, "ERROR: tcp_receive: handle_io: read_fd:"
867                                                                         "no fd read\n");
868                                 goto con_error;
869                         }
870                         con->reader_pid=my_pid();
871                         if (unlikely(con==tcp_conn_lst)){
872                                 LOG(L_CRIT, "BUG: tcp_receive: handle_io: duplicate"
873                                                         " connection received: %p, id %d, fd %d, refcnt %d"
874                                                         " state %d (n=%d)\n", con, con->id, con->fd,
875                                                         atomic_get(&con->refcnt), con->state, n);
876                                 goto con_error;
877                                 break; /* try to recover */
878                         }
879                         if (unlikely(con->state==S_CONN_BAD)){
880                                 LOG(L_WARN, "WARNING: tcp_receive: handle_io: received an"
881                                                         " already bad connection: %p id %d refcnt %d\n",
882                                                         con, con->id, atomic_get(&con->refcnt));
883                                 goto con_error;
884                         }
885                         /* if we received the fd there is most likely data waiting to
886                          * be read => process it first to avoid extra sys calls */
887                         read_flags=((con->flags & (F_CONN_EOF_SEEN|F_CONN_FORCE_EOF)) && 
888                                                 !(con->flags & F_CONN_OOB_DATA))? RD_CONN_FORCE_EOF
889                                                 :0;
890                         resp=tcp_read_req(con, &n, &read_flags);
891                         if (unlikely(resp<0)){
892                                 /* some error occured, but on the new fd, not on the tcp
893                                  * main fd, so keep the ret value */
894                                 if (unlikely(resp!=CONN_EOF))
895                                         con->state=S_CONN_BAD;
896                                 release_tcpconn(con, resp, tcpmain_sock);
897                                 break;
898                         }
899                         
900                         /* must be before io_watch_add, io_watch_add might catch some
901                          * already existing events => might call handle_io and
902                          * handle_io might decide to del. the new connection =>
903                          * must be in the list */
904                         tcpconn_listadd(tcp_conn_lst, con, c_next, c_prev);
905                         t=get_ticks_raw();
906                         con->timeout=t+S_TO_TICKS(TCP_CHILD_TIMEOUT);
907                         /* re-activate the timer */
908                         con->timer.f=tcpconn_read_timeout;
909                         local_timer_reinit(&con->timer);
910                         local_timer_add(&tcp_reader_ltimer, &con->timer,
911                                                                 S_TO_TICKS(TCP_CHILD_TIMEOUT), t);
912                         if (unlikely(io_watch_add(&io_w, s, POLLIN, F_TCPCONN, con)<0)){
913                                 LOG(L_CRIT, "ERROR: tcpconn_receive: handle_io: io_watch_add "
914                                                         "failed for %p id %d fd %d, state %d, flags %x,"
915                                                         " main fd %d, refcnt %d\n",
916                                                         con, con->id, con->fd, con->state, con->flags,
917                                                         con->s, atomic_get(&con->refcnt));
918                                 tcpconn_listrm(tcp_conn_lst, con, c_next, c_prev);
919                                 local_timer_del(&tcp_reader_ltimer, &con->timer);
920                                 goto con_error;
921                         }
922                         break;
923                 case F_TCPCONN:
924                         con=(struct tcp_connection*)fm->data;
925                         if (unlikely(con->state==S_CONN_BAD)){
926                                 resp=CONN_ERROR;
927                                 LOG(L_WARN, "WARNING: tcp_receive: handle_io: F_TCPCONN"
928                                                         " connection marked as bad: %p id %d refcnt %d\n",
929                                                         con, con->id, atomic_get(&con->refcnt));
930                                 goto read_error;
931                         }
932 #ifdef POLLRDHUP
933                         read_flags=(((events & POLLRDHUP) | 
934                                                         (con->flags & (F_CONN_EOF_SEEN|F_CONN_FORCE_EOF)))
935                                                 && !(events & POLLPRI))? RD_CONN_FORCE_EOF: 0;
936 #else /* POLLRDHUP */
937                         read_flags=0;
938 #endif /* POLLRDHUP */
939                         resp=tcp_read_req(con, &ret, &read_flags);
940                         if (unlikely(resp<0)){
941 read_error:
942                                 ret=-1; /* some error occured */
943                                 if (unlikely(io_watch_del(&io_w, con->fd, idx,
944                                                                                         IO_FD_CLOSING) < 0)){
945                                         LOG(L_CRIT, "ERROR: tcpconn_receive: handle_io: "
946                                                         "io_watch_del failed for %p id %d fd %d,"
947                                                         " state %d, flags %x, main fd %d, refcnt %d\n",
948                                                         con, con->id, con->fd, con->state,
949                                                         con->flags, con->s, atomic_get(&con->refcnt));
950                                 }
951                                 tcpconn_listrm(tcp_conn_lst, con, c_next, c_prev);
952                                 local_timer_del(&tcp_reader_ltimer, &con->timer);
953                                 if (unlikely(resp!=CONN_EOF))
954                                         con->state=S_CONN_BAD;
955                                 release_tcpconn(con, resp, tcpmain_sock);
956                         }else{
957                                 /* update timeout */
958                                 con->timeout=get_ticks_raw()+S_TO_TICKS(TCP_CHILD_TIMEOUT);
959                                 /* ret= 0 (read the whole socket buffer) if short read & 
960                                  *  !POLLPRI,  bytes read otherwise */
961                                 ret&=(((read_flags & RD_CONN_SHORT_READ) && 
962                                                 !(events & POLLPRI)) - 1);
963                         }
964                         break;
965                 case F_NONE:
966                         LOG(L_CRIT, "BUG: handle_io: empty fd map %p (%d): "
967                                                 "{%d, %d, %p}\n", fm, (int)(fm-io_w.fd_hash),
968                                                 fm->fd, fm->type, fm->data);
969                         goto error;
970                 default:
971                         LOG(L_CRIT, "BUG: handle_io: uknown fd type %d\n", fm->type); 
972                         goto error;
973         }
974         
975         return ret;
976 con_error:
977         con->state=S_CONN_BAD;
978         release_tcpconn(con, CONN_ERROR, tcpmain_sock);
979         return ret;
980 error:
981         return -1;
982 }
983
984
985
986 inline static void tcp_reader_timer_run()
987 {
988         ticks_t ticks;
989         
990         ticks=get_ticks_raw();
991         if (unlikely((ticks-tcp_reader_prev_ticks)<TCPCONN_TIMEOUT_MIN_RUN))
992                 return;
993         tcp_reader_prev_ticks=ticks;
994         local_timer_run(&tcp_reader_ltimer, ticks);
995 }
996
997
998
999 void tcp_receive_loop(int unix_sock)
1000 {
1001         
1002         /* init */
1003         tcpmain_sock=unix_sock; /* init com. socket */
1004         if (init_io_wait(&io_w, get_max_open_fds(), tcp_poll_method)<0)
1005                 goto error;
1006         tcp_reader_prev_ticks=get_ticks_raw();
1007         if (init_local_timer(&tcp_reader_ltimer, get_ticks_raw())!=0)
1008                 goto error;
1009         /* add the unix socket */
1010         if (io_watch_add(&io_w, tcpmain_sock, POLLIN,  F_TCPMAIN, 0)<0){
1011                 LOG(L_CRIT, "ERROR: tcp_receive_loop: init: failed to add socket "
1012                                                         " to the fd list\n");
1013                 goto error;
1014         }
1015
1016         /* initialize the config framework */
1017         if (cfg_child_init()) goto error;
1018
1019         /* main loop */
1020         switch(io_w.poll_method){
1021                 case POLL_POLL:
1022                                 while(1){
1023                                         io_wait_loop_poll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1024                                         tcp_reader_timer_run();
1025                                 }
1026                                 break;
1027 #ifdef HAVE_SELECT
1028                 case POLL_SELECT:
1029                         while(1){
1030                                 io_wait_loop_select(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1031                                 tcp_reader_timer_run();
1032                         }
1033                         break;
1034 #endif
1035 #ifdef HAVE_SIGIO_RT
1036                 case POLL_SIGIO_RT:
1037                         while(1){
1038                                 io_wait_loop_sigio_rt(&io_w, TCP_CHILD_SELECT_TIMEOUT);
1039                                 tcp_reader_timer_run();
1040                         }
1041                         break;
1042 #endif
1043 #ifdef HAVE_EPOLL
1044                 case POLL_EPOLL_LT:
1045                         while(1){
1046                                 io_wait_loop_epoll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1047                                 tcp_reader_timer_run();
1048                         }
1049                         break;
1050                 case POLL_EPOLL_ET:
1051                         while(1){
1052                                 io_wait_loop_epoll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 1);
1053                                 tcp_reader_timer_run();
1054                         }
1055                         break;
1056 #endif
1057 #ifdef HAVE_KQUEUE
1058                 case POLL_KQUEUE:
1059                         while(1){
1060                                 io_wait_loop_kqueue(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1061                                 tcp_reader_timer_run();
1062                         }
1063                         break;
1064 #endif
1065 #ifdef HAVE_DEVPOLL
1066                 case POLL_DEVPOLL:
1067                         while(1){
1068                                 io_wait_loop_devpoll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1069                                 tcp_reader_timer_run();
1070                         }
1071                         break;
1072 #endif
1073                 default:
1074                         LOG(L_CRIT, "BUG: tcp_receive_loop: no support for poll method "
1075                                         " %s (%d)\n", 
1076                                         poll_method_name(io_w.poll_method), io_w.poll_method);
1077                         goto error;
1078         }
1079 error:
1080         destroy_io_wait(&io_w);
1081         LOG(L_CRIT, "ERROR: tcp_receive_loop: exiting...");
1082         exit(-1);
1083 }
1084
1085
1086
1087 #ifdef USE_STUN
1088 int is_msg_complete(struct tcp_req* r)
1089 {
1090         if (TCP_REQ_HAS_CLEN(r)) {
1091                 r->state = H_STUN_FP;
1092                 return 0;
1093         }
1094         else {
1095                 /* STUN message is complete */
1096                 r->state = H_STUN_END;
1097                 r->flags |= F_TCP_REQ_COMPLETE |
1098                                         F_TCP_REQ_HAS_CLEN; /* hack to avoid error check */
1099                 return 1;
1100         }
1101 }
1102 #endif
1103
1104 #endif /* USE_TCP */