eeafb6fcd907aaa5c388d6bdc7d809449df01148
[sip-router] / tcp_read.c
1 /*
2  * $Id$
3  *
4  * Copyright (C) 2001-2003 FhG Fokus
5  *
6  * This file is part of ser, a free SIP server.
7  *
8  * ser is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version
12  *
13  * For a license to use the ser software under conditions
14  * other than those described here, or to purchase support for this
15  * software, please contact iptel.org by e-mail at the following addresses:
16  *    info@iptel.org
17  *
18  * ser is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26  */
27 /*
28  * History:
29  * --------
30  * 2002-12-??  created by andrei.
31  * 2003-02-10  zero term before calling receive_msg & undo afterward (andrei)
32  * 2003-05-13  l: (short form of Content-Length) is now recognized (andrei)
33  * 2003-07-01  tcp_read & friends take no a single tcp_connection 
34  *              parameter & they set c->state to S_CONN_EOF on eof (andrei)
35  * 2003-07-04  fixed tcp EOF handling (possible infinite loop) (andrei)
36  * 2005-07-05  migrated to the new io_wait code (andrei)
37  * 2006-02-03  use tsend_stream instead of send_all (andrei)
38  * 2006-10-13  added STUN support - state machine for TCP (vlada)
39  * 2007-02-20  fixed timeout calc. bug (andrei)
40  * 2007-11-26  improved tcp timers: switched to local_timer (andrei)
41  * 2008-02-04  optimizations: handle POLLRDHUP (if supported), detect short
42  *              reads (sock. buffer empty) (andrei)
43  * 2009-02-26  direct blacklist support (andrei)
44  */
45
46 #ifdef USE_TCP
47
48 #include <stdio.h>
49 #include <errno.h>
50 #include <string.h>
51
52
53 #include <sys/time.h>
54 #include <sys/types.h>
55 #include <sys/select.h>
56 #include <sys/socket.h>
57
58 #include <unistd.h>
59 #include <stdlib.h> /* for abort() */
60
61
62 #include "dprint.h"
63 #include "tcp_conn.h"
64 #include "pass_fd.h"
65 #include "globals.h"
66 #include "receive.h"
67 #include "timer.h"
68 #include "local_timer.h"
69 #include "ut.h"
70 #include "pt.h"
71 #include "cfg/cfg_struct.h"
72 #ifdef CORE_TLS
73 #include "tls/tls_server.h"
74 #else
75 #include "tls_hooks.h"
76 #endif /* CORE_TLS */
77 #ifdef USE_DST_BLACKLIST
78 #include "dst_blacklist.h"
79 #endif /* USE_DST_BLACKLIST */
80
81 #define HANDLE_IO_INLINE
82 #include "io_wait.h"
83 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
84 #include "tsend.h"
85 #include "forward.h"
86
87 #ifdef USE_STUN
88 #include "ser_stun.h"
89
90 int is_msg_complete(struct tcp_req* r);
91
92 #endif /* USE_STUN */
93
94 #define TCPCONN_TIMEOUT_MIN_RUN  1 /* run the timers each new tick */
95
96 #define RD_CONN_SHORT_READ      1
97 #define RD_CONN_EOF             2
98 #define RD_CONN_FORCE_EOF       65536
99
100 /* types used in io_wait* */
101 enum fd_types { F_NONE, F_TCPMAIN, F_TCPCONN };
102
103 /* list of tcp connections handled by this process */
104 static struct tcp_connection* tcp_conn_lst=0;
105 static io_wait_h io_w; /* io_wait handler*/
106 static int tcpmain_sock=-1;
107
108 static struct local_timer tcp_reader_ltimer;
109 static ticks_t tcp_reader_prev_ticks;
110
111
112 /* reads next available bytes
113  *   c- tcp connection used for reading, tcp_read changes also c->state on
114  *      EOF and c->req.error on read error
115  *   * flags - value/result - used to signal a seen or "forced" EOF on the 
116  *     connection (when it is known that no more data will come after the 
117  *     current socket buffer is emptied )=> return/signal EOF on the first 
118  *     short read (=> don't use it on POLLPRI, as OOB data will cause short
119  *      reads even if there are still remaining bytes in the socket buffer)
120  * return number of bytes read, 0 on EOF or -1 on error,
121  * on EOF it also sets c->state to S_CONN_EOF.
122  * (to distinguish from reads that would block which could return 0)
123  * RD_CONN_SHORT_READ is also set in *flags for short reads.
124  * sets also r->error */
125 int tcp_read(struct tcp_connection *c, int* flags)
126 {
127         int bytes_free, bytes_read;
128         struct tcp_req *r;
129         int fd;
130
131         r=&c->req;
132         fd=c->fd;
133         bytes_free=r->b_size- (int)(r->pos - r->buf);
134         
135         if (bytes_free==0){
136                 LOG(L_ERR, "ERROR: tcp_read: buffer overrun, dropping\n");
137                 r->error=TCP_REQ_OVERRUN;
138                 return -1;
139         }
140 again:
141         bytes_read=read(fd, r->pos, bytes_free);
142
143         if (likely(bytes_read!=bytes_free)){
144                 if(unlikely(bytes_read==-1)){
145                         if (errno == EWOULDBLOCK || errno == EAGAIN){
146                                 bytes_read=0; /* nothing has been read */
147                         }else if (errno == EINTR) goto again;
148                         else{
149 #ifdef USE_DST_BLACKLIST
150                                 if (cfg_get(core, core_cfg, use_dst_blacklist))
151                                         switch(errno){
152                                                 case ECONNRESET:
153                                                 case ETIMEDOUT:
154                                                         dst_blacklist_su((c->state==S_CONN_CONNECT)?
155                                                                                                         BLST_ERR_CONNECT:
156                                                                                                         BLST_ERR_SEND,
157                                                                                                         c->rcv.proto,
158                                                                                                         &c->rcv.src_su, 0);
159                                                         break;
160                                         }
161 #endif /* USE_DST_BLACKLIST */
162                                 LOG(L_ERR, "ERROR: tcp_read: error reading: %s (%d)\n",
163                                                         strerror(errno), errno);
164                                 r->error=TCP_READ_ERROR;
165                                 return -1;
166                         }
167                 }else if (unlikely((bytes_read==0) || 
168                                         (*flags & RD_CONN_FORCE_EOF))){
169                         c->state=S_CONN_EOF;
170                         *flags|=RD_CONN_EOF;
171                         DBG("tcp_read: EOF on %p, FD %d\n", c, fd);
172                 }else{
173                         if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT))
174                                 c->state=S_CONN_OK;
175                 }
176                 /* short read */
177                 *flags|=RD_CONN_SHORT_READ;
178         }else{ /* else normal full read */
179                 if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT))
180                         c->state=S_CONN_OK;
181         }
182 #ifdef EXTRA_DEBUG
183         DBG("tcp_read: read %d bytes:\n%.*s\n", bytes_read, bytes_read, r->pos);
184 #endif
185         r->pos+=bytes_read;
186         return bytes_read;
187 }
188
189
190
191 /* reads all headers (until double crlf), & parses the content-length header
192  * (WARNING: inefficient, tries to reuse receive_msg but will go through
193  * the headers twice [once here looking for Content-Length and for the end
194  * of the headers and once in receive_msg]; a more speed efficient version will
195  * result in either major code duplication or major changes to the receive code)
196  * returns number of bytes read & sets r->state & r->body
197  * when either r->body!=0 or r->state==H_BODY =>
198  * all headers have been read. It should be called in a while loop.
199  * returns < 0 if error or 0 if EOF */
200 int tcp_read_headers(struct tcp_connection *c, int* read_flags)
201 {
202         int bytes, remaining;
203         char *p;
204         struct tcp_req* r;
205         
206 #ifdef USE_STUN
207         unsigned int mc;   /* magic cookie */
208         unsigned short body_len;
209 #endif
210         
211         #define crlf_default_skip_case \
212                                         case '\n': \
213                                                 r->state=H_LF; \
214                                                 break; \
215                                         default: \
216                                                 r->state=H_SKIP
217         
218         #define content_len_beg_case \
219                                         case ' ': \
220                                         case '\t': \
221                                                 if (!r->has_content_len) r->state=H_STARTWS; \
222                                                 else r->state=H_SKIP; \
223                                                         /* not interested if we already found one */ \
224                                                 break; \
225                                         case 'C': \
226                                         case 'c': \
227                                                 if(!r->has_content_len) r->state=H_CONT_LEN1; \
228                                                 else r->state=H_SKIP; \
229                                                 break; \
230                                         case 'l': \
231                                         case 'L': \
232                                                 /* short form for Content-Length */ \
233                                                 if (!r->has_content_len) r->state=H_L_COLON; \
234                                                 else r->state=H_SKIP; \
235                                                 break
236                                                 
237         #define change_state(upper, lower, newstate)\
238                                         switch(*p){ \
239                                                 case upper: \
240                                                 case lower: \
241                                                         r->state=(newstate); break; \
242                                                 crlf_default_skip_case; \
243                                         }
244         
245         #define change_state_case(state0, upper, lower, newstate)\
246                                         case state0: \
247                                                           change_state(upper, lower, newstate); \
248                                                           p++; \
249                                                           break
250
251
252         r=&c->req;
253         /* if we still have some unparsed part, parse it first, don't do the read*/
254         if (unlikely(r->parsed<r->pos)){
255                 bytes=0;
256         }else{
257 #ifdef USE_TLS
258                 if (unlikely(c->type==PROTO_TLS))
259                         bytes=tls_read(c); /* FIXME: read_flags support */
260                 else
261 #endif
262                         bytes=tcp_read(c, read_flags);
263                 if (bytes<=0) return bytes;
264         }
265         p=r->parsed;
266         
267         while(p<r->pos && r->error==TCP_REQ_OK){
268                 switch((unsigned char)r->state){
269                         case H_BODY: /* read the body*/
270                                 remaining=r->pos-p;
271                                 if (remaining>r->bytes_to_go) remaining=r->bytes_to_go;
272                                 r->bytes_to_go-=remaining;
273                                 p+=remaining;
274                                 if (r->bytes_to_go==0){
275                                         r->complete=1;
276                                         goto skip;
277                                 }
278                                 break;
279                                 
280                         case H_SKIP:
281                                 /* find lf, we are in this state if we are not interested
282                                  * in anything till end of line*/
283                                 p=q_memchr(p, '\n', r->pos-p);
284                                 if (p){
285                                         p++;
286                                         r->state=H_LF;
287                                 }else{
288                                         p=r->pos;
289                                 }
290                                 break;
291                                 
292                         case H_LF:
293                                 /* terminate on LF CR LF or LF LF */
294                                 switch (*p){
295                                         case '\r':
296                                                 r->state=H_LFCR;
297                                                 break;
298                                         case '\n':
299                                                 /* found LF LF */
300                                                 r->state=H_BODY;
301                                                 if (r->has_content_len){
302                                                         r->body=p+1;
303                                                         r->bytes_to_go=r->content_len;
304                                                         if (r->bytes_to_go==0){
305                                                                 r->complete=1;
306                                                                 p++;
307                                                                 goto skip;
308                                                         }
309                                                 }else{
310                                                         DBG("tcp_read_headers: ERROR: no clen, p=%X\n",
311                                                                         *p);
312                                                         r->error=TCP_REQ_BAD_LEN;
313                                                 }
314                                                 break;
315                                         content_len_beg_case;
316                                         default: 
317                                                 r->state=H_SKIP;
318                                 }
319                                 p++;
320                                 break;
321                         case H_LFCR:
322                                 if (*p=='\n'){
323                                         /* found LF CR LF */
324                                         r->state=H_BODY;
325                                         if (r->has_content_len){
326                                                 r->body=p+1;
327                                                 r->bytes_to_go=r->content_len;
328                                                 if (r->bytes_to_go==0){
329                                                         r->complete=1;
330                                                         p++;
331                                                         goto skip;
332                                                 }
333                                         }else{
334                                                 DBG("tcp_read_headers: ERROR: no clen, p=%X\n",
335                                                                         *p);
336                                                 r->error=TCP_REQ_BAD_LEN;
337                                         }
338                                 }else r->state=H_SKIP;
339                                 p++;
340                                 break;
341                                 
342                         case H_STARTWS:
343                                 switch (*p){
344                                         content_len_beg_case;
345                                         crlf_default_skip_case;
346                                 }
347                                 p++;
348                                 break;
349                         case H_SKIP_EMPTY:
350                                 switch (*p){
351                                         case '\n':
352                                                 break;
353                                         case '\r':
354                                                 if (cfg_get(tcp, tcp_cfg, crlf_ping)) {
355                                                         r->state=H_SKIP_EMPTY_CR_FOUND;
356                                                         r->start=p;
357                                                 }
358                                                 break;
359                                         case ' ':
360                                         case '\t':
361                                                 /* skip empty lines */
362                                                 break;
363                                         case 'C': 
364                                         case 'c': 
365                                                 r->state=H_CONT_LEN1; 
366                                                 r->start=p;
367                                                 break;
368                                         case 'l':
369                                         case 'L':
370                                                 /* short form for Content-Length */
371                                                 r->state=H_L_COLON;
372                                                 r->start=p;
373                                                 break;
374                                         default:
375 #ifdef USE_STUN
376                                                 /* STUN support can be switched off even if it's compiled */
377                                                 /* stun test */                                         
378                                                 if (stun_allow_stun && (unsigned char)*p == 0x00) {
379                                                         r->state=H_STUN_MSG;
380                                                 /* body will used as pointer to the last used byte */
381                                                         r->body=p;
382                                                         body_len = 0;
383                                                         DBG("stun msg detected\n");
384                                                 }else
385 #endif
386                                                 r->state=H_SKIP;
387                                                 r->start=p;
388                                 };
389                                 p++;
390                                 break;
391
392                         case H_SKIP_EMPTY_CR_FOUND:
393                                 if (*p=='\n'){
394                                         r->state=H_SKIP_EMPTY_CRLF_FOUND;
395                                         p++;
396                                 }else{
397                                         r->state=H_SKIP_EMPTY;
398                                 }
399                                 break;
400
401                         case H_SKIP_EMPTY_CRLF_FOUND:
402                                 if (*p=='\r'){
403                                         r->state = H_SKIP_EMPTY_CRLFCR_FOUND;
404                                         p++;
405                                 }else{
406                                         r->state = H_SKIP_EMPTY;
407                                 }
408                                 break;
409
410                         case H_SKIP_EMPTY_CRLFCR_FOUND:
411                                 if (*p=='\n'){
412                                         r->state = H_PING_CRLF;
413                                         r->complete = 1;
414                                         r->has_content_len = 1; /* hack to avoid error check */
415                                         p++;
416                                         goto skip;
417                                 }else{
418                                         r->state = H_SKIP_EMPTY;
419                                 }
420                                 break;
421 #ifdef USE_STUN
422                         case H_STUN_MSG:
423                                 if ((r->pos - r->body) >= sizeof(struct stun_hdr)) {
424                                         r->content_len = 0;
425                                         /* copy second short from buffer where should be body 
426                                          * length 
427                                          */
428                                         memcpy(&body_len, &r->start[sizeof(unsigned short)], 
429                                                 sizeof(unsigned short));
430                                         
431                                         body_len = ntohs(r->content_len);
432                                         
433                                         /* check if there is valid magic cookie */
434                                         memcpy(&mc, &r->start[sizeof(unsigned int)], 
435                                                 sizeof(unsigned int));
436                                         mc = ntohl(mc);
437                                         /* using has_content_len as a flag if there should be
438                                          * fingerprint or no
439                                          */
440                                         r->has_content_len = (mc == MAGIC_COOKIE) ? 1 : 0;
441                                         
442                                         r->body += sizeof(struct stun_hdr);
443                                         p = r->body; 
444                                         
445                                         if (body_len > 0) {
446                                                 r->state = H_STUN_READ_BODY;
447                                         }
448                                         else {
449                                                 if (is_msg_complete(r) != 0) {
450                                                         goto skip;
451                                                 }
452                                                 else {
453                                                         /* set content_len to length of fingerprint */
454                                                         body_len = sizeof(struct stun_attr) + 
455                                                                            SHA_DIGEST_LENGTH;
456                                                 }
457                                         }
458                                 }
459                                 else {
460                                         p = r->pos; 
461                                 }
462                                 break;
463                                 
464                         case H_STUN_READ_BODY:
465                                 /* check if the whole body was read */
466                                 if ((r->pos - r->body) >= body_len) {
467                                         r->body += body_len;
468                                         p = r->body;
469                                         if (is_msg_complete(r) != 0) {
470                                                 goto skip;
471                                         }
472                                         else {
473                                                 /* set content_len to length of fingerprint */
474                                                 body_len = sizeof(struct stun_attr)+SHA_DIGEST_LENGTH;
475                                         }
476                                 }
477                                 else {
478                                         p = r->pos;
479                                 }
480                                 break;
481                                 
482                         case H_STUN_FP:
483                                 /* content_len contains length of fingerprint in this place! */
484                                 if ((r->pos - r->body) >= body_len) {
485                                         r->body += body_len;
486                                         p = r->body;
487                                         r->state = H_STUN_END;
488                                         r->complete = 1;
489                                         r->has_content_len = 1; /* hack to avoid error check */
490                                         goto skip;
491                                 }
492                                 else {
493                                         p = r->pos;
494                                 }
495                                 break;
496 #endif /* USE_STUN */
497                         change_state_case(H_CONT_LEN1,  'O', 'o', H_CONT_LEN2);
498                         change_state_case(H_CONT_LEN2,  'N', 'n', H_CONT_LEN3);
499                         change_state_case(H_CONT_LEN3,  'T', 't', H_CONT_LEN4);
500                         change_state_case(H_CONT_LEN4,  'E', 'e', H_CONT_LEN5);
501                         change_state_case(H_CONT_LEN5,  'N', 'n', H_CONT_LEN6);
502                         change_state_case(H_CONT_LEN6,  'T', 't', H_CONT_LEN7);
503                         change_state_case(H_CONT_LEN7,  '-', '_', H_CONT_LEN8);
504                         change_state_case(H_CONT_LEN8,  'L', 'l', H_CONT_LEN9);
505                         change_state_case(H_CONT_LEN9,  'E', 'e', H_CONT_LEN10);
506                         change_state_case(H_CONT_LEN10, 'N', 'n', H_CONT_LEN11);
507                         change_state_case(H_CONT_LEN11, 'G', 'g', H_CONT_LEN12);
508                         change_state_case(H_CONT_LEN12, 'T', 't', H_CONT_LEN13);
509                         change_state_case(H_CONT_LEN13, 'H', 'h', H_L_COLON);
510                         
511                         case H_L_COLON:
512                                 switch(*p){
513                                         case ' ':
514                                         case '\t':
515                                                 break; /* skip space */
516                                         case ':':
517                                                 r->state=H_CONT_LEN_BODY;
518                                                 break;
519                                         crlf_default_skip_case;
520                                 };
521                                 p++;
522                                 break;
523                         
524                         case  H_CONT_LEN_BODY:
525                                 switch(*p){
526                                         case ' ':
527                                         case '\t':
528                                                 break; /* eat space */
529                                         case '0':
530                                         case '1':
531                                         case '2':
532                                         case '3':
533                                         case '4':
534                                         case '5':
535                                         case '6':
536                                         case '7':
537                                         case '8':
538                                         case '9':
539                                                 r->state=H_CONT_LEN_BODY_PARSE;
540                                                 r->content_len=(*p-'0');
541                                                 break;
542                                         /*FIXME: content length on different lines ! */
543                                         crlf_default_skip_case;
544                                 }
545                                 p++;
546                                 break;
547                                 
548                         case H_CONT_LEN_BODY_PARSE:
549                                 switch(*p){
550                                         case '0':
551                                         case '1':
552                                         case '2':
553                                         case '3':
554                                         case '4':
555                                         case '5':
556                                         case '6':
557                                         case '7':
558                                         case '8':
559                                         case '9':
560                                                 r->content_len=r->content_len*10+(*p-'0');
561                                                 break;
562                                         case '\r':
563                                         case ' ':
564                                         case '\t': /* FIXME: check if line contains only WS */
565                                                 r->state=H_SKIP;
566                                                 r->has_content_len=1;
567                                                 break;
568                                         case '\n':
569                                                 /* end of line, parse successful */
570                                                 r->state=H_LF;
571                                                 r->has_content_len=1;
572                                                 break;
573                                         default:
574                                                 LOG(L_ERR, "ERROR: tcp_read_headers: bad "
575                                                                 "Content-Length header value, unexpected "
576                                                                 "char %c in state %d\n", *p, r->state);
577                                                 r->state=H_SKIP; /* try to find another?*/
578                                 }
579                                 p++;
580                                 break;
581                         
582                         default:
583                                 LOG(L_CRIT, "BUG: tcp_read_headers: unexpected state %d\n",
584                                                 r->state);
585                                 abort();
586                 }
587         }
588 skip:
589         r->parsed=p;
590         return bytes;
591 }
592
593
594
595 int tcp_read_req(struct tcp_connection* con, int* bytes_read, int* read_flags)
596 {
597         int bytes;
598         int total_bytes;
599         int resp;
600         long size;
601         struct tcp_req* req;
602         struct dest_info dst;
603         int s;
604         char c;
605         int ret;
606                 
607                 bytes=-1;
608                 total_bytes=0;
609                 resp=CONN_RELEASE;
610                 s=con->fd;
611                 req=&con->req;
612 #ifdef USE_TLS
613                 if (con->type==PROTO_TLS){
614                         if (tls_fix_read_conn(con)!=0){
615                                 resp=CONN_ERROR;
616                                 goto end_req;
617                         }
618                         if (unlikely(con->state!=S_CONN_OK && con->state!=S_CONN_ACCEPT))
619                                 goto end_req; /* not enough data */
620                 }
621 #endif
622
623 again:
624                 if (likely(req->error==TCP_REQ_OK)){
625                         bytes=tcp_read_headers(con, read_flags);
626 #ifdef EXTRA_DEBUG
627                                                 /* if timeout state=0; goto end__req; */
628                         DBG("read= %d bytes, parsed=%d, state=%d, error=%d\n",
629                                         bytes, (int)(req->parsed-req->start), req->state,
630                                         req->error );
631                         DBG("tcp_read_req: last char=0x%02X, parsed msg=\n%.*s\n",
632                                         *(req->parsed-1), (int)(req->parsed-req->start),
633                                         req->start);
634 #endif
635                         if (unlikely(bytes==-1)){
636                                 LOG(L_ERR, "ERROR: tcp_read_req: error reading \n");
637                                 resp=CONN_ERROR;
638                                 goto end_req;
639                         }
640                         total_bytes+=bytes;
641                         /* eof check:
642                          * is EOF if eof on fd and req.  not complete yet,
643                          * if req. is complete we might have a second unparsed
644                          * request after it, so postpone release_with_eof
645                          */
646                         if (unlikely((con->state==S_CONN_EOF) && (req->complete==0))) {
647                                 DBG( "tcp_read_req: EOF\n");
648                                 resp=CONN_EOF;
649                                 goto end_req;
650                         }
651                 
652                 }
653                 if (unlikely(req->error!=TCP_REQ_OK)){
654                         LOG(L_ERR,"ERROR: tcp_read_req: bad request, state=%d, error=%d "
655                                           "buf:\n%.*s\nparsed:\n%.*s\n", req->state, req->error,
656                                           (int)(req->pos-req->buf), req->buf,
657                                           (int)(req->parsed-req->start), req->start);
658                         DBG("- received from: port %d\n", con->rcv.src_port);
659                         print_ip("- received from: ip ",&con->rcv.src_ip, "\n");
660                         resp=CONN_ERROR;
661                         goto end_req;
662                 }
663                 if (likely(req->complete)){
664 #ifdef EXTRA_DEBUG
665                         DBG("tcp_read_req: end of header part\n");
666                         DBG("- received from: port %d\n", con->rcv.src_port);
667                         print_ip("- received from: ip ", &con->rcv.src_ip, "\n");
668                         DBG("tcp_read_req: headers:\n%.*s.\n",
669                                         (int)(req->body-req->start), req->start);
670 #endif
671                         if (likely(req->has_content_len)){
672                                 DBG("tcp_read_req: content-length= %d\n", req->content_len);
673 #ifdef EXTRA_DEBUG
674                                 DBG("tcp_read_req: body:\n%.*s\n", req->content_len,req->body);
675 #endif
676                         }else{
677                                 req->error=TCP_REQ_BAD_LEN;
678                                 LOG(L_ERR, "ERROR: tcp_read_req: content length not present or"
679                                                 " unparsable\n");
680                                 resp=CONN_ERROR;
681                                 goto end_req;
682                         }
683                         /* if we are here everything is nice and ok*/
684                         resp=CONN_RELEASE;
685 #ifdef EXTRA_DEBUG
686                         DBG("calling receive_msg(%p, %d, )\n",
687                                         req->start, (int)(req->parsed-req->start));
688 #endif
689                         /* rcv.bind_address should always be !=0 */
690                         bind_address=con->rcv.bind_address;
691                         /* just for debugging use sendipv4 as receiving socket  FIXME*/
692                         /*
693                         if (con->rcv.dst_ip.af==AF_INET6){
694                                 bind_address=sendipv6_tcp;
695                         }else{
696                                 bind_address=sendipv4_tcp;
697                         }
698                         */
699                         con->rcv.proto_reserved1=con->id; /* copy the id */
700                         c=*req->parsed; /* ugly hack: zero term the msg & save the
701                                                            previous char, req->parsed should be ok
702                                                            because we always alloc BUF_SIZE+1 */
703                         *req->parsed=0;
704
705                         if (req->state==H_PING_CRLF) {
706                                 init_dst_from_rcv(&dst, &con->rcv);
707
708                                 if (tcp_send(&dst, 0, CRLF, CRLF_LEN) < 0) {
709                                         LOG(L_ERR, "CRLF ping: tcp_send() failed\n");
710                                 }
711                                 ret = 0;
712                         }else
713 #ifdef USE_STUN
714                         if (unlikely(req->state==H_STUN_END)){
715                                 /* stun request */
716                                 ret = stun_process_msg(req->start, req->parsed-req->start,
717                                                                          &con->rcv);
718                         }else
719 #endif
720                                 ret = receive_msg(req->start, req->parsed-req->start,
721                                                                         &con->rcv);
722                                 
723                         if (unlikely(ret < 0)) {
724                                 *req->parsed=c;
725                                 resp=CONN_ERROR;
726                                 goto end_req;
727                         }
728                         *req->parsed=c;
729                         
730                         /* prepare for next request */
731                         size=req->pos-req->parsed;
732                         req->start=req->buf;
733                         req->body=0;
734                         req->error=TCP_REQ_OK;
735                         req->state=H_SKIP_EMPTY;
736                         req->complete=req->content_len=req->has_content_len=0;
737                         req->bytes_to_go=0;
738                         req->pos=req->buf+size;
739                         
740                         if (unlikely(size)){ 
741                                 memmove(req->buf, req->parsed, size);
742                                 req->parsed=req->buf; /* fix req->parsed after using it */
743 #ifdef EXTRA_DEBUG
744                                 DBG("tcp_read_req: preparing for new request, kept %ld"
745                                                 " bytes\n", size);
746 #endif
747                                 /*if we still have some unparsed bytes, try to parse them too*/
748                                 goto again;
749                         } else if (unlikely(con->state==S_CONN_EOF)){
750                                 DBG( "tcp_read_req: EOF after reading complete request\n");
751                                 resp=CONN_EOF;
752                         }
753                         req->parsed=req->buf; /* fix req->parsed */
754                 }
755                 
756                 
757         end_req:
758                 if (likely(bytes_read)) *bytes_read=total_bytes;
759                 return resp;
760 }
761
762
763
764 void release_tcpconn(struct tcp_connection* c, long state, int unix_sock)
765 {
766         long response[2];
767         
768                 DBG( "releasing con %p, state %ld, fd=%d, id=%d\n",
769                                 c, state, c->fd, c->id);
770                 DBG(" extra_data %p\n", c->extra_data);
771                 /* release req & signal the parent */
772                 c->reader_pid=0; /* reset it */
773                 if (c->fd!=-1){
774                         close(c->fd);
775                         c->fd=-1;
776                 }
777                 /* errno==EINTR, EWOULDBLOCK a.s.o todo */
778                 response[0]=(long)c;
779                 response[1]=state;
780                 
781                 if (tsend_stream(unix_sock, (char*)response, sizeof(response), -1)<=0)
782                         LOG(L_ERR, "ERROR: release_tcpconn: tsend_stream failed\n");
783 }
784
785
786
787 static ticks_t tcpconn_read_timeout(ticks_t t, struct timer_ln* tl, void* data)
788 {
789         struct tcp_connection *c;
790         
791         c=(struct tcp_connection*)data; 
792         /* or (struct tcp...*)(tl-offset(c->timer)) */
793         
794         if (likely(!(c->state<0) && TICKS_LT(t, c->timeout))){
795                 /* timeout extended, exit */
796                 return (ticks_t)(c->timeout - t);
797         }
798         /* if conn->state is ERROR or BAD => force timeout too */
799         if (unlikely(io_watch_del(&io_w, c->fd, -1, IO_FD_CLOSING)<0)){
800                 LOG(L_ERR, "ERROR: tcpconn_read_timeout: io_watch_del failed for %p"
801                                         " id %d fd %d, state %d, flags %x, main fd %d\n",
802                                         c, c->id, c->fd, c->state, c->flags, c->s);
803         }
804         tcpconn_listrm(tcp_conn_lst, c, c_next, c_prev);
805         release_tcpconn(c, (c->state<0)?CONN_ERROR:CONN_RELEASE, tcpmain_sock);
806         
807         return 0;
808 }
809
810
811
812 /* handle io routine, based on the fd_map type
813  * (it will be called from io_wait_loop* )
814  * params:  fm  - pointer to a fd hash entry
815  *          idx - index in the fd_array (or -1 if not known)
816  * return: -1 on error, or when we are not interested any more on reads
817  *            from this fd (e.g.: we are closing it )
818  *          0 on EAGAIN or when by some other way it is known that no more 
819  *            io events are queued on the fd (the receive buffer is empty).
820  *            Usefull to detect when there are no more io events queued for
821  *            sigio_rt, epoll_et, kqueue.
822  *         >0 on successfull read from the fd (when there might be more io
823  *            queued -- the receive buffer might still be non-empty)
824  */
825 inline static int handle_io(struct fd_map* fm, short events, int idx)
826 {       
827         int ret;
828         int n;
829         int read_flags;
830         struct tcp_connection* con;
831         int s;
832         long resp;
833         ticks_t t;
834         
835         /* update the local config */
836         cfg_update();
837         
838         switch(fm->type){
839                 case F_TCPMAIN:
840 again:
841                         ret=n=receive_fd(fm->fd, &con, sizeof(con), &s, 0);
842                         DBG("received n=%d con=%p, fd=%d\n", n, con, s);
843                         if (unlikely(n<0)){
844                                 if (errno == EWOULDBLOCK || errno == EAGAIN){
845                                         ret=0;
846                                         break;
847                                 }else if (errno == EINTR) goto again;
848                                 else{
849                                         LOG(L_CRIT,"BUG: tcp_receive: handle_io: read_fd: %s \n",
850                                                         strerror(errno));
851                                                 abort(); /* big error*/
852                                 }
853                         }
854                         if (unlikely(n==0)){
855                                 LOG(L_ERR, "WARNING: tcp_receive: handle_io: 0 bytes read\n");
856                                 goto error;
857                         }
858                         if (unlikely(con==0)){
859                                         LOG(L_CRIT, "BUG: tcp_receive: handle_io null pointer\n");
860                                         goto error;
861                         }
862                         con->fd=s;
863                         if (unlikely(s==-1)) {
864                                 LOG(L_ERR, "ERROR: tcp_receive: handle_io: read_fd:"
865                                                                         "no fd read\n");
866                                 goto con_error;
867                         }
868                         con->reader_pid=my_pid();
869                         if (unlikely(con==tcp_conn_lst)){
870                                 LOG(L_CRIT, "BUG: tcp_receive: handle_io: duplicate"
871                                                         " connection received: %p, id %d, fd %d, refcnt %d"
872                                                         " state %d (n=%d)\n", con, con->id, con->fd,
873                                                         atomic_get(&con->refcnt), con->state, n);
874                                 goto con_error;
875                                 break; /* try to recover */
876                         }
877                         if (unlikely(con->state==S_CONN_BAD)){
878                                 LOG(L_WARN, "WARNING: tcp_receive: handle_io: received an"
879                                                         " already bad connection: %p id %d refcnt %d\n",
880                                                         con, con->id, atomic_get(&con->refcnt));
881                                 goto con_error;
882                         }
883                         /* if we received the fd there is most likely data waiting to
884                          * be read => process it first to avoid extra sys calls */
885                         read_flags=((con->flags & (F_CONN_EOF_SEEN|F_CONN_FORCE_EOF)) && 
886                                                 !(con->flags & F_CONN_OOB_DATA))? RD_CONN_FORCE_EOF
887                                                 :0;
888                         resp=tcp_read_req(con, &n, &read_flags);
889                         if (unlikely(resp<0)){
890                                 /* some error occured, but on the new fd, not on the tcp
891                                  * main fd, so keep the ret value */
892                                 if (unlikely(resp!=CONN_EOF))
893                                         con->state=S_CONN_BAD;
894                                 release_tcpconn(con, resp, tcpmain_sock);
895                                 break;
896                         }
897                         
898                         /* must be before io_watch_add, io_watch_add might catch some
899                          * already existing events => might call handle_io and
900                          * handle_io might decide to del. the new connection =>
901                          * must be in the list */
902                         tcpconn_listadd(tcp_conn_lst, con, c_next, c_prev);
903                         t=get_ticks_raw();
904                         con->timeout=t+S_TO_TICKS(TCP_CHILD_TIMEOUT);
905                         /* re-activate the timer */
906                         con->timer.f=tcpconn_read_timeout;
907                         local_timer_reinit(&con->timer);
908                         local_timer_add(&tcp_reader_ltimer, &con->timer,
909                                                                 S_TO_TICKS(TCP_CHILD_TIMEOUT), t);
910                         if (unlikely(io_watch_add(&io_w, s, POLLIN, F_TCPCONN, con)<0)){
911                                 LOG(L_CRIT, "ERROR: tcpconn_receive: handle_io: io_watch_add "
912                                                         "failed for %p id %d fd %d, state %d, flags %x,"
913                                                         " main fd %d, refcnt %d\n",
914                                                         con, con->id, con->fd, con->state, con->flags,
915                                                         con->s, atomic_get(&con->refcnt));
916                                 tcpconn_listrm(tcp_conn_lst, con, c_next, c_prev);
917                                 local_timer_del(&tcp_reader_ltimer, &con->timer);
918                                 goto con_error;
919                         }
920                         break;
921                 case F_TCPCONN:
922                         con=(struct tcp_connection*)fm->data;
923                         if (unlikely(con->state==S_CONN_BAD)){
924                                 resp=CONN_ERROR;
925                                 LOG(L_WARN, "WARNING: tcp_receive: handle_io: F_TCPCONN"
926                                                         " connection marked as bad: %p id %d refcnt %d\n",
927                                                         con, con->id, atomic_get(&con->refcnt));
928                                 goto read_error;
929                         }
930 #ifdef POLLRDHUP
931                         read_flags=(((events & POLLRDHUP) | 
932                                                         (con->flags & (F_CONN_EOF_SEEN|F_CONN_FORCE_EOF)))
933                                                 && !(events & POLLPRI))? RD_CONN_FORCE_EOF: 0;
934 #else /* POLLRDHUP */
935                         read_flags=0;
936 #endif /* POLLRDHUP */
937                         resp=tcp_read_req(con, &ret, &read_flags);
938                         if (unlikely(resp<0)){
939 read_error:
940                                 ret=-1; /* some error occured */
941                                 if (unlikely(io_watch_del(&io_w, con->fd, idx,
942                                                                                         IO_FD_CLOSING) < 0)){
943                                         LOG(L_CRIT, "ERROR: tcpconn_receive: handle_io: "
944                                                         "io_watch_del failed for %p id %d fd %d,"
945                                                         " state %d, flags %x, main fd %d, refcnt %d\n",
946                                                         con, con->id, con->fd, con->state,
947                                                         con->flags, con->s, atomic_get(&con->refcnt));
948                                 }
949                                 tcpconn_listrm(tcp_conn_lst, con, c_next, c_prev);
950                                 local_timer_del(&tcp_reader_ltimer, &con->timer);
951                                 if (unlikely(resp!=CONN_EOF))
952                                         con->state=S_CONN_BAD;
953                                 release_tcpconn(con, resp, tcpmain_sock);
954                         }else{
955                                 /* update timeout */
956                                 con->timeout=get_ticks_raw()+S_TO_TICKS(TCP_CHILD_TIMEOUT);
957                                 /* ret= 0 (read the whole socket buffer) if short read & 
958                                  *  !POLLPRI,  bytes read otherwise */
959                                 ret&=(((read_flags & RD_CONN_SHORT_READ) && 
960                                                 !(events & POLLPRI)) - 1);
961                         }
962                         break;
963                 case F_NONE:
964                         LOG(L_CRIT, "BUG: handle_io: empty fd map %p (%d): "
965                                                 "{%d, %d, %p}\n", fm, (int)(fm-io_w.fd_hash),
966                                                 fm->fd, fm->type, fm->data);
967                         goto error;
968                 default:
969                         LOG(L_CRIT, "BUG: handle_io: uknown fd type %d\n", fm->type); 
970                         goto error;
971         }
972         
973         return ret;
974 con_error:
975         con->state=S_CONN_BAD;
976         release_tcpconn(con, CONN_ERROR, tcpmain_sock);
977         return ret;
978 error:
979         return -1;
980 }
981
982
983
984 inline static void tcp_reader_timer_run()
985 {
986         ticks_t ticks;
987         
988         ticks=get_ticks_raw();
989         if (unlikely((ticks-tcp_reader_prev_ticks)<TCPCONN_TIMEOUT_MIN_RUN))
990                 return;
991         tcp_reader_prev_ticks=ticks;
992         local_timer_run(&tcp_reader_ltimer, ticks);
993 }
994
995
996
997 void tcp_receive_loop(int unix_sock)
998 {
999         
1000         /* init */
1001         tcpmain_sock=unix_sock; /* init com. socket */
1002         if (init_io_wait(&io_w, get_max_open_fds(), tcp_poll_method)<0)
1003                 goto error;
1004         tcp_reader_prev_ticks=get_ticks_raw();
1005         if (init_local_timer(&tcp_reader_ltimer, get_ticks_raw())!=0)
1006                 goto error;
1007         /* add the unix socket */
1008         if (io_watch_add(&io_w, tcpmain_sock, POLLIN,  F_TCPMAIN, 0)<0){
1009                 LOG(L_CRIT, "ERROR: tcp_receive_loop: init: failed to add socket "
1010                                                         " to the fd list\n");
1011                 goto error;
1012         }
1013
1014         /* initialize the config framework */
1015         if (cfg_child_init()) goto error;
1016
1017         /* main loop */
1018         switch(io_w.poll_method){
1019                 case POLL_POLL:
1020                                 while(1){
1021                                         io_wait_loop_poll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1022                                         tcp_reader_timer_run();
1023                                 }
1024                                 break;
1025 #ifdef HAVE_SELECT
1026                 case POLL_SELECT:
1027                         while(1){
1028                                 io_wait_loop_select(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1029                                 tcp_reader_timer_run();
1030                         }
1031                         break;
1032 #endif
1033 #ifdef HAVE_SIGIO_RT
1034                 case POLL_SIGIO_RT:
1035                         while(1){
1036                                 io_wait_loop_sigio_rt(&io_w, TCP_CHILD_SELECT_TIMEOUT);
1037                                 tcp_reader_timer_run();
1038                         }
1039                         break;
1040 #endif
1041 #ifdef HAVE_EPOLL
1042                 case POLL_EPOLL_LT:
1043                         while(1){
1044                                 io_wait_loop_epoll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1045                                 tcp_reader_timer_run();
1046                         }
1047                         break;
1048                 case POLL_EPOLL_ET:
1049                         while(1){
1050                                 io_wait_loop_epoll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 1);
1051                                 tcp_reader_timer_run();
1052                         }
1053                         break;
1054 #endif
1055 #ifdef HAVE_KQUEUE
1056                 case POLL_KQUEUE:
1057                         while(1){
1058                                 io_wait_loop_kqueue(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1059                                 tcp_reader_timer_run();
1060                         }
1061                         break;
1062 #endif
1063 #ifdef HAVE_DEVPOLL
1064                 case POLL_DEVPOLL:
1065                         while(1){
1066                                 io_wait_loop_devpoll(&io_w, TCP_CHILD_SELECT_TIMEOUT, 0);
1067                                 tcp_reader_timer_run();
1068                         }
1069                         break;
1070 #endif
1071                 default:
1072                         LOG(L_CRIT, "BUG: tcp_receive_loop: no support for poll method "
1073                                         " %s (%d)\n", 
1074                                         poll_method_name(io_w.poll_method), io_w.poll_method);
1075                         goto error;
1076         }
1077 error:
1078         destroy_io_wait(&io_w);
1079         LOG(L_CRIT, "ERROR: tcp_receive_loop: exiting...");
1080         exit(-1);
1081 }
1082
1083
1084
1085 #ifdef USE_STUN
1086 int is_msg_complete(struct tcp_req* r)
1087 {
1088         if (r->has_content_len == 1) {
1089                 r->state = H_STUN_FP;
1090                 return 0;
1091         }
1092         else {
1093                 /* STUN message is complete */
1094                 r->state = H_STUN_END;
1095                 r->complete = 1;
1096                 r->has_content_len = 1; /* hack to avoid error check */
1097                 return 1;
1098         }
1099 }
1100 #endif
1101
1102 #endif /* USE_TCP */