io_wait.h: logging: convert LOG to LM_*
[sip-router] / io_wait.h
1 /*
2  * $Id$
3  *
4  * Copyright (C) 2005 iptelorg GmbH
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 /*
19  * tcp io wait common stuff used by tcp_main.c & tcp_read.c
20  * All the functions are inline because of speed reasons and because they are
21  * used only from 2 places.
22  * You also have to define:
23  *     int handle_io(struct fd_map* fm, short events, int idx) (see below)
24  *     (this could be trivially replaced by a callback pointer entry attached
25  *      to the io_wait handler if more flexibility rather then performance
26  *      is needed)
27  *      fd_type - define to some enum of you choice and define also
28  *                FD_TYPE_DEFINED (if you don't do it fd_type will be defined
29  *                to int). 0 has a special not set/not init. meaning
30  *                (a lot of sanity checks and the sigio_rt code are based on
31  *                 this assumption)
32  *     local_malloc (defaults to pkg_malloc)
33  *     local_free   (defaults to pkg_free)
34  *
35  */
36 /*
37  * History:
38  * --------
39  *  2005-06-13  created by andrei
40  *  2005-06-26  added kqueue (andrei)
41  *  2005-07-01  added /dev/poll (andrei)
42  *  2006-05-30  sigio 64 bit workarround enabled for kernels < 2.6.5 (andrei)
43  *  2007-11-22  when handle_io() is called in a loop check & stop if the fd was
44  *               removed inside handle_io() (andrei)
45  *  2007-11-29  support for write (POLLOUT); added io_watch_chg() (andrei)
46  *  2008-02-04  POLLRDHUP & EPOLLRDHUP support (automatically enabled if POLLIN
47  *               is set) (andrei)
48  *  2010-06-17  re-enabled & enhanced the EV_ERROR for kqueue (andrei)
49  */
50
51
52
53 #ifndef _io_wait_h
54 #define _io_wait_h
55
56 #include <errno.h>
57 #include <string.h>
58 #ifdef HAVE_SIGIO_RT
59 #define __USE_GNU /* or else F_SETSIG won't be included */
60 #include <sys/types.h> /* recv */
61 #include <sys/socket.h> /* recv */
62 #include <signal.h> /* sigprocmask, sigwait a.s.o */
63 #endif
64
65 #define _GNU_SOURCE  /* for POLLRDHUP on linux */
66 #include <poll.h>
67 #include <fcntl.h>
68
69 #ifdef HAVE_EPOLL
70 #include <sys/epoll.h>
71 #endif
72 #ifdef HAVE_KQUEUE
73 #include <sys/types.h> /* needed on freebsd */
74 #include <sys/event.h>
75 #include <sys/time.h>
76 #endif
77 #ifdef HAVE_DEVPOLL
78 #include <sys/devpoll.h>
79 #endif
80 #ifdef HAVE_SELECT
81 /* needed on openbsd for select*/
82 #include <sys/time.h>
83 #include <sys/types.h>
84 #include <unistd.h>
85 /* needed according to POSIX for select*/
86 #include <sys/select.h>
87 #endif
88
89 #include "dprint.h"
90
91 #include "poll_types.h" /* poll_types*/
92 #ifdef HAVE_SIGIO_RT
93 #include "pt.h" /* mypid() */
94 #endif
95
96 #include "compiler_opt.h"
97
98
99 #ifdef HAVE_EPOLL
100 /* fix defines for EPOLL */
101 #if defined POLLRDHUP && ! defined EPOLLRDHUP
102 #define EPOLLRDHUP POLLRDHUP  /* should work on all linuxes */
103 #endif /* POLLRDHUP && EPOLLRDHUP */
104 #endif /* HAVE_EPOLL */
105
106
107 extern int _os_ver; /* os version number, needed to select bugs workarrounds */
108
109
110 #if 0
111 enum fd_types; /* this should be defined from the including file,
112                                   see tcp_main.c for an example,
113                                   0 has a special meaning: not used/empty*/
114 #endif
115
116 #ifndef FD_TYPE_DEFINED
117 typedef int fd_type;
118 #define FD_TYPE_DEFINED
119 #endif
120
121 /* maps a fd to some other structure; used in almost all cases
122  * except epoll and maybe kqueue or /dev/poll */
123 struct fd_map{
124         int fd;               /* fd no */
125         fd_type type;         /* "data" type */
126         void* data;           /* pointer to the corresponding structure */
127         short events;         /* events we are interested int */
128 };
129
130
131 #ifdef HAVE_KQUEUE
132 #ifndef KQ_CHANGES_ARRAY_SIZE
133 #define KQ_CHANGES_ARRAY_SIZE 256
134
135 #ifdef __OS_netbsd
136 #define KEV_UDATA_CAST (intptr_t)
137 #else
138 #define KEV_UDATA_CAST
139 #endif
140
141 #endif
142 #endif
143
144
145 /* handler structure */
146 struct io_wait_handler{
147         enum poll_types poll_method;
148         int flags;
149         struct fd_map* fd_hash;
150         int fd_no; /*  current index used in fd_array and the passed size for
151                                    ep_array (for kq_array at least
152                                     max(twice the size, kq_changes_size) should be
153                                    be passed). */
154         int max_fd_no; /* maximum fd no, is also the size of fd_array,
155                                                        fd_hash  and ep_array*/
156         /* common stuff for POLL, SIGIO_RT and SELECT
157          * since poll support is always compiled => this will always be compiled */
158         struct pollfd* fd_array; /* used also by devpoll as devpoll array */
159         int crt_fd_array_idx; /*  crt idx for which handle_io is called
160                                                          (updated also by del -> internal optimization) */
161         /* end of common stuff */
162 #ifdef HAVE_EPOLL
163         int epfd; /* epoll ctrl fd */
164         struct epoll_event* ep_array;
165 #endif
166 #ifdef HAVE_SIGIO_RT
167         sigset_t sset; /* signal mask for sigio & sigrtmin */
168         int signo;     /* real time signal used */
169 #endif
170 #ifdef HAVE_KQUEUE
171         int kq_fd;
172         struct kevent* kq_array;   /* used for the eventlist*/
173         struct kevent* kq_changes; /* used for the changelist */
174         size_t kq_nchanges;
175         size_t kq_array_size;   /* array size */
176         size_t kq_changes_size; /* size of the changes array */
177 #endif
178 #ifdef HAVE_DEVPOLL
179         int dpoll_fd;
180 #endif
181 #ifdef HAVE_SELECT
182         fd_set master_rset; /* read set */
183         fd_set master_wset; /* write set */
184         int max_fd_select; /* maximum select used fd */
185 #endif
186 };
187
188 typedef struct io_wait_handler io_wait_h;
189
190
191 /* get the corresponding fd_map structure pointer */
192 #define get_fd_map(h, fd)               (&(h)->fd_hash[(fd)])
193 /* remove a fd_map structure from the hash; the pointer must be returned
194  * by get_fd_map or hash_fd_map*/
195 #define unhash_fd_map(pfm)      \
196         do{ \
197                 (pfm)->type=0 /*F_NONE */; \
198                 (pfm)->fd=-1; \
199         }while(0)
200
201 /* add a fd_map structure to the fd hash */
202 static inline struct fd_map* hash_fd_map(       io_wait_h* h,
203                                                                                         int fd,
204                                                                                         short events,
205                                                                                         fd_type type,
206                                                                                         void* data)
207 {
208         h->fd_hash[fd].fd=fd;
209         h->fd_hash[fd].events=events;
210         h->fd_hash[fd].type=type;
211         h->fd_hash[fd].data=data;
212         return &h->fd_hash[fd];
213 }
214
215
216
217 #ifdef HANDLE_IO_INLINE
218 /* generic handle io routine, this must be defined in the including file
219  * (faster then registering a callback pointer)
220  *
221  * params:  fm     - pointer to a fd hash entry
222  *          events - combinations of POLLIN, POLLOUT, POLLERR & POLLHUP
223  *          idx    - index in the fd_array (or -1 if not known)
224  * return: -1 on error
225  *          0 on EAGAIN or when by some other way it is known that no more
226  *            io events are queued on the fd (the receive buffer is empty).
227  *            Usefull to detect when there are no more io events queued for
228  *            sigio_rt, epoll_et, kqueue.
229  *         >0 on successfull read from the fd (when there might be more io
230  *            queued -- the receive buffer might still be non-empty)
231  */
232 inline static int handle_io(struct fd_map* fm, short events, int idx);
233 #else
234 int handle_io(struct fd_map* fm, short events, int idx);
235 #endif
236
237
238
239 #ifdef HAVE_KQUEUE
240 /*
241  * kqueue specific function: register a change
242  * (adds a change to the kevent change array, and if full flushes it first)
243  *
244  * TODO: check if the event already exists in the change list or if it's
245  *       complementary to an event in the list (e.g. EVFILT_WRITE, EV_DELETE
246  *       and EVFILT_WRITE, EV_ADD for the same fd).
247  * returns: -1 on error, 0 on success
248  */
249 static inline int kq_ev_change(io_wait_h* h, int fd, int filter, int flag,
250                                                                 void* data)
251 {
252         int n;
253         int r;
254         struct timespec tspec;
255
256         if (h->kq_nchanges>=h->kq_changes_size){
257                 /* changes array full ! */
258                 LM_WARN("kqueue changes array full trying to flush...\n");
259                 tspec.tv_sec=0;
260                 tspec.tv_nsec=0;
261 again:
262                 n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges, 0, 0, &tspec);
263                 if (unlikely(n == -1)){
264                         if (unlikely(errno == EINTR)) goto again;
265                         else {
266                                 /* for a detailed explanation of what follows see
267                                    io_wait_loop_kqueue EV_ERROR case */
268                                 if (unlikely(!(errno == EBADF || errno == ENOENT)))
269                                         BUG("kq_ev_change: kevent flush changes failed"
270                                                         " (unexpected error): %s [%d]\n",
271                                                         strerror(errno), errno);
272                                         /* ignore error even if it's not a EBADF/ENOENT */
273                                 /* one of the file descriptors is bad, probably already
274                                    closed => try to apply changes one-by-one */
275                                 for (r = 0; r < h->kq_nchanges; r++) {
276 retry2:
277                                         n = kevent(h->kq_fd, &h->kq_changes[r], 1, 0, 0, &tspec);
278                                         if (n==-1) {
279                                                 if (unlikely(errno == EINTR))
280                                                         goto retry2;
281                                         /* for a detailed explanation of what follows see
282                                                 io_wait_loop_kqueue EV_ERROR case */
283                                                 if (unlikely(!(errno == EBADF || errno == ENOENT)))
284                                                         BUG("kq_ev_change: kevent flush changes failed:"
285                                                                         " (unexpected error) %s [%d] (%d/%lu)\n",
286                                                                                 strerror(errno), errno,
287                                                                                 r, (unsigned long)h->kq_nchanges);
288                                                 continue; /* skip over it */
289                                         }
290                                 }
291                         }
292                 }
293                 h->kq_nchanges=0; /* changes array is empty */
294         }
295         EV_SET(&h->kq_changes[h->kq_nchanges], fd, filter, flag, 0, 0,
296                         KEV_UDATA_CAST data);
297         h->kq_nchanges++;
298         return 0;
299 }
300 #endif
301
302
303
304 /* generic io_watch_add function
305  * Params:
306  *     h      - pointer to initialized io_wait handle
307  *     fd     - fd to watch
308  *     events - bitmap with the fd events for which the fd should be watched
309  *              (combination of POLLIN and POLLOUT)
310  *     type   - fd type (non 0 value, returned in the call to handle_io)
311  *     data   - pointer/private data returned in the handle_io call
312  * returns 0 on success, -1 on error
313  *
314  * WARNING: handle_io() can be called immediately (from io_watch_add()) so
315  *  make sure that any dependent init. (e.g. data stuff) is made before
316  *  calling io_watch_add
317  *
318  * this version should be faster than pointers to poll_method specific
319  * functions (it avoids functions calls, the overhead being only an extra
320  *  switch())*/
321 inline static int io_watch_add( io_wait_h* h,
322                                                                 int fd,
323                                                                 short events,
324                                                                 fd_type type,
325                                                                 void* data)
326 {
327
328         /* helper macros */
329 #define fd_array_setup(ev) \
330         do{ \
331                 h->fd_array[h->fd_no].fd=fd; \
332                 h->fd_array[h->fd_no].events=(ev); /* useless for select */ \
333                 h->fd_array[h->fd_no].revents=0;     /* useless for select */ \
334         }while(0)
335         
336 #define set_fd_flags(f) \
337         do{ \
338                         flags=fcntl(fd, F_GETFL); \
339                         if (flags==-1){ \
340                                 LM_ERR("fnctl: GETFL failed: %s [%d]\n", \
341                                         strerror(errno), errno); \
342                                 goto error; \
343                         } \
344                         if (fcntl(fd, F_SETFL, flags|(f))==-1){ \
345                                 LM_ERR("fnctl: SETFL failed: %s [%d]\n", \
346                                         strerror(errno), errno); \
347                                 goto error; \
348                         } \
349         }while(0)
350         
351         
352         struct fd_map* e;
353         int flags;
354 #ifdef HAVE_EPOLL
355         struct epoll_event ep_event;
356 #endif
357 #ifdef HAVE_DEVPOLL
358         struct pollfd pfd;
359 #endif
360 #if defined(HAVE_SIGIO_RT) || defined (HAVE_EPOLL)
361         int n;
362 #endif
363 #if defined(HAVE_SIGIO_RT)
364         int idx;
365         int check_io;
366         struct pollfd pf;
367         
368         check_io=0; /* set to 1 if we need to check for pre-existing queued
369                                    io/data on the fd */
370         idx=-1;
371 #endif
372         e=0;
373         /* sanity checks */
374         if (unlikely(fd==-1)){
375                 LM_CRIT("fd is -1!\n");
376                 goto error;
377         }
378         if (unlikely((events&(POLLIN|POLLOUT))==0)){
379                 LM_CRIT("invalid events: 0x%0x\n", events);
380                 goto error;
381         }
382         /* check if not too big */
383         if (unlikely(h->fd_no>=h->max_fd_no)){
384                 LM_CRIT("maximum fd number exceeded: %d/%d\n", h->fd_no, h->max_fd_no);
385                 goto error;
386         }
387         DBG("DBG: io_watch_add(%p, %d, %d, %p), fd_no=%d\n",
388                         h, fd, type, data, h->fd_no);
389         /*  hash sanity check */
390         e=get_fd_map(h, fd);
391         if (unlikely(e && (e->type!=0 /*F_NONE*/))){
392                 LM_ERR("trying to overwrite entry %d"
393                         " watched for %x in the hash(%d, %d, %p) with (%d, %d, %p)\n",
394                         fd, events, e->fd, e->type, e->data, fd, type, data);
395                 e=0;
396                 goto error;
397         }
398         
399         if (unlikely((e=hash_fd_map(h, fd, events, type, data))==0)){
400                 LM_ERR("failed to hash the fd %d\n", fd);
401                 goto error;
402         }
403         switch(h->poll_method){ /* faster then pointer to functions */
404                 case POLL_POLL:
405 #ifdef POLLRDHUP
406                         /* listen to POLLRDHUP by default (if POLLIN) */
407                         events|=((int)!(events & POLLIN) - 1) & POLLRDHUP;
408 #endif /* POLLRDHUP */
409                         fd_array_setup(events);
410                         set_fd_flags(O_NONBLOCK);
411                         break;
412 #ifdef HAVE_SELECT
413                 case POLL_SELECT:
414                         fd_array_setup(events);
415                         if (likely(events & POLLIN))
416                                 FD_SET(fd, &h->master_rset);
417                         if (unlikely(events & POLLOUT))
418                                 FD_SET(fd, &h->master_wset);
419                         if (h->max_fd_select<fd) h->max_fd_select=fd;
420                         break;
421 #endif
422 #ifdef HAVE_SIGIO_RT
423                 case POLL_SIGIO_RT:
424                         fd_array_setup(events);
425                         /* re-set O_ASYNC might be needed, if not done from
426                          * io_watch_del (or if somebody wants to add a fd which has
427                          * already O_ASYNC/F_SETSIG set on a duplicate)
428                          */
429                         /* set async & signal */
430                         if (fcntl(fd, F_SETOWN, my_pid())==-1){
431                                 LM_ERR("fnctl: SETOWN failed: %s [%d]\n",
432                                         strerror(errno), errno);
433                                 goto error;
434                         }
435                         if (fcntl(fd, F_SETSIG, h->signo)==-1){
436                                 LM_ERR("fnctl: SETSIG failed: %s [%d]\n",
437                                         strerror(errno), errno);
438                                 goto error;
439                         }
440                         /* set both non-blocking and async */
441                         set_fd_flags(O_ASYNC| O_NONBLOCK);
442 #ifdef EXTRA_DEBUG
443                         DBG("io_watch_add: sigio_rt on f %d, signal %d to pid %d\n",
444                                         fd,  h->signo, my_pid());
445 #endif
446                         /* empty socket receive buffer, if buffer is already full
447                          * no more space to put packets
448                          * => no more signals are ever generated
449                          * also when moving fds, the freshly moved fd might have
450                          *  already some bytes queued, we want to get them now
451                          *  and not later -- andrei */
452                         idx=h->fd_no;
453                         check_io=1;
454                         break;
455 #endif
456 #ifdef HAVE_EPOLL
457                 case POLL_EPOLL_LT:
458                         ep_event.events=
459 #ifdef POLLRDHUP
460                                                 /* listen for EPOLLRDHUP too */
461                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
462 #else /* POLLRDHUP */
463                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
464 #endif /* POLLRDHUP */
465                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
466                         ep_event.data.ptr=e;
467 again1:
468                         n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
469                         if (unlikely(n==-1)){
470                                 if (errno==EAGAIN) goto again1;
471                                 LM_ERR("epoll_ctl failed: %s [%d]\n", strerror(errno), errno);
472                                 goto error;
473                         }
474                         break;
475                 case POLL_EPOLL_ET:
476                         set_fd_flags(O_NONBLOCK);
477                         ep_event.events=
478 #ifdef POLLRDHUP
479                                                 /* listen for EPOLLRDHUP too */
480                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
481 #else /* POLLRDHUP */
482                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
483 #endif /* POLLRDHUP */
484                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
485                                                 EPOLLET;
486                         ep_event.data.ptr=e;
487 again2:
488                         n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
489                         if (unlikely(n==-1)){
490                                 if (errno==EAGAIN) goto again2;
491                                 LM_ERR("epoll_ctl failed: %s [%d]\n", strerror(errno), errno);
492                                 goto error;
493                         }
494                         break;
495 #endif
496 #ifdef HAVE_KQUEUE
497                 case POLL_KQUEUE:
498                         if (likely( events & POLLIN)){
499                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e)==-1))
500                                         goto error;
501                         }
502                         if (unlikely( events & POLLOUT)){
503                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
504                                 {
505                                         if (likely(events & POLLIN)){
506                                                 kq_ev_change(h, fd, EVFILT_READ, EV_DELETE, 0);
507                                         }
508                                         goto error;
509                                 }
510                         }
511                         break;
512 #endif
513 #ifdef HAVE_DEVPOLL
514                 case POLL_DEVPOLL:
515                         pfd.fd=fd;
516                         pfd.events=events;
517                         pfd.revents=0;
518 again_devpoll:
519                         if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
520                                 if (errno==EAGAIN) goto again_devpoll;
521                                 LM_ERR("/dev/poll write failed: %s [%d]\n",
522                                         strerror(errno), errno);
523                                 goto error;
524                         }
525                         break;
526 #endif
527                         
528                 default:
529                         LM_CRIT("no support for poll method  %s (%d)\n",
530                                 poll_method_str[h->poll_method], h->poll_method);
531                         goto error;
532         }
533         
534         h->fd_no++; /* "activate" changes, for epoll/kqueue/devpoll it
535                                    has only informative value */
536 #if defined(HAVE_SIGIO_RT)
537         if (check_io){
538                 /* handle possible pre-existing events */
539                 pf.fd=fd;
540                 pf.events=events;
541 check_io_again:
542                 n=0;
543                 while(e->type && ((n=poll(&pf, 1, 0))>0) &&
544                                 (handle_io(e, pf.revents, idx)>0) &&
545                                 (pf.revents & (e->events|POLLERR|POLLHUP)));
546                 if (unlikely(e->type && (n==-1))){
547                         if (errno==EINTR) goto check_io_again;
548                         LM_ERR("check_io poll: %s [%d]\n", strerror(errno), errno);
549                 }
550         }
551 #endif
552         return 0;
553 error:
554         if (e) unhash_fd_map(e);
555         return -1;
556 #undef fd_array_setup
557 #undef set_fd_flags
558 }
559
560
561
562 #define IO_FD_CLOSING 16
563 /* parameters:    h - handler
564  *               fd - file descriptor
565  *            index - index in the fd_array if known, -1 if not
566  *                    (if index==-1 fd_array will be searched for the
567  *                     corresponding fd* entry -- slower but unavoidable in
568  *                     some cases). index is not used (no fd_array) for epoll,
569  *                     /dev/poll and kqueue
570  *            flags - optimization flags, e.g. IO_FD_CLOSING, the fd was
571  *                    or will shortly be closed, in some cases we can avoid
572  *                    extra remove operations (e.g.: epoll, kqueue, sigio)
573  * returns 0 if ok, -1 on error */
574 inline static int io_watch_del(io_wait_h* h, int fd, int idx, int flags)
575 {
576         
577 #define fix_fd_array \
578         do{\
579                         if (unlikely(idx==-1)){ \
580                                 /* fix idx if -1 and needed */ \
581                                 for (idx=0; (idx<h->fd_no) && \
582                                                         (h->fd_array[idx].fd!=fd); idx++); \
583                         } \
584                         if (likely(idx<h->fd_no)){ \
585                                 memmove(&h->fd_array[idx], &h->fd_array[idx+1], \
586                                         (h->fd_no-(idx+1))*sizeof(*(h->fd_array))); \
587                                 if ((idx<=h->crt_fd_array_idx) && (h->crt_fd_array_idx>=0)) \
588                                         h->crt_fd_array_idx--; \
589                         } \
590         }while(0)
591         
592         struct fd_map* e;
593         int events;
594 #ifdef HAVE_EPOLL
595         int n;
596         struct epoll_event ep_event;
597 #endif
598 #ifdef HAVE_DEVPOLL
599         struct pollfd pfd;
600 #endif
601 #ifdef HAVE_SIGIO_RT
602         int fd_flags;
603 #endif
604         
605         if (unlikely((fd<0) || (fd>=h->max_fd_no))){
606                 LM_CRIT("invalid fd %d, not in [0, %d) \n", fd, h->fd_no);
607                 goto error;
608         }
609         DBG("DBG: io_watch_del (%p, %d, %d, 0x%x) fd_no=%d called\n",
610                         h, fd, idx, flags, h->fd_no);
611         e=get_fd_map(h, fd);
612         /* more sanity checks */
613         if (unlikely(e==0)){
614                 LM_CRIT("no corresponding hash entry for %d\n", fd);
615                 goto error;
616         }
617         if (unlikely(e->type==0 /*F_NONE*/)){
618                 LM_ERR("trying to delete already erased"
619                         " entry %d in the hash(%d, %d, %p) flags %x)\n",
620                         fd, e->fd, e->type, e->data, flags);
621                 goto error;
622         }
623         events=e->events;
624         
625         switch(h->poll_method){
626                 case POLL_POLL:
627                         fix_fd_array;
628                         break;
629 #ifdef HAVE_SELECT
630                 case POLL_SELECT:
631                         if (likely(events & POLLIN))
632                                 FD_CLR(fd, &h->master_rset);
633                         if (unlikely(events & POLLOUT))
634                                 FD_CLR(fd, &h->master_wset);
635                         if (unlikely(h->max_fd_select && (h->max_fd_select==fd)))
636                                 /* we don't know the prev. max, so we just decrement it */
637                                 h->max_fd_select--;
638                         fix_fd_array;
639                         break;
640 #endif
641 #ifdef HAVE_SIGIO_RT
642                 case POLL_SIGIO_RT:
643                         /* the O_ASYNC flag must be reset all the time, the fd
644                          *  can be changed only if  O_ASYNC is reset (if not and
645                          *  the fd is a duplicate, you will get signals from the dup. fd
646                          *  and not from the original, even if the dup. fd was closed
647                          *  and the signals re-set on the original) -- andrei
648                          */
649                         /*if (!(flags & IO_FD_CLOSING)){*/
650                                 /* reset ASYNC */
651                                 fd_flags=fcntl(fd, F_GETFL);
652                                 if (unlikely(fd_flags==-1)){
653                                         LM_ERR("fnctl: GETFL failed: %s [%d]\n",
654                                                 strerror(errno), errno);
655                                         goto error;
656                                 }
657                                 if (unlikely(fcntl(fd, F_SETFL, fd_flags&(~O_ASYNC))==-1)){
658                                         LM_ERR("fnctl: SETFL failed: %s [%d]\n",
659                                                 strerror(errno), errno);
660                                         goto error;
661                                 }
662                         fix_fd_array; /* only on success */
663                         break;
664 #endif
665 #ifdef HAVE_EPOLL
666                 case POLL_EPOLL_LT:
667                 case POLL_EPOLL_ET:
668                         /* epoll doesn't seem to automatically remove sockets,
669                          * if the socket is a duplicate/moved and the original
670                          * is still open. The fd is removed from the epoll set
671                          * only when the original (and all the  copies?) is/are
672                          * closed. This is probably a bug in epoll. --andrei */
673 #ifdef EPOLL_NO_CLOSE_BUG
674                         if (!(flags & IO_FD_CLOSING)){
675 #endif
676 again_epoll:
677                                 n=epoll_ctl(h->epfd, EPOLL_CTL_DEL, fd, &ep_event);
678                                 if (unlikely(n==-1)){
679                                         if (errno==EAGAIN) goto again_epoll;
680                                         LM_ERR("removing fd from epoll list failed: %s [%d]\n",
681                                                 strerror(errno), errno);
682                                         goto error;
683                                 }
684 #ifdef EPOLL_NO_CLOSE_BUG
685                         }
686 #endif
687                         break;
688 #endif
689 #ifdef HAVE_KQUEUE
690                 case POLL_KQUEUE:
691                         if (!(flags & IO_FD_CLOSING)){
692                                 if (likely(events & POLLIN)){
693                                         if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
694                                                                                                         EV_DELETE, 0) ==-1)){
695                                                 /* try to delete the write filter anyway */
696                                                 if (events & POLLOUT){
697                                                         kq_ev_change(h, fd, EVFILT_WRITE, EV_DELETE, 0);
698                                                 }
699                                                 goto error;
700                                         }
701                                 }
702                                 if (unlikely(events & POLLOUT)){
703                                         if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
704                                                                                                         EV_DELETE, 0) ==-1))
705                                                 goto error;
706                                 }
707                         }
708                         break;
709 #endif
710 #ifdef HAVE_DEVPOLL
711                 case POLL_DEVPOLL:
712                                 /* for /dev/poll the closed fds _must_ be removed
713                                    (they are not removed automatically on close()) */
714                                 pfd.fd=fd;
715                                 pfd.events=POLLREMOVE;
716                                 pfd.revents=0;
717 again_devpoll:
718                                 if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
719                                         if (errno==EINTR) goto again_devpoll;
720                                         LM_ERR("removing fd from /dev/poll failed: %s [%d]\n",
721                                                 strerror(errno), errno);
722                                         goto error;
723                                 }
724                                 break;
725 #endif
726                 default:
727                         LM_CRIT("no support for poll method  %s (%d)\n",
728                                 poll_method_str[h->poll_method], h->poll_method);
729                         goto error;
730         }
731         unhash_fd_map(e); /* only on success */
732         h->fd_no--;
733         return 0;
734 error:
735         return -1;
736 #undef fix_fd_array
737 }
738
739
740
741 /* parameters:    h - handler
742  *               fd - file descriptor
743  *           events - new events to watch for
744  *              idx - index in the fd_array if known, -1 if not
745  *                    (if index==-1 fd_array will be searched for the
746  *                     corresponding fd* entry -- slower but unavoidable in
747  *                     some cases). index is not used (no fd_array) for epoll,
748  *                     /dev/poll and kqueue
749  * returns 0 if ok, -1 on error */
750 inline static int io_watch_chg(io_wait_h* h, int fd, short events, int idx )
751 {
752         
753 #define fd_array_chg(ev) \
754         do{\
755                         if (unlikely(idx==-1)){ \
756                                 /* fix idx if -1 and needed */ \
757                                 for (idx=0; (idx<h->fd_no) && \
758                                                         (h->fd_array[idx].fd!=fd); idx++); \
759                         } \
760                         if (likely(idx<h->fd_no)){ \
761                                 h->fd_array[idx].events=(ev); \
762                         } \
763         }while(0)
764         
765         struct fd_map* e;
766         int add_events;
767         int del_events;
768 #ifdef HAVE_DEVPOLL
769         struct pollfd pfd;
770 #endif
771 #ifdef HAVE_EPOLL
772         int n;
773         struct epoll_event ep_event;
774 #endif
775         
776         if (unlikely((fd<0) || (fd>=h->max_fd_no))){
777                 LM_CRIT("invalid fd %d, not in [0, %d) \n", fd, h->fd_no);
778                 goto error;
779         }
780         if (unlikely((events&(POLLIN|POLLOUT))==0)){
781                 LM_CRIT("invalid events: 0x%0x\n", events);
782                 goto error;
783         }
784         DBG("DBG: io_watch_chg (%p, %d, 0x%x, 0x%x) fd_no=%d called\n",
785                         h, fd, events, idx, h->fd_no);
786         e=get_fd_map(h, fd);
787         /* more sanity checks */
788         if (unlikely(e==0)){
789                 LM_CRIT("no corresponding hash entry for %d\n", fd);
790                 goto error;
791         }
792         if (unlikely(e->type==0 /*F_NONE*/)){
793                 LM_ERR("trying to change an already erased"
794                         " entry %d in the hash(%d, %d, %p) )\n",
795                         fd, e->fd, e->type, e->data);
796                 goto error;
797         }
798         
799         add_events=events & ~e->events;
800         del_events=e->events & ~events;
801         switch(h->poll_method){
802                 case POLL_POLL:
803 #ifdef POLLRDHUP
804                         fd_array_chg(events |
805                                                         /* listen to POLLRDHUP by default (if POLLIN) */
806                                                         (((int)!(events & POLLIN) - 1) & POLLRDHUP)
807                                                 );
808 #else /* POLLRDHUP */
809                         fd_array_chg(events);
810 #endif /* POLLRDHUP */
811                         break;
812 #ifdef HAVE_SELECT
813                 case POLL_SELECT:
814                         fd_array_chg(events);
815                         if (unlikely(del_events & POLLIN))
816                                 FD_CLR(fd, &h->master_rset);
817                         else if (unlikely(add_events & POLLIN))
818                                 FD_SET(fd, &h->master_rset);
819                         if (likely(del_events & POLLOUT))
820                                 FD_CLR(fd, &h->master_wset);
821                         else if (likely(add_events & POLLOUT))
822                                 FD_SET(fd, &h->master_wset);
823                         break;
824 #endif
825 #ifdef HAVE_SIGIO_RT
826                 case POLL_SIGIO_RT:
827                         fd_array_chg(events);
828                         /* no need for check_io, since SIGIO_RT listens by default for all
829                          * the events */
830                         break;
831 #endif
832 #ifdef HAVE_EPOLL
833                 case POLL_EPOLL_LT:
834                                 ep_event.events=
835 #ifdef POLLRDHUP
836                                                 /* listen for EPOLLRDHUP too */
837                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
838 #else /* POLLRDHUP */
839                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
840 #endif /* POLLRDHUP */
841                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
842                                 ep_event.data.ptr=e;
843 again_epoll_lt:
844                                 n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
845                                 if (unlikely(n==-1)){
846                                         if (errno==EAGAIN) goto again_epoll_lt;
847                                         LM_ERR("modifying epoll events failed: %s [%d]\n",
848                                                 strerror(errno), errno);
849                                         goto error;
850                                 }
851                         break;
852                 case POLL_EPOLL_ET:
853                                 ep_event.events=
854 #ifdef POLLRDHUP
855                                                 /* listen for EPOLLRDHUP too */
856                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
857 #else /* POLLRDHUP */
858                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
859 #endif /* POLLRDHUP */
860                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
861                                                 EPOLLET;
862                                 ep_event.data.ptr=e;
863 again_epoll_et:
864                                 n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
865                                 if (unlikely(n==-1)){
866                                         if (errno==EAGAIN) goto again_epoll_et;
867                                         LM_ERR("modifying epoll events failed: %s [%d]\n",
868                                                 strerror(errno), errno);
869                                         goto error;
870                                 }
871                         break;
872 #endif
873 #ifdef HAVE_KQUEUE
874                 case POLL_KQUEUE:
875                         if (unlikely(del_events & POLLIN)){
876                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
877                                                                                                                 EV_DELETE, 0) ==-1))
878                                                 goto error;
879                         }else if (unlikely(add_events & POLLIN)){
880                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e) ==-1))
881                                         goto error;
882                         }
883                         if (likely(del_events & POLLOUT)){
884                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
885                                                                                                                 EV_DELETE, 0) ==-1))
886                                                 goto error;
887                         }else if (likely(add_events & POLLOUT)){
888                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
889                                         goto error;
890                         }
891                         break;
892 #endif
893 #ifdef HAVE_DEVPOLL
894                 case POLL_DEVPOLL:
895                                 /* for /dev/poll the closed fds _must_ be removed
896                                    (they are not removed automatically on close()) */
897                                 pfd.fd=fd;
898                                 pfd.events=POLLREMOVE;
899                                 pfd.revents=0;
900 again_devpoll1:
901                                 if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
902                                         if (errno==EINTR) goto again_devpoll1;
903                                         LM_ERR("removing fd from /dev/poll failed: %s [%d]\n",
904                                                                 strerror(errno), errno);
905                                         goto error;
906                                 }
907 again_devpoll2:
908                                 pfd.events=events;
909                                 pfd.revents=0;
910                                 if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
911                                         if (errno==EINTR) goto again_devpoll2;
912                                         LM_ERR("re-adding fd to /dev/poll failed: %s [%d]\n",
913                                                                 strerror(errno), errno);
914                                         /* error re-adding the fd => mark it as removed/unhash */
915                                         unhash_fd_map(e);
916                                         goto error;
917                                 }
918                                 break;
919 #endif
920                 default:
921                         LM_CRIT("no support for poll method %s (%d)\n",
922                                 poll_method_str[h->poll_method], h->poll_method);
923                         goto error;
924         }
925         e->events=events; /* only on success */
926         return 0;
927 error:
928         return -1;
929 #undef fix_fd_array
930 }
931
932
933
934 /* io_wait_loop_x style function.
935  * wait for io using poll()
936  * params: h      - io_wait handle
937  *         t      - timeout in s
938  *         repeat - if !=0 handle_io will be called until it returns <=0
939  * returns: number of IO events handled on success (can be 0), -1 on error
940  */
941 inline static int io_wait_loop_poll(io_wait_h* h, int t, int repeat)
942 {
943         int n, r;
944         int ret;
945         struct fd_map* fm;
946         
947 again:
948                 ret=n=poll(h->fd_array, h->fd_no, t*1000);
949                 if (n==-1){
950                         if (errno==EINTR) goto again; /* signal, ignore it */
951                         else{
952                                 LM_ERR("poll: %s [%d]\n", strerror(errno), errno);
953                                 goto error;
954                         }
955                 }
956                 for (r=0; (r<h->fd_no) && n; r++){
957                         fm=get_fd_map(h, h->fd_array[r].fd);
958                         if (h->fd_array[r].revents & (fm->events|POLLERR|POLLHUP)){
959                                 n--;
960                                 /* sanity checks */
961                                 if (unlikely((h->fd_array[r].fd >= h->max_fd_no)||
962                                                                 (h->fd_array[r].fd < 0))){
963                                         LM_CRIT("bad fd %d (no in the 0 - %d range)\n",
964                                                         h->fd_array[r].fd, h->max_fd_no);
965                                         /* try to continue anyway */
966                                         h->fd_array[r].events=0; /* clear the events */
967                                         continue;
968                                 }
969                                 h->crt_fd_array_idx=r;
970                                 /* repeat handle_io if repeat, fd still watched (not deleted
971                                  *  inside handle_io), handle_io returns that there's still
972                                  *  IO and the fd is still watched for the triggering event */
973                                 while(fm->type &&
974                                                 (handle_io(fm, h->fd_array[r].revents, r) > 0) &&
975                                                 repeat && ((fm->events|POLLERR|POLLHUP) &
976                                                                                                         h->fd_array[r].revents));
977                                 r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd)
978                                                                                   array shifting */
979                         }
980                 }
981 error:
982         return ret;
983 }
984
985
986
987 #ifdef HAVE_SELECT
988 /* wait for io using select */
989 inline static int io_wait_loop_select(io_wait_h* h, int t, int repeat)
990 {
991         fd_set sel_rset;
992         fd_set sel_wset;
993         int n, ret;
994         struct timeval timeout;
995         int r;
996         struct fd_map* fm;
997         int revents;
998         
999 again:
1000                 sel_rset=h->master_rset;
1001                 sel_wset=h->master_wset;
1002                 timeout.tv_sec=t;
1003                 timeout.tv_usec=0;
1004                 ret=n=select(h->max_fd_select+1, &sel_rset, &sel_wset, 0, &timeout);
1005                 if (n<0){
1006                         if (errno==EINTR) goto again; /* just a signal */
1007                         LM_ERR("select: %s [%d]\n", strerror(errno), errno);
1008                         n=0;
1009                         /* continue */
1010                 }
1011                 /* use poll fd array */
1012                 for(r=0; (r<h->fd_no) && n; r++){
1013                         revents=0;
1014                         if (likely(FD_ISSET(h->fd_array[r].fd, &sel_rset)))
1015                                 revents|=POLLIN;
1016                         if (unlikely(FD_ISSET(h->fd_array[r].fd, &sel_wset)))
1017                                 revents|=POLLOUT;
1018                         if (unlikely(revents)){
1019                                 h->crt_fd_array_idx=r;
1020                                 fm=get_fd_map(h, h->fd_array[r].fd);
1021                                 while(fm->type && (fm->events & revents) &&
1022                                                 (handle_io(fm, revents, r)>0) && repeat);
1023                                 r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd)
1024                                                                                   array shifting */
1025                                 n--;
1026                         }
1027                 };
1028         return ret;
1029 }
1030 #endif
1031
1032
1033
1034 #ifdef HAVE_EPOLL
1035 inline static int io_wait_loop_epoll(io_wait_h* h, int t, int repeat)
1036 {
1037         int n, r;
1038         struct fd_map* fm;
1039         int revents;
1040         
1041 again:
1042                 n=epoll_wait(h->epfd, h->ep_array, h->fd_no, t*1000);
1043                 if (unlikely(n==-1)){
1044                         if (errno==EINTR) goto again; /* signal, ignore it */
1045                         else{
1046                                 LM_ERR("epoll_wait(%d, %p, %d, %d): %s [%d]\n",
1047                                                 h->epfd, h->ep_array, h->fd_no, t*1000,
1048                                                 strerror(errno), errno);
1049                                 goto error;
1050                         }
1051                 }
1052 #if 0
1053                 if (n>1){
1054                         for(r=0; r<n; r++){
1055                                 LM_ERR("ep_array[%d]= %x, %p\n",
1056                                         r, h->ep_array[r].events, h->ep_array[r].data.ptr);
1057                         }
1058                 }
1059 #endif
1060                 for (r=0; r<n; r++){
1061                         revents= (POLLIN & (!(h->ep_array[r].events & (EPOLLIN|EPOLLPRI))
1062                                                 -1)) |
1063                                          (POLLOUT & (!(h->ep_array[r].events & EPOLLOUT)-1)) |
1064                                          (POLLERR & (!(h->ep_array[r].events & EPOLLERR)-1)) |
1065                                          (POLLHUP & (!(h->ep_array[r].events & EPOLLHUP)-1))
1066 #ifdef POLLRDHUP
1067                                         | (POLLRDHUP & (!(h->ep_array[r].events & EPOLLRDHUP)-1))
1068 #endif
1069                                         ;
1070                         if (likely(revents)){
1071                                 fm=(struct fd_map*)h->ep_array[r].data.ptr;
1072                                 while(fm->type && ((fm->events|POLLERR|POLLHUP) & revents) &&
1073                                                 (handle_io(fm, revents, -1)>0) && repeat);
1074                         }else{
1075                                 LM_ERR("unexpected event %x on %d/%d, data=%p\n",
1076                                         h->ep_array[r].events, r+1, n, h->ep_array[r].data.ptr);
1077                         }
1078                 }
1079 error:
1080         return n;
1081 }
1082 #endif
1083
1084
1085
1086 #ifdef HAVE_KQUEUE
1087 inline static int io_wait_loop_kqueue(io_wait_h* h, int t, int repeat)
1088 {
1089         int n, r;
1090         struct timespec tspec;
1091         struct fd_map* fm;
1092         int orig_changes;
1093         int apply_changes;
1094         int revents;
1095         
1096         tspec.tv_sec=t;
1097         tspec.tv_nsec=0;
1098         orig_changes=h->kq_nchanges;
1099         apply_changes=orig_changes;
1100         do {
1101 again:
1102                 n=kevent(h->kq_fd, h->kq_changes, apply_changes,  h->kq_array,
1103                                         h->kq_array_size, &tspec);
1104                 if (unlikely(n==-1)){
1105                         if (unlikely(errno==EINTR)) goto again; /* signal, ignore it */
1106                         else {
1107                                 /* for a detailed explanation of what follows see below
1108                                    the EV_ERROR case */
1109                                 if (unlikely(!(errno==EBADF || errno==ENOENT)))
1110                                         BUG("io_wait_loop_kqueue: kevent: unexpected error"
1111                                                 " %s [%d]\n", strerror(errno), errno);
1112                                 /* some of the FDs in kq_changes are bad (already closed)
1113                                    and there is not enough space in kq_array to return all
1114                                    of them back */
1115                                 apply_changes = h->kq_array_size;
1116                                 goto again;
1117                         }
1118                 }
1119                 /* remove applied changes */
1120                 h->kq_nchanges -= apply_changes;
1121                 if (unlikely(apply_changes < orig_changes)) {
1122                         orig_changes -= apply_changes;
1123                         memmove(&h->kq_changes[0], &h->kq_changes[apply_changes],
1124                                                                         sizeof(h->kq_changes[0])*h->kq_nchanges);
1125                         apply_changes = (orig_changes < h->kq_array_size) ? orig_changes :
1126                                                                 h->kq_array_size;
1127                 } else {
1128                         orig_changes = 0;
1129                         apply_changes = 0;
1130                 }
1131                 for (r=0; r<n; r++){
1132 #ifdef EXTRA_DEBUG
1133                         DBG("DBG: kqueue: event %d/%d: fd=%d, udata=%lx, flags=0x%x\n",
1134                                         r, n, h->kq_array[r].ident, (long)h->kq_array[r].udata,
1135                                         h->kq_array[r].flags);
1136 #endif
1137                         if (unlikely((h->kq_array[r].flags & EV_ERROR) ||
1138                                                          h->kq_array[r].udata == 0)){
1139                                 /* error in changes: we ignore it if it has to do with a
1140                                    bad fd or update==0. It can be caused by trying to remove an
1141                                    already closed fd: race between adding something to the
1142                                    changes array, close() and applying the changes (EBADF).
1143                                    E.g. for ser tcp: tcp_main sends a fd to child for reading
1144                                     => deletes it from the watched fds => the changes array
1145                                         will contain an EV_DELETE for it. Before the changes
1146                                         are applied (they are at the end of the main io_wait loop,
1147                                         after all the fd events were processed), a CON_ERR sent
1148                                         to tcp_main by a sender (send fail) is processed and causes
1149                                         the fd to be closed. When the changes are applied =>
1150                                         error for the EV_DELETE attempt of a closed fd.
1151                                         Something similar can happen when a fd is scheduled
1152                                         for removal, is close()'ed before being removed and
1153                                         re-opened(a new sock. get the same fd). When the
1154                                         watched fd changes will be applied the fd will be valid
1155                                         (so no EBADF), but it's not already watch => ENOENT.
1156                                         We report a BUG for the other errors (there's nothing
1157                                         constructive we can do if we get an error we don't know
1158                                         how to handle), but apart from that we ignore it in the
1159                                         idea that it is better apply the rest of the changes,
1160                                         rather then dropping all of them.
1161                                 */
1162                                 /*
1163                                         example EV_ERROR for trying to delete a read watched fd,
1164                                         that was already closed:
1165                                         {
1166                                                 ident = 63,  [fd]
1167                                                 filter = -1, [EVFILT_READ]
1168                                                 flags = 16384, [EV_ERROR]
1169                                                 fflags = 0,
1170                                                 data = 9, [errno = EBADF]
1171                                                 udata = 0x0
1172                                         }
1173                                 */
1174                                 if (h->kq_array[r].data != EBADF &&
1175                                                 h->kq_array[r].data != ENOENT)
1176                                         BUG("io_wait_loop_kqueue: kevent unexpected error on "
1177                                                         "fd %ld udata %lx: %s [%ld]\n",
1178                                                         (long)h->kq_array[r].ident,
1179                                                         (long)h->kq_array[r].udata,
1180                                                         strerror(h->kq_array[r].data),
1181                                                         (long)h->kq_array[r].data);
1182                         }else{
1183                                 fm=(struct fd_map*)h->kq_array[r].udata;
1184                                 if (likely(h->kq_array[r].filter==EVFILT_READ)){
1185                                         revents=POLLIN |
1186                                                 (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP) |
1187                                                 (((int)!((h->kq_array[r].flags & EV_EOF) &&
1188                                                                         h->kq_array[r].fflags != 0) - 1)&POLLERR);
1189                                         while(fm->type && (fm->events & revents) &&
1190                                                         (handle_io(fm, revents, -1)>0) && repeat);
1191                                 }else if (h->kq_array[r].filter==EVFILT_WRITE){
1192                                         revents=POLLOUT |
1193                                                 (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP) |
1194                                                 (((int)!((h->kq_array[r].flags & EV_EOF) &&
1195                                                                         h->kq_array[r].fflags != 0) - 1)&POLLERR);
1196                                         while(fm->type && (fm->events & revents) &&
1197                                                         (handle_io(fm, revents, -1)>0) && repeat);
1198                                 }else{
1199                                         BUG("io_wait_loop_kqueue: unknown filter: kqueue: event "
1200                                                         "%d/%d: fd=%d, filter=%d, flags=0x%x, fflags=0x%x,"
1201                                                         " data=%lx, udata=%lx\n",
1202                                         r, n, (int)h->kq_array[r].ident, (int)h->kq_array[r].filter,
1203                                         h->kq_array[r].flags, h->kq_array[r].fflags,
1204                                         (unsigned long)h->kq_array[r].data,
1205                                         (unsigned long)h->kq_array[r].udata);
1206                                 }
1207                         }
1208                 }
1209         } while(unlikely(orig_changes));
1210         return n;
1211 }
1212 #endif
1213
1214
1215
1216 #ifdef HAVE_SIGIO_RT
1217 /* sigio rt version has no repeat (it doesn't make sense)*/
1218 inline static int io_wait_loop_sigio_rt(io_wait_h* h, int t)
1219 {
1220         int n;
1221         int ret;
1222         struct timespec ts;
1223         siginfo_t siginfo;
1224         int sigio_band;
1225         int sigio_fd;
1226         struct fd_map* fm;
1227         int revents;
1228 #ifdef SIGINFO64_WORKARROUND
1229         int* pi;
1230 #endif
1231         
1232         
1233         ret=1; /* 1 event per call normally */
1234         ts.tv_sec=t;
1235         ts.tv_nsec=0;
1236         if (unlikely(!sigismember(&h->sset, h->signo) ||
1237                                         !sigismember(&h->sset, SIGIO))) {
1238                 LM_CRIT("the signal mask is not properly set!\n");
1239                 goto error;
1240         }
1241 again:
1242         n=sigtimedwait(&h->sset, &siginfo, &ts);
1243         if (unlikely(n==-1)){
1244                 if (errno==EINTR) goto again; /* some other signal, ignore it */
1245                 else if (errno==EAGAIN){ /* timeout */
1246                         ret=0;
1247                         goto end;
1248                 }else{
1249                         LM_ERR("sigtimed_wait %s [%d]\n", strerror(errno), errno);
1250                         goto error;
1251                 }
1252         }
1253         if (likely(n!=SIGIO)){
1254 #ifdef SIGINFO64_WORKARROUND
1255                 /* on linux siginfo.si_band is defined as long in userspace
1256                  * and as int in kernel (< 2.6.5) => on 64 bits things will break!
1257                  * (si_band will include si_fd, and si_fd will contain
1258                  *  garbage).
1259                  *  see /usr/src/linux/include/asm-generic/siginfo.h and
1260                  *      /usr/include/bits/siginfo.h
1261                  *  On newer kernels this is fixed (si_band is long in the kernel too).
1262                  * -- andrei */
1263                 if  ((_os_ver<0x020605) && (sizeof(siginfo.si_band)>sizeof(int))){
1264                         pi=(int*)(void*)&siginfo.si_band; /* avoid type punning warnings */
1265                         sigio_band=*pi;
1266                         sigio_fd=*(pi+1);
1267                 }else
1268 #endif
1269                 {
1270                         sigio_band=siginfo.si_band;
1271                         sigio_fd=siginfo.si_fd;
1272                 }
1273                 if (unlikely(siginfo.si_code==SI_SIGIO)){
1274                         /* old style, we don't know the event (linux 2.2.?) */
1275                         LM_WARN("old style sigio interface\n");
1276                         fm=get_fd_map(h, sigio_fd);
1277                         /* we can have queued signals generated by fds not watched
1278                          * any more, or by fds in transition, to a child => ignore them*/
1279                         if (fm->type)
1280                                 handle_io(fm, POLLIN|POLLOUT, -1);
1281                 }else{
1282                         /* si_code contains the SIGPOLL reason: POLL_IN, POLL_OUT,
1283                          *  POLL_MSG, POLL_ERR, POLL_PRI or POLL_HUP
1284                          * and si_band the translated poll event bitmap:
1285                          *  POLLIN|POLLRDNORM  (=POLL_IN),
1286                          *  POLLOUT|POLLWRNORM|POLLWRBAND (=POLL_OUT),
1287                          *  POLLIN|POLLRDNORM|POLLMSG (=POLL_MSG),
1288                          *  POLLERR (=POLL_ERR),
1289                          *  POLLPRI|POLLRDBAND (=POLL_PRI),
1290                          *  POLLHUP|POLLERR (=POLL_HUP)
1291                          *  [linux 2.6.22 fs/fcntl.c:447]
1292                          */
1293 #ifdef EXTRA_DEBUG
1294                         DBG("io_wait_loop_sigio_rt: siginfo: signal=%d (%d),"
1295                                         " si_code=%d, si_band=0x%x,"
1296                                         " si_fd=%d\n",
1297                                         siginfo.si_signo, n, siginfo.si_code,
1298                                         (unsigned)sigio_band,
1299                                         sigio_fd);
1300 #endif
1301                         /* on some errors (e.g. when receving TCP RST), sigio_band will
1302                          * be set to 0x08 (POLLERR) or 0x18 (POLLERR|POLLHUP - on stream
1303                          *  unix socket close) , so better catch all events --andrei */
1304                         if (likely(sigio_band)){
1305                                 fm=get_fd_map(h, sigio_fd);
1306                                 revents=sigio_band;
1307                                 /* fix revents==POLLPRI case */
1308                                 revents |= (!(revents & POLLPRI)-1) & POLLIN;
1309                                 /* we can have queued signals generated by fds not watched
1310                                  * any more, or by fds in transition, to a child
1311                                  * => ignore them */
1312                                 if (fm->type && ((fm->events|POLLERR|POLLHUP) & revents))
1313                                         handle_io(fm, revents, -1);
1314                                 else
1315                                         DBG("WARNING: io_wait_loop_sigio_rt: ignoring event"
1316                                                         " %x on fd %d, watching for %x, si_code=%x "
1317                                                         "(fm->type=%d, fm->fd=%d, fm->data=%p)\n",
1318                                                         sigio_band, sigio_fd, fm->events, siginfo.si_code,
1319                                                         fm->type, fm->fd, fm->data);
1320                         }else{
1321                                 LM_ERR("unexpected event on fd %d: %x\n", sigio_fd, sigio_band);
1322                         }
1323                 }
1324         }else{
1325                 /* signal queue overflow
1326                  * TODO: increase signal queue size: 2.4x /proc/.., 2.6x -rlimits */
1327                 LM_WARN("signal queue overflowed - falling back to poll\n");
1328                 /* clear real-time signal queue
1329                  * both SIG_IGN and SIG_DFL are needed , it doesn't work
1330                  * only with SIG_DFL  */
1331                 if (signal(h->signo, SIG_IGN)==SIG_ERR){
1332                         LM_CRIT("do_poll: couldn't reset signal to IGN\n");
1333                 }
1334                 
1335                 if (signal(h->signo, SIG_DFL)==SIG_ERR){
1336                         LM_CRIT("do_poll: couldn't reset signal to DFL\n");
1337                 }
1338                 /* falling back to normal poll */
1339                 ret=io_wait_loop_poll(h, -1, 1);
1340         }
1341 end:
1342         return ret;
1343 error:
1344         return -1;
1345 }
1346 #endif
1347
1348
1349
1350 #ifdef HAVE_DEVPOLL
1351 inline static int io_wait_loop_devpoll(io_wait_h* h, int t, int repeat)
1352 {
1353         int n, r;
1354         int ret;
1355         struct dvpoll dpoll;
1356         struct fd_map* fm;
1357
1358                 dpoll.dp_timeout=t*1000;
1359                 dpoll.dp_nfds=h->fd_no;
1360                 dpoll.dp_fds=h->fd_array;
1361 again:
1362                 ret=n=ioctl(h->dpoll_fd, DP_POLL, &dpoll);
1363                 if (unlikely(n==-1)){
1364                         if (errno==EINTR) goto again; /* signal, ignore it */
1365                         else{
1366                                 LM_ERR("ioctl: %s [%d]\n", strerror(errno), errno);
1367                                 goto error;
1368                         }
1369                 }
1370                 for (r=0; r< n; r++){
1371                         if (h->fd_array[r].revents & (POLLNVAL|POLLERR)){
1372                                 LM_ERR("pollinval returned for fd %d, revents=%x\n",
1373                                         h->fd_array[r].fd, h->fd_array[r].revents);
1374                         }
1375                         /* POLLIN|POLLHUP just go through */
1376                         fm=get_fd_map(h, h->fd_array[r].fd);
1377                         while(fm->type && (fm->events & h->fd_array[r].revents) &&
1378                                 (handle_io(fm, h->fd_array[r].revents, r) > 0) && repeat);
1379                 }
1380 error:
1381         return ret;
1382 }
1383 #endif
1384
1385
1386
1387 /* init */
1388
1389
1390 /* initializes the static vars/arrays
1391  * params:      h - pointer to the io_wait_h that will be initialized
1392  *         max_fd - maximum allowed fd number
1393  *         poll_m - poll method (0 for automatic best fit)
1394  */
1395 int init_io_wait(io_wait_h* h, int max_fd, enum poll_types poll_method);
1396
1397 /* destroys everything init_io_wait allocated */
1398 void destroy_io_wait(io_wait_h* h);
1399
1400
1401 #endif