io_wait: fix: check for EV_ERROR for kqueue()
[sip-router] / io_wait.h
1 /* 
2  * $Id$
3  * 
4  * Copyright (C) 2005 iptelorg GmbH
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 /*
19  * tcp io wait common stuff used by tcp_main.c & tcp_read.c
20  * All the functions are inline because of speed reasons and because they are
21  * used only from 2 places.
22  * You also have to define:
23  *     int handle_io(struct fd_map* fm, short events, int idx) (see below)
24  *     (this could be trivially replaced by a callback pointer entry attached
25  *      to the io_wait handler if more flexibility rather then performance
26  *      is needed)
27  *      fd_type - define to some enum of you choice and define also
28  *                FD_TYPE_DEFINED (if you don't do it fd_type will be defined
29  *                to int). 0 has a special not set/not init. meaning
30  *                (a lot of sanity checks and the sigio_rt code are based on
31  *                 this assumption)
32  *     local_malloc (defaults to pkg_malloc)
33  *     local_free   (defaults to pkg_free)
34  *  
35  */
36 /* 
37  * History:
38  * --------
39  *  2005-06-13  created by andrei
40  *  2005-06-26  added kqueue (andrei)
41  *  2005-07-01  added /dev/poll (andrei)
42  *  2006-05-30  sigio 64 bit workarround enabled for kernels < 2.6.5 (andrei)
43  *  2007-11-22  when handle_io() is called in a loop check & stop if the fd was
44  *               removed inside handle_io() (andrei)
45  *  2007-11-29  support for write (POLLOUT); added io_watch_chg() (andrei)
46  *  2008-02-04  POLLRDHUP & EPOLLRDHUP support (automatically enabled if POLLIN
47  *               is set) (andrei)
48  *  2010-06-17  re-enabled & enhanced the EV_ERROR for kqueue (andrei)
49  */
50
51
52
53 #ifndef _io_wait_h
54 #define _io_wait_h
55
56 #include <errno.h>
57 #include <string.h>
58 #ifdef HAVE_SIGIO_RT
59 #define __USE_GNU /* or else F_SETSIG won't be included */
60 #include <sys/types.h> /* recv */
61 #include <sys/socket.h> /* recv */
62 #include <signal.h> /* sigprocmask, sigwait a.s.o */
63 #endif
64
65 #define _GNU_SOURCE  /* for POLLRDHUP on linux */
66 #include <sys/poll.h>
67 #include <fcntl.h>
68
69 #ifdef HAVE_EPOLL
70 #include <sys/epoll.h>
71 #endif
72 #ifdef HAVE_KQUEUE
73 #include <sys/types.h> /* needed on freebsd */
74 #include <sys/event.h>
75 #include <sys/time.h>
76 #endif
77 #ifdef HAVE_DEVPOLL
78 #include <sys/devpoll.h>
79 #endif
80 #ifdef HAVE_SELECT
81 /* needed on openbsd for select*/
82 #include <sys/time.h> 
83 #include <sys/types.h> 
84 #include <unistd.h>
85 /* needed according to POSIX for select*/
86 #include <sys/select.h>
87 #endif
88
89 #include "dprint.h"
90
91 #include "poll_types.h" /* poll_types*/
92 #ifdef HAVE_SIGIO_RT
93 #include "pt.h" /* mypid() */
94 #endif
95
96 #include "compiler_opt.h"
97
98
99 #ifdef HAVE_EPOLL
100 /* fix defines for EPOLL */
101 #if defined POLLRDHUP && ! defined EPOLLRDHUP
102 #define EPOLLRDHUP POLLRDHUP  /* should work on all linuxes */
103 #endif /* POLLRDHUP && EPOLLRDHUP */
104 #endif /* HAVE_EPOLL */
105
106
107 extern int _os_ver; /* os version number, needed to select bugs workarrounds */
108
109
110 #if 0
111 enum fd_types; /* this should be defined from the including file,
112                                   see tcp_main.c for an example, 
113                                   0 has a special meaning: not used/empty*/
114 #endif
115
116 #ifndef FD_TYPE_DEFINED
117 typedef int fd_type;
118 #define FD_TYPE_DEFINED
119 #endif
120
121 /* maps a fd to some other structure; used in almost all cases
122  * except epoll and maybe kqueue or /dev/poll */
123 struct fd_map{
124         int fd;               /* fd no */
125         fd_type type;         /* "data" type */
126         void* data;           /* pointer to the corresponding structure */
127         short events;         /* events we are interested int */
128 };
129
130
131 #ifdef HAVE_KQUEUE
132 #ifndef KQ_CHANGES_ARRAY_SIZE
133 #define KQ_CHANGES_ARRAY_SIZE 256
134
135 #ifdef __OS_netbsd
136 #define KEV_UDATA_CAST (intptr_t)
137 #else
138 #define KEV_UDATA_CAST
139 #endif
140
141 #endif
142 #endif
143
144
145 /* handler structure */
146 struct io_wait_handler{
147         enum poll_types poll_method;
148         int flags;
149         struct fd_map* fd_hash;
150         int fd_no; /*  current index used in fd_array and the passed size for 
151                                    ep_array & kq_array*/
152         int max_fd_no; /* maximum fd no, is also the size of fd_array,
153                                                        fd_hash  and ep_array*/
154         /* common stuff for POLL, SIGIO_RT and SELECT
155          * since poll support is always compiled => this will always be compiled */
156         struct pollfd* fd_array; /* used also by devpoll as devpoll array */
157         int crt_fd_array_idx; /*  crt idx for which handle_io is called
158                                                          (updated also by del -> internal optimization) */
159         /* end of common stuff */
160 #ifdef HAVE_EPOLL
161         int epfd; /* epoll ctrl fd */
162         struct epoll_event* ep_array;
163 #endif
164 #ifdef HAVE_SIGIO_RT
165         sigset_t sset; /* signal mask for sigio & sigrtmin */
166         int signo;     /* real time signal used */
167 #endif
168 #ifdef HAVE_KQUEUE
169         int kq_fd;
170         struct kevent* kq_array;   /* used for the eventlist*/
171         struct kevent* kq_changes; /* used for the changelist */
172         size_t kq_nchanges;
173         size_t kq_changes_size; /* size of the changes array */
174 #endif
175 #ifdef HAVE_DEVPOLL
176         int dpoll_fd;
177 #endif
178 #ifdef HAVE_SELECT
179         fd_set master_rset; /* read set */
180         fd_set master_wset; /* write set */
181         int max_fd_select; /* maximum select used fd */
182 #endif
183 };
184
185 typedef struct io_wait_handler io_wait_h;
186
187
188 /* get the corresponding fd_map structure pointer */
189 #define get_fd_map(h, fd)               (&(h)->fd_hash[(fd)])
190 /* remove a fd_map structure from the hash; the pointer must be returned
191  * by get_fd_map or hash_fd_map*/
192 #define unhash_fd_map(pfm)      \
193         do{ \
194                 (pfm)->type=0 /*F_NONE */; \
195                 (pfm)->fd=-1; \
196         }while(0)
197
198 /* add a fd_map structure to the fd hash */
199 static inline struct fd_map* hash_fd_map(       io_wait_h* h,
200                                                                                         int fd,
201                                                                                         short events,
202                                                                                         fd_type type,
203                                                                                         void* data)
204 {
205         h->fd_hash[fd].fd=fd;
206         h->fd_hash[fd].events=events;
207         h->fd_hash[fd].type=type;
208         h->fd_hash[fd].data=data;
209         return &h->fd_hash[fd];
210 }
211
212
213
214 #ifdef HANDLE_IO_INLINE
215 /* generic handle io routine, this must be defined in the including file
216  * (faster then registering a callback pointer)
217  *
218  * params:  fm     - pointer to a fd hash entry
219  *          events - combinations of POLLIN, POLLOUT, POLLERR & POLLHUP
220  *          idx    - index in the fd_array (or -1 if not known)
221  * return: -1 on error
222  *          0 on EAGAIN or when by some other way it is known that no more 
223  *            io events are queued on the fd (the receive buffer is empty).
224  *            Usefull to detect when there are no more io events queued for
225  *            sigio_rt, epoll_et, kqueue.
226  *         >0 on successfull read from the fd (when there might be more io
227  *            queued -- the receive buffer might still be non-empty)
228  */
229 inline static int handle_io(struct fd_map* fm, short events, int idx);
230 #else
231 int handle_io(struct fd_map* fm, short events, int idx);
232 #endif
233
234
235
236 #ifdef HAVE_KQUEUE
237 /*
238  * kqueue specific function: register a change
239  * (adds a change to the kevent change array, and if full flushes it first)
240  *
241  * TODO: check if the event already exists in the change list or if it's
242  *       complementary to an event in the list (e.g. EVFILT_WRITE, EV_DELETE
243  *       and EVFILT_WRITE, EV_ADD for the same fd).
244  * returns: -1 on error, 0 on success
245  */
246 static inline int kq_ev_change(io_wait_h* h, int fd, int filter, int flag, 
247                                                                 void* data)
248 {
249         int n;
250         struct timespec tspec;
251
252         if (h->kq_nchanges>=h->kq_changes_size){
253                 /* changes array full ! */
254                 LOG(L_WARN, "WARNING: kq_ev_change: kqueue changes array full"
255                                         " trying to flush...\n");
256                 tspec.tv_sec=0;
257                 tspec.tv_nsec=0;
258 again:
259                 n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges, 0, 0, &tspec);
260                 if (n==-1){
261                         if (errno==EINTR) goto again;
262                         LOG(L_ERR, "ERROR: io_watch_add: kevent flush changes "
263                                                 " failed: %s [%d]\n", strerror(errno), errno);
264                         return -1;
265                 }
266                 h->kq_nchanges=0; /* changes array is empty */
267         }
268         EV_SET(&h->kq_changes[h->kq_nchanges], fd, filter, flag, 0, 0,
269                         KEV_UDATA_CAST data);
270         h->kq_nchanges++;
271         return 0;
272 }
273 #endif
274
275
276
277 /* generic io_watch_add function
278  * Params:
279  *     h      - pointer to initialized io_wait handle
280  *     fd     - fd to watch
281  *     events - bitmap with the fd events for which the fd should be watched
282  *              (combination of POLLIN and POLLOUT)
283  *     type   - fd type (non 0 value, returned in the call to handle_io)
284  *     data   - pointer/private data returned in the handle_io call
285  * returns 0 on success, -1 on error
286  *
287  * WARNING: handle_io() can be called immediately (from io_watch_add()) so
288  *  make sure that any dependent init. (e.g. data stuff) is made before
289  *  calling io_watch_add
290  *
291  * this version should be faster than pointers to poll_method specific
292  * functions (it avoids functions calls, the overhead being only an extra
293  *  switch())*/
294 inline static int io_watch_add( io_wait_h* h,
295                                                                 int fd,
296                                                                 short events,
297                                                                 fd_type type,
298                                                                 void* data)
299 {
300
301         /* helper macros */
302 #define fd_array_setup(ev) \
303         do{ \
304                 h->fd_array[h->fd_no].fd=fd; \
305                 h->fd_array[h->fd_no].events=(ev); /* useless for select */ \
306                 h->fd_array[h->fd_no].revents=0;     /* useless for select */ \
307         }while(0)
308         
309 #define set_fd_flags(f) \
310         do{ \
311                         flags=fcntl(fd, F_GETFL); \
312                         if (flags==-1){ \
313                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: GETFL failed:" \
314                                                 " %s [%d]\n", strerror(errno), errno); \
315                                 goto error; \
316                         } \
317                         if (fcntl(fd, F_SETFL, flags|(f))==-1){ \
318                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETFL" \
319                                                         " failed: %s [%d]\n", strerror(errno), errno); \
320                                 goto error; \
321                         } \
322         }while(0)
323         
324         
325         struct fd_map* e;
326         int flags;
327 #ifdef HAVE_EPOLL
328         struct epoll_event ep_event;
329 #endif
330 #ifdef HAVE_DEVPOLL
331         struct pollfd pfd;
332 #endif
333 #if defined(HAVE_SIGIO_RT) || defined (HAVE_EPOLL)
334         int n;
335 #endif
336 #if defined(HAVE_SIGIO_RT)
337         int idx;
338         int check_io;
339         struct pollfd pf;
340         
341         check_io=0; /* set to 1 if we need to check for pre-existing queued
342                                    io/data on the fd */
343         idx=-1;
344 #endif
345         e=0;
346         /* sanity checks */
347         if (unlikely(fd==-1)){
348                 LOG(L_CRIT, "BUG: io_watch_add: fd is -1!\n");
349                 goto error;
350         }
351         if (unlikely((events&(POLLIN|POLLOUT))==0)){
352                 LOG(L_CRIT, "BUG: io_watch_add: invalid events: 0x%0x\n", events);
353                 goto error;
354         }
355         /* check if not too big */
356         if (unlikely(h->fd_no>=h->max_fd_no)){
357                 LOG(L_CRIT, "ERROR: io_watch_add: maximum fd number exceeded:"
358                                 " %d/%d\n", h->fd_no, h->max_fd_no);
359                 goto error;
360         }
361         DBG("DBG: io_watch_add(%p, %d, %d, %p), fd_no=%d\n",
362                         h, fd, type, data, h->fd_no);
363         /*  hash sanity check */
364         e=get_fd_map(h, fd);
365         if (unlikely(e && (e->type!=0 /*F_NONE*/))){
366                 LOG(L_ERR, "ERROR: io_watch_add: trying to overwrite entry %d"
367                                 " watched for %x in the hash(%d, %d, %p) with (%d, %d, %p)\n",
368                                 fd, events, e->fd, e->type, e->data, fd, type, data);
369                 e=0;
370                 goto error;
371         }
372         
373         if (unlikely((e=hash_fd_map(h, fd, events, type, data))==0)){
374                 LOG(L_ERR, "ERROR: io_watch_add: failed to hash the fd %d\n", fd);
375                 goto error;
376         }
377         switch(h->poll_method){ /* faster then pointer to functions */
378                 case POLL_POLL:
379 #ifdef POLLRDHUP
380                         /* listen to POLLRDHUP by default (if POLLIN) */
381                         events|=((int)!(events & POLLIN) - 1) & POLLRDHUP;
382 #endif /* POLLRDHUP */
383                         fd_array_setup(events);
384                         set_fd_flags(O_NONBLOCK);
385                         break;
386 #ifdef HAVE_SELECT
387                 case POLL_SELECT:
388                         fd_array_setup(events);
389                         if (likely(events & POLLIN))
390                                 FD_SET(fd, &h->master_rset);
391                         if (unlikely(events & POLLOUT))
392                                 FD_SET(fd, &h->master_wset);
393                         if (h->max_fd_select<fd) h->max_fd_select=fd;
394                         break;
395 #endif
396 #ifdef HAVE_SIGIO_RT
397                 case POLL_SIGIO_RT:
398                         fd_array_setup(events);
399                         /* re-set O_ASYNC might be needed, if not done from 
400                          * io_watch_del (or if somebody wants to add a fd which has
401                          * already O_ASYNC/F_SETSIG set on a duplicate)
402                          */
403                         /* set async & signal */
404                         if (fcntl(fd, F_SETOWN, my_pid())==-1){
405                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETOWN"
406                                 " failed: %s [%d]\n", strerror(errno), errno);
407                                 goto error;
408                         }
409                         if (fcntl(fd, F_SETSIG, h->signo)==-1){
410                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETSIG"
411                                         " failed: %s [%d]\n", strerror(errno), errno);
412                                 goto error;
413                         }
414                         /* set both non-blocking and async */
415                         set_fd_flags(O_ASYNC| O_NONBLOCK);
416 #ifdef EXTRA_DEBUG
417                         DBG("io_watch_add: sigio_rt on f %d, signal %d to pid %d\n",
418                                         fd,  h->signo, my_pid());
419 #endif
420                         /* empty socket receive buffer, if buffer is already full
421                          * no more space to put packets
422                          * => no more signals are ever generated
423                          * also when moving fds, the freshly moved fd might have
424                          *  already some bytes queued, we want to get them now
425                          *  and not later -- andrei */
426                         idx=h->fd_no;
427                         check_io=1;
428                         break;
429 #endif
430 #ifdef HAVE_EPOLL
431                 case POLL_EPOLL_LT:
432                         ep_event.events=
433 #ifdef POLLRDHUP
434                                                 /* listen for EPOLLRDHUP too */
435                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
436 #else /* POLLRDHUP */
437                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
438 #endif /* POLLRDHUP */
439                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
440                         ep_event.data.ptr=e;
441 again1:
442                         n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
443                         if (unlikely(n==-1)){
444                                 if (errno==EAGAIN) goto again1;
445                                 LOG(L_ERR, "ERROR: io_watch_add: epoll_ctl failed: %s [%d]\n",
446                                         strerror(errno), errno);
447                                 goto error;
448                         }
449                         break;
450                 case POLL_EPOLL_ET:
451                         set_fd_flags(O_NONBLOCK);
452                         ep_event.events=
453 #ifdef POLLRDHUP
454                                                 /* listen for EPOLLRDHUP too */
455                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
456 #else /* POLLRDHUP */
457                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
458 #endif /* POLLRDHUP */
459                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
460                                                 EPOLLET;
461                         ep_event.data.ptr=e;
462 again2:
463                         n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
464                         if (unlikely(n==-1)){
465                                 if (errno==EAGAIN) goto again2;
466                                 LOG(L_ERR, "ERROR: io_watch_add: epoll_ctl failed: %s [%d]\n",
467                                         strerror(errno), errno);
468                                 goto error;
469                         }
470                         break;
471 #endif
472 #ifdef HAVE_KQUEUE
473                 case POLL_KQUEUE:
474                         if (likely( events & POLLIN)){
475                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e)==-1))
476                                 goto error;
477                         }
478                         if (unlikely( events & POLLOUT)){
479                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
480                                 {
481                                         if (likely(events & POLLIN)){
482                                                 kq_ev_change(h, fd, EVFILT_READ, EV_DELETE, 0);
483                                         }
484                                 }
485                                 goto error;
486                         }
487                         break;
488 #endif
489 #ifdef HAVE_DEVPOLL
490                 case POLL_DEVPOLL:
491                         pfd.fd=fd;
492                         pfd.events=events;
493                         pfd.revents=0;
494 again_devpoll:
495                         if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
496                                 if (errno==EAGAIN) goto again_devpoll;
497                                 LOG(L_ERR, "ERROR: io_watch_add: /dev/poll write failed:"
498                                                         "%s [%d]\n", strerror(errno), errno);
499                                 goto error;
500                         }
501                         break;
502 #endif
503                         
504                 default:
505                         LOG(L_CRIT, "BUG: io_watch_add: no support for poll method "
506                                         " %s (%d)\n", poll_method_str[h->poll_method],
507                                         h->poll_method);
508                         goto error;
509         }
510         
511         h->fd_no++; /* "activate" changes, for epoll/kqueue/devpoll it
512                                    has only informative value */
513 #if defined(HAVE_SIGIO_RT)
514         if (check_io){
515                 /* handle possible pre-existing events */
516                 pf.fd=fd;
517                 pf.events=events;
518 check_io_again:
519                 n=0;
520                 while(e->type && ((n=poll(&pf, 1, 0))>0) && 
521                                 (handle_io(e, pf.revents, idx)>0) &&
522                                 (pf.revents & (e->events|POLLERR|POLLHUP)));
523                 if (unlikely(e->type && (n==-1))){
524                         if (errno==EINTR) goto check_io_again;
525                         LOG(L_ERR, "ERROR: io_watch_add: check_io poll: %s [%d]\n",
526                                                 strerror(errno), errno);
527                 }
528         }
529 #endif
530         return 0;
531 error:
532         if (e) unhash_fd_map(e);
533         return -1;
534 #undef fd_array_setup
535 #undef set_fd_flags 
536 }
537
538
539
540 #define IO_FD_CLOSING 16
541 /* parameters:    h - handler 
542  *               fd - file descriptor
543  *            index - index in the fd_array if known, -1 if not
544  *                    (if index==-1 fd_array will be searched for the
545  *                     corresponding fd* entry -- slower but unavoidable in 
546  *                     some cases). index is not used (no fd_array) for epoll,
547  *                     /dev/poll and kqueue
548  *            flags - optimization flags, e.g. IO_FD_CLOSING, the fd was 
549  *                    or will shortly be closed, in some cases we can avoid
550  *                    extra remove operations (e.g.: epoll, kqueue, sigio)
551  * returns 0 if ok, -1 on error */
552 inline static int io_watch_del(io_wait_h* h, int fd, int idx, int flags)
553 {
554         
555 #define fix_fd_array \
556         do{\
557                         if (unlikely(idx==-1)){ \
558                                 /* fix idx if -1 and needed */ \
559                                 for (idx=0; (idx<h->fd_no) && \
560                                                         (h->fd_array[idx].fd!=fd); idx++); \
561                         } \
562                         if (likely(idx<h->fd_no)){ \
563                                 memmove(&h->fd_array[idx], &h->fd_array[idx+1], \
564                                         (h->fd_no-(idx+1))*sizeof(*(h->fd_array))); \
565                                 if ((idx<=h->crt_fd_array_idx) && (h->crt_fd_array_idx>=0)) \
566                                         h->crt_fd_array_idx--; \
567                         } \
568         }while(0)
569         
570         struct fd_map* e;
571         int events;
572 #ifdef HAVE_EPOLL
573         int n;
574         struct epoll_event ep_event;
575 #endif
576 #ifdef HAVE_DEVPOLL
577         struct pollfd pfd;
578 #endif
579 #ifdef HAVE_SIGIO_RT
580         int fd_flags;
581 #endif
582         
583         if (unlikely((fd<0) || (fd>=h->max_fd_no))){
584                 LOG(L_CRIT, "BUG: io_watch_del: invalid fd %d, not in [0, %d) \n",
585                                                 fd, h->fd_no);
586                 goto error;
587         }
588         DBG("DBG: io_watch_del (%p, %d, %d, 0x%x) fd_no=%d called\n",
589                         h, fd, idx, flags, h->fd_no);
590         e=get_fd_map(h, fd);
591         /* more sanity checks */
592         if (unlikely(e==0)){
593                 LOG(L_CRIT, "BUG: io_watch_del: no corresponding hash entry for %d\n",
594                                         fd);
595                 goto error;
596         }
597         if (unlikely(e->type==0 /*F_NONE*/)){
598                 LOG(L_ERR, "ERROR: io_watch_del: trying to delete already erased"
599                                 " entry %d in the hash(%d, %d, %p) flags %x)\n",
600                                 fd, e->fd, e->type, e->data, flags);
601                 goto error;
602         }
603         events=e->events;
604         unhash_fd_map(e);
605         
606         switch(h->poll_method){
607                 case POLL_POLL:
608                         fix_fd_array;
609                         break;
610 #ifdef HAVE_SELECT
611                 case POLL_SELECT:
612                         if (likely(events & POLLIN))
613                                 FD_CLR(fd, &h->master_rset);
614                         if (unlikely(events & POLLOUT))
615                                 FD_CLR(fd, &h->master_wset);
616                         if (unlikely(h->max_fd_select && (h->max_fd_select==fd)))
617                                 /* we don't know the prev. max, so we just decrement it */
618                                 h->max_fd_select--; 
619                         fix_fd_array;
620                         break;
621 #endif
622 #ifdef HAVE_SIGIO_RT
623                 case POLL_SIGIO_RT:
624                         fix_fd_array;
625                         /* the O_ASYNC flag must be reset all the time, the fd
626                          *  can be changed only if  O_ASYNC is reset (if not and
627                          *  the fd is a duplicate, you will get signals from the dup. fd
628                          *  and not from the original, even if the dup. fd was closed
629                          *  and the signals re-set on the original) -- andrei
630                          */
631                         /*if (!(flags & IO_FD_CLOSING)){*/
632                                 /* reset ASYNC */
633                                 fd_flags=fcntl(fd, F_GETFL); 
634                                 if (unlikely(fd_flags==-1)){ 
635                                         LOG(L_ERR, "ERROR: io_watch_del: fnctl: GETFL failed:" 
636                                                         " %s [%d]\n", strerror(errno), errno); 
637                                         goto error; 
638                                 } 
639                                 if (unlikely(fcntl(fd, F_SETFL, fd_flags&(~O_ASYNC))==-1)){ 
640                                         LOG(L_ERR, "ERROR: io_watch_del: fnctl: SETFL" 
641                                                                 " failed: %s [%d]\n", strerror(errno), errno); 
642                                         goto error; 
643                                 } 
644                         break;
645 #endif
646 #ifdef HAVE_EPOLL
647                 case POLL_EPOLL_LT:
648                 case POLL_EPOLL_ET:
649                         /* epoll doesn't seem to automatically remove sockets,
650                          * if the socket is a duplicate/moved and the original
651                          * is still open. The fd is removed from the epoll set
652                          * only when the original (and all the  copies?) is/are 
653                          * closed. This is probably a bug in epoll. --andrei */
654 #ifdef EPOLL_NO_CLOSE_BUG
655                         if (!(flags & IO_FD_CLOSING)){
656 #endif
657 again_epoll:
658                                 n=epoll_ctl(h->epfd, EPOLL_CTL_DEL, fd, &ep_event);
659                                 if (unlikely(n==-1)){
660                                         if (errno==EAGAIN) goto again_epoll;
661                                         LOG(L_ERR, "ERROR: io_watch_del: removing fd from epoll "
662                                                         "list failed: %s [%d]\n", strerror(errno), errno);
663                                         goto error;
664                                 }
665 #ifdef EPOLL_NO_CLOSE_BUG
666                         }
667 #endif
668                         break;
669 #endif
670 #ifdef HAVE_KQUEUE
671                 case POLL_KQUEUE:
672                         if (!(flags & IO_FD_CLOSING)){
673                                 if (likely(events & POLLIN)){
674                                         if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
675                                                                                                         EV_DELETE, 0) ==-1)){
676                                                 /* try to delete the write filter anyway */
677                                                 if (events & POLLOUT){
678                                                         kq_ev_change(h, fd, EVFILT_WRITE, EV_DELETE, 0);
679                                                 }
680                                                 goto error;
681                                         }
682                                 }
683                                 if (unlikely(events & POLLOUT)){
684                                         if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
685                                                                                                         EV_DELETE, 0) ==-1))
686                                                 goto error;
687                                 }
688                         }
689                         break;
690 #endif
691 #ifdef HAVE_DEVPOLL
692                 case POLL_DEVPOLL:
693                                 /* for /dev/poll the closed fds _must_ be removed
694                                    (they are not removed automatically on close()) */
695                                 pfd.fd=fd;
696                                 pfd.events=POLLREMOVE;
697                                 pfd.revents=0;
698 again_devpoll:
699                                 if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
700                                         if (errno==EINTR) goto again_devpoll;
701                                         LOG(L_ERR, "ERROR: io_watch_del: removing fd from "
702                                                                 "/dev/poll failed: %s [%d]\n", 
703                                                                 strerror(errno), errno);
704                                         goto error;
705                                 }
706                                 break;
707 #endif
708                 default:
709                         LOG(L_CRIT, "BUG: io_watch_del: no support for poll method "
710                                         " %s (%d)\n", poll_method_str[h->poll_method], 
711                                         h->poll_method);
712                         goto error;
713         }
714         h->fd_no--;
715         return 0;
716 error:
717         return -1;
718 #undef fix_fd_array
719 }
720
721
722
723 /* parameters:    h - handler 
724  *               fd - file descriptor
725  *           events - new events to watch for
726  *              idx - index in the fd_array if known, -1 if not
727  *                    (if index==-1 fd_array will be searched for the
728  *                     corresponding fd* entry -- slower but unavoidable in 
729  *                     some cases). index is not used (no fd_array) for epoll,
730  *                     /dev/poll and kqueue
731  * returns 0 if ok, -1 on error */
732 inline static int io_watch_chg(io_wait_h* h, int fd, short events, int idx )
733 {
734         
735 #define fd_array_chg(ev) \
736         do{\
737                         if (unlikely(idx==-1)){ \
738                                 /* fix idx if -1 and needed */ \
739                                 for (idx=0; (idx<h->fd_no) && \
740                                                         (h->fd_array[idx].fd!=fd); idx++); \
741                         } \
742                         if (likely(idx<h->fd_no)){ \
743                                 h->fd_array[idx].events=(ev); \
744                         } \
745         }while(0)
746         
747         struct fd_map* e;
748         int add_events;
749         int del_events;
750 #ifdef HAVE_DEVPOLL
751         struct pollfd pfd;
752 #endif
753 #ifdef HAVE_EPOLL
754         int n;
755         struct epoll_event ep_event;
756 #endif
757         
758         if (unlikely((fd<0) || (fd>=h->max_fd_no))){
759                 LOG(L_CRIT, "BUG: io_watch_chg: invalid fd %d, not in [0, %d) \n",
760                                                 fd, h->fd_no);
761                 goto error;
762         }
763         if (unlikely((events&(POLLIN|POLLOUT))==0)){
764                 LOG(L_CRIT, "BUG: io_watch_chg: invalid events: 0x%0x\n", events);
765                 goto error;
766         }
767         DBG("DBG: io_watch_chg (%p, %d, 0x%x, 0x%x) fd_no=%d called\n",
768                         h, fd, events, idx, h->fd_no);
769         e=get_fd_map(h, fd);
770         /* more sanity checks */
771         if (unlikely(e==0)){
772                 LOG(L_CRIT, "BUG: io_watch_chg: no corresponding hash entry for %d\n",
773                                         fd);
774                 goto error;
775         }
776         if (unlikely(e->type==0 /*F_NONE*/)){
777                 LOG(L_ERR, "ERROR: io_watch_chg: trying to change an already erased"
778                                 " entry %d in the hash(%d, %d, %p) )\n",
779                                 fd, e->fd, e->type, e->data);
780                 goto error;
781         }
782         
783         add_events=events & ~e->events;
784         del_events=e->events & ~events;
785         e->events=events;
786         switch(h->poll_method){
787                 case POLL_POLL:
788 #ifdef POLLRDHUP
789                         /* listen to POLLRDHUP by default (if POLLIN) */
790                         events|=((int)!(events & POLLIN) - 1) & POLLRDHUP;
791 #endif /* POLLRDHUP */
792                         fd_array_chg(events);
793                         break;
794 #ifdef HAVE_SELECT
795                 case POLL_SELECT:
796                         fd_array_chg(events);
797                         if (unlikely(del_events & POLLIN))
798                                 FD_CLR(fd, &h->master_rset);
799                         else if (unlikely(add_events & POLLIN))
800                                 FD_SET(fd, &h->master_rset);
801                         if (likely(del_events & POLLOUT))
802                                 FD_CLR(fd, &h->master_wset);
803                         else if (likely(add_events & POLLOUT))
804                                 FD_SET(fd, &h->master_wset);
805                         break;
806 #endif
807 #ifdef HAVE_SIGIO_RT
808                 case POLL_SIGIO_RT:
809                         fd_array_chg(events);
810                         /* no need for check_io, since SIGIO_RT listens by default for all
811                          * the events */
812                         break;
813 #endif
814 #ifdef HAVE_EPOLL
815                 case POLL_EPOLL_LT:
816                                 ep_event.events=
817 #ifdef POLLRDHUP
818                                                 /* listen for EPOLLRDHUP too */
819                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
820 #else /* POLLRDHUP */
821                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
822 #endif /* POLLRDHUP */
823                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
824                                 ep_event.data.ptr=e;
825 again_epoll_lt:
826                                 n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
827                                 if (unlikely(n==-1)){
828                                         if (errno==EAGAIN) goto again_epoll_lt;
829                                         LOG(L_ERR, "ERROR: io_watch_chg: modifying epoll events"
830                                                         " failed: %s [%d]\n", strerror(errno), errno);
831                                         goto error;
832                                 }
833                         break;
834                 case POLL_EPOLL_ET:
835                                 ep_event.events=
836 #ifdef POLLRDHUP
837                                                 /* listen for EPOLLRDHUP too */
838                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
839 #else /* POLLRDHUP */
840                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
841 #endif /* POLLRDHUP */
842                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
843                                                 EPOLLET;
844                                 ep_event.data.ptr=e;
845 again_epoll_et:
846                                 n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
847                                 if (unlikely(n==-1)){
848                                         if (errno==EAGAIN) goto again_epoll_et;
849                                         LOG(L_ERR, "ERROR: io_watch_chg: modifying epoll events"
850                                                         " failed: %s [%d]\n", strerror(errno), errno);
851                                         goto error;
852                                 }
853                         break;
854 #endif
855 #ifdef HAVE_KQUEUE
856                 case POLL_KQUEUE:
857                         if (unlikely(del_events & POLLIN)){
858                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
859                                                                                                                 EV_DELETE, 0) ==-1))
860                                                 goto error;
861                         }else if (unlikely(add_events & POLLIN)){
862                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e) ==-1))
863                                         goto error;
864                         }
865                         if (likely(del_events & POLLOUT)){
866                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
867                                                                                                                 EV_DELETE, 0) ==-1))
868                                                 goto error;
869                         }else if (likely(add_events & POLLOUT)){
870                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
871                                         goto error;
872                         }
873                         break;
874 #endif
875 #ifdef HAVE_DEVPOLL
876                 case POLL_DEVPOLL:
877                                 /* for /dev/poll the closed fds _must_ be removed
878                                    (they are not removed automatically on close()) */
879                                 pfd.fd=fd;
880                                 pfd.events=POLLREMOVE;
881                                 pfd.revents=0;
882 again_devpoll1:
883                                 if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
884                                         if (errno==EINTR) goto again_devpoll1;
885                                         LOG(L_ERR, "ERROR: io_watch_chg: removing fd from "
886                                                                 "/dev/poll failed: %s [%d]\n", 
887                                                                 strerror(errno), errno);
888                                         goto error;
889                                 }
890 again_devpoll2:
891                                 pfd.events=events;
892                                 pfd.revents=0;
893                                 if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
894                                         if (errno==EINTR) goto again_devpoll2;
895                                         LOG(L_ERR, "ERROR: io_watch_chg: re-adding fd to "
896                                                                 "/dev/poll failed: %s [%d]\n", 
897                                                                 strerror(errno), errno);
898                                         goto error;
899                                 }
900                                 break;
901 #endif
902                 default:
903                         LOG(L_CRIT, "BUG: io_watch_chg: no support for poll method "
904                                         " %s (%d)\n", poll_method_str[h->poll_method], 
905                                         h->poll_method);
906                         goto error;
907         }
908         return 0;
909 error:
910         return -1;
911 #undef fix_fd_array
912 }
913
914
915
916 /* io_wait_loop_x style function 
917  * wait for io using poll()
918  * params: h      - io_wait handle
919  *         t      - timeout in s
920  *         repeat - if !=0 handle_io will be called until it returns <=0
921  * returns: number of IO events handled on success (can be 0), -1 on error
922  */
923 inline static int io_wait_loop_poll(io_wait_h* h, int t, int repeat)
924 {
925         int n, r;
926         int ret;
927         struct fd_map* fm;
928         
929 again:
930                 ret=n=poll(h->fd_array, h->fd_no, t*1000);
931                 if (n==-1){
932                         if (errno==EINTR) goto again; /* signal, ignore it */
933                         else{
934                                 LOG(L_ERR, "ERROR:io_wait_loop_poll: poll: %s [%d]\n",
935                                                 strerror(errno), errno);
936                                 goto error;
937                         }
938                 }
939                 for (r=0; (r<h->fd_no) && n; r++){
940                         fm=get_fd_map(h, h->fd_array[r].fd);
941                         if (h->fd_array[r].revents & (fm->events|POLLERR|POLLHUP)){
942                                 n--;
943                                 /* sanity checks */
944                                 if (unlikely((h->fd_array[r].fd >= h->max_fd_no)||
945                                                                 (h->fd_array[r].fd < 0))){
946                                         LOG(L_CRIT, "BUG: io_wait_loop_poll: bad fd %d "
947                                                         "(no in the 0 - %d range)\n",
948                                                         h->fd_array[r].fd, h->max_fd_no);
949                                         /* try to continue anyway */
950                                         h->fd_array[r].events=0; /* clear the events */
951                                         continue;
952                                 }
953                                 h->crt_fd_array_idx=r;
954                                 /* repeat handle_io if repeat, fd still watched (not deleted
955                                  *  inside handle_io), handle_io returns that there's still
956                                  *  IO and the fd is still watched for the triggering event */
957                                 while(fm->type && 
958                                                 (handle_io(fm, h->fd_array[r].revents, r) > 0) &&
959                                                 repeat && ((fm->events|POLLERR|POLLHUP) &
960                                                                                                         h->fd_array[r].revents));
961                                 r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd) 
962                                                                                   array shifting */
963                         }
964                 }
965 error:
966         return ret;
967 }
968
969
970
971 #ifdef HAVE_SELECT
972 /* wait for io using select */
973 inline static int io_wait_loop_select(io_wait_h* h, int t, int repeat)
974 {
975         fd_set sel_rset;
976         fd_set sel_wset;
977         int n, ret;
978         struct timeval timeout;
979         int r;
980         struct fd_map* fm;
981         int revents;
982         
983 again:
984                 sel_rset=h->master_rset;
985                 sel_wset=h->master_wset;
986                 timeout.tv_sec=t;
987                 timeout.tv_usec=0;
988                 ret=n=select(h->max_fd_select+1, &sel_rset, &sel_wset, 0, &timeout);
989                 if (n<0){
990                         if (errno==EINTR) goto again; /* just a signal */
991                         LOG(L_ERR, "ERROR: io_wait_loop_select: select: %s [%d]\n",
992                                         strerror(errno), errno);
993                         n=0;
994                         /* continue */
995                 }
996                 /* use poll fd array */
997                 for(r=0; (r<h->fd_no) && n; r++){
998                         revents=0;
999                         if (likely(FD_ISSET(h->fd_array[r].fd, &sel_rset)))
1000                                 revents|=POLLIN;
1001                         if (unlikely(FD_ISSET(h->fd_array[r].fd, &sel_wset)))
1002                                 revents|=POLLOUT;
1003                         if (unlikely(revents)){
1004                                 h->crt_fd_array_idx=r;
1005                                 fm=get_fd_map(h, h->fd_array[r].fd);
1006                                 while(fm->type && (fm->events & revents) && 
1007                                                 (handle_io(fm, revents, r)>0) && repeat);
1008                                 r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd) 
1009                                                                                   array shifting */
1010                                 n--;
1011                         }
1012                 };
1013         return ret;
1014 }
1015 #endif
1016
1017
1018
1019 #ifdef HAVE_EPOLL
1020 inline static int io_wait_loop_epoll(io_wait_h* h, int t, int repeat)
1021 {
1022         int n, r;
1023         struct fd_map* fm;
1024         int revents;
1025         
1026 again:
1027                 n=epoll_wait(h->epfd, h->ep_array, h->fd_no, t*1000);
1028                 if (unlikely(n==-1)){
1029                         if (errno==EINTR) goto again; /* signal, ignore it */
1030                         else{
1031                                 LOG(L_ERR, "ERROR:io_wait_loop_epoll: "
1032                                                 "epoll_wait(%d, %p, %d, %d): %s [%d]\n", 
1033                                                 h->epfd, h->ep_array, h->fd_no, t*1000,
1034                                                 strerror(errno), errno);
1035                                 goto error;
1036                         }
1037                 }
1038 #if 0
1039                 if (n>1){
1040                         for(r=0; r<n; r++){
1041                                 LOG(L_ERR, "WARNING: ep_array[%d]= %x, %p\n",
1042                                                 r, h->ep_array[r].events, h->ep_array[r].data.ptr);
1043                         }
1044                 }
1045 #endif
1046                 for (r=0; r<n; r++){
1047                         revents= (POLLIN & (!(h->ep_array[r].events & (EPOLLIN|EPOLLPRI))
1048                                                 -1)) |
1049                                          (POLLOUT & (!(h->ep_array[r].events & EPOLLOUT)-1)) |
1050                                          (POLLERR & (!(h->ep_array[r].events & EPOLLERR)-1)) |
1051                                          (POLLHUP & (!(h->ep_array[r].events & EPOLLHUP)-1))
1052 #ifdef POLLRDHUP
1053                                         | (POLLRDHUP & (!(h->ep_array[r].events & EPOLLRDHUP)-1))
1054 #endif
1055                                         ;
1056                         if (likely(revents)){
1057                                 fm=(struct fd_map*)h->ep_array[r].data.ptr;
1058                                 while(fm->type && ((fm->events|POLLERR|POLLHUP) & revents) && 
1059                                                 (handle_io(fm, revents, -1)>0) && repeat);
1060                         }else{
1061                                 LOG(L_ERR, "ERROR:io_wait_loop_epoll: unexpected event %x"
1062                                                         " on %d/%d, data=%p\n", h->ep_array[r].events,
1063                                                         r+1, n, h->ep_array[r].data.ptr);
1064                         }
1065                 }
1066 error:
1067         return n;
1068 }
1069 #endif
1070
1071
1072
1073 #ifdef HAVE_KQUEUE
1074 inline static int io_wait_loop_kqueue(io_wait_h* h, int t, int repeat)
1075 {
1076         int n, r;
1077         struct timespec tspec;
1078         struct fd_map* fm;
1079         int revents;
1080         
1081         tspec.tv_sec=t;
1082         tspec.tv_nsec=0;
1083 again:
1084                 n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges,  h->kq_array,
1085                                         h->fd_no, &tspec);
1086                 if (unlikely(n==-1)){
1087                         if (errno==EINTR) goto again; /* signal, ignore it */
1088                         else{
1089                                 LOG(L_ERR, "ERROR: io_wait_loop_kqueue: kevent:"
1090                                                 " %s [%d]\n", strerror(errno), errno);
1091                                 goto error;
1092                         }
1093                 }
1094                 h->kq_nchanges=0; /* reset changes array */
1095                 for (r=0; r<n; r++){
1096 #ifdef EXTRA_DEBUG
1097                         DBG("DBG: kqueue: event %d/%d: fd=%d, udata=%lx, flags=0x%x\n",
1098                                         r, n, h->kq_array[r].ident, (long)h->kq_array[r].udata,
1099                                         h->kq_array[r].flags);
1100 #endif
1101                         if (unlikely((h->kq_array[r].flags & EV_ERROR) &&
1102                                                         (h->kq_array[r].data == EBADF ||
1103                                                          h->kq_array[r].udata == 0))){
1104                                 /* error in changes: we ignore it if it has to do with a
1105                                    bad fd or update==0. It can be caused by trying to remove an
1106                                    already closed fd: race between adding something to the
1107                                    changes array, close() and applying the changes.
1108                                    E.g. for ser tcp: tcp_main sends a fd to child fore reading
1109                                     => deletes it from the watched fds => the changes array
1110                                         will contain an EV_DELETE for it. Before the changes
1111                                         are applied (they are at the end of the main io_wait loop,
1112                                         after all the fd events were processed), a CON_ERR sent
1113                                         to tcp_main by a sender (send fail) is processed and causes
1114                                         the fd to be closed. When the changes are applied =>
1115                                         error for the EV_DELETE attempt of a closed fd.
1116                                 */
1117                                 /*
1118                                         example EV_ERROR for trying to delete a read watched fd,
1119                                         that was already closed:
1120                                         {
1121                                                 ident = 63,  [fd]
1122                                                 filter = -1, [EVFILT_READ]
1123                                                 flags = 16384, [EV_ERROR]
1124                                                 fflags = 0,
1125                                                 data = 9, [errno = EBADF]
1126                                                 udata = 0x0
1127                                         }
1128                                 */
1129                                 if (h->kq_array[r].data != EBADF)
1130                                         LOG(L_INFO, "INFO: io_wait_loop_kqueue: kevent error on "
1131                                                         "fd %ld: %s [%ld]\n", (long)h->kq_array[r].ident,
1132                                                         strerror(h->kq_array[r].data),
1133                                                         (long)h->kq_array[r].data);
1134                         }else{
1135                                 fm=(struct fd_map*)h->kq_array[r].udata;
1136                                 if (likely(h->kq_array[r].filter==EVFILT_READ)){
1137                                         revents=POLLIN |
1138                                                 (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP) |
1139                                                 (((int)!(h->kq_array[r].flags & EV_ERROR)-1)&POLLERR);
1140                                         while(fm->type && (fm->events & revents) && 
1141                                                         (handle_io(fm, revents, -1)>0) && repeat);
1142                                 }else if (h->kq_array[r].filter==EVFILT_WRITE){
1143                                         revents=POLLOUT |
1144                                                 (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP) |
1145                                                 (((int)!(h->kq_array[r].flags & EV_ERROR)-1)&POLLERR);
1146                                         while(fm->type && (fm->events & revents) && 
1147                                                         (handle_io(fm, revents, -1)>0) && repeat);
1148                                 }
1149                         }
1150                 }
1151 error:
1152         return n;
1153 }
1154 #endif
1155
1156
1157
1158 #ifdef HAVE_SIGIO_RT
1159 /* sigio rt version has no repeat (it doesn't make sense)*/
1160 inline static int io_wait_loop_sigio_rt(io_wait_h* h, int t)
1161 {
1162         int n;
1163         int ret;
1164         struct timespec ts;
1165         siginfo_t siginfo;
1166         int sigio_band;
1167         int sigio_fd;
1168         struct fd_map* fm;
1169         int revents;
1170 #ifdef SIGINFO64_WORKARROUND
1171         int* pi;
1172 #endif
1173         
1174         
1175         ret=1; /* 1 event per call normally */
1176         ts.tv_sec=t;
1177         ts.tv_nsec=0;
1178         if (unlikely(!sigismember(&h->sset, h->signo) ||
1179                                         !sigismember(&h->sset, SIGIO))) {
1180                 LOG(L_CRIT, "BUG: io_wait_loop_sigio_rt: the signal mask"
1181                                 " is not properly set!\n");
1182                 goto error;
1183         }
1184 again:
1185         n=sigtimedwait(&h->sset, &siginfo, &ts);
1186         if (unlikely(n==-1)){
1187                 if (errno==EINTR) goto again; /* some other signal, ignore it */
1188                 else if (errno==EAGAIN){ /* timeout */
1189                         ret=0;
1190                         goto end;
1191                 }else{
1192                         LOG(L_ERR, "ERROR: io_wait_loop_sigio_rt: sigtimed_wait"
1193                                         " %s [%d]\n", strerror(errno), errno);
1194                         goto error;
1195                 }
1196         }
1197         if (likely(n!=SIGIO)){
1198 #ifdef SIGINFO64_WORKARROUND
1199                 /* on linux siginfo.si_band is defined as long in userspace
1200                  * and as int in kernel (< 2.6.5) => on 64 bits things will break!
1201                  * (si_band will include si_fd, and si_fd will contain
1202                  *  garbage).
1203                  *  see /usr/src/linux/include/asm-generic/siginfo.h and
1204                  *      /usr/include/bits/siginfo.h
1205                  *  On newer kernels this is fixed (si_band is long in the kernel too).
1206                  * -- andrei */
1207                 if  ((_os_ver<0x020605) && (sizeof(siginfo.si_band)>sizeof(int))){
1208                         pi=(int*)(void*)&siginfo.si_band; /* avoid type punning warnings */
1209                         sigio_band=*pi;
1210                         sigio_fd=*(pi+1);
1211                 }else
1212 #endif
1213                 {
1214                         sigio_band=siginfo.si_band;
1215                         sigio_fd=siginfo.si_fd;
1216                 }
1217                 if (unlikely(siginfo.si_code==SI_SIGIO)){
1218                         /* old style, we don't know the event (linux 2.2.?) */
1219                         LOG(L_WARN, "WARNING: io_wait_loop_sigio_rt: old style sigio"
1220                                         " interface\n");
1221                         fm=get_fd_map(h, sigio_fd);
1222                         /* we can have queued signals generated by fds not watched
1223                          * any more, or by fds in transition, to a child => ignore them*/
1224                         if (fm->type)
1225                                 handle_io(fm, POLLIN|POLLOUT, -1);
1226                 }else{
1227                         /* si_code contains the SIGPOLL reason: POLL_IN, POLL_OUT,
1228                          *  POLL_MSG, POLL_ERR, POLL_PRI or POLL_HUP
1229                          * and si_band the translated poll event bitmap:
1230                          *  POLLIN|POLLRDNORM  (=POLL_IN),
1231                          *  POLLOUT|POLLWRNORM|POLLWRBAND (=POLL_OUT),
1232                          *  POLLIN|POLLRDNORM|POLLMSG (=POLL_MSG),
1233                          *  POLLERR (=POLL_ERR),
1234                          *  POLLPRI|POLLRDBAND (=POLL_PRI),
1235                          *  POLLHUP|POLLERR (=POLL_HUP) 
1236                          *  [linux 2.6.22 fs/fcntl.c:447]
1237                          */
1238 #ifdef EXTRA_DEBUG
1239                         DBG("io_wait_loop_sigio_rt: siginfo: signal=%d (%d),"
1240                                         " si_code=%d, si_band=0x%x,"
1241                                         " si_fd=%d\n",
1242                                         siginfo.si_signo, n, siginfo.si_code, 
1243                                         (unsigned)sigio_band,
1244                                         sigio_fd);
1245 #endif
1246                         /* on some errors (e.g. when receving TCP RST), sigio_band will
1247                          * be set to 0x08 (POLLERR) or 0x18 (POLLERR|POLLHUP - on stream
1248                          *  unix socket close) , so better catch all events --andrei */
1249                         if (likely(sigio_band)){
1250                                 fm=get_fd_map(h, sigio_fd);
1251                                 revents=sigio_band;
1252                                 /* fix revents==POLLPRI case */
1253                                 revents |= (!(revents & POLLPRI)-1) & POLLIN;
1254                                 /* we can have queued signals generated by fds not watched
1255                                  * any more, or by fds in transition, to a child 
1256                                  * => ignore them */
1257                                 if (fm->type && ((fm->events|POLLERR|POLLHUP) & revents))
1258                                         handle_io(fm, revents, -1);
1259                                 else
1260                                         DBG("WARNING: io_wait_loop_sigio_rt: ignoring event"
1261                                                         " %x on fd %d, watching for %x, si_code=%x "
1262                                                         "(fm->type=%d, fm->fd=%d, fm->data=%p)\n",
1263                                                         sigio_band, sigio_fd, fm->events, siginfo.si_code,
1264                                                         fm->type, fm->fd, fm->data);
1265                         }else{
1266                                 LOG(L_ERR, "ERROR: io_wait_loop_sigio_rt: unexpected event"
1267                                                         " on fd %d: %x\n", sigio_fd, sigio_band);
1268                         }
1269                 }
1270         }else{
1271                 /* signal queue overflow 
1272                  * TODO: increase signal queue size: 2.4x /proc/.., 2.6x -rlimits */
1273                 LOG(L_WARN, "WARNING: io_wait_loop_sigio_rt: signal queue overflowed"
1274                                         "- falling back to poll\n");
1275                 /* clear real-time signal queue
1276                  * both SIG_IGN and SIG_DFL are needed , it doesn't work
1277                  * only with SIG_DFL  */
1278                 if (signal(h->signo, SIG_IGN)==SIG_ERR){
1279                         LOG(L_CRIT, "BUG: do_poll: couldn't reset signal to IGN\n");
1280                 }
1281                 
1282                 if (signal(h->signo, SIG_DFL)==SIG_ERR){
1283                         LOG(L_CRIT, "BUG: do_poll: couldn't reset signal to DFL\n");
1284                 }
1285                 /* falling back to normal poll */
1286                 ret=io_wait_loop_poll(h, -1, 1);
1287         }
1288 end:
1289         return ret;
1290 error:
1291         return -1;
1292 }
1293 #endif
1294
1295
1296
1297 #ifdef HAVE_DEVPOLL
1298 inline static int io_wait_loop_devpoll(io_wait_h* h, int t, int repeat)
1299 {
1300         int n, r;
1301         int ret;
1302         struct dvpoll dpoll;
1303         struct fd_map* fm;
1304
1305                 dpoll.dp_timeout=t*1000;
1306                 dpoll.dp_nfds=h->fd_no;
1307                 dpoll.dp_fds=h->fd_array;
1308 again:
1309                 ret=n=ioctl(h->dpoll_fd, DP_POLL, &dpoll);
1310                 if (unlikely(n==-1)){
1311                         if (errno==EINTR) goto again; /* signal, ignore it */
1312                         else{
1313                                 LOG(L_ERR, "ERROR:io_wait_loop_devpoll: ioctl: %s [%d]\n",
1314                                                 strerror(errno), errno);
1315                                 goto error;
1316                         }
1317                 }
1318                 for (r=0; r< n; r++){
1319                         if (h->fd_array[r].revents & (POLLNVAL|POLLERR)){
1320                                 LOG(L_ERR, "ERROR: io_wait_loop_devpoll: pollinval returned"
1321                                                         " for fd %d, revents=%x\n",
1322                                                         h->fd_array[r].fd, h->fd_array[r].revents);
1323                         }
1324                         /* POLLIN|POLLHUP just go through */
1325                         fm=get_fd_map(h, h->fd_array[r].fd);
1326                         while(fm->type && (fm->events & h->fd_array[r].revents) &&
1327                                         (handle_io(fm, h->fd_array[r].revents, r) > 0) && repeat);
1328                 }
1329 error:
1330         return ret;
1331 }
1332 #endif
1333
1334
1335
1336 /* init */
1337
1338
1339 /* initializes the static vars/arrays
1340  * params:      h - pointer to the io_wait_h that will be initialized
1341  *         max_fd - maximum allowed fd number
1342  *         poll_m - poll method (0 for automatic best fit)
1343  */
1344 int init_io_wait(io_wait_h* h, int max_fd, enum poll_types poll_method);
1345
1346 /* destroys everything init_io_wait allocated */
1347 void destroy_io_wait(io_wait_h* h);
1348
1349
1350 #endif