01df1e6903a619ceae9bc52e1700396f672b49f6
[sip-router] / io_wait.h
1 /* 
2  * $Id$
3  * 
4  * Copyright (C) 2005 iptelorg GmbH
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 /*
19  * tcp io wait common stuff used by tcp_main.c & tcp_read.c
20  * All the functions are inline because of speed reasons and because they are
21  * used only from 2 places.
22  * You also have to define:
23  *     int handle_io(struct fd_map* fm, short events, int idx) (see below)
24  *     (this could be trivially replaced by a callback pointer entry attached
25  *      to the io_wait handler if more flexibility rather then performance
26  *      is needed)
27  *      fd_type - define to some enum of you choice and define also
28  *                FD_TYPE_DEFINED (if you don't do it fd_type will be defined
29  *                to int). 0 has a special not set/not init. meaning
30  *                (a lot of sanity checks and the sigio_rt code are based on
31  *                 this assumption)
32  *     local_malloc (defaults to pkg_malloc)
33  *     local_free   (defaults to pkg_free)
34  *  
35  */
36 /* 
37  * History:
38  * --------
39  *  2005-06-13  created by andrei
40  *  2005-06-26  added kqueue (andrei)
41  *  2005-07-01  added /dev/poll (andrei)
42  *  2006-05-30  sigio 64 bit workarround enabled for kernels < 2.6.5 (andrei)
43  *  2007-11-22  when handle_io() is called in a loop check & stop if the fd was
44  *               removed inside handle_io() (andrei)
45  *  2007-11-29  support for write (POLLOUT); added io_watch_chg() (andrei)
46  *  2008-02-04  POLLRDHUP & EPOLLRDHUP support (automatically enabled if POLLIN
47  *               is set) (andrei)
48  *  2010-06-17  re-enabled & enhanced the EV_ERROR for kqueue (andrei)
49  */
50
51
52
53 #ifndef _io_wait_h
54 #define _io_wait_h
55
56 #include <errno.h>
57 #include <string.h>
58 #ifdef HAVE_SIGIO_RT
59 #define __USE_GNU /* or else F_SETSIG won't be included */
60 #include <sys/types.h> /* recv */
61 #include <sys/socket.h> /* recv */
62 #include <signal.h> /* sigprocmask, sigwait a.s.o */
63 #endif
64
65 #define _GNU_SOURCE  /* for POLLRDHUP on linux */
66 #include <sys/poll.h>
67 #include <fcntl.h>
68
69 #ifdef HAVE_EPOLL
70 #include <sys/epoll.h>
71 #endif
72 #ifdef HAVE_KQUEUE
73 #include <sys/types.h> /* needed on freebsd */
74 #include <sys/event.h>
75 #include <sys/time.h>
76 #endif
77 #ifdef HAVE_DEVPOLL
78 #include <sys/devpoll.h>
79 #endif
80 #ifdef HAVE_SELECT
81 /* needed on openbsd for select*/
82 #include <sys/time.h> 
83 #include <sys/types.h> 
84 #include <unistd.h>
85 /* needed according to POSIX for select*/
86 #include <sys/select.h>
87 #endif
88
89 #include "dprint.h"
90
91 #include "poll_types.h" /* poll_types*/
92 #ifdef HAVE_SIGIO_RT
93 #include "pt.h" /* mypid() */
94 #endif
95
96 #include "compiler_opt.h"
97
98
99 #ifdef HAVE_EPOLL
100 /* fix defines for EPOLL */
101 #if defined POLLRDHUP && ! defined EPOLLRDHUP
102 #define EPOLLRDHUP POLLRDHUP  /* should work on all linuxes */
103 #endif /* POLLRDHUP && EPOLLRDHUP */
104 #endif /* HAVE_EPOLL */
105
106
107 extern int _os_ver; /* os version number, needed to select bugs workarrounds */
108
109
110 #if 0
111 enum fd_types; /* this should be defined from the including file,
112                                   see tcp_main.c for an example, 
113                                   0 has a special meaning: not used/empty*/
114 #endif
115
116 #ifndef FD_TYPE_DEFINED
117 typedef int fd_type;
118 #define FD_TYPE_DEFINED
119 #endif
120
121 /* maps a fd to some other structure; used in almost all cases
122  * except epoll and maybe kqueue or /dev/poll */
123 struct fd_map{
124         int fd;               /* fd no */
125         fd_type type;         /* "data" type */
126         void* data;           /* pointer to the corresponding structure */
127         short events;         /* events we are interested int */
128 };
129
130
131 #ifdef HAVE_KQUEUE
132 #ifndef KQ_CHANGES_ARRAY_SIZE
133 #define KQ_CHANGES_ARRAY_SIZE 256
134
135 #ifdef __OS_netbsd
136 #define KEV_UDATA_CAST (intptr_t)
137 #else
138 #define KEV_UDATA_CAST
139 #endif
140
141 #endif
142 #endif
143
144
145 /* handler structure */
146 struct io_wait_handler{
147         enum poll_types poll_method;
148         int flags;
149         struct fd_map* fd_hash;
150         int fd_no; /*  current index used in fd_array and the passed size for 
151                                    ep_array & kq_array*/
152         int max_fd_no; /* maximum fd no, is also the size of fd_array,
153                                                        fd_hash  and ep_array*/
154         /* common stuff for POLL, SIGIO_RT and SELECT
155          * since poll support is always compiled => this will always be compiled */
156         struct pollfd* fd_array; /* used also by devpoll as devpoll array */
157         int crt_fd_array_idx; /*  crt idx for which handle_io is called
158                                                          (updated also by del -> internal optimization) */
159         /* end of common stuff */
160 #ifdef HAVE_EPOLL
161         int epfd; /* epoll ctrl fd */
162         struct epoll_event* ep_array;
163 #endif
164 #ifdef HAVE_SIGIO_RT
165         sigset_t sset; /* signal mask for sigio & sigrtmin */
166         int signo;     /* real time signal used */
167 #endif
168 #ifdef HAVE_KQUEUE
169         int kq_fd;
170         struct kevent* kq_array;   /* used for the eventlist*/
171         struct kevent* kq_changes; /* used for the changelist */
172         size_t kq_nchanges;
173         size_t kq_changes_size; /* size of the changes array */
174 #endif
175 #ifdef HAVE_DEVPOLL
176         int dpoll_fd;
177 #endif
178 #ifdef HAVE_SELECT
179         fd_set master_rset; /* read set */
180         fd_set master_wset; /* write set */
181         int max_fd_select; /* maximum select used fd */
182 #endif
183 };
184
185 typedef struct io_wait_handler io_wait_h;
186
187
188 /* get the corresponding fd_map structure pointer */
189 #define get_fd_map(h, fd)               (&(h)->fd_hash[(fd)])
190 /* remove a fd_map structure from the hash; the pointer must be returned
191  * by get_fd_map or hash_fd_map*/
192 #define unhash_fd_map(pfm)      \
193         do{ \
194                 (pfm)->type=0 /*F_NONE */; \
195                 (pfm)->fd=-1; \
196         }while(0)
197
198 /* add a fd_map structure to the fd hash */
199 static inline struct fd_map* hash_fd_map(       io_wait_h* h,
200                                                                                         int fd,
201                                                                                         short events,
202                                                                                         fd_type type,
203                                                                                         void* data)
204 {
205         h->fd_hash[fd].fd=fd;
206         h->fd_hash[fd].events=events;
207         h->fd_hash[fd].type=type;
208         h->fd_hash[fd].data=data;
209         return &h->fd_hash[fd];
210 }
211
212
213
214 #ifdef HANDLE_IO_INLINE
215 /* generic handle io routine, this must be defined in the including file
216  * (faster then registering a callback pointer)
217  *
218  * params:  fm     - pointer to a fd hash entry
219  *          events - combinations of POLLIN, POLLOUT, POLLERR & POLLHUP
220  *          idx    - index in the fd_array (or -1 if not known)
221  * return: -1 on error
222  *          0 on EAGAIN or when by some other way it is known that no more 
223  *            io events are queued on the fd (the receive buffer is empty).
224  *            Usefull to detect when there are no more io events queued for
225  *            sigio_rt, epoll_et, kqueue.
226  *         >0 on successfull read from the fd (when there might be more io
227  *            queued -- the receive buffer might still be non-empty)
228  */
229 inline static int handle_io(struct fd_map* fm, short events, int idx);
230 #else
231 int handle_io(struct fd_map* fm, short events, int idx);
232 #endif
233
234
235
236 #ifdef HAVE_KQUEUE
237 /*
238  * kqueue specific function: register a change
239  * (adds a change to the kevent change array, and if full flushes it first)
240  *
241  * TODO: check if the event already exists in the change list or if it's
242  *       complementary to an event in the list (e.g. EVFILT_WRITE, EV_DELETE
243  *       and EVFILT_WRITE, EV_ADD for the same fd).
244  * returns: -1 on error, 0 on success
245  */
246 static inline int kq_ev_change(io_wait_h* h, int fd, int filter, int flag, 
247                                                                 void* data)
248 {
249         int n;
250         int r;
251         struct timespec tspec;
252
253         if (h->kq_nchanges>=h->kq_changes_size){
254                 /* changes array full ! */
255                 LOG(L_WARN, "WARNING: kq_ev_change: kqueue changes array full"
256                                         " trying to flush...\n");
257                 tspec.tv_sec=0;
258                 tspec.tv_nsec=0;
259 again:
260                 n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges, 0, 0, &tspec);
261                 if (unlikely(n == -1)){
262                         if (likely(errno == EBADF)) {
263                                 /* one of the file descriptors is bad, probably already
264                                    closed => try to apply changes one-by-one */
265                                 for (r = 0; r < h->kq_nchanges; r++) {
266 retry2:
267                                         n = kevent(h->kq_fd, &h->kq_changes[r], 1, 0, 0, &tspec);
268                                         if (n==-1) {
269                                                 if (errno == EBADF)
270                                                         continue; /* skip over it */
271                                                 if (errno == EINTR)
272                                                         goto retry2;
273                                                 LOG(L_ERR, "ERROR: io_watch_add: kevent flush changes"
274                                                                         " failed: %s [%d]\n",
275                                                                                 strerror(errno), errno);
276                                                 /* shift the array */
277                                                 memmove(&h->kq_changes[0], &h->kq_changes[r+1],
278                                                                         sizeof(h->kq_changes[0])*
279                                                                                 (h->kq_nchanges-r-1));
280                                                 h->kq_nchanges-=(r+1);
281                                                 return -1;
282                                         }
283                                 }
284                         } else if (errno == EINTR) goto again;
285                         else {
286                                 LOG(L_ERR, "ERROR: io_watch_add: kevent flush changes"
287                                                 " failed: %s [%d]\n", strerror(errno), errno);
288                                 h->kq_nchanges=0; /* reset changes array */
289                                 return -1;
290                         }
291                 }
292                 h->kq_nchanges=0; /* changes array is empty */
293         }
294         EV_SET(&h->kq_changes[h->kq_nchanges], fd, filter, flag, 0, 0,
295                         KEV_UDATA_CAST data);
296         h->kq_nchanges++;
297         return 0;
298 }
299 #endif
300
301
302
303 /* generic io_watch_add function
304  * Params:
305  *     h      - pointer to initialized io_wait handle
306  *     fd     - fd to watch
307  *     events - bitmap with the fd events for which the fd should be watched
308  *              (combination of POLLIN and POLLOUT)
309  *     type   - fd type (non 0 value, returned in the call to handle_io)
310  *     data   - pointer/private data returned in the handle_io call
311  * returns 0 on success, -1 on error
312  *
313  * WARNING: handle_io() can be called immediately (from io_watch_add()) so
314  *  make sure that any dependent init. (e.g. data stuff) is made before
315  *  calling io_watch_add
316  *
317  * this version should be faster than pointers to poll_method specific
318  * functions (it avoids functions calls, the overhead being only an extra
319  *  switch())*/
320 inline static int io_watch_add( io_wait_h* h,
321                                                                 int fd,
322                                                                 short events,
323                                                                 fd_type type,
324                                                                 void* data)
325 {
326
327         /* helper macros */
328 #define fd_array_setup(ev) \
329         do{ \
330                 h->fd_array[h->fd_no].fd=fd; \
331                 h->fd_array[h->fd_no].events=(ev); /* useless for select */ \
332                 h->fd_array[h->fd_no].revents=0;     /* useless for select */ \
333         }while(0)
334         
335 #define set_fd_flags(f) \
336         do{ \
337                         flags=fcntl(fd, F_GETFL); \
338                         if (flags==-1){ \
339                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: GETFL failed:" \
340                                                 " %s [%d]\n", strerror(errno), errno); \
341                                 goto error; \
342                         } \
343                         if (fcntl(fd, F_SETFL, flags|(f))==-1){ \
344                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETFL" \
345                                                         " failed: %s [%d]\n", strerror(errno), errno); \
346                                 goto error; \
347                         } \
348         }while(0)
349         
350         
351         struct fd_map* e;
352         int flags;
353 #ifdef HAVE_EPOLL
354         struct epoll_event ep_event;
355 #endif
356 #ifdef HAVE_DEVPOLL
357         struct pollfd pfd;
358 #endif
359 #if defined(HAVE_SIGIO_RT) || defined (HAVE_EPOLL)
360         int n;
361 #endif
362 #if defined(HAVE_SIGIO_RT)
363         int idx;
364         int check_io;
365         struct pollfd pf;
366         
367         check_io=0; /* set to 1 if we need to check for pre-existing queued
368                                    io/data on the fd */
369         idx=-1;
370 #endif
371         e=0;
372         /* sanity checks */
373         if (unlikely(fd==-1)){
374                 LOG(L_CRIT, "BUG: io_watch_add: fd is -1!\n");
375                 goto error;
376         }
377         if (unlikely((events&(POLLIN|POLLOUT))==0)){
378                 LOG(L_CRIT, "BUG: io_watch_add: invalid events: 0x%0x\n", events);
379                 goto error;
380         }
381         /* check if not too big */
382         if (unlikely(h->fd_no>=h->max_fd_no)){
383                 LOG(L_CRIT, "ERROR: io_watch_add: maximum fd number exceeded:"
384                                 " %d/%d\n", h->fd_no, h->max_fd_no);
385                 goto error;
386         }
387         DBG("DBG: io_watch_add(%p, %d, %d, %p), fd_no=%d\n",
388                         h, fd, type, data, h->fd_no);
389         /*  hash sanity check */
390         e=get_fd_map(h, fd);
391         if (unlikely(e && (e->type!=0 /*F_NONE*/))){
392                 LOG(L_ERR, "ERROR: io_watch_add: trying to overwrite entry %d"
393                                 " watched for %x in the hash(%d, %d, %p) with (%d, %d, %p)\n",
394                                 fd, events, e->fd, e->type, e->data, fd, type, data);
395                 e=0;
396                 goto error;
397         }
398         
399         if (unlikely((e=hash_fd_map(h, fd, events, type, data))==0)){
400                 LOG(L_ERR, "ERROR: io_watch_add: failed to hash the fd %d\n", fd);
401                 goto error;
402         }
403         switch(h->poll_method){ /* faster then pointer to functions */
404                 case POLL_POLL:
405 #ifdef POLLRDHUP
406                         /* listen to POLLRDHUP by default (if POLLIN) */
407                         events|=((int)!(events & POLLIN) - 1) & POLLRDHUP;
408 #endif /* POLLRDHUP */
409                         fd_array_setup(events);
410                         set_fd_flags(O_NONBLOCK);
411                         break;
412 #ifdef HAVE_SELECT
413                 case POLL_SELECT:
414                         fd_array_setup(events);
415                         if (likely(events & POLLIN))
416                                 FD_SET(fd, &h->master_rset);
417                         if (unlikely(events & POLLOUT))
418                                 FD_SET(fd, &h->master_wset);
419                         if (h->max_fd_select<fd) h->max_fd_select=fd;
420                         break;
421 #endif
422 #ifdef HAVE_SIGIO_RT
423                 case POLL_SIGIO_RT:
424                         fd_array_setup(events);
425                         /* re-set O_ASYNC might be needed, if not done from 
426                          * io_watch_del (or if somebody wants to add a fd which has
427                          * already O_ASYNC/F_SETSIG set on a duplicate)
428                          */
429                         /* set async & signal */
430                         if (fcntl(fd, F_SETOWN, my_pid())==-1){
431                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETOWN"
432                                 " failed: %s [%d]\n", strerror(errno), errno);
433                                 goto error;
434                         }
435                         if (fcntl(fd, F_SETSIG, h->signo)==-1){
436                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETSIG"
437                                         " failed: %s [%d]\n", strerror(errno), errno);
438                                 goto error;
439                         }
440                         /* set both non-blocking and async */
441                         set_fd_flags(O_ASYNC| O_NONBLOCK);
442 #ifdef EXTRA_DEBUG
443                         DBG("io_watch_add: sigio_rt on f %d, signal %d to pid %d\n",
444                                         fd,  h->signo, my_pid());
445 #endif
446                         /* empty socket receive buffer, if buffer is already full
447                          * no more space to put packets
448                          * => no more signals are ever generated
449                          * also when moving fds, the freshly moved fd might have
450                          *  already some bytes queued, we want to get them now
451                          *  and not later -- andrei */
452                         idx=h->fd_no;
453                         check_io=1;
454                         break;
455 #endif
456 #ifdef HAVE_EPOLL
457                 case POLL_EPOLL_LT:
458                         ep_event.events=
459 #ifdef POLLRDHUP
460                                                 /* listen for EPOLLRDHUP too */
461                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
462 #else /* POLLRDHUP */
463                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
464 #endif /* POLLRDHUP */
465                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
466                         ep_event.data.ptr=e;
467 again1:
468                         n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
469                         if (unlikely(n==-1)){
470                                 if (errno==EAGAIN) goto again1;
471                                 LOG(L_ERR, "ERROR: io_watch_add: epoll_ctl failed: %s [%d]\n",
472                                         strerror(errno), errno);
473                                 goto error;
474                         }
475                         break;
476                 case POLL_EPOLL_ET:
477                         set_fd_flags(O_NONBLOCK);
478                         ep_event.events=
479 #ifdef POLLRDHUP
480                                                 /* listen for EPOLLRDHUP too */
481                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
482 #else /* POLLRDHUP */
483                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
484 #endif /* POLLRDHUP */
485                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
486                                                 EPOLLET;
487                         ep_event.data.ptr=e;
488 again2:
489                         n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
490                         if (unlikely(n==-1)){
491                                 if (errno==EAGAIN) goto again2;
492                                 LOG(L_ERR, "ERROR: io_watch_add: epoll_ctl failed: %s [%d]\n",
493                                         strerror(errno), errno);
494                                 goto error;
495                         }
496                         break;
497 #endif
498 #ifdef HAVE_KQUEUE
499                 case POLL_KQUEUE:
500                         if (likely( events & POLLIN)){
501                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e)==-1))
502                                 goto error;
503                         }
504                         if (unlikely( events & POLLOUT)){
505                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
506                                 {
507                                         if (likely(events & POLLIN)){
508                                                 kq_ev_change(h, fd, EVFILT_READ, EV_DELETE, 0);
509                                         }
510                                 }
511                                 goto error;
512                         }
513                         break;
514 #endif
515 #ifdef HAVE_DEVPOLL
516                 case POLL_DEVPOLL:
517                         pfd.fd=fd;
518                         pfd.events=events;
519                         pfd.revents=0;
520 again_devpoll:
521                         if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
522                                 if (errno==EAGAIN) goto again_devpoll;
523                                 LOG(L_ERR, "ERROR: io_watch_add: /dev/poll write failed:"
524                                                         "%s [%d]\n", strerror(errno), errno);
525                                 goto error;
526                         }
527                         break;
528 #endif
529                         
530                 default:
531                         LOG(L_CRIT, "BUG: io_watch_add: no support for poll method "
532                                         " %s (%d)\n", poll_method_str[h->poll_method],
533                                         h->poll_method);
534                         goto error;
535         }
536         
537         h->fd_no++; /* "activate" changes, for epoll/kqueue/devpoll it
538                                    has only informative value */
539 #if defined(HAVE_SIGIO_RT)
540         if (check_io){
541                 /* handle possible pre-existing events */
542                 pf.fd=fd;
543                 pf.events=events;
544 check_io_again:
545                 n=0;
546                 while(e->type && ((n=poll(&pf, 1, 0))>0) && 
547                                 (handle_io(e, pf.revents, idx)>0) &&
548                                 (pf.revents & (e->events|POLLERR|POLLHUP)));
549                 if (unlikely(e->type && (n==-1))){
550                         if (errno==EINTR) goto check_io_again;
551                         LOG(L_ERR, "ERROR: io_watch_add: check_io poll: %s [%d]\n",
552                                                 strerror(errno), errno);
553                 }
554         }
555 #endif
556         return 0;
557 error:
558         if (e) unhash_fd_map(e);
559         return -1;
560 #undef fd_array_setup
561 #undef set_fd_flags 
562 }
563
564
565
566 #define IO_FD_CLOSING 16
567 /* parameters:    h - handler 
568  *               fd - file descriptor
569  *            index - index in the fd_array if known, -1 if not
570  *                    (if index==-1 fd_array will be searched for the
571  *                     corresponding fd* entry -- slower but unavoidable in 
572  *                     some cases). index is not used (no fd_array) for epoll,
573  *                     /dev/poll and kqueue
574  *            flags - optimization flags, e.g. IO_FD_CLOSING, the fd was 
575  *                    or will shortly be closed, in some cases we can avoid
576  *                    extra remove operations (e.g.: epoll, kqueue, sigio)
577  * returns 0 if ok, -1 on error */
578 inline static int io_watch_del(io_wait_h* h, int fd, int idx, int flags)
579 {
580         
581 #define fix_fd_array \
582         do{\
583                         if (unlikely(idx==-1)){ \
584                                 /* fix idx if -1 and needed */ \
585                                 for (idx=0; (idx<h->fd_no) && \
586                                                         (h->fd_array[idx].fd!=fd); idx++); \
587                         } \
588                         if (likely(idx<h->fd_no)){ \
589                                 memmove(&h->fd_array[idx], &h->fd_array[idx+1], \
590                                         (h->fd_no-(idx+1))*sizeof(*(h->fd_array))); \
591                                 if ((idx<=h->crt_fd_array_idx) && (h->crt_fd_array_idx>=0)) \
592                                         h->crt_fd_array_idx--; \
593                         } \
594         }while(0)
595         
596         struct fd_map* e;
597         int events;
598 #ifdef HAVE_EPOLL
599         int n;
600         struct epoll_event ep_event;
601 #endif
602 #ifdef HAVE_DEVPOLL
603         struct pollfd pfd;
604 #endif
605 #ifdef HAVE_SIGIO_RT
606         int fd_flags;
607 #endif
608         
609         if (unlikely((fd<0) || (fd>=h->max_fd_no))){
610                 LOG(L_CRIT, "BUG: io_watch_del: invalid fd %d, not in [0, %d) \n",
611                                                 fd, h->fd_no);
612                 goto error;
613         }
614         DBG("DBG: io_watch_del (%p, %d, %d, 0x%x) fd_no=%d called\n",
615                         h, fd, idx, flags, h->fd_no);
616         e=get_fd_map(h, fd);
617         /* more sanity checks */
618         if (unlikely(e==0)){
619                 LOG(L_CRIT, "BUG: io_watch_del: no corresponding hash entry for %d\n",
620                                         fd);
621                 goto error;
622         }
623         if (unlikely(e->type==0 /*F_NONE*/)){
624                 LOG(L_ERR, "ERROR: io_watch_del: trying to delete already erased"
625                                 " entry %d in the hash(%d, %d, %p) flags %x)\n",
626                                 fd, e->fd, e->type, e->data, flags);
627                 goto error;
628         }
629         events=e->events;
630         
631         switch(h->poll_method){
632                 case POLL_POLL:
633                         fix_fd_array;
634                         break;
635 #ifdef HAVE_SELECT
636                 case POLL_SELECT:
637                         if (likely(events & POLLIN))
638                                 FD_CLR(fd, &h->master_rset);
639                         if (unlikely(events & POLLOUT))
640                                 FD_CLR(fd, &h->master_wset);
641                         if (unlikely(h->max_fd_select && (h->max_fd_select==fd)))
642                                 /* we don't know the prev. max, so we just decrement it */
643                                 h->max_fd_select--; 
644                         fix_fd_array;
645                         break;
646 #endif
647 #ifdef HAVE_SIGIO_RT
648                 case POLL_SIGIO_RT:
649                         /* the O_ASYNC flag must be reset all the time, the fd
650                          *  can be changed only if  O_ASYNC is reset (if not and
651                          *  the fd is a duplicate, you will get signals from the dup. fd
652                          *  and not from the original, even if the dup. fd was closed
653                          *  and the signals re-set on the original) -- andrei
654                          */
655                         /*if (!(flags & IO_FD_CLOSING)){*/
656                                 /* reset ASYNC */
657                                 fd_flags=fcntl(fd, F_GETFL); 
658                                 if (unlikely(fd_flags==-1)){ 
659                                         LOG(L_ERR, "ERROR: io_watch_del: fnctl: GETFL failed:" 
660                                                         " %s [%d]\n", strerror(errno), errno); 
661                                         goto error; 
662                                 } 
663                                 if (unlikely(fcntl(fd, F_SETFL, fd_flags&(~O_ASYNC))==-1)){ 
664                                         LOG(L_ERR, "ERROR: io_watch_del: fnctl: SETFL" 
665                                                                 " failed: %s [%d]\n", strerror(errno), errno); 
666                                         goto error; 
667                                 } 
668                         fix_fd_array; /* only on success */
669                         break;
670 #endif
671 #ifdef HAVE_EPOLL
672                 case POLL_EPOLL_LT:
673                 case POLL_EPOLL_ET:
674                         /* epoll doesn't seem to automatically remove sockets,
675                          * if the socket is a duplicate/moved and the original
676                          * is still open. The fd is removed from the epoll set
677                          * only when the original (and all the  copies?) is/are 
678                          * closed. This is probably a bug in epoll. --andrei */
679 #ifdef EPOLL_NO_CLOSE_BUG
680                         if (!(flags & IO_FD_CLOSING)){
681 #endif
682 again_epoll:
683                                 n=epoll_ctl(h->epfd, EPOLL_CTL_DEL, fd, &ep_event);
684                                 if (unlikely(n==-1)){
685                                         if (errno==EAGAIN) goto again_epoll;
686                                         LOG(L_ERR, "ERROR: io_watch_del: removing fd from epoll "
687                                                         "list failed: %s [%d]\n", strerror(errno), errno);
688                                         goto error;
689                                 }
690 #ifdef EPOLL_NO_CLOSE_BUG
691                         }
692 #endif
693                         break;
694 #endif
695 #ifdef HAVE_KQUEUE
696                 case POLL_KQUEUE:
697                         if (!(flags & IO_FD_CLOSING)){
698                                 if (likely(events & POLLIN)){
699                                         if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
700                                                                                                         EV_DELETE, 0) ==-1)){
701                                                 /* try to delete the write filter anyway */
702                                                 if (events & POLLOUT){
703                                                         kq_ev_change(h, fd, EVFILT_WRITE, EV_DELETE, 0);
704                                                 }
705                                                 goto error;
706                                         }
707                                 }
708                                 if (unlikely(events & POLLOUT)){
709                                         if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
710                                                                                                         EV_DELETE, 0) ==-1))
711                                                 goto error;
712                                 }
713                         }
714                         break;
715 #endif
716 #ifdef HAVE_DEVPOLL
717                 case POLL_DEVPOLL:
718                                 /* for /dev/poll the closed fds _must_ be removed
719                                    (they are not removed automatically on close()) */
720                                 pfd.fd=fd;
721                                 pfd.events=POLLREMOVE;
722                                 pfd.revents=0;
723 again_devpoll:
724                                 if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
725                                         if (errno==EINTR) goto again_devpoll;
726                                         LOG(L_ERR, "ERROR: io_watch_del: removing fd from "
727                                                                 "/dev/poll failed: %s [%d]\n", 
728                                                                 strerror(errno), errno);
729                                         goto error;
730                                 }
731                                 break;
732 #endif
733                 default:
734                         LOG(L_CRIT, "BUG: io_watch_del: no support for poll method "
735                                         " %s (%d)\n", poll_method_str[h->poll_method], 
736                                         h->poll_method);
737                         goto error;
738         }
739         unhash_fd_map(e); /* only on success */
740         h->fd_no--;
741         return 0;
742 error:
743         return -1;
744 #undef fix_fd_array
745 }
746
747
748
749 /* parameters:    h - handler 
750  *               fd - file descriptor
751  *           events - new events to watch for
752  *              idx - index in the fd_array if known, -1 if not
753  *                    (if index==-1 fd_array will be searched for the
754  *                     corresponding fd* entry -- slower but unavoidable in 
755  *                     some cases). index is not used (no fd_array) for epoll,
756  *                     /dev/poll and kqueue
757  * returns 0 if ok, -1 on error */
758 inline static int io_watch_chg(io_wait_h* h, int fd, short events, int idx )
759 {
760         
761 #define fd_array_chg(ev) \
762         do{\
763                         if (unlikely(idx==-1)){ \
764                                 /* fix idx if -1 and needed */ \
765                                 for (idx=0; (idx<h->fd_no) && \
766                                                         (h->fd_array[idx].fd!=fd); idx++); \
767                         } \
768                         if (likely(idx<h->fd_no)){ \
769                                 h->fd_array[idx].events=(ev); \
770                         } \
771         }while(0)
772         
773         struct fd_map* e;
774         int add_events;
775         int del_events;
776 #ifdef HAVE_DEVPOLL
777         struct pollfd pfd;
778 #endif
779 #ifdef HAVE_EPOLL
780         int n;
781         struct epoll_event ep_event;
782 #endif
783         
784         if (unlikely((fd<0) || (fd>=h->max_fd_no))){
785                 LOG(L_CRIT, "BUG: io_watch_chg: invalid fd %d, not in [0, %d) \n",
786                                                 fd, h->fd_no);
787                 goto error;
788         }
789         if (unlikely((events&(POLLIN|POLLOUT))==0)){
790                 LOG(L_CRIT, "BUG: io_watch_chg: invalid events: 0x%0x\n", events);
791                 goto error;
792         }
793         DBG("DBG: io_watch_chg (%p, %d, 0x%x, 0x%x) fd_no=%d called\n",
794                         h, fd, events, idx, h->fd_no);
795         e=get_fd_map(h, fd);
796         /* more sanity checks */
797         if (unlikely(e==0)){
798                 LOG(L_CRIT, "BUG: io_watch_chg: no corresponding hash entry for %d\n",
799                                         fd);
800                 goto error;
801         }
802         if (unlikely(e->type==0 /*F_NONE*/)){
803                 LOG(L_ERR, "ERROR: io_watch_chg: trying to change an already erased"
804                                 " entry %d in the hash(%d, %d, %p) )\n",
805                                 fd, e->fd, e->type, e->data);
806                 goto error;
807         }
808         
809         add_events=events & ~e->events;
810         del_events=e->events & ~events;
811         switch(h->poll_method){
812                 case POLL_POLL:
813                         fd_array_chg(events
814 #ifdef POLLRDHUP
815                                                         /* listen to POLLRDHUP by default (if POLLIN) */
816                                                         | (((int)!(events & POLLIN) - 1) & POLLRDHUP)
817 #endif /* POLLRDHUP */
818                                                 );
819                         break;
820 #ifdef HAVE_SELECT
821                 case POLL_SELECT:
822                         fd_array_chg(events);
823                         if (unlikely(del_events & POLLIN))
824                                 FD_CLR(fd, &h->master_rset);
825                         else if (unlikely(add_events & POLLIN))
826                                 FD_SET(fd, &h->master_rset);
827                         if (likely(del_events & POLLOUT))
828                                 FD_CLR(fd, &h->master_wset);
829                         else if (likely(add_events & POLLOUT))
830                                 FD_SET(fd, &h->master_wset);
831                         break;
832 #endif
833 #ifdef HAVE_SIGIO_RT
834                 case POLL_SIGIO_RT:
835                         fd_array_chg(events);
836                         /* no need for check_io, since SIGIO_RT listens by default for all
837                          * the events */
838                         break;
839 #endif
840 #ifdef HAVE_EPOLL
841                 case POLL_EPOLL_LT:
842                                 ep_event.events=
843 #ifdef POLLRDHUP
844                                                 /* listen for EPOLLRDHUP too */
845                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
846 #else /* POLLRDHUP */
847                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
848 #endif /* POLLRDHUP */
849                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
850                                 ep_event.data.ptr=e;
851 again_epoll_lt:
852                                 n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
853                                 if (unlikely(n==-1)){
854                                         if (errno==EAGAIN) goto again_epoll_lt;
855                                         LOG(L_ERR, "ERROR: io_watch_chg: modifying epoll events"
856                                                         " failed: %s [%d]\n", strerror(errno), errno);
857                                         goto error;
858                                 }
859                         break;
860                 case POLL_EPOLL_ET:
861                                 ep_event.events=
862 #ifdef POLLRDHUP
863                                                 /* listen for EPOLLRDHUP too */
864                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
865 #else /* POLLRDHUP */
866                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
867 #endif /* POLLRDHUP */
868                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
869                                                 EPOLLET;
870                                 ep_event.data.ptr=e;
871 again_epoll_et:
872                                 n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
873                                 if (unlikely(n==-1)){
874                                         if (errno==EAGAIN) goto again_epoll_et;
875                                         LOG(L_ERR, "ERROR: io_watch_chg: modifying epoll events"
876                                                         " failed: %s [%d]\n", strerror(errno), errno);
877                                         goto error;
878                                 }
879                         break;
880 #endif
881 #ifdef HAVE_KQUEUE
882                 case POLL_KQUEUE:
883                         if (unlikely(del_events & POLLIN)){
884                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
885                                                                                                                 EV_DELETE, 0) ==-1))
886                                                 goto error;
887                         }else if (unlikely(add_events & POLLIN)){
888                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e) ==-1))
889                                         goto error;
890                         }
891                         if (likely(del_events & POLLOUT)){
892                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
893                                                                                                                 EV_DELETE, 0) ==-1))
894                                                 goto error;
895                         }else if (likely(add_events & POLLOUT)){
896                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
897                                         goto error;
898                         }
899                         break;
900 #endif
901 #ifdef HAVE_DEVPOLL
902                 case POLL_DEVPOLL:
903                                 /* for /dev/poll the closed fds _must_ be removed
904                                    (they are not removed automatically on close()) */
905                                 pfd.fd=fd;
906                                 pfd.events=POLLREMOVE;
907                                 pfd.revents=0;
908 again_devpoll1:
909                                 if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
910                                         if (errno==EINTR) goto again_devpoll1;
911                                         LOG(L_ERR, "ERROR: io_watch_chg: removing fd from "
912                                                                 "/dev/poll failed: %s [%d]\n", 
913                                                                 strerror(errno), errno);
914                                         goto error;
915                                 }
916 again_devpoll2:
917                                 pfd.events=events;
918                                 pfd.revents=0;
919                                 if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
920                                         if (errno==EINTR) goto again_devpoll2;
921                                         LOG(L_ERR, "ERROR: io_watch_chg: re-adding fd to "
922                                                                 "/dev/poll failed: %s [%d]\n", 
923                                                                 strerror(errno), errno);
924                                         /* error re-adding the fd => mark it as removed/unhash */
925                                         unhash_fd_map(e);
926                                         goto error;
927                                 }
928                                 break;
929 #endif
930                 default:
931                         LOG(L_CRIT, "BUG: io_watch_chg: no support for poll method "
932                                         " %s (%d)\n", poll_method_str[h->poll_method], 
933                                         h->poll_method);
934                         goto error;
935         }
936         e->events=events; /* only on success */
937         return 0;
938 error:
939         return -1;
940 #undef fix_fd_array
941 }
942
943
944
945 /* io_wait_loop_x style function 
946  * wait for io using poll()
947  * params: h      - io_wait handle
948  *         t      - timeout in s
949  *         repeat - if !=0 handle_io will be called until it returns <=0
950  * returns: number of IO events handled on success (can be 0), -1 on error
951  */
952 inline static int io_wait_loop_poll(io_wait_h* h, int t, int repeat)
953 {
954         int n, r;
955         int ret;
956         struct fd_map* fm;
957         
958 again:
959                 ret=n=poll(h->fd_array, h->fd_no, t*1000);
960                 if (n==-1){
961                         if (errno==EINTR) goto again; /* signal, ignore it */
962                         else{
963                                 LOG(L_ERR, "ERROR:io_wait_loop_poll: poll: %s [%d]\n",
964                                                 strerror(errno), errno);
965                                 goto error;
966                         }
967                 }
968                 for (r=0; (r<h->fd_no) && n; r++){
969                         fm=get_fd_map(h, h->fd_array[r].fd);
970                         if (h->fd_array[r].revents & (fm->events|POLLERR|POLLHUP)){
971                                 n--;
972                                 /* sanity checks */
973                                 if (unlikely((h->fd_array[r].fd >= h->max_fd_no)||
974                                                                 (h->fd_array[r].fd < 0))){
975                                         LOG(L_CRIT, "BUG: io_wait_loop_poll: bad fd %d "
976                                                         "(no in the 0 - %d range)\n",
977                                                         h->fd_array[r].fd, h->max_fd_no);
978                                         /* try to continue anyway */
979                                         h->fd_array[r].events=0; /* clear the events */
980                                         continue;
981                                 }
982                                 h->crt_fd_array_idx=r;
983                                 /* repeat handle_io if repeat, fd still watched (not deleted
984                                  *  inside handle_io), handle_io returns that there's still
985                                  *  IO and the fd is still watched for the triggering event */
986                                 while(fm->type && 
987                                                 (handle_io(fm, h->fd_array[r].revents, r) > 0) &&
988                                                 repeat && ((fm->events|POLLERR|POLLHUP) &
989                                                                                                         h->fd_array[r].revents));
990                                 r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd) 
991                                                                                   array shifting */
992                         }
993                 }
994 error:
995         return ret;
996 }
997
998
999
1000 #ifdef HAVE_SELECT
1001 /* wait for io using select */
1002 inline static int io_wait_loop_select(io_wait_h* h, int t, int repeat)
1003 {
1004         fd_set sel_rset;
1005         fd_set sel_wset;
1006         int n, ret;
1007         struct timeval timeout;
1008         int r;
1009         struct fd_map* fm;
1010         int revents;
1011         
1012 again:
1013                 sel_rset=h->master_rset;
1014                 sel_wset=h->master_wset;
1015                 timeout.tv_sec=t;
1016                 timeout.tv_usec=0;
1017                 ret=n=select(h->max_fd_select+1, &sel_rset, &sel_wset, 0, &timeout);
1018                 if (n<0){
1019                         if (errno==EINTR) goto again; /* just a signal */
1020                         LOG(L_ERR, "ERROR: io_wait_loop_select: select: %s [%d]\n",
1021                                         strerror(errno), errno);
1022                         n=0;
1023                         /* continue */
1024                 }
1025                 /* use poll fd array */
1026                 for(r=0; (r<h->fd_no) && n; r++){
1027                         revents=0;
1028                         if (likely(FD_ISSET(h->fd_array[r].fd, &sel_rset)))
1029                                 revents|=POLLIN;
1030                         if (unlikely(FD_ISSET(h->fd_array[r].fd, &sel_wset)))
1031                                 revents|=POLLOUT;
1032                         if (unlikely(revents)){
1033                                 h->crt_fd_array_idx=r;
1034                                 fm=get_fd_map(h, h->fd_array[r].fd);
1035                                 while(fm->type && (fm->events & revents) && 
1036                                                 (handle_io(fm, revents, r)>0) && repeat);
1037                                 r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd) 
1038                                                                                   array shifting */
1039                                 n--;
1040                         }
1041                 };
1042         return ret;
1043 }
1044 #endif
1045
1046
1047
1048 #ifdef HAVE_EPOLL
1049 inline static int io_wait_loop_epoll(io_wait_h* h, int t, int repeat)
1050 {
1051         int n, r;
1052         struct fd_map* fm;
1053         int revents;
1054         
1055 again:
1056                 n=epoll_wait(h->epfd, h->ep_array, h->fd_no, t*1000);
1057                 if (unlikely(n==-1)){
1058                         if (errno==EINTR) goto again; /* signal, ignore it */
1059                         else{
1060                                 LOG(L_ERR, "ERROR:io_wait_loop_epoll: "
1061                                                 "epoll_wait(%d, %p, %d, %d): %s [%d]\n", 
1062                                                 h->epfd, h->ep_array, h->fd_no, t*1000,
1063                                                 strerror(errno), errno);
1064                                 goto error;
1065                         }
1066                 }
1067 #if 0
1068                 if (n>1){
1069                         for(r=0; r<n; r++){
1070                                 LOG(L_ERR, "WARNING: ep_array[%d]= %x, %p\n",
1071                                                 r, h->ep_array[r].events, h->ep_array[r].data.ptr);
1072                         }
1073                 }
1074 #endif
1075                 for (r=0; r<n; r++){
1076                         revents= (POLLIN & (!(h->ep_array[r].events & (EPOLLIN|EPOLLPRI))
1077                                                 -1)) |
1078                                          (POLLOUT & (!(h->ep_array[r].events & EPOLLOUT)-1)) |
1079                                          (POLLERR & (!(h->ep_array[r].events & EPOLLERR)-1)) |
1080                                          (POLLHUP & (!(h->ep_array[r].events & EPOLLHUP)-1))
1081 #ifdef POLLRDHUP
1082                                         | (POLLRDHUP & (!(h->ep_array[r].events & EPOLLRDHUP)-1))
1083 #endif
1084                                         ;
1085                         if (likely(revents)){
1086                                 fm=(struct fd_map*)h->ep_array[r].data.ptr;
1087                                 while(fm->type && ((fm->events|POLLERR|POLLHUP) & revents) && 
1088                                                 (handle_io(fm, revents, -1)>0) && repeat);
1089                         }else{
1090                                 LOG(L_ERR, "ERROR:io_wait_loop_epoll: unexpected event %x"
1091                                                         " on %d/%d, data=%p\n", h->ep_array[r].events,
1092                                                         r+1, n, h->ep_array[r].data.ptr);
1093                         }
1094                 }
1095 error:
1096         return n;
1097 }
1098 #endif
1099
1100
1101
1102 #ifdef HAVE_KQUEUE
1103 inline static int io_wait_loop_kqueue(io_wait_h* h, int t, int repeat)
1104 {
1105         int n, r;
1106         struct timespec tspec;
1107         struct fd_map* fm;
1108         int orig_changes;
1109         int apply_changes;
1110         int revents;
1111         
1112         tspec.tv_sec=t;
1113         tspec.tv_nsec=0;
1114         orig_changes=h->kq_nchanges;
1115         apply_changes=orig_changes;
1116         do {
1117 again:
1118                 n=kevent(h->kq_fd, h->kq_changes, apply_changes,  h->kq_array,
1119                                         h->fd_no, &tspec);
1120                 if (unlikely(n==-1)){
1121                         if (errno==EINTR) goto again; /* signal, ignore it */
1122                         else if (errno==EBADF) {
1123                                 /* some of the FDs in kq_changes are bad (already closed)
1124                                    and there is not enough space in kq_array to return all
1125                                    of them back */
1126                                 apply_changes = h->fd_no;
1127                                 goto again;
1128                         }else{
1129                                 LOG(L_ERR, "ERROR: io_wait_loop_kqueue: kevent:"
1130                                                 " %s [%d]\n", strerror(errno), errno);
1131                                 goto error;
1132                         }
1133                 }
1134                 /* remove applied changes */
1135                 h->kq_nchanges -= apply_changes;
1136                 if (unlikely(apply_changes < orig_changes)) {
1137                         orig_changes -= apply_changes;
1138                         memmove(&h->kq_changes[0], &h->kq_changes[apply_changes],
1139                                                                         sizeof(h->kq_changes[0])*h->kq_nchanges);
1140                         apply_changes = orig_changes<h->fd_no ? orig_changes : h->fd_no;
1141                 } else {
1142                         orig_changes = 0;
1143                         apply_changes = 0;
1144                 }
1145                 for (r=0; r<n; r++){
1146 #ifdef EXTRA_DEBUG
1147                         DBG("DBG: kqueue: event %d/%d: fd=%d, udata=%lx, flags=0x%x\n",
1148                                         r, n, h->kq_array[r].ident, (long)h->kq_array[r].udata,
1149                                         h->kq_array[r].flags);
1150 #endif
1151                         if (unlikely((h->kq_array[r].flags & EV_ERROR) &&
1152                                                         (h->kq_array[r].data == EBADF ||
1153                                                          h->kq_array[r].udata == 0))){
1154                                 /* error in changes: we ignore it if it has to do with a
1155                                    bad fd or update==0. It can be caused by trying to remove an
1156                                    already closed fd: race between adding something to the
1157                                    changes array, close() and applying the changes.
1158                                    E.g. for ser tcp: tcp_main sends a fd to child fore reading
1159                                     => deletes it from the watched fds => the changes array
1160                                         will contain an EV_DELETE for it. Before the changes
1161                                         are applied (they are at the end of the main io_wait loop,
1162                                         after all the fd events were processed), a CON_ERR sent
1163                                         to tcp_main by a sender (send fail) is processed and causes
1164                                         the fd to be closed. When the changes are applied =>
1165                                         error for the EV_DELETE attempt of a closed fd.
1166                                 */
1167                                 /*
1168                                         example EV_ERROR for trying to delete a read watched fd,
1169                                         that was already closed:
1170                                         {
1171                                                 ident = 63,  [fd]
1172                                                 filter = -1, [EVFILT_READ]
1173                                                 flags = 16384, [EV_ERROR]
1174                                                 fflags = 0,
1175                                                 data = 9, [errno = EBADF]
1176                                                 udata = 0x0
1177                                         }
1178                                 */
1179                                 if (h->kq_array[r].data != EBADF)
1180                                         LOG(L_INFO, "INFO: io_wait_loop_kqueue: kevent error on "
1181                                                         "fd %ld: %s [%ld]\n", (long)h->kq_array[r].ident,
1182                                                         strerror(h->kq_array[r].data),
1183                                                         (long)h->kq_array[r].data);
1184                         }else{
1185                                 fm=(struct fd_map*)h->kq_array[r].udata;
1186                                 if (likely(h->kq_array[r].filter==EVFILT_READ)){
1187                                         revents=POLLIN |
1188                                                 (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP) |
1189                                                 (((int)!(h->kq_array[r].flags & EV_ERROR)-1)&POLLERR);
1190                                         while(fm->type && (fm->events & revents) && 
1191                                                         (handle_io(fm, revents, -1)>0) && repeat);
1192                                 }else if (h->kq_array[r].filter==EVFILT_WRITE){
1193                                         revents=POLLOUT |
1194                                                 (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP) |
1195                                                 (((int)!(h->kq_array[r].flags & EV_ERROR)-1)&POLLERR);
1196                                         while(fm->type && (fm->events & revents) && 
1197                                                         (handle_io(fm, revents, -1)>0) && repeat);
1198                                 }
1199                         }
1200                 }
1201         } while(unlikely(orig_changes));
1202 error:
1203         return n;
1204 }
1205 #endif
1206
1207
1208
1209 #ifdef HAVE_SIGIO_RT
1210 /* sigio rt version has no repeat (it doesn't make sense)*/
1211 inline static int io_wait_loop_sigio_rt(io_wait_h* h, int t)
1212 {
1213         int n;
1214         int ret;
1215         struct timespec ts;
1216         siginfo_t siginfo;
1217         int sigio_band;
1218         int sigio_fd;
1219         struct fd_map* fm;
1220         int revents;
1221 #ifdef SIGINFO64_WORKARROUND
1222         int* pi;
1223 #endif
1224         
1225         
1226         ret=1; /* 1 event per call normally */
1227         ts.tv_sec=t;
1228         ts.tv_nsec=0;
1229         if (unlikely(!sigismember(&h->sset, h->signo) ||
1230                                         !sigismember(&h->sset, SIGIO))) {
1231                 LOG(L_CRIT, "BUG: io_wait_loop_sigio_rt: the signal mask"
1232                                 " is not properly set!\n");
1233                 goto error;
1234         }
1235 again:
1236         n=sigtimedwait(&h->sset, &siginfo, &ts);
1237         if (unlikely(n==-1)){
1238                 if (errno==EINTR) goto again; /* some other signal, ignore it */
1239                 else if (errno==EAGAIN){ /* timeout */
1240                         ret=0;
1241                         goto end;
1242                 }else{
1243                         LOG(L_ERR, "ERROR: io_wait_loop_sigio_rt: sigtimed_wait"
1244                                         " %s [%d]\n", strerror(errno), errno);
1245                         goto error;
1246                 }
1247         }
1248         if (likely(n!=SIGIO)){
1249 #ifdef SIGINFO64_WORKARROUND
1250                 /* on linux siginfo.si_band is defined as long in userspace
1251                  * and as int in kernel (< 2.6.5) => on 64 bits things will break!
1252                  * (si_band will include si_fd, and si_fd will contain
1253                  *  garbage).
1254                  *  see /usr/src/linux/include/asm-generic/siginfo.h and
1255                  *      /usr/include/bits/siginfo.h
1256                  *  On newer kernels this is fixed (si_band is long in the kernel too).
1257                  * -- andrei */
1258                 if  ((_os_ver<0x020605) && (sizeof(siginfo.si_band)>sizeof(int))){
1259                         pi=(int*)(void*)&siginfo.si_band; /* avoid type punning warnings */
1260                         sigio_band=*pi;
1261                         sigio_fd=*(pi+1);
1262                 }else
1263 #endif
1264                 {
1265                         sigio_band=siginfo.si_band;
1266                         sigio_fd=siginfo.si_fd;
1267                 }
1268                 if (unlikely(siginfo.si_code==SI_SIGIO)){
1269                         /* old style, we don't know the event (linux 2.2.?) */
1270                         LOG(L_WARN, "WARNING: io_wait_loop_sigio_rt: old style sigio"
1271                                         " interface\n");
1272                         fm=get_fd_map(h, sigio_fd);
1273                         /* we can have queued signals generated by fds not watched
1274                          * any more, or by fds in transition, to a child => ignore them*/
1275                         if (fm->type)
1276                                 handle_io(fm, POLLIN|POLLOUT, -1);
1277                 }else{
1278                         /* si_code contains the SIGPOLL reason: POLL_IN, POLL_OUT,
1279                          *  POLL_MSG, POLL_ERR, POLL_PRI or POLL_HUP
1280                          * and si_band the translated poll event bitmap:
1281                          *  POLLIN|POLLRDNORM  (=POLL_IN),
1282                          *  POLLOUT|POLLWRNORM|POLLWRBAND (=POLL_OUT),
1283                          *  POLLIN|POLLRDNORM|POLLMSG (=POLL_MSG),
1284                          *  POLLERR (=POLL_ERR),
1285                          *  POLLPRI|POLLRDBAND (=POLL_PRI),
1286                          *  POLLHUP|POLLERR (=POLL_HUP) 
1287                          *  [linux 2.6.22 fs/fcntl.c:447]
1288                          */
1289 #ifdef EXTRA_DEBUG
1290                         DBG("io_wait_loop_sigio_rt: siginfo: signal=%d (%d),"
1291                                         " si_code=%d, si_band=0x%x,"
1292                                         " si_fd=%d\n",
1293                                         siginfo.si_signo, n, siginfo.si_code, 
1294                                         (unsigned)sigio_band,
1295                                         sigio_fd);
1296 #endif
1297                         /* on some errors (e.g. when receving TCP RST), sigio_band will
1298                          * be set to 0x08 (POLLERR) or 0x18 (POLLERR|POLLHUP - on stream
1299                          *  unix socket close) , so better catch all events --andrei */
1300                         if (likely(sigio_band)){
1301                                 fm=get_fd_map(h, sigio_fd);
1302                                 revents=sigio_band;
1303                                 /* fix revents==POLLPRI case */
1304                                 revents |= (!(revents & POLLPRI)-1) & POLLIN;
1305                                 /* we can have queued signals generated by fds not watched
1306                                  * any more, or by fds in transition, to a child 
1307                                  * => ignore them */
1308                                 if (fm->type && ((fm->events|POLLERR|POLLHUP) & revents))
1309                                         handle_io(fm, revents, -1);
1310                                 else
1311                                         DBG("WARNING: io_wait_loop_sigio_rt: ignoring event"
1312                                                         " %x on fd %d, watching for %x, si_code=%x "
1313                                                         "(fm->type=%d, fm->fd=%d, fm->data=%p)\n",
1314                                                         sigio_band, sigio_fd, fm->events, siginfo.si_code,
1315                                                         fm->type, fm->fd, fm->data);
1316                         }else{
1317                                 LOG(L_ERR, "ERROR: io_wait_loop_sigio_rt: unexpected event"
1318                                                         " on fd %d: %x\n", sigio_fd, sigio_band);
1319                         }
1320                 }
1321         }else{
1322                 /* signal queue overflow 
1323                  * TODO: increase signal queue size: 2.4x /proc/.., 2.6x -rlimits */
1324                 LOG(L_WARN, "WARNING: io_wait_loop_sigio_rt: signal queue overflowed"
1325                                         "- falling back to poll\n");
1326                 /* clear real-time signal queue
1327                  * both SIG_IGN and SIG_DFL are needed , it doesn't work
1328                  * only with SIG_DFL  */
1329                 if (signal(h->signo, SIG_IGN)==SIG_ERR){
1330                         LOG(L_CRIT, "BUG: do_poll: couldn't reset signal to IGN\n");
1331                 }
1332                 
1333                 if (signal(h->signo, SIG_DFL)==SIG_ERR){
1334                         LOG(L_CRIT, "BUG: do_poll: couldn't reset signal to DFL\n");
1335                 }
1336                 /* falling back to normal poll */
1337                 ret=io_wait_loop_poll(h, -1, 1);
1338         }
1339 end:
1340         return ret;
1341 error:
1342         return -1;
1343 }
1344 #endif
1345
1346
1347
1348 #ifdef HAVE_DEVPOLL
1349 inline static int io_wait_loop_devpoll(io_wait_h* h, int t, int repeat)
1350 {
1351         int n, r;
1352         int ret;
1353         struct dvpoll dpoll;
1354         struct fd_map* fm;
1355
1356                 dpoll.dp_timeout=t*1000;
1357                 dpoll.dp_nfds=h->fd_no;
1358                 dpoll.dp_fds=h->fd_array;
1359 again:
1360                 ret=n=ioctl(h->dpoll_fd, DP_POLL, &dpoll);
1361                 if (unlikely(n==-1)){
1362                         if (errno==EINTR) goto again; /* signal, ignore it */
1363                         else{
1364                                 LOG(L_ERR, "ERROR:io_wait_loop_devpoll: ioctl: %s [%d]\n",
1365                                                 strerror(errno), errno);
1366                                 goto error;
1367                         }
1368                 }
1369                 for (r=0; r< n; r++){
1370                         if (h->fd_array[r].revents & (POLLNVAL|POLLERR)){
1371                                 LOG(L_ERR, "ERROR: io_wait_loop_devpoll: pollinval returned"
1372                                                         " for fd %d, revents=%x\n",
1373                                                         h->fd_array[r].fd, h->fd_array[r].revents);
1374                         }
1375                         /* POLLIN|POLLHUP just go through */
1376                         fm=get_fd_map(h, h->fd_array[r].fd);
1377                         while(fm->type && (fm->events & h->fd_array[r].revents) &&
1378                                         (handle_io(fm, h->fd_array[r].revents, r) > 0) && repeat);
1379                 }
1380 error:
1381         return ret;
1382 }
1383 #endif
1384
1385
1386
1387 /* init */
1388
1389
1390 /* initializes the static vars/arrays
1391  * params:      h - pointer to the io_wait_h that will be initialized
1392  *         max_fd - maximum allowed fd number
1393  *         poll_m - poll method (0 for automatic best fit)
1394  */
1395 int init_io_wait(io_wait_h* h, int max_fd, enum poll_types poll_method);
1396
1397 /* destroys everything init_io_wait allocated */
1398 void destroy_io_wait(io_wait_h* h);
1399
1400
1401 #endif