- automatically "listen" for POLLRDHUP or EPOLLRDHUP if supported (linux
[sip-router] / io_wait.h
1 /* 
2  * $Id$
3  * 
4  * Copyright (C) 2005 iptelorg GmbH
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 /*
19  * tcp io wait common stuff used by tcp_main.c & tcp_read.c
20  * All the functions are inline because of speed reasons and because they are
21  * used only from 2 places.
22  * You also have to define:
23  *     int handle_io(struct fd_map* fm, short events, int idx) (see below)
24  *     (this could be trivially replaced by a callback pointer entry attached
25  *      to the io_wait handler if more flexibility rather then performance
26  *      is needed)
27  *      fd_type - define to some enum of you choice and define also
28  *                FD_TYPE_DEFINED (if you don't do it fd_type will be defined
29  *                to int). 0 has a special not set/not init. meaning
30  *                (a lot of sanity checks and the sigio_rt code are based on
31  *                 this assumption)
32  *     local_malloc (defaults to pkg_malloc)
33  *     local_free   (defaults to pkg_free)
34  *  
35  */
36 /* 
37  * History:
38  * --------
39  *  2005-06-13  created by andrei
40  *  2005-06-26  added kqueue (andrei)
41  *  2005-07-01  added /dev/poll (andrei)
42  *  2006-05-30  sigio 64 bit workarround enabled for kernels < 2.6.5 (andrei)
43  *  2007-11-22  when handle_io() is called in a loop check & stop if the fd was
44  *               removed inside handle_io() (andrei)
45  *  2007-11-29  support for write (POLLOUT); added io_watch_chg() (andrei)
46  *  2008-02-04  POLLRDHUP & EPOLLRDHUP support (automatically enabled if POLLIN
47  *               is set) (andrei)
48  */
49
50
51
52 #ifndef _io_wait_h
53 #define _io_wait_h
54
55 #include <errno.h>
56 #include <string.h>
57 #ifdef HAVE_SIGIO_RT
58 #define __USE_GNU /* or else F_SETSIG won't be included */
59 #include <sys/types.h> /* recv */
60 #include <sys/socket.h> /* recv */
61 #include <signal.h> /* sigprocmask, sigwait a.s.o */
62 #endif
63
64 #define _GNU_SOURCE  /* for POLLRDHUP on linux */
65 #include <sys/poll.h>
66 #include <fcntl.h>
67
68 #ifdef HAVE_EPOLL
69 #include <sys/epoll.h>
70 #endif
71 #ifdef HAVE_KQUEUE
72 #include <sys/types.h> /* needed on freebsd */
73 #include <sys/event.h>
74 #include <sys/time.h>
75 #endif
76 #ifdef HAVE_DEVPOLL
77 #include <sys/devpoll.h>
78 #endif
79 #ifdef HAVE_SELECT
80 /* needed on openbsd for select*/
81 #include <sys/time.h> 
82 #include <sys/types.h> 
83 #include <unistd.h>
84 /* needed according to POSIX for select*/
85 #include <sys/select.h>
86 #endif
87
88 #include "dprint.h"
89
90 #include "poll_types.h" /* poll_types*/
91 #ifdef HAVE_SIGIO_RT
92 #include "pt.h" /* mypid() */
93 #endif
94
95 #include "compiler_opt.h"
96
97
98 #ifdef HAVE_EPOLL
99 /* fix defines for EPOLL */
100 #if defined POLLRDHUP && ! defined EPOLLRDHUP
101 #define EPOLLRDHUP POLLRDHUP  /* should work on all linuxes */
102 #endif /* POLLRDHUP && EPOLLRDHUP */
103 #endif /* HAVE_EPOLL */
104
105
106 extern int _os_ver; /* os version number, needed to select bugs workarrounds */
107
108
109 #if 0
110 enum fd_types; /* this should be defined from the including file,
111                                   see tcp_main.c for an example, 
112                                   0 has a special meaning: not used/empty*/
113 #endif
114
115 #ifndef FD_TYPE_DEFINED
116 typedef int fd_type;
117 #define FD_TYPE_DEFINED
118 #endif
119
120 /* maps a fd to some other structure; used in almost all cases
121  * except epoll and maybe kqueue or /dev/poll */
122 struct fd_map{
123         int fd;               /* fd no */
124         fd_type type;         /* "data" type */
125         void* data;           /* pointer to the corresponding structure */
126         short events;         /* events we are interested int */
127 };
128
129
130 #ifdef HAVE_KQUEUE
131 #ifndef KQ_CHANGES_ARRAY_SIZE
132 #define KQ_CHANGES_ARRAY_SIZE 256
133
134 #ifdef __OS_netbsd
135 #define KEV_UDATA_CAST (intptr_t)
136 #else
137 #define KEV_UDATA_CAST
138 #endif
139
140 #endif
141 #endif
142
143
144 /* handler structure */
145 struct io_wait_handler{
146         enum poll_types poll_method;
147         int flags;
148         struct fd_map* fd_hash;
149         int fd_no; /*  current index used in fd_array and the passed size for 
150                                    ep_array & kq_array*/
151         int max_fd_no; /* maximum fd no, is also the size of fd_array,
152                                                        fd_hash  and ep_array*/
153         /* common stuff for POLL, SIGIO_RT and SELECT
154          * since poll support is always compiled => this will always be compiled */
155         struct pollfd* fd_array; /* used also by devpoll as devpoll array */
156         int crt_fd_array_idx; /*  crt idx for which handle_io is called
157                                                          (updated also by del -> internal optimization) */
158         /* end of common stuff */
159 #ifdef HAVE_EPOLL
160         int epfd; /* epoll ctrl fd */
161         struct epoll_event* ep_array;
162 #endif
163 #ifdef HAVE_SIGIO_RT
164         sigset_t sset; /* signal mask for sigio & sigrtmin */
165         int signo;     /* real time signal used */
166 #endif
167 #ifdef HAVE_KQUEUE
168         int kq_fd;
169         struct kevent* kq_array;   /* used for the eventlist*/
170         struct kevent* kq_changes; /* used for the changelist */
171         size_t kq_nchanges;
172         size_t kq_changes_size; /* size of the changes array */
173 #endif
174 #ifdef HAVE_DEVPOLL
175         int dpoll_fd;
176 #endif
177 #ifdef HAVE_SELECT
178         fd_set master_rset; /* read set */
179         fd_set master_wset; /* write set */
180         int max_fd_select; /* maximum select used fd */
181 #endif
182 };
183
184 typedef struct io_wait_handler io_wait_h;
185
186
187 /* get the corresponding fd_map structure pointer */
188 #define get_fd_map(h, fd)               (&(h)->fd_hash[(fd)])
189 /* remove a fd_map structure from the hash; the pointer must be returned
190  * by get_fd_map or hash_fd_map*/
191 #define unhash_fd_map(pfm)      \
192         do{ \
193                 (pfm)->type=0 /*F_NONE */; \
194                 (pfm)->fd=-1; \
195         }while(0)
196
197 /* add a fd_map structure to the fd hash */
198 static inline struct fd_map* hash_fd_map(       io_wait_h* h,
199                                                                                         int fd,
200                                                                                         short events,
201                                                                                         fd_type type,
202                                                                                         void* data)
203 {
204         h->fd_hash[fd].fd=fd;
205         h->fd_hash[fd].events=events;
206         h->fd_hash[fd].type=type;
207         h->fd_hash[fd].data=data;
208         return &h->fd_hash[fd];
209 }
210
211
212
213 #ifdef HANDLE_IO_INLINE
214 /* generic handle io routine, this must be defined in the including file
215  * (faster then registering a callback pointer)
216  *
217  * params:  fm     - pointer to a fd hash entry
218  *          events - combinations of POLLIN, POLLOUT, POLLERR & POLLHUP
219  *          idx    - index in the fd_array (or -1 if not known)
220  * return: -1 on error
221  *          0 on EAGAIN or when by some other way it is known that no more 
222  *            io events are queued on the fd (the receive buffer is empty).
223  *            Usefull to detect when there are no more io events queued for
224  *            sigio_rt, epoll_et, kqueue.
225  *         >0 on successfull read from the fd (when there might be more io
226  *            queued -- the receive buffer might still be non-empty)
227  */
228 inline static int handle_io(struct fd_map* fm, short events, int idx);
229 #else
230 int handle_io(struct fd_map* fm, short events, int idx);
231 #endif
232
233
234
235 #ifdef HAVE_KQUEUE
236 /*
237  * kqueue specific function: register a change
238  * (adds a change to the kevent change array, and if full flushes it first)
239  *
240  * TODO: check if the event already exists in the change list or if it's
241  *       complementary to an event in the list (e.g. EVFILT_WRITE, EV_DELETE
242  *       and EVFILT_WRITE, EV_ADD for the same fd).
243  * returns: -1 on error, 0 on success
244  */
245 static inline int kq_ev_change(io_wait_h* h, int fd, int filter, int flag, 
246                                                                 void* data)
247 {
248         int n;
249         struct timespec tspec;
250
251         if (h->kq_nchanges>=h->kq_changes_size){
252                 /* changes array full ! */
253                 LOG(L_WARN, "WARNING: kq_ev_change: kqueue changes array full"
254                                         " trying to flush...\n");
255                 tspec.tv_sec=0;
256                 tspec.tv_nsec=0;
257 again:
258                 n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges, 0, 0, &tspec);
259                 if (n==-1){
260                         if (errno==EINTR) goto again;
261                         LOG(L_ERR, "ERROR: io_watch_add: kevent flush changes "
262                                                 " failed: %s [%d]\n", strerror(errno), errno);
263                         return -1;
264                 }
265                 h->kq_nchanges=0; /* changes array is empty */
266         }
267         EV_SET(&h->kq_changes[h->kq_nchanges], fd, filter, flag, 0, 0,
268                         KEV_UDATA_CAST data);
269         h->kq_nchanges++;
270         return 0;
271 }
272 #endif
273
274
275
276 /* generic io_watch_add function
277  * Params:
278  *     h      - pointer to initialized io_wait handle
279  *     fd     - fd to watch
280  *     events - bitmap with the fd events for which the fd should be watched
281  *              (combination of POLLIN and POLLOUT)
282  *     type   - fd type (non 0 value, returned in the call to handle_io)
283  *     data   - pointer/private data returned in the handle_io call
284  * returns 0 on success, -1 on error
285  *
286  * WARNING: handle_io() can be called immediately (from io_watch_add()) so
287  *  make sure that any dependent init. (e.g. data stuff) is made before
288  *  calling io_watch_add
289  *
290  * this version should be faster than pointers to poll_method specific
291  * functions (it avoids functions calls, the overhead being only an extra
292  *  switch())*/
293 inline static int io_watch_add( io_wait_h* h,
294                                                                 int fd,
295                                                                 short events,
296                                                                 fd_type type,
297                                                                 void* data)
298 {
299
300         /* helper macros */
301 #define fd_array_setup(ev) \
302         do{ \
303                 h->fd_array[h->fd_no].fd=fd; \
304                 h->fd_array[h->fd_no].events=(ev); /* useless for select */ \
305                 h->fd_array[h->fd_no].revents=0;     /* useless for select */ \
306         }while(0)
307         
308 #define set_fd_flags(f) \
309         do{ \
310                         flags=fcntl(fd, F_GETFL); \
311                         if (flags==-1){ \
312                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: GETFL failed:" \
313                                                 " %s [%d]\n", strerror(errno), errno); \
314                                 goto error; \
315                         } \
316                         if (fcntl(fd, F_SETFL, flags|(f))==-1){ \
317                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETFL" \
318                                                         " failed: %s [%d]\n", strerror(errno), errno); \
319                                 goto error; \
320                         } \
321         }while(0)
322         
323         
324         struct fd_map* e;
325         int flags;
326 #ifdef HAVE_EPOLL
327         struct epoll_event ep_event;
328 #endif
329 #ifdef HAVE_DEVPOLL
330         struct pollfd pfd;
331 #endif
332 #if defined(HAVE_SIGIO_RT)
333         int n;
334         int idx;
335         int check_io;
336         struct pollfd pf;
337         
338         check_io=0; /* set to 1 if we need to check for pre-existing queued
339                                    io/data on the fd */
340         idx=-1;
341 #endif
342         e=0;
343         /* sanity checks */
344         if (unlikely(fd==-1)){
345                 LOG(L_CRIT, "BUG: io_watch_add: fd is -1!\n");
346                 goto error;
347         }
348         if (unlikely((events&(POLLIN|POLLOUT))==0)){
349                 LOG(L_CRIT, "BUG: io_watch_add: invalid events: 0x%0x\n", events);
350                 goto error;
351         }
352         /* check if not too big */
353         if (unlikely(h->fd_no>=h->max_fd_no)){
354                 LOG(L_CRIT, "ERROR: io_watch_add: maximum fd number exceeded:"
355                                 " %d/%d\n", h->fd_no, h->max_fd_no);
356                 goto error;
357         }
358         DBG("DBG: io_watch_add(%p, %d, %d, %p), fd_no=%d\n",
359                         h, fd, type, data, h->fd_no);
360         /*  hash sanity check */
361         e=get_fd_map(h, fd);
362         if (unlikely(e && (e->type!=0 /*F_NONE*/))){
363                 LOG(L_ERR, "ERROR: io_watch_add: trying to overwrite entry %d"
364                                 " watched for %x in the hash(%d, %d, %p) with (%d, %d, %p)\n",
365                                 fd, events, e->fd, e->type, e->data, fd, type, data);
366                 e=0;
367                 goto error;
368         }
369         
370         if (unlikely((e=hash_fd_map(h, fd, events, type, data))==0)){
371                 LOG(L_ERR, "ERROR: io_watch_add: failed to hash the fd %d\n", fd);
372                 goto error;
373         }
374         switch(h->poll_method){ /* faster then pointer to functions */
375                 case POLL_POLL:
376 #ifdef POLLRDHUP
377                         /* listen to POLLRDHUP by default (if POLLIN) */
378                         events|=((int)!(events & POLLIN) - 1) & POLLRDHUP;
379 #endif /* POLLRDHUP */
380                         fd_array_setup(events);
381                         set_fd_flags(O_NONBLOCK);
382                         break;
383 #ifdef HAVE_SELECT
384                 case POLL_SELECT:
385                         fd_array_setup(events);
386                         if (likely(events & POLLIN))
387                                 FD_SET(fd, &h->master_rset);
388                         if (unlikely(events & POLLOUT))
389                                 FD_SET(fd, &h->master_wset);
390                         if (h->max_fd_select<fd) h->max_fd_select=fd;
391                         break;
392 #endif
393 #ifdef HAVE_SIGIO_RT
394                 case POLL_SIGIO_RT:
395                         fd_array_setup(events);
396                         /* re-set O_ASYNC might be needed, if not done from 
397                          * io_watch_del (or if somebody wants to add a fd which has
398                          * already O_ASYNC/F_SETSIG set on a duplicate)
399                          */
400                         /* set async & signal */
401                         if (fcntl(fd, F_SETOWN, my_pid())==-1){
402                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETOWN"
403                                 " failed: %s [%d]\n", strerror(errno), errno);
404                                 goto error;
405                         }
406                         if (fcntl(fd, F_SETSIG, h->signo)==-1){
407                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETSIG"
408                                         " failed: %s [%d]\n", strerror(errno), errno);
409                                 goto error;
410                         }
411                         /* set both non-blocking and async */
412                         set_fd_flags(O_ASYNC| O_NONBLOCK);
413 #ifdef EXTRA_DEBUG
414                         DBG("io_watch_add: sigio_rt on f %d, signal %d to pid %d\n",
415                                         fd,  h->signo, my_pid());
416 #endif
417                         /* empty socket receive buffer, if buffer is already full
418                          * no more space to put packets
419                          * => no more signals are ever generated
420                          * also when moving fds, the freshly moved fd might have
421                          *  already some bytes queued, we want to get them now
422                          *  and not later -- andrei */
423                         idx=h->fd_no;
424                         check_io=1;
425                         break;
426 #endif
427 #ifdef HAVE_EPOLL
428                 case POLL_EPOLL_LT:
429                         ep_event.events=
430 #ifdef POLLRDHUP
431                                                 /* listen for EPOLLRDHUP too */
432                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
433 #else /* POLLRDHUP */
434                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
435 #endif /* POLLRDHUP */
436                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
437                         ep_event.data.ptr=e;
438 again1:
439                         n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
440                         if (unlikely(n==-1)){
441                                 if (errno==EAGAIN) goto again1;
442                                 LOG(L_ERR, "ERROR: io_watch_add: epoll_ctl failed: %s [%d]\n",
443                                         strerror(errno), errno);
444                                 goto error;
445                         }
446                         break;
447                 case POLL_EPOLL_ET:
448                         set_fd_flags(O_NONBLOCK);
449                         ep_event.events=
450 #ifdef POLLRDHUP
451                                                 /* listen for EPOLLRDHUP too */
452                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
453 #else /* POLLRDHUP */
454                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
455 #endif /* POLLRDHUP */
456                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
457                                                 EPOLLET;
458                         ep_event.data.ptr=e;
459 again2:
460                         n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
461                         if (unlikely(n==-1)){
462                                 if (errno==EAGAIN) goto again2;
463                                 LOG(L_ERR, "ERROR: io_watch_add: epoll_ctl failed: %s [%d]\n",
464                                         strerror(errno), errno);
465                                 goto error;
466                         }
467                         idx=-1;
468                         break;
469 #endif
470 #ifdef HAVE_KQUEUE
471                 case POLL_KQUEUE:
472                         if (likely( events & POLLIN)){
473                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e)==-1))
474                                 goto error;
475                         }
476                         if (unlikely( events & POLLOUT)){
477                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
478                                 {
479                                         if (likely(events & POLLIN)){
480                                                 kq_ev_change(h, fd, EVFILT_READ, EV_DELETE, 0);
481                                         }
482                                 }
483                                 goto error;
484                         }
485                         break;
486 #endif
487 #ifdef HAVE_DEVPOLL
488                 case POLL_DEVPOLL:
489                         pfd.fd=fd;
490                         pfd.events=events;
491                         pfd.revents=0;
492 again_devpoll:
493                         if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
494                                 if (errno==EAGAIN) goto again_devpoll;
495                                 LOG(L_ERR, "ERROR: io_watch_add: /dev/poll write failed:"
496                                                         "%s [%d]\n", strerror(errno), errno);
497                                 goto error;
498                         }
499                         break;
500 #endif
501                         
502                 default:
503                         LOG(L_CRIT, "BUG: io_watch_add: no support for poll method "
504                                         " %s (%d)\n", poll_method_str[h->poll_method],
505                                         h->poll_method);
506                         goto error;
507         }
508         
509         h->fd_no++; /* "activate" changes, for epoll/kqueue/devpoll it
510                                    has only informative value */
511 #if defined(HAVE_SIGIO_RT)
512         if (check_io){
513                 /* handle possible pre-existing events */
514                 pf.fd=fd;
515                 pf.events=events;
516 check_io_again:
517                 n=0;
518                 while(e->type && ((n=poll(&pf, 1, 0))>0) && 
519                                 (handle_io(e, pf.revents, idx)>0) &&
520                                 (pf.revents & (e->events|POLLERR|POLLHUP)));
521                 if (unlikely(e->type && (n==-1))){
522                         if (errno==EINTR) goto check_io_again;
523                         LOG(L_ERR, "ERROR: io_watch_add: check_io poll: %s [%d]\n",
524                                                 strerror(errno), errno);
525                 }
526         }
527 #endif
528         return 0;
529 error:
530         if (e) unhash_fd_map(e);
531         return -1;
532 #undef fd_array_setup
533 #undef set_fd_flags 
534 }
535
536
537
538 #define IO_FD_CLOSING 16
539 /* parameters:    h - handler 
540  *               fd - file descriptor
541  *            index - index in the fd_array if known, -1 if not
542  *                    (if index==-1 fd_array will be searched for the
543  *                     corresponding fd* entry -- slower but unavoidable in 
544  *                     some cases). index is not used (no fd_array) for epoll,
545  *                     /dev/poll and kqueue
546  *            flags - optimization flags, e.g. IO_FD_CLOSING, the fd was 
547  *                    or will shortly be closed, in some cases we can avoid
548  *                    extra remove operations (e.g.: epoll, kqueue, sigio)
549  * returns 0 if ok, -1 on error */
550 inline static int io_watch_del(io_wait_h* h, int fd, int idx, int flags)
551 {
552         
553 #define fix_fd_array \
554         do{\
555                         if (unlikely(idx==-1)){ \
556                                 /* fix idx if -1 and needed */ \
557                                 for (idx=0; (idx<h->fd_no) && \
558                                                         (h->fd_array[idx].fd!=fd); idx++); \
559                         } \
560                         if (likely(idx<h->fd_no)){ \
561                                 memmove(&h->fd_array[idx], &h->fd_array[idx+1], \
562                                         (h->fd_no-(idx+1))*sizeof(*(h->fd_array))); \
563                                 if ((idx<=h->crt_fd_array_idx) && (h->crt_fd_array_idx>=0)) \
564                                         h->crt_fd_array_idx--; \
565                         } \
566         }while(0)
567         
568         struct fd_map* e;
569         int events;
570 #ifdef HAVE_EPOLL
571         int n;
572         struct epoll_event ep_event;
573 #endif
574 #ifdef HAVE_DEVPOLL
575         struct pollfd pfd;
576 #endif
577 #ifdef HAVE_SIGIO_RT
578         int fd_flags;
579 #endif
580         
581         if (unlikely((fd<0) || (fd>=h->max_fd_no))){
582                 LOG(L_CRIT, "BUG: io_watch_del: invalid fd %d, not in [0, %d) \n",
583                                                 fd, h->fd_no);
584                 goto error;
585         }
586         DBG("DBG: io_watch_del (%p, %d, %d, 0x%x) fd_no=%d called\n",
587                         h, fd, idx, flags, h->fd_no);
588         e=get_fd_map(h, fd);
589         /* more sanity checks */
590         if (unlikely(e==0)){
591                 LOG(L_CRIT, "BUG: io_watch_del: no corresponding hash entry for %d\n",
592                                         fd);
593                 goto error;
594         }
595         if (unlikely(e->type==0 /*F_NONE*/)){
596                 LOG(L_ERR, "ERROR: io_watch_del: trying to delete already erased"
597                                 " entry %d in the hash(%d, %d, %p) flags %x)\n",
598                                 fd, e->fd, e->type, e->data, flags);
599                 goto error;
600         }
601         events=e->events;
602         unhash_fd_map(e);
603         
604         switch(h->poll_method){
605                 case POLL_POLL:
606                         fix_fd_array;
607                         break;
608 #ifdef HAVE_SELECT
609                 case POLL_SELECT:
610                         if (likely(events & POLLIN))
611                                 FD_CLR(fd, &h->master_rset);
612                         if (unlikely(events & POLLOUT))
613                                 FD_CLR(fd, &h->master_wset);
614                         if (unlikely(h->max_fd_select && (h->max_fd_select==fd)))
615                                 /* we don't know the prev. max, so we just decrement it */
616                                 h->max_fd_select--; 
617                         fix_fd_array;
618                         break;
619 #endif
620 #ifdef HAVE_SIGIO_RT
621                 case POLL_SIGIO_RT:
622                         fix_fd_array;
623                         /* the O_ASYNC flag must be reset all the time, the fd
624                          *  can be changed only if  O_ASYNC is reset (if not and
625                          *  the fd is a duplicate, you will get signals from the dup. fd
626                          *  and not from the original, even if the dup. fd was closed
627                          *  and the signals re-set on the original) -- andrei
628                          */
629                         /*if (!(flags & IO_FD_CLOSING)){*/
630                                 /* reset ASYNC */
631                                 fd_flags=fcntl(fd, F_GETFL); 
632                                 if (unlikely(fd_flags==-1)){ 
633                                         LOG(L_ERR, "ERROR: io_watch_del: fnctl: GETFL failed:" 
634                                                         " %s [%d]\n", strerror(errno), errno); 
635                                         goto error; 
636                                 } 
637                                 if (unlikely(fcntl(fd, F_SETFL, fd_flags&(~O_ASYNC))==-1)){ 
638                                         LOG(L_ERR, "ERROR: io_watch_del: fnctl: SETFL" 
639                                                                 " failed: %s [%d]\n", strerror(errno), errno); 
640                                         goto error; 
641                                 } 
642                         break;
643 #endif
644 #ifdef HAVE_EPOLL
645                 case POLL_EPOLL_LT:
646                 case POLL_EPOLL_ET:
647                         /* epoll doesn't seem to automatically remove sockets,
648                          * if the socket is a duplicate/moved and the original
649                          * is still open. The fd is removed from the epoll set
650                          * only when the original (and all the  copies?) is/are 
651                          * closed. This is probably a bug in epoll. --andrei */
652 #ifdef EPOLL_NO_CLOSE_BUG
653                         if (!(flags & IO_FD_CLOSING)){
654 #endif
655 again_epoll:
656                                 n=epoll_ctl(h->epfd, EPOLL_CTL_DEL, fd, &ep_event);
657                                 if (unlikely(n==-1)){
658                                         if (errno==EAGAIN) goto again_epoll;
659                                         LOG(L_ERR, "ERROR: io_watch_del: removing fd from epoll "
660                                                         "list failed: %s [%d]\n", strerror(errno), errno);
661                                         goto error;
662                                 }
663 #ifdef EPOLL_NO_CLOSE_BUG
664                         }
665 #endif
666                         break;
667 #endif
668 #ifdef HAVE_KQUEUE
669                 case POLL_KQUEUE:
670                         if (!(flags & IO_FD_CLOSING)){
671                                 if (likely(events & POLLIN)){
672                                         if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
673                                                                                                         EV_DELETE, 0) ==-1)){
674                                                 /* try to delete the write filter anyway */
675                                                 if (events & POLLOUT){
676                                                         kq_ev_change(h, fd, EVFILT_WRITE, EV_DELETE, 0);
677                                                 }
678                                                 goto error;
679                                         }
680                                 }
681                                 if (unlikely(events & POLLOUT)){
682                                         if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
683                                                                                                         EV_DELETE, 0) ==-1))
684                                                 goto error;
685                                 }
686                         }
687                         break;
688 #endif
689 #ifdef HAVE_DEVPOLL
690                 case POLL_DEVPOLL:
691                                 /* for /dev/poll the closed fds _must_ be removed
692                                    (they are not removed automatically on close()) */
693                                 pfd.fd=fd;
694                                 pfd.events=POLLREMOVE;
695                                 pfd.revents=0;
696 again_devpoll:
697                                 if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
698                                         if (errno==EINTR) goto again_devpoll;
699                                         LOG(L_ERR, "ERROR: io_watch_del: removing fd from "
700                                                                 "/dev/poll failed: %s [%d]\n", 
701                                                                 strerror(errno), errno);
702                                         goto error;
703                                 }
704                                 break;
705 #endif
706                 default:
707                         LOG(L_CRIT, "BUG: io_watch_del: no support for poll method "
708                                         " %s (%d)\n", poll_method_str[h->poll_method], 
709                                         h->poll_method);
710                         goto error;
711         }
712         h->fd_no--;
713         return 0;
714 error:
715         return -1;
716 #undef fix_fd_array
717 }
718
719
720
721 /* parameters:    h - handler 
722  *               fd - file descriptor
723  *           events - new events to watch for
724  *              idx - index in the fd_array if known, -1 if not
725  *                    (if index==-1 fd_array will be searched for the
726  *                     corresponding fd* entry -- slower but unavoidable in 
727  *                     some cases). index is not used (no fd_array) for epoll,
728  *                     /dev/poll and kqueue
729  * returns 0 if ok, -1 on error */
730 inline static int io_watch_chg(io_wait_h* h, int fd, short events, int idx )
731 {
732         
733 #define fd_array_chg(ev) \
734         do{\
735                         if (unlikely(idx==-1)){ \
736                                 /* fix idx if -1 and needed */ \
737                                 for (idx=0; (idx<h->fd_no) && \
738                                                         (h->fd_array[idx].fd!=fd); idx++); \
739                         } \
740                         if (likely(idx<h->fd_no)){ \
741                                 h->fd_array[idx].events=(ev); \
742                         } \
743         }while(0)
744         
745         struct fd_map* e;
746         int add_events;
747         int del_events;
748 #ifdef HAVE_DEVPOLL
749         struct pollfd pfd;
750 #endif
751 #ifdef HAVE_EPOLL
752         int n;
753         struct epoll_event ep_event;
754 #endif
755         
756         if (unlikely((fd<0) || (fd>=h->max_fd_no))){
757                 LOG(L_CRIT, "BUG: io_watch_chg: invalid fd %d, not in [0, %d) \n",
758                                                 fd, h->fd_no);
759                 goto error;
760         }
761         if (unlikely((events&(POLLIN|POLLOUT))==0)){
762                 LOG(L_CRIT, "BUG: io_watch_chg: invalid events: 0x%0x\n", events);
763                 goto error;
764         }
765         DBG("DBG: io_watch_chg (%p, %d, 0x%x, 0x%x) fd_no=%d called\n",
766                         h, fd, events, idx, h->fd_no);
767         e=get_fd_map(h, fd);
768         /* more sanity checks */
769         if (unlikely(e==0)){
770                 LOG(L_CRIT, "BUG: io_watch_chg: no corresponding hash entry for %d\n",
771                                         fd);
772                 goto error;
773         }
774         if (unlikely(e->type==0 /*F_NONE*/)){
775                 LOG(L_ERR, "ERROR: io_watch_chg: trying to change an already erased"
776                                 " entry %d in the hash(%d, %d, %p) )\n",
777                                 fd, e->fd, e->type, e->data);
778                 goto error;
779         }
780         
781         add_events=events & ~e->events;
782         del_events=e->events & ~events;
783         e->events=events;
784         switch(h->poll_method){
785                 case POLL_POLL:
786 #ifdef POLLRDHUP
787                         /* listen to POLLRDHUP by default (if POLLIN) */
788                         events|=((int)!(events & POLLIN) - 1) & POLLRDHUP;
789 #endif /* POLLRDHUP */
790                         fd_array_chg(events);
791                         break;
792 #ifdef HAVE_SELECT
793                 case POLL_SELECT:
794                         fd_array_chg(events);
795                         if (unlikely(del_events & POLLIN))
796                                 FD_CLR(fd, &h->master_rset);
797                         else if (unlikely(add_events & POLLIN))
798                                 FD_SET(fd, &h->master_rset);
799                         if (likely(del_events & POLLOUT))
800                                 FD_CLR(fd, &h->master_wset);
801                         else if (likely(add_events & POLLOUT))
802                                 FD_SET(fd, &h->master_wset);
803                         break;
804 #endif
805 #ifdef HAVE_SIGIO_RT
806                 case POLL_SIGIO_RT:
807                         fd_array_chg(events);
808                         /* no need for check_io, since SIGIO_RT listens by default for all
809                          * the events */
810                         break;
811 #endif
812 #ifdef HAVE_EPOLL
813                 case POLL_EPOLL_LT:
814                                 ep_event.events=
815 #ifdef POLLRDHUP
816                                                 /* listen for EPOLLRDHUP too */
817                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
818 #else /* POLLRDHUP */
819                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
820 #endif /* POLLRDHUP */
821                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
822                                 ep_event.data.ptr=e;
823 again_epoll_lt:
824                                 n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
825                                 if (unlikely(n==-1)){
826                                         if (errno==EAGAIN) goto again_epoll_lt;
827                                         LOG(L_ERR, "ERROR: io_watch_chg: modifying epoll events"
828                                                         " failed: %s [%d]\n", strerror(errno), errno);
829                                         goto error;
830                                 }
831                         break;
832                 case POLL_EPOLL_ET:
833                                 ep_event.events=
834 #ifdef POLLRDHUP
835                                                 /* listen for EPOLLRDHUP too */
836                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
837 #else /* POLLRDHUP */
838                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
839 #endif /* POLLRDHUP */
840                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
841                                                 EPOLLET;
842                                 ep_event.data.ptr=e;
843 again_epoll_et:
844                                 n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
845                                 if (unlikely(n==-1)){
846                                         if (errno==EAGAIN) goto again_epoll_et;
847                                         LOG(L_ERR, "ERROR: io_watch_chg: modifying epoll events"
848                                                         " failed: %s [%d]\n", strerror(errno), errno);
849                                         goto error;
850                                 }
851                         break;
852 #endif
853 #ifdef HAVE_KQUEUE
854                 case POLL_KQUEUE:
855                         if (unlikely(del_events & POLLIN)){
856                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
857                                                                                                                 EV_DELETE, 0) ==-1))
858                                                 goto error;
859                         }else if (unlikely(add_events & POLLIN)){
860                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e) ==-1))
861                                         goto error;
862                         }
863                         if (likely(del_events & POLLOUT)){
864                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
865                                                                                                                 EV_DELETE, 0) ==-1))
866                                                 goto error;
867                         }else if (likely(add_events & POLLOUT)){
868                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
869                                         goto error;
870                         }
871                         break;
872 #endif
873 #ifdef HAVE_DEVPOLL
874                 case POLL_DEVPOLL:
875                                 /* for /dev/poll the closed fds _must_ be removed
876                                    (they are not removed automatically on close()) */
877                                 pfd.fd=fd;
878                                 pfd.events=POLLREMOVE;
879                                 pfd.revents=0;
880 again_devpoll1:
881                                 if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
882                                         if (errno==EINTR) goto again_devpoll1;
883                                         LOG(L_ERR, "ERROR: io_watch_chg: removing fd from "
884                                                                 "/dev/poll failed: %s [%d]\n", 
885                                                                 strerror(errno), errno);
886                                         goto error;
887                                 }
888 again_devpoll2:
889                                 pfd.events=events;
890                                 pfd.revents=0;
891                                 if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
892                                         if (errno==EINTR) goto again_devpoll2;
893                                         LOG(L_ERR, "ERROR: io_watch_chg: re-adding fd to "
894                                                                 "/dev/poll failed: %s [%d]\n", 
895                                                                 strerror(errno), errno);
896                                         goto error;
897                                 }
898                                 break;
899 #endif
900                 default:
901                         LOG(L_CRIT, "BUG: io_watch_chg: no support for poll method "
902                                         " %s (%d)\n", poll_method_str[h->poll_method], 
903                                         h->poll_method);
904                         goto error;
905         }
906         return 0;
907 error:
908         return -1;
909 #undef fix_fd_array
910 }
911
912
913
914 /* io_wait_loop_x style function 
915  * wait for io using poll()
916  * params: h      - io_wait handle
917  *         t      - timeout in s
918  *         repeat - if !=0 handle_io will be called until it returns <=0
919  * returns: number of IO events handled on success (can be 0), -1 on error
920  */
921 inline static int io_wait_loop_poll(io_wait_h* h, int t, int repeat)
922 {
923         int n, r;
924         int ret;
925         struct fd_map* fm;
926         
927 again:
928                 ret=n=poll(h->fd_array, h->fd_no, t*1000);
929                 if (n==-1){
930                         if (errno==EINTR) goto again; /* signal, ignore it */
931                         else{
932                                 LOG(L_ERR, "ERROR:io_wait_loop_poll: poll: %s [%d]\n",
933                                                 strerror(errno), errno);
934                                 goto error;
935                         }
936                 }
937                 for (r=0; (r<h->fd_no) && n; r++){
938                         fm=get_fd_map(h, h->fd_array[r].fd);
939                         if (h->fd_array[r].revents & (fm->events|POLLERR|POLLHUP)){
940                                 n--;
941                                 /* sanity checks */
942                                 if (unlikely((h->fd_array[r].fd >= h->max_fd_no)||
943                                                                 (h->fd_array[r].fd < 0))){
944                                         LOG(L_CRIT, "BUG: io_wait_loop_poll: bad fd %d "
945                                                         "(no in the 0 - %d range)\n",
946                                                         h->fd_array[r].fd, h->max_fd_no);
947                                         /* try to continue anyway */
948                                         h->fd_array[r].events=0; /* clear the events */
949                                         continue;
950                                 }
951                                 h->crt_fd_array_idx=r;
952                                 /* repeat handle_io if repeat, fd still watched (not deleted
953                                  *  inside handle_io), handle_io returns that there's still
954                                  *  IO and the fd is still watched for the triggering event */
955                                 while(fm->type && 
956                                                 (handle_io(fm, h->fd_array[r].revents, r) > 0) &&
957                                                 repeat && ((fm->events|POLLERR|POLLHUP) &
958                                                                                                         h->fd_array[r].revents));
959                                 r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd) 
960                                                                                   array shifting */
961                         }
962                 }
963 error:
964         return ret;
965 }
966
967
968
969 #ifdef HAVE_SELECT
970 /* wait for io using select */
971 inline static int io_wait_loop_select(io_wait_h* h, int t, int repeat)
972 {
973         fd_set sel_rset;
974         fd_set sel_wset;
975         int n, ret;
976         struct timeval timeout;
977         int r;
978         struct fd_map* fm;
979         int revents;
980         
981 again:
982                 sel_rset=h->master_rset;
983                 sel_wset=h->master_wset;
984                 timeout.tv_sec=t;
985                 timeout.tv_usec=0;
986                 ret=n=select(h->max_fd_select+1, &sel_rset, &sel_wset, 0, &timeout);
987                 if (n<0){
988                         if (errno==EINTR) goto again; /* just a signal */
989                         LOG(L_ERR, "ERROR: io_wait_loop_select: select: %s [%d]\n",
990                                         strerror(errno), errno);
991                         n=0;
992                         /* continue */
993                 }
994                 /* use poll fd array */
995                 for(r=0; (r<h->fd_no) && n; r++){
996                         revents=0;
997                         if (likely(FD_ISSET(h->fd_array[r].fd, &sel_rset)))
998                                 revents|=POLLIN;
999                         if (unlikely(FD_ISSET(h->fd_array[r].fd, &sel_wset)))
1000                                 revents|=POLLOUT;
1001                         if (unlikely(revents)){
1002                                 h->crt_fd_array_idx=r;
1003                                 fm=get_fd_map(h, h->fd_array[r].fd);
1004                                 while(fm->type && (fm->events & revents) && 
1005                                                 (handle_io(fm, revents, r)>0) && repeat);
1006                                 r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd) 
1007                                                                                   array shifting */
1008                                 n--;
1009                         }
1010                 };
1011         return ret;
1012 }
1013 #endif
1014
1015
1016
1017 #ifdef HAVE_EPOLL
1018 inline static int io_wait_loop_epoll(io_wait_h* h, int t, int repeat)
1019 {
1020         int n, r;
1021         struct fd_map* fm;
1022         int revents;
1023         
1024 again:
1025                 n=epoll_wait(h->epfd, h->ep_array, h->fd_no, t*1000);
1026                 if (unlikely(n==-1)){
1027                         if (errno==EINTR) goto again; /* signal, ignore it */
1028                         else{
1029                                 LOG(L_ERR, "ERROR:io_wait_loop_epoll: "
1030                                                 "epoll_wait(%d, %p, %d, %d): %s [%d]\n", 
1031                                                 h->epfd, h->ep_array, h->fd_no, t*1000,
1032                                                 strerror(errno), errno);
1033                                 goto error;
1034                         }
1035                 }
1036 #if 0
1037                 if (n>1){
1038                         for(r=0; r<n; r++){
1039                                 LOG(L_ERR, "WARNING: ep_array[%d]= %x, %p\n",
1040                                                 r, h->ep_array[r].events, h->ep_array[r].data.ptr);
1041                         }
1042                 }
1043 #endif
1044                 for (r=0; r<n; r++){
1045                         revents= (POLLIN & (!(h->ep_array[r].events & (EPOLLIN|EPOLLPRI))
1046                                                 -1)) |
1047                                          (POLLOUT & (!(h->ep_array[r].events & EPOLLOUT)-1)) |
1048                                          (POLLERR & (!(h->ep_array[r].events & EPOLLERR)-1)) |
1049                                          (POLLHUP & (!(h->ep_array[r].events & EPOLLHUP)-1))
1050 #ifdef POLLRDHUP
1051                                         | (POLLRDHUP & (!(h->ep_array[r].events & EPOLLRDHUP)-1))
1052 #endif
1053                                         ;
1054                         if (likely(revents)){
1055                                 fm=(struct fd_map*)h->ep_array[r].data.ptr;
1056                                 while(fm->type && ((fm->events|POLLERR|POLLHUP) & revents) && 
1057                                                 (handle_io(fm, revents, -1)>0) && repeat);
1058                         }else{
1059                                 LOG(L_ERR, "ERROR:io_wait_loop_epoll: unexpected event %x"
1060                                                         " on %d/%d, data=%p\n", h->ep_array[r].events,
1061                                                         r+1, n, h->ep_array[r].data.ptr);
1062                         }
1063                 }
1064 error:
1065         return n;
1066 }
1067 #endif
1068
1069
1070
1071 #ifdef HAVE_KQUEUE
1072 inline static int io_wait_loop_kqueue(io_wait_h* h, int t, int repeat)
1073 {
1074         int n, r;
1075         struct timespec tspec;
1076         struct fd_map* fm;
1077         int revents;
1078         
1079         tspec.tv_sec=t;
1080         tspec.tv_nsec=0;
1081 again:
1082                 n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges,  h->kq_array,
1083                                         h->fd_no, &tspec);
1084                 if (unlikely(n==-1)){
1085                         if (errno==EINTR) goto again; /* signal, ignore it */
1086                         else{
1087                                 LOG(L_ERR, "ERROR: io_wait_loop_kqueue: kevent:"
1088                                                 " %s [%d]\n", strerror(errno), errno);
1089                                 goto error;
1090                         }
1091                 }
1092                 h->kq_nchanges=0; /* reset changes array */
1093                 for (r=0; r<n; r++){
1094 #ifdef EXTRA_DEBUG
1095                         DBG("DBG: kqueue: event %d/%d: fd=%d, udata=%lx, flags=0x%x\n",
1096                                         r, n, h->kq_array[r].ident, (long)h->kq_array[r].udata,
1097                                         h->kq_array[r].flags);
1098 #endif
1099 #if 0
1100                         if (unlikely(h->kq_array[r].flags & EV_ERROR)){
1101                                 /* error in changes: we ignore it, it can be caused by
1102                                    trying to remove an already closed fd: race between
1103                                    adding something to the changes array, close() and
1104                                    applying the changes */
1105                                 LOG(L_INFO, "INFO: io_wait_loop_kqueue: kevent error on "
1106                                                         "fd %d: %s [%ld]\n", h->kq_array[r].ident,
1107                                                         strerror(h->kq_array[r].data),
1108                                                         (long)h->kq_array[r].data);
1109                         }else{ 
1110 #endif
1111                                 fm=(struct fd_map*)h->kq_array[r].udata;
1112                                 if (likely(h->kq_array[r].filter==EVFILT_READ)){
1113                                         revents=POLLIN | 
1114                                                 (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP);
1115                                         while(fm->type && (fm->events & revents) && 
1116                                                         (handle_io(fm, revents, -1)>0) && repeat);
1117                                 }else if (h->kq_array[r].filter==EVFILT_WRITE){
1118                                         revents=POLLOUT | 
1119                                                 (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP);
1120                                         while(fm->type && (fm->events & revents) && 
1121                                                         (handle_io(fm, revents, -1)>0) && repeat);
1122                                 }
1123                         /*} */
1124                 }
1125 error:
1126         return n;
1127 }
1128 #endif
1129
1130
1131
1132 #ifdef HAVE_SIGIO_RT
1133 /* sigio rt version has no repeat (it doesn't make sense)*/
1134 inline static int io_wait_loop_sigio_rt(io_wait_h* h, int t)
1135 {
1136         int n;
1137         int ret;
1138         struct timespec ts;
1139         siginfo_t siginfo;
1140         int sigio_band;
1141         int sigio_fd;
1142         struct fd_map* fm;
1143         int revents;
1144         
1145         
1146         ret=1; /* 1 event per call normally */
1147         ts.tv_sec=t;
1148         ts.tv_nsec=0;
1149         if (unlikely(!sigismember(&h->sset, h->signo) ||
1150                                         !sigismember(&h->sset, SIGIO))) {
1151                 LOG(L_CRIT, "BUG: io_wait_loop_sigio_rt: the signal mask"
1152                                 " is not properly set!\n");
1153                 goto error;
1154         }
1155 again:
1156         n=sigtimedwait(&h->sset, &siginfo, &ts);
1157         if (unlikely(n==-1)){
1158                 if (errno==EINTR) goto again; /* some other signal, ignore it */
1159                 else if (errno==EAGAIN){ /* timeout */
1160                         ret=0;
1161                         goto end;
1162                 }else{
1163                         LOG(L_ERR, "ERROR: io_wait_loop_sigio_rt: sigtimed_wait"
1164                                         " %s [%d]\n", strerror(errno), errno);
1165                         goto error;
1166                 }
1167         }
1168         if (likely(n!=SIGIO)){
1169 #ifdef SIGINFO64_WORKARROUND
1170                 /* on linux siginfo.si_band is defined as long in userspace
1171                  * and as int in kernel (< 2.6.5) => on 64 bits things will break!
1172                  * (si_band will include si_fd, and si_fd will contain
1173                  *  garbage).
1174                  *  see /usr/src/linux/include/asm-generic/siginfo.h and
1175                  *      /usr/include/bits/siginfo.h
1176                  *  On newer kernels this is fixed (si_band is long in the kernel too).
1177                  * -- andrei */
1178                 if  ((_os_ver<0x020605) && (sizeof(siginfo.si_band)>sizeof(int))){
1179                         sigio_band=*((int*)(void*)&siginfo.si_band);
1180                         sigio_fd=*(((int*)(void*)&siginfo.si_band)+1);
1181                 }else
1182 #endif
1183                 {
1184                         sigio_band=siginfo.si_band;
1185                         sigio_fd=siginfo.si_fd;
1186                 }
1187                 if (unlikely(siginfo.si_code==SI_SIGIO)){
1188                         /* old style, we don't know the event (linux 2.2.?) */
1189                         LOG(L_WARN, "WARNING: io_wait_loop_sigio_rt: old style sigio"
1190                                         " interface\n");
1191                         fm=get_fd_map(h, sigio_fd);
1192                         /* we can have queued signals generated by fds not watched
1193                          * any more, or by fds in transition, to a child => ignore them*/
1194                         if (fm->type)
1195                                 handle_io(fm, POLLIN|POLLOUT, -1);
1196                 }else{
1197                         /* si_code contains the SIGPOLL reason: POLL_IN, POLL_OUT,
1198                          *  POLL_MSG, POLL_ERR, POLL_PRI or POLL_HUP
1199                          * and si_band the translated poll event bitmap:
1200                          *  POLLIN|POLLRDNORM  (=POLL_IN),
1201                          *  POLLOUT|POLLWRNORM|POLLWRBAND (=POLL_OUT),
1202                          *  POLLIN|POLLRDNORM|POLLMSG (=POLL_MSG),
1203                          *  POLLERR (=POLL_ERR),
1204                          *  POLLPRI|POLLRDBAND (=POLL_PRI),
1205                          *  POLLHUP|POLLERR (=POLL_HUP) 
1206                          *  [linux 2.6.22 fs/fcntl.c:447]
1207                          */
1208 #ifdef EXTRA_DEBUG
1209                         DBG("io_wait_loop_sigio_rt: siginfo: signal=%d (%d),"
1210                                         " si_code=%d, si_band=0x%x,"
1211                                         " si_fd=%d\n",
1212                                         siginfo.si_signo, n, siginfo.si_code, 
1213                                         (unsigned)sigio_band,
1214                                         sigio_fd);
1215 #endif
1216                         /* on some errors (e.g. when receving TCP RST), sigio_band will
1217                          * be set to 0x08 (POLLERR) or 0x18 (POLLERR|POLLHUP - on stream
1218                          *  unix socket close) , so better catch all events --andrei */
1219                         if (likely(sigio_band)){
1220                                 fm=get_fd_map(h, sigio_fd);
1221                                 revents=sigio_band;
1222                                 /* fix revents==POLLPRI case */
1223                                 revents |= (!(revents & POLLPRI)-1) & POLLIN;
1224                                 /* we can have queued signals generated by fds not watched
1225                                  * any more, or by fds in transition, to a child 
1226                                  * => ignore them */
1227                                 if (fm->type && ((fm->events|POLLERR|POLLHUP) & revents))
1228                                         handle_io(fm, revents, -1);
1229                                 else
1230                                         DBG("WARNING: io_wait_loop_sigio_rt: ignoring event"
1231                                                         " %x on fd %d, watching for %x, si_code=%x "
1232                                                         "(fm->type=%d, fm->fd=%d, fm->data=%p)\n",
1233                                                         sigio_band, sigio_fd, fm->events, siginfo.si_code,
1234                                                         fm->type, fm->fd, fm->data);
1235                         }else{
1236                                 LOG(L_ERR, "ERROR: io_wait_loop_sigio_rt: unexpected event"
1237                                                         " on fd %d: %x\n", sigio_fd, sigio_band);
1238                         }
1239                 }
1240         }else{
1241                 /* signal queue overflow 
1242                  * TODO: increase signal queue size: 2.4x /proc/.., 2.6x -rlimits */
1243                 LOG(L_WARN, "WARNING: io_wait_loop_sigio_rt: signal queue overflowed"
1244                                         "- falling back to poll\n");
1245                 /* clear real-time signal queue
1246                  * both SIG_IGN and SIG_DFL are needed , it doesn't work
1247                  * only with SIG_DFL  */
1248                 if (signal(h->signo, SIG_IGN)==SIG_ERR){
1249                         LOG(L_CRIT, "BUG: do_poll: couldn't reset signal to IGN\n");
1250                 }
1251                 
1252                 if (signal(h->signo, SIG_DFL)==SIG_ERR){
1253                         LOG(L_CRIT, "BUG: do_poll: couldn't reset signal to DFL\n");
1254                 }
1255                 /* falling back to normal poll */
1256                 ret=io_wait_loop_poll(h, -1, 1);
1257         }
1258 end:
1259         return ret;
1260 error:
1261         return -1;
1262 }
1263 #endif
1264
1265
1266
1267 #ifdef HAVE_DEVPOLL
1268 inline static int io_wait_loop_devpoll(io_wait_h* h, int t, int repeat)
1269 {
1270         int n, r;
1271         int ret;
1272         struct dvpoll dpoll;
1273         struct fd_map* fm;
1274
1275                 dpoll.dp_timeout=t*1000;
1276                 dpoll.dp_nfds=h->fd_no;
1277                 dpoll.dp_fds=h->fd_array;
1278 again:
1279                 ret=n=ioctl(h->dpoll_fd, DP_POLL, &dpoll);
1280                 if (unlikely(n==-1)){
1281                         if (errno==EINTR) goto again; /* signal, ignore it */
1282                         else{
1283                                 LOG(L_ERR, "ERROR:io_wait_loop_devpoll: ioctl: %s [%d]\n",
1284                                                 strerror(errno), errno);
1285                                 goto error;
1286                         }
1287                 }
1288                 for (r=0; r< n; r++){
1289                         if (h->fd_array[r].revents & (POLLNVAL|POLLERR)){
1290                                 LOG(L_ERR, "ERROR: io_wait_loop_devpoll: pollinval returned"
1291                                                         " for fd %d, revents=%x\n",
1292                                                         h->fd_array[r].fd, h->fd_array[r].revents);
1293                         }
1294                         /* POLLIN|POLLHUP just go through */
1295                         fm=get_fd_map(h, h->fd_array[r].fd);
1296                         while(fm->type && (fm->events & h->fd_array[r].revents) &&
1297                                         (handle_io(fm, h->fd_array[r].revents, r) > 0) && repeat);
1298                 }
1299 error:
1300         return ret;
1301 }
1302 #endif
1303
1304
1305
1306 /* init */
1307
1308
1309 /* initializes the static vars/arrays
1310  * params:      h - pointer to the io_wait_h that will be initialized
1311  *         max_fd - maximum allowed fd number
1312  *         poll_m - poll method (0 for automatic best fit)
1313  */
1314 int init_io_wait(io_wait_h* h, int max_fd, enum poll_types poll_method);
1315
1316 /* destroys everything init_io_wait allocated */
1317 void destroy_io_wait(io_wait_h* h);
1318
1319
1320 #endif