0557c00d50484fc102d79710dd244bfe4721fded
[sip-router] / io_wait.h
1 /* 
2  * $Id$
3  * 
4  * Copyright (C) 2005 iptelorg GmbH
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 /*
19  * tcp io wait common stuff used by tcp_main.c & tcp_read.c
20  * All the functions are inline because of speed reasons and because they are
21  * used only from 2 places.
22  * You also have to define:
23  *     int handle_io(struct fd_map* fm, short events, int idx) (see below)
24  *     (this could be trivially replaced by a callback pointer entry attached
25  *      to the io_wait handler if more flexibility rather then performance
26  *      is needed)
27  *      fd_type - define to some enum of you choice and define also
28  *                FD_TYPE_DEFINED (if you don't do it fd_type will be defined
29  *                to int). 0 has a special not set/not init. meaning
30  *                (a lot of sanity checks and the sigio_rt code are based on
31  *                 this assumption)
32  *     local_malloc (defaults to pkg_malloc)
33  *     local_free   (defaults to pkg_free)
34  *  
35  */
36 /* 
37  * History:
38  * --------
39  *  2005-06-13  created by andrei
40  *  2005-06-26  added kqueue (andrei)
41  *  2005-07-01  added /dev/poll (andrei)
42  *  2006-05-30  sigio 64 bit workarround enabled for kernels < 2.6.5 (andrei)
43  *  2007-11-22  when handle_io() is called in a loop check & stop if the fd was
44  *               removed inside handle_io() (andrei)
45  *  2007-11-29  support for write (POLLOUT); added io_watch_chg() (andrei)
46  *  2008-02-04  POLLRDHUP & EPOLLRDHUP support (automatically enabled if POLLIN
47  *               is set) (andrei)
48  */
49
50
51
52 #ifndef _io_wait_h
53 #define _io_wait_h
54
55 #include <errno.h>
56 #include <string.h>
57 #ifdef HAVE_SIGIO_RT
58 #define __USE_GNU /* or else F_SETSIG won't be included */
59 #include <sys/types.h> /* recv */
60 #include <sys/socket.h> /* recv */
61 #include <signal.h> /* sigprocmask, sigwait a.s.o */
62 #endif
63
64 #define _GNU_SOURCE  /* for POLLRDHUP on linux */
65 #include <sys/poll.h>
66 #include <fcntl.h>
67
68 #ifdef HAVE_EPOLL
69 #include <sys/epoll.h>
70 #endif
71 #ifdef HAVE_KQUEUE
72 #include <sys/types.h> /* needed on freebsd */
73 #include <sys/event.h>
74 #include <sys/time.h>
75 #endif
76 #ifdef HAVE_DEVPOLL
77 #include <sys/devpoll.h>
78 #endif
79 #ifdef HAVE_SELECT
80 /* needed on openbsd for select*/
81 #include <sys/time.h> 
82 #include <sys/types.h> 
83 #include <unistd.h>
84 /* needed according to POSIX for select*/
85 #include <sys/select.h>
86 #endif
87
88 #include "dprint.h"
89
90 #include "poll_types.h" /* poll_types*/
91 #ifdef HAVE_SIGIO_RT
92 #include "pt.h" /* mypid() */
93 #endif
94
95 #include "compiler_opt.h"
96
97
98 #ifdef HAVE_EPOLL
99 /* fix defines for EPOLL */
100 #if defined POLLRDHUP && ! defined EPOLLRDHUP
101 #define EPOLLRDHUP POLLRDHUP  /* should work on all linuxes */
102 #endif /* POLLRDHUP && EPOLLRDHUP */
103 #endif /* HAVE_EPOLL */
104
105
106 extern int _os_ver; /* os version number, needed to select bugs workarrounds */
107
108
109 #if 0
110 enum fd_types; /* this should be defined from the including file,
111                                   see tcp_main.c for an example, 
112                                   0 has a special meaning: not used/empty*/
113 #endif
114
115 #ifndef FD_TYPE_DEFINED
116 typedef int fd_type;
117 #define FD_TYPE_DEFINED
118 #endif
119
120 /* maps a fd to some other structure; used in almost all cases
121  * except epoll and maybe kqueue or /dev/poll */
122 struct fd_map{
123         int fd;               /* fd no */
124         fd_type type;         /* "data" type */
125         void* data;           /* pointer to the corresponding structure */
126         short events;         /* events we are interested int */
127 };
128
129
130 #ifdef HAVE_KQUEUE
131 #ifndef KQ_CHANGES_ARRAY_SIZE
132 #define KQ_CHANGES_ARRAY_SIZE 256
133
134 #ifdef __OS_netbsd
135 #define KEV_UDATA_CAST (intptr_t)
136 #else
137 #define KEV_UDATA_CAST
138 #endif
139
140 #endif
141 #endif
142
143
144 /* handler structure */
145 struct io_wait_handler{
146         enum poll_types poll_method;
147         int flags;
148         struct fd_map* fd_hash;
149         int fd_no; /*  current index used in fd_array and the passed size for 
150                                    ep_array & kq_array*/
151         int max_fd_no; /* maximum fd no, is also the size of fd_array,
152                                                        fd_hash  and ep_array*/
153         /* common stuff for POLL, SIGIO_RT and SELECT
154          * since poll support is always compiled => this will always be compiled */
155         struct pollfd* fd_array; /* used also by devpoll as devpoll array */
156         int crt_fd_array_idx; /*  crt idx for which handle_io is called
157                                                          (updated also by del -> internal optimization) */
158         /* end of common stuff */
159 #ifdef HAVE_EPOLL
160         int epfd; /* epoll ctrl fd */
161         struct epoll_event* ep_array;
162 #endif
163 #ifdef HAVE_SIGIO_RT
164         sigset_t sset; /* signal mask for sigio & sigrtmin */
165         int signo;     /* real time signal used */
166 #endif
167 #ifdef HAVE_KQUEUE
168         int kq_fd;
169         struct kevent* kq_array;   /* used for the eventlist*/
170         struct kevent* kq_changes; /* used for the changelist */
171         size_t kq_nchanges;
172         size_t kq_changes_size; /* size of the changes array */
173 #endif
174 #ifdef HAVE_DEVPOLL
175         int dpoll_fd;
176 #endif
177 #ifdef HAVE_SELECT
178         fd_set master_rset; /* read set */
179         fd_set master_wset; /* write set */
180         int max_fd_select; /* maximum select used fd */
181 #endif
182 };
183
184 typedef struct io_wait_handler io_wait_h;
185
186
187 /* get the corresponding fd_map structure pointer */
188 #define get_fd_map(h, fd)               (&(h)->fd_hash[(fd)])
189 /* remove a fd_map structure from the hash; the pointer must be returned
190  * by get_fd_map or hash_fd_map*/
191 #define unhash_fd_map(pfm)      \
192         do{ \
193                 (pfm)->type=0 /*F_NONE */; \
194                 (pfm)->fd=-1; \
195         }while(0)
196
197 /* add a fd_map structure to the fd hash */
198 static inline struct fd_map* hash_fd_map(       io_wait_h* h,
199                                                                                         int fd,
200                                                                                         short events,
201                                                                                         fd_type type,
202                                                                                         void* data)
203 {
204         h->fd_hash[fd].fd=fd;
205         h->fd_hash[fd].events=events;
206         h->fd_hash[fd].type=type;
207         h->fd_hash[fd].data=data;
208         return &h->fd_hash[fd];
209 }
210
211
212
213 #ifdef HANDLE_IO_INLINE
214 /* generic handle io routine, this must be defined in the including file
215  * (faster then registering a callback pointer)
216  *
217  * params:  fm     - pointer to a fd hash entry
218  *          events - combinations of POLLIN, POLLOUT, POLLERR & POLLHUP
219  *          idx    - index in the fd_array (or -1 if not known)
220  * return: -1 on error
221  *          0 on EAGAIN or when by some other way it is known that no more 
222  *            io events are queued on the fd (the receive buffer is empty).
223  *            Usefull to detect when there are no more io events queued for
224  *            sigio_rt, epoll_et, kqueue.
225  *         >0 on successfull read from the fd (when there might be more io
226  *            queued -- the receive buffer might still be non-empty)
227  */
228 inline static int handle_io(struct fd_map* fm, short events, int idx);
229 #else
230 int handle_io(struct fd_map* fm, short events, int idx);
231 #endif
232
233
234
235 #ifdef HAVE_KQUEUE
236 /*
237  * kqueue specific function: register a change
238  * (adds a change to the kevent change array, and if full flushes it first)
239  *
240  * TODO: check if the event already exists in the change list or if it's
241  *       complementary to an event in the list (e.g. EVFILT_WRITE, EV_DELETE
242  *       and EVFILT_WRITE, EV_ADD for the same fd).
243  * returns: -1 on error, 0 on success
244  */
245 static inline int kq_ev_change(io_wait_h* h, int fd, int filter, int flag, 
246                                                                 void* data)
247 {
248         int n;
249         struct timespec tspec;
250
251         if (h->kq_nchanges>=h->kq_changes_size){
252                 /* changes array full ! */
253                 LOG(L_WARN, "WARNING: kq_ev_change: kqueue changes array full"
254                                         " trying to flush...\n");
255                 tspec.tv_sec=0;
256                 tspec.tv_nsec=0;
257 again:
258                 n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges, 0, 0, &tspec);
259                 if (n==-1){
260                         if (errno==EINTR) goto again;
261                         LOG(L_ERR, "ERROR: io_watch_add: kevent flush changes "
262                                                 " failed: %s [%d]\n", strerror(errno), errno);
263                         return -1;
264                 }
265                 h->kq_nchanges=0; /* changes array is empty */
266         }
267         EV_SET(&h->kq_changes[h->kq_nchanges], fd, filter, flag, 0, 0,
268                         KEV_UDATA_CAST data);
269         h->kq_nchanges++;
270         return 0;
271 }
272 #endif
273
274
275
276 /* generic io_watch_add function
277  * Params:
278  *     h      - pointer to initialized io_wait handle
279  *     fd     - fd to watch
280  *     events - bitmap with the fd events for which the fd should be watched
281  *              (combination of POLLIN and POLLOUT)
282  *     type   - fd type (non 0 value, returned in the call to handle_io)
283  *     data   - pointer/private data returned in the handle_io call
284  * returns 0 on success, -1 on error
285  *
286  * WARNING: handle_io() can be called immediately (from io_watch_add()) so
287  *  make sure that any dependent init. (e.g. data stuff) is made before
288  *  calling io_watch_add
289  *
290  * this version should be faster than pointers to poll_method specific
291  * functions (it avoids functions calls, the overhead being only an extra
292  *  switch())*/
293 inline static int io_watch_add( io_wait_h* h,
294                                                                 int fd,
295                                                                 short events,
296                                                                 fd_type type,
297                                                                 void* data)
298 {
299
300         /* helper macros */
301 #define fd_array_setup(ev) \
302         do{ \
303                 h->fd_array[h->fd_no].fd=fd; \
304                 h->fd_array[h->fd_no].events=(ev); /* useless for select */ \
305                 h->fd_array[h->fd_no].revents=0;     /* useless for select */ \
306         }while(0)
307         
308 #define set_fd_flags(f) \
309         do{ \
310                         flags=fcntl(fd, F_GETFL); \
311                         if (flags==-1){ \
312                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: GETFL failed:" \
313                                                 " %s [%d]\n", strerror(errno), errno); \
314                                 goto error; \
315                         } \
316                         if (fcntl(fd, F_SETFL, flags|(f))==-1){ \
317                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETFL" \
318                                                         " failed: %s [%d]\n", strerror(errno), errno); \
319                                 goto error; \
320                         } \
321         }while(0)
322         
323         
324         struct fd_map* e;
325         int flags;
326 #ifdef HAVE_EPOLL
327         struct epoll_event ep_event;
328 #endif
329 #ifdef HAVE_DEVPOLL
330         struct pollfd pfd;
331 #endif
332 #if defined(HAVE_SIGIO_RT) || defined (HAVE_EPOLL)
333         int n;
334 #endif
335 #if defined(HAVE_SIGIO_RT)
336         int idx;
337         int check_io;
338         struct pollfd pf;
339         
340         check_io=0; /* set to 1 if we need to check for pre-existing queued
341                                    io/data on the fd */
342         idx=-1;
343 #endif
344         e=0;
345         /* sanity checks */
346         if (unlikely(fd==-1)){
347                 LOG(L_CRIT, "BUG: io_watch_add: fd is -1!\n");
348                 goto error;
349         }
350         if (unlikely((events&(POLLIN|POLLOUT))==0)){
351                 LOG(L_CRIT, "BUG: io_watch_add: invalid events: 0x%0x\n", events);
352                 goto error;
353         }
354         /* check if not too big */
355         if (unlikely(h->fd_no>=h->max_fd_no)){
356                 LOG(L_CRIT, "ERROR: io_watch_add: maximum fd number exceeded:"
357                                 " %d/%d\n", h->fd_no, h->max_fd_no);
358                 goto error;
359         }
360         DBG("DBG: io_watch_add(%p, %d, %d, %p), fd_no=%d\n",
361                         h, fd, type, data, h->fd_no);
362         /*  hash sanity check */
363         e=get_fd_map(h, fd);
364         if (unlikely(e && (e->type!=0 /*F_NONE*/))){
365                 LOG(L_ERR, "ERROR: io_watch_add: trying to overwrite entry %d"
366                                 " watched for %x in the hash(%d, %d, %p) with (%d, %d, %p)\n",
367                                 fd, events, e->fd, e->type, e->data, fd, type, data);
368                 e=0;
369                 goto error;
370         }
371         
372         if (unlikely((e=hash_fd_map(h, fd, events, type, data))==0)){
373                 LOG(L_ERR, "ERROR: io_watch_add: failed to hash the fd %d\n", fd);
374                 goto error;
375         }
376         switch(h->poll_method){ /* faster then pointer to functions */
377                 case POLL_POLL:
378 #ifdef POLLRDHUP
379                         /* listen to POLLRDHUP by default (if POLLIN) */
380                         events|=((int)!(events & POLLIN) - 1) & POLLRDHUP;
381 #endif /* POLLRDHUP */
382                         fd_array_setup(events);
383                         set_fd_flags(O_NONBLOCK);
384                         break;
385 #ifdef HAVE_SELECT
386                 case POLL_SELECT:
387                         fd_array_setup(events);
388                         if (likely(events & POLLIN))
389                                 FD_SET(fd, &h->master_rset);
390                         if (unlikely(events & POLLOUT))
391                                 FD_SET(fd, &h->master_wset);
392                         if (h->max_fd_select<fd) h->max_fd_select=fd;
393                         break;
394 #endif
395 #ifdef HAVE_SIGIO_RT
396                 case POLL_SIGIO_RT:
397                         fd_array_setup(events);
398                         /* re-set O_ASYNC might be needed, if not done from 
399                          * io_watch_del (or if somebody wants to add a fd which has
400                          * already O_ASYNC/F_SETSIG set on a duplicate)
401                          */
402                         /* set async & signal */
403                         if (fcntl(fd, F_SETOWN, my_pid())==-1){
404                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETOWN"
405                                 " failed: %s [%d]\n", strerror(errno), errno);
406                                 goto error;
407                         }
408                         if (fcntl(fd, F_SETSIG, h->signo)==-1){
409                                 LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETSIG"
410                                         " failed: %s [%d]\n", strerror(errno), errno);
411                                 goto error;
412                         }
413                         /* set both non-blocking and async */
414                         set_fd_flags(O_ASYNC| O_NONBLOCK);
415 #ifdef EXTRA_DEBUG
416                         DBG("io_watch_add: sigio_rt on f %d, signal %d to pid %d\n",
417                                         fd,  h->signo, my_pid());
418 #endif
419                         /* empty socket receive buffer, if buffer is already full
420                          * no more space to put packets
421                          * => no more signals are ever generated
422                          * also when moving fds, the freshly moved fd might have
423                          *  already some bytes queued, we want to get them now
424                          *  and not later -- andrei */
425                         idx=h->fd_no;
426                         check_io=1;
427                         break;
428 #endif
429 #ifdef HAVE_EPOLL
430                 case POLL_EPOLL_LT:
431                         ep_event.events=
432 #ifdef POLLRDHUP
433                                                 /* listen for EPOLLRDHUP too */
434                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
435 #else /* POLLRDHUP */
436                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
437 #endif /* POLLRDHUP */
438                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
439                         ep_event.data.ptr=e;
440 again1:
441                         n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
442                         if (unlikely(n==-1)){
443                                 if (errno==EAGAIN) goto again1;
444                                 LOG(L_ERR, "ERROR: io_watch_add: epoll_ctl failed: %s [%d]\n",
445                                         strerror(errno), errno);
446                                 goto error;
447                         }
448                         break;
449                 case POLL_EPOLL_ET:
450                         set_fd_flags(O_NONBLOCK);
451                         ep_event.events=
452 #ifdef POLLRDHUP
453                                                 /* listen for EPOLLRDHUP too */
454                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
455 #else /* POLLRDHUP */
456                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
457 #endif /* POLLRDHUP */
458                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
459                                                 EPOLLET;
460                         ep_event.data.ptr=e;
461 again2:
462                         n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
463                         if (unlikely(n==-1)){
464                                 if (errno==EAGAIN) goto again2;
465                                 LOG(L_ERR, "ERROR: io_watch_add: epoll_ctl failed: %s [%d]\n",
466                                         strerror(errno), errno);
467                                 goto error;
468                         }
469                         break;
470 #endif
471 #ifdef HAVE_KQUEUE
472                 case POLL_KQUEUE:
473                         if (likely( events & POLLIN)){
474                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e)==-1))
475                                 goto error;
476                         }
477                         if (unlikely( events & POLLOUT)){
478                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
479                                 {
480                                         if (likely(events & POLLIN)){
481                                                 kq_ev_change(h, fd, EVFILT_READ, EV_DELETE, 0);
482                                         }
483                                 }
484                                 goto error;
485                         }
486                         break;
487 #endif
488 #ifdef HAVE_DEVPOLL
489                 case POLL_DEVPOLL:
490                         pfd.fd=fd;
491                         pfd.events=events;
492                         pfd.revents=0;
493 again_devpoll:
494                         if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
495                                 if (errno==EAGAIN) goto again_devpoll;
496                                 LOG(L_ERR, "ERROR: io_watch_add: /dev/poll write failed:"
497                                                         "%s [%d]\n", strerror(errno), errno);
498                                 goto error;
499                         }
500                         break;
501 #endif
502                         
503                 default:
504                         LOG(L_CRIT, "BUG: io_watch_add: no support for poll method "
505                                         " %s (%d)\n", poll_method_str[h->poll_method],
506                                         h->poll_method);
507                         goto error;
508         }
509         
510         h->fd_no++; /* "activate" changes, for epoll/kqueue/devpoll it
511                                    has only informative value */
512 #if defined(HAVE_SIGIO_RT)
513         if (check_io){
514                 /* handle possible pre-existing events */
515                 pf.fd=fd;
516                 pf.events=events;
517 check_io_again:
518                 n=0;
519                 while(e->type && ((n=poll(&pf, 1, 0))>0) && 
520                                 (handle_io(e, pf.revents, idx)>0) &&
521                                 (pf.revents & (e->events|POLLERR|POLLHUP)));
522                 if (unlikely(e->type && (n==-1))){
523                         if (errno==EINTR) goto check_io_again;
524                         LOG(L_ERR, "ERROR: io_watch_add: check_io poll: %s [%d]\n",
525                                                 strerror(errno), errno);
526                 }
527         }
528 #endif
529         return 0;
530 error:
531         if (e) unhash_fd_map(e);
532         return -1;
533 #undef fd_array_setup
534 #undef set_fd_flags 
535 }
536
537
538
539 #define IO_FD_CLOSING 16
540 /* parameters:    h - handler 
541  *               fd - file descriptor
542  *            index - index in the fd_array if known, -1 if not
543  *                    (if index==-1 fd_array will be searched for the
544  *                     corresponding fd* entry -- slower but unavoidable in 
545  *                     some cases). index is not used (no fd_array) for epoll,
546  *                     /dev/poll and kqueue
547  *            flags - optimization flags, e.g. IO_FD_CLOSING, the fd was 
548  *                    or will shortly be closed, in some cases we can avoid
549  *                    extra remove operations (e.g.: epoll, kqueue, sigio)
550  * returns 0 if ok, -1 on error */
551 inline static int io_watch_del(io_wait_h* h, int fd, int idx, int flags)
552 {
553         
554 #define fix_fd_array \
555         do{\
556                         if (unlikely(idx==-1)){ \
557                                 /* fix idx if -1 and needed */ \
558                                 for (idx=0; (idx<h->fd_no) && \
559                                                         (h->fd_array[idx].fd!=fd); idx++); \
560                         } \
561                         if (likely(idx<h->fd_no)){ \
562                                 memmove(&h->fd_array[idx], &h->fd_array[idx+1], \
563                                         (h->fd_no-(idx+1))*sizeof(*(h->fd_array))); \
564                                 if ((idx<=h->crt_fd_array_idx) && (h->crt_fd_array_idx>=0)) \
565                                         h->crt_fd_array_idx--; \
566                         } \
567         }while(0)
568         
569         struct fd_map* e;
570         int events;
571 #ifdef HAVE_EPOLL
572         int n;
573         struct epoll_event ep_event;
574 #endif
575 #ifdef HAVE_DEVPOLL
576         struct pollfd pfd;
577 #endif
578 #ifdef HAVE_SIGIO_RT
579         int fd_flags;
580 #endif
581         
582         if (unlikely((fd<0) || (fd>=h->max_fd_no))){
583                 LOG(L_CRIT, "BUG: io_watch_del: invalid fd %d, not in [0, %d) \n",
584                                                 fd, h->fd_no);
585                 goto error;
586         }
587         DBG("DBG: io_watch_del (%p, %d, %d, 0x%x) fd_no=%d called\n",
588                         h, fd, idx, flags, h->fd_no);
589         e=get_fd_map(h, fd);
590         /* more sanity checks */
591         if (unlikely(e==0)){
592                 LOG(L_CRIT, "BUG: io_watch_del: no corresponding hash entry for %d\n",
593                                         fd);
594                 goto error;
595         }
596         if (unlikely(e->type==0 /*F_NONE*/)){
597                 LOG(L_ERR, "ERROR: io_watch_del: trying to delete already erased"
598                                 " entry %d in the hash(%d, %d, %p) flags %x)\n",
599                                 fd, e->fd, e->type, e->data, flags);
600                 goto error;
601         }
602         events=e->events;
603         unhash_fd_map(e);
604         
605         switch(h->poll_method){
606                 case POLL_POLL:
607                         fix_fd_array;
608                         break;
609 #ifdef HAVE_SELECT
610                 case POLL_SELECT:
611                         if (likely(events & POLLIN))
612                                 FD_CLR(fd, &h->master_rset);
613                         if (unlikely(events & POLLOUT))
614                                 FD_CLR(fd, &h->master_wset);
615                         if (unlikely(h->max_fd_select && (h->max_fd_select==fd)))
616                                 /* we don't know the prev. max, so we just decrement it */
617                                 h->max_fd_select--; 
618                         fix_fd_array;
619                         break;
620 #endif
621 #ifdef HAVE_SIGIO_RT
622                 case POLL_SIGIO_RT:
623                         fix_fd_array;
624                         /* the O_ASYNC flag must be reset all the time, the fd
625                          *  can be changed only if  O_ASYNC is reset (if not and
626                          *  the fd is a duplicate, you will get signals from the dup. fd
627                          *  and not from the original, even if the dup. fd was closed
628                          *  and the signals re-set on the original) -- andrei
629                          */
630                         /*if (!(flags & IO_FD_CLOSING)){*/
631                                 /* reset ASYNC */
632                                 fd_flags=fcntl(fd, F_GETFL); 
633                                 if (unlikely(fd_flags==-1)){ 
634                                         LOG(L_ERR, "ERROR: io_watch_del: fnctl: GETFL failed:" 
635                                                         " %s [%d]\n", strerror(errno), errno); 
636                                         goto error; 
637                                 } 
638                                 if (unlikely(fcntl(fd, F_SETFL, fd_flags&(~O_ASYNC))==-1)){ 
639                                         LOG(L_ERR, "ERROR: io_watch_del: fnctl: SETFL" 
640                                                                 " failed: %s [%d]\n", strerror(errno), errno); 
641                                         goto error; 
642                                 } 
643                         break;
644 #endif
645 #ifdef HAVE_EPOLL
646                 case POLL_EPOLL_LT:
647                 case POLL_EPOLL_ET:
648                         /* epoll doesn't seem to automatically remove sockets,
649                          * if the socket is a duplicate/moved and the original
650                          * is still open. The fd is removed from the epoll set
651                          * only when the original (and all the  copies?) is/are 
652                          * closed. This is probably a bug in epoll. --andrei */
653 #ifdef EPOLL_NO_CLOSE_BUG
654                         if (!(flags & IO_FD_CLOSING)){
655 #endif
656 again_epoll:
657                                 n=epoll_ctl(h->epfd, EPOLL_CTL_DEL, fd, &ep_event);
658                                 if (unlikely(n==-1)){
659                                         if (errno==EAGAIN) goto again_epoll;
660                                         LOG(L_ERR, "ERROR: io_watch_del: removing fd from epoll "
661                                                         "list failed: %s [%d]\n", strerror(errno), errno);
662                                         goto error;
663                                 }
664 #ifdef EPOLL_NO_CLOSE_BUG
665                         }
666 #endif
667                         break;
668 #endif
669 #ifdef HAVE_KQUEUE
670                 case POLL_KQUEUE:
671                         if (!(flags & IO_FD_CLOSING)){
672                                 if (likely(events & POLLIN)){
673                                         if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
674                                                                                                         EV_DELETE, 0) ==-1)){
675                                                 /* try to delete the write filter anyway */
676                                                 if (events & POLLOUT){
677                                                         kq_ev_change(h, fd, EVFILT_WRITE, EV_DELETE, 0);
678                                                 }
679                                                 goto error;
680                                         }
681                                 }
682                                 if (unlikely(events & POLLOUT)){
683                                         if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
684                                                                                                         EV_DELETE, 0) ==-1))
685                                                 goto error;
686                                 }
687                         }
688                         break;
689 #endif
690 #ifdef HAVE_DEVPOLL
691                 case POLL_DEVPOLL:
692                                 /* for /dev/poll the closed fds _must_ be removed
693                                    (they are not removed automatically on close()) */
694                                 pfd.fd=fd;
695                                 pfd.events=POLLREMOVE;
696                                 pfd.revents=0;
697 again_devpoll:
698                                 if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
699                                         if (errno==EINTR) goto again_devpoll;
700                                         LOG(L_ERR, "ERROR: io_watch_del: removing fd from "
701                                                                 "/dev/poll failed: %s [%d]\n", 
702                                                                 strerror(errno), errno);
703                                         goto error;
704                                 }
705                                 break;
706 #endif
707                 default:
708                         LOG(L_CRIT, "BUG: io_watch_del: no support for poll method "
709                                         " %s (%d)\n", poll_method_str[h->poll_method], 
710                                         h->poll_method);
711                         goto error;
712         }
713         h->fd_no--;
714         return 0;
715 error:
716         return -1;
717 #undef fix_fd_array
718 }
719
720
721
722 /* parameters:    h - handler 
723  *               fd - file descriptor
724  *           events - new events to watch for
725  *              idx - index in the fd_array if known, -1 if not
726  *                    (if index==-1 fd_array will be searched for the
727  *                     corresponding fd* entry -- slower but unavoidable in 
728  *                     some cases). index is not used (no fd_array) for epoll,
729  *                     /dev/poll and kqueue
730  * returns 0 if ok, -1 on error */
731 inline static int io_watch_chg(io_wait_h* h, int fd, short events, int idx )
732 {
733         
734 #define fd_array_chg(ev) \
735         do{\
736                         if (unlikely(idx==-1)){ \
737                                 /* fix idx if -1 and needed */ \
738                                 for (idx=0; (idx<h->fd_no) && \
739                                                         (h->fd_array[idx].fd!=fd); idx++); \
740                         } \
741                         if (likely(idx<h->fd_no)){ \
742                                 h->fd_array[idx].events=(ev); \
743                         } \
744         }while(0)
745         
746         struct fd_map* e;
747         int add_events;
748         int del_events;
749 #ifdef HAVE_DEVPOLL
750         struct pollfd pfd;
751 #endif
752 #ifdef HAVE_EPOLL
753         int n;
754         struct epoll_event ep_event;
755 #endif
756         
757         if (unlikely((fd<0) || (fd>=h->max_fd_no))){
758                 LOG(L_CRIT, "BUG: io_watch_chg: invalid fd %d, not in [0, %d) \n",
759                                                 fd, h->fd_no);
760                 goto error;
761         }
762         if (unlikely((events&(POLLIN|POLLOUT))==0)){
763                 LOG(L_CRIT, "BUG: io_watch_chg: invalid events: 0x%0x\n", events);
764                 goto error;
765         }
766         DBG("DBG: io_watch_chg (%p, %d, 0x%x, 0x%x) fd_no=%d called\n",
767                         h, fd, events, idx, h->fd_no);
768         e=get_fd_map(h, fd);
769         /* more sanity checks */
770         if (unlikely(e==0)){
771                 LOG(L_CRIT, "BUG: io_watch_chg: no corresponding hash entry for %d\n",
772                                         fd);
773                 goto error;
774         }
775         if (unlikely(e->type==0 /*F_NONE*/)){
776                 LOG(L_ERR, "ERROR: io_watch_chg: trying to change an already erased"
777                                 " entry %d in the hash(%d, %d, %p) )\n",
778                                 fd, e->fd, e->type, e->data);
779                 goto error;
780         }
781         
782         add_events=events & ~e->events;
783         del_events=e->events & ~events;
784         e->events=events;
785         switch(h->poll_method){
786                 case POLL_POLL:
787 #ifdef POLLRDHUP
788                         /* listen to POLLRDHUP by default (if POLLIN) */
789                         events|=((int)!(events & POLLIN) - 1) & POLLRDHUP;
790 #endif /* POLLRDHUP */
791                         fd_array_chg(events);
792                         break;
793 #ifdef HAVE_SELECT
794                 case POLL_SELECT:
795                         fd_array_chg(events);
796                         if (unlikely(del_events & POLLIN))
797                                 FD_CLR(fd, &h->master_rset);
798                         else if (unlikely(add_events & POLLIN))
799                                 FD_SET(fd, &h->master_rset);
800                         if (likely(del_events & POLLOUT))
801                                 FD_CLR(fd, &h->master_wset);
802                         else if (likely(add_events & POLLOUT))
803                                 FD_SET(fd, &h->master_wset);
804                         break;
805 #endif
806 #ifdef HAVE_SIGIO_RT
807                 case POLL_SIGIO_RT:
808                         fd_array_chg(events);
809                         /* no need for check_io, since SIGIO_RT listens by default for all
810                          * the events */
811                         break;
812 #endif
813 #ifdef HAVE_EPOLL
814                 case POLL_EPOLL_LT:
815                                 ep_event.events=
816 #ifdef POLLRDHUP
817                                                 /* listen for EPOLLRDHUP too */
818                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
819 #else /* POLLRDHUP */
820                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
821 #endif /* POLLRDHUP */
822                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
823                                 ep_event.data.ptr=e;
824 again_epoll_lt:
825                                 n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
826                                 if (unlikely(n==-1)){
827                                         if (errno==EAGAIN) goto again_epoll_lt;
828                                         LOG(L_ERR, "ERROR: io_watch_chg: modifying epoll events"
829                                                         " failed: %s [%d]\n", strerror(errno), errno);
830                                         goto error;
831                                 }
832                         break;
833                 case POLL_EPOLL_ET:
834                                 ep_event.events=
835 #ifdef POLLRDHUP
836                                                 /* listen for EPOLLRDHUP too */
837                                                 ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
838 #else /* POLLRDHUP */
839                                                 (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
840 #endif /* POLLRDHUP */
841                                                 (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
842                                                 EPOLLET;
843                                 ep_event.data.ptr=e;
844 again_epoll_et:
845                                 n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
846                                 if (unlikely(n==-1)){
847                                         if (errno==EAGAIN) goto again_epoll_et;
848                                         LOG(L_ERR, "ERROR: io_watch_chg: modifying epoll events"
849                                                         " failed: %s [%d]\n", strerror(errno), errno);
850                                         goto error;
851                                 }
852                         break;
853 #endif
854 #ifdef HAVE_KQUEUE
855                 case POLL_KQUEUE:
856                         if (unlikely(del_events & POLLIN)){
857                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
858                                                                                                                 EV_DELETE, 0) ==-1))
859                                                 goto error;
860                         }else if (unlikely(add_events & POLLIN)){
861                                 if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e) ==-1))
862                                         goto error;
863                         }
864                         if (likely(del_events & POLLOUT)){
865                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
866                                                                                                                 EV_DELETE, 0) ==-1))
867                                                 goto error;
868                         }else if (likely(add_events & POLLOUT)){
869                                 if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
870                                         goto error;
871                         }
872                         break;
873 #endif
874 #ifdef HAVE_DEVPOLL
875                 case POLL_DEVPOLL:
876                                 /* for /dev/poll the closed fds _must_ be removed
877                                    (they are not removed automatically on close()) */
878                                 pfd.fd=fd;
879                                 pfd.events=POLLREMOVE;
880                                 pfd.revents=0;
881 again_devpoll1:
882                                 if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
883                                         if (errno==EINTR) goto again_devpoll1;
884                                         LOG(L_ERR, "ERROR: io_watch_chg: removing fd from "
885                                                                 "/dev/poll failed: %s [%d]\n", 
886                                                                 strerror(errno), errno);
887                                         goto error;
888                                 }
889 again_devpoll2:
890                                 pfd.events=events;
891                                 pfd.revents=0;
892                                 if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
893                                         if (errno==EINTR) goto again_devpoll2;
894                                         LOG(L_ERR, "ERROR: io_watch_chg: re-adding fd to "
895                                                                 "/dev/poll failed: %s [%d]\n", 
896                                                                 strerror(errno), errno);
897                                         goto error;
898                                 }
899                                 break;
900 #endif
901                 default:
902                         LOG(L_CRIT, "BUG: io_watch_chg: no support for poll method "
903                                         " %s (%d)\n", poll_method_str[h->poll_method], 
904                                         h->poll_method);
905                         goto error;
906         }
907         return 0;
908 error:
909         return -1;
910 #undef fix_fd_array
911 }
912
913
914
915 /* io_wait_loop_x style function 
916  * wait for io using poll()
917  * params: h      - io_wait handle
918  *         t      - timeout in s
919  *         repeat - if !=0 handle_io will be called until it returns <=0
920  * returns: number of IO events handled on success (can be 0), -1 on error
921  */
922 inline static int io_wait_loop_poll(io_wait_h* h, int t, int repeat)
923 {
924         int n, r;
925         int ret;
926         struct fd_map* fm;
927         
928 again:
929                 ret=n=poll(h->fd_array, h->fd_no, t*1000);
930                 if (n==-1){
931                         if (errno==EINTR) goto again; /* signal, ignore it */
932                         else{
933                                 LOG(L_ERR, "ERROR:io_wait_loop_poll: poll: %s [%d]\n",
934                                                 strerror(errno), errno);
935                                 goto error;
936                         }
937                 }
938                 for (r=0; (r<h->fd_no) && n; r++){
939                         fm=get_fd_map(h, h->fd_array[r].fd);
940                         if (h->fd_array[r].revents & (fm->events|POLLERR|POLLHUP)){
941                                 n--;
942                                 /* sanity checks */
943                                 if (unlikely((h->fd_array[r].fd >= h->max_fd_no)||
944                                                                 (h->fd_array[r].fd < 0))){
945                                         LOG(L_CRIT, "BUG: io_wait_loop_poll: bad fd %d "
946                                                         "(no in the 0 - %d range)\n",
947                                                         h->fd_array[r].fd, h->max_fd_no);
948                                         /* try to continue anyway */
949                                         h->fd_array[r].events=0; /* clear the events */
950                                         continue;
951                                 }
952                                 h->crt_fd_array_idx=r;
953                                 /* repeat handle_io if repeat, fd still watched (not deleted
954                                  *  inside handle_io), handle_io returns that there's still
955                                  *  IO and the fd is still watched for the triggering event */
956                                 while(fm->type && 
957                                                 (handle_io(fm, h->fd_array[r].revents, r) > 0) &&
958                                                 repeat && ((fm->events|POLLERR|POLLHUP) &
959                                                                                                         h->fd_array[r].revents));
960                                 r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd) 
961                                                                                   array shifting */
962                         }
963                 }
964 error:
965         return ret;
966 }
967
968
969
970 #ifdef HAVE_SELECT
971 /* wait for io using select */
972 inline static int io_wait_loop_select(io_wait_h* h, int t, int repeat)
973 {
974         fd_set sel_rset;
975         fd_set sel_wset;
976         int n, ret;
977         struct timeval timeout;
978         int r;
979         struct fd_map* fm;
980         int revents;
981         
982 again:
983                 sel_rset=h->master_rset;
984                 sel_wset=h->master_wset;
985                 timeout.tv_sec=t;
986                 timeout.tv_usec=0;
987                 ret=n=select(h->max_fd_select+1, &sel_rset, &sel_wset, 0, &timeout);
988                 if (n<0){
989                         if (errno==EINTR) goto again; /* just a signal */
990                         LOG(L_ERR, "ERROR: io_wait_loop_select: select: %s [%d]\n",
991                                         strerror(errno), errno);
992                         n=0;
993                         /* continue */
994                 }
995                 /* use poll fd array */
996                 for(r=0; (r<h->fd_no) && n; r++){
997                         revents=0;
998                         if (likely(FD_ISSET(h->fd_array[r].fd, &sel_rset)))
999                                 revents|=POLLIN;
1000                         if (unlikely(FD_ISSET(h->fd_array[r].fd, &sel_wset)))
1001                                 revents|=POLLOUT;
1002                         if (unlikely(revents)){
1003                                 h->crt_fd_array_idx=r;
1004                                 fm=get_fd_map(h, h->fd_array[r].fd);
1005                                 while(fm->type && (fm->events & revents) && 
1006                                                 (handle_io(fm, revents, r)>0) && repeat);
1007                                 r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd) 
1008                                                                                   array shifting */
1009                                 n--;
1010                         }
1011                 };
1012         return ret;
1013 }
1014 #endif
1015
1016
1017
1018 #ifdef HAVE_EPOLL
1019 inline static int io_wait_loop_epoll(io_wait_h* h, int t, int repeat)
1020 {
1021         int n, r;
1022         struct fd_map* fm;
1023         int revents;
1024         
1025 again:
1026                 n=epoll_wait(h->epfd, h->ep_array, h->fd_no, t*1000);
1027                 if (unlikely(n==-1)){
1028                         if (errno==EINTR) goto again; /* signal, ignore it */
1029                         else{
1030                                 LOG(L_ERR, "ERROR:io_wait_loop_epoll: "
1031                                                 "epoll_wait(%d, %p, %d, %d): %s [%d]\n", 
1032                                                 h->epfd, h->ep_array, h->fd_no, t*1000,
1033                                                 strerror(errno), errno);
1034                                 goto error;
1035                         }
1036                 }
1037 #if 0
1038                 if (n>1){
1039                         for(r=0; r<n; r++){
1040                                 LOG(L_ERR, "WARNING: ep_array[%d]= %x, %p\n",
1041                                                 r, h->ep_array[r].events, h->ep_array[r].data.ptr);
1042                         }
1043                 }
1044 #endif
1045                 for (r=0; r<n; r++){
1046                         revents= (POLLIN & (!(h->ep_array[r].events & (EPOLLIN|EPOLLPRI))
1047                                                 -1)) |
1048                                          (POLLOUT & (!(h->ep_array[r].events & EPOLLOUT)-1)) |
1049                                          (POLLERR & (!(h->ep_array[r].events & EPOLLERR)-1)) |
1050                                          (POLLHUP & (!(h->ep_array[r].events & EPOLLHUP)-1))
1051 #ifdef POLLRDHUP
1052                                         | (POLLRDHUP & (!(h->ep_array[r].events & EPOLLRDHUP)-1))
1053 #endif
1054                                         ;
1055                         if (likely(revents)){
1056                                 fm=(struct fd_map*)h->ep_array[r].data.ptr;
1057                                 while(fm->type && ((fm->events|POLLERR|POLLHUP) & revents) && 
1058                                                 (handle_io(fm, revents, -1)>0) && repeat);
1059                         }else{
1060                                 LOG(L_ERR, "ERROR:io_wait_loop_epoll: unexpected event %x"
1061                                                         " on %d/%d, data=%p\n", h->ep_array[r].events,
1062                                                         r+1, n, h->ep_array[r].data.ptr);
1063                         }
1064                 }
1065 error:
1066         return n;
1067 }
1068 #endif
1069
1070
1071
1072 #ifdef HAVE_KQUEUE
1073 inline static int io_wait_loop_kqueue(io_wait_h* h, int t, int repeat)
1074 {
1075         int n, r;
1076         struct timespec tspec;
1077         struct fd_map* fm;
1078         int revents;
1079         
1080         tspec.tv_sec=t;
1081         tspec.tv_nsec=0;
1082 again:
1083                 n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges,  h->kq_array,
1084                                         h->fd_no, &tspec);
1085                 if (unlikely(n==-1)){
1086                         if (errno==EINTR) goto again; /* signal, ignore it */
1087                         else{
1088                                 LOG(L_ERR, "ERROR: io_wait_loop_kqueue: kevent:"
1089                                                 " %s [%d]\n", strerror(errno), errno);
1090                                 goto error;
1091                         }
1092                 }
1093                 h->kq_nchanges=0; /* reset changes array */
1094                 for (r=0; r<n; r++){
1095 #ifdef EXTRA_DEBUG
1096                         DBG("DBG: kqueue: event %d/%d: fd=%d, udata=%lx, flags=0x%x\n",
1097                                         r, n, h->kq_array[r].ident, (long)h->kq_array[r].udata,
1098                                         h->kq_array[r].flags);
1099 #endif
1100 #if 0
1101                         if (unlikely(h->kq_array[r].flags & EV_ERROR)){
1102                                 /* error in changes: we ignore it, it can be caused by
1103                                    trying to remove an already closed fd: race between
1104                                    adding something to the changes array, close() and
1105                                    applying the changes */
1106                                 LOG(L_INFO, "INFO: io_wait_loop_kqueue: kevent error on "
1107                                                         "fd %ld: %s [%ld]\n", h->kq_array[r].ident,
1108                                                         strerror(h->kq_array[r].data),
1109                                                         (long)h->kq_array[r].data);
1110                         }else{ 
1111 #endif
1112                                 fm=(struct fd_map*)h->kq_array[r].udata;
1113                                 if (likely(h->kq_array[r].filter==EVFILT_READ)){
1114                                         revents=POLLIN | 
1115                                                 (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP);
1116                                         while(fm->type && (fm->events & revents) && 
1117                                                         (handle_io(fm, revents, -1)>0) && repeat);
1118                                 }else if (h->kq_array[r].filter==EVFILT_WRITE){
1119                                         revents=POLLOUT | 
1120                                                 (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP);
1121                                         while(fm->type && (fm->events & revents) && 
1122                                                         (handle_io(fm, revents, -1)>0) && repeat);
1123                                 }
1124                         /*} */
1125                 }
1126 error:
1127         return n;
1128 }
1129 #endif
1130
1131
1132
1133 #ifdef HAVE_SIGIO_RT
1134 /* sigio rt version has no repeat (it doesn't make sense)*/
1135 inline static int io_wait_loop_sigio_rt(io_wait_h* h, int t)
1136 {
1137         int n;
1138         int ret;
1139         struct timespec ts;
1140         siginfo_t siginfo;
1141         int sigio_band;
1142         int sigio_fd;
1143         struct fd_map* fm;
1144         int revents;
1145 #ifdef SIGINFO64_WORKARROUND
1146         int* pi;
1147 #endif
1148         
1149         
1150         ret=1; /* 1 event per call normally */
1151         ts.tv_sec=t;
1152         ts.tv_nsec=0;
1153         if (unlikely(!sigismember(&h->sset, h->signo) ||
1154                                         !sigismember(&h->sset, SIGIO))) {
1155                 LOG(L_CRIT, "BUG: io_wait_loop_sigio_rt: the signal mask"
1156                                 " is not properly set!\n");
1157                 goto error;
1158         }
1159 again:
1160         n=sigtimedwait(&h->sset, &siginfo, &ts);
1161         if (unlikely(n==-1)){
1162                 if (errno==EINTR) goto again; /* some other signal, ignore it */
1163                 else if (errno==EAGAIN){ /* timeout */
1164                         ret=0;
1165                         goto end;
1166                 }else{
1167                         LOG(L_ERR, "ERROR: io_wait_loop_sigio_rt: sigtimed_wait"
1168                                         " %s [%d]\n", strerror(errno), errno);
1169                         goto error;
1170                 }
1171         }
1172         if (likely(n!=SIGIO)){
1173 #ifdef SIGINFO64_WORKARROUND
1174                 /* on linux siginfo.si_band is defined as long in userspace
1175                  * and as int in kernel (< 2.6.5) => on 64 bits things will break!
1176                  * (si_band will include si_fd, and si_fd will contain
1177                  *  garbage).
1178                  *  see /usr/src/linux/include/asm-generic/siginfo.h and
1179                  *      /usr/include/bits/siginfo.h
1180                  *  On newer kernels this is fixed (si_band is long in the kernel too).
1181                  * -- andrei */
1182                 if  ((_os_ver<0x020605) && (sizeof(siginfo.si_band)>sizeof(int))){
1183                         pi=(int*)(void*)&siginfo.si_band; /* avoid type punning warnings */
1184                         sigio_band=*pi;
1185                         sigio_fd=*(pi+1);
1186                 }else
1187 #endif
1188                 {
1189                         sigio_band=siginfo.si_band;
1190                         sigio_fd=siginfo.si_fd;
1191                 }
1192                 if (unlikely(siginfo.si_code==SI_SIGIO)){
1193                         /* old style, we don't know the event (linux 2.2.?) */
1194                         LOG(L_WARN, "WARNING: io_wait_loop_sigio_rt: old style sigio"
1195                                         " interface\n");
1196                         fm=get_fd_map(h, sigio_fd);
1197                         /* we can have queued signals generated by fds not watched
1198                          * any more, or by fds in transition, to a child => ignore them*/
1199                         if (fm->type)
1200                                 handle_io(fm, POLLIN|POLLOUT, -1);
1201                 }else{
1202                         /* si_code contains the SIGPOLL reason: POLL_IN, POLL_OUT,
1203                          *  POLL_MSG, POLL_ERR, POLL_PRI or POLL_HUP
1204                          * and si_band the translated poll event bitmap:
1205                          *  POLLIN|POLLRDNORM  (=POLL_IN),
1206                          *  POLLOUT|POLLWRNORM|POLLWRBAND (=POLL_OUT),
1207                          *  POLLIN|POLLRDNORM|POLLMSG (=POLL_MSG),
1208                          *  POLLERR (=POLL_ERR),
1209                          *  POLLPRI|POLLRDBAND (=POLL_PRI),
1210                          *  POLLHUP|POLLERR (=POLL_HUP) 
1211                          *  [linux 2.6.22 fs/fcntl.c:447]
1212                          */
1213 #ifdef EXTRA_DEBUG
1214                         DBG("io_wait_loop_sigio_rt: siginfo: signal=%d (%d),"
1215                                         " si_code=%d, si_band=0x%x,"
1216                                         " si_fd=%d\n",
1217                                         siginfo.si_signo, n, siginfo.si_code, 
1218                                         (unsigned)sigio_band,
1219                                         sigio_fd);
1220 #endif
1221                         /* on some errors (e.g. when receving TCP RST), sigio_band will
1222                          * be set to 0x08 (POLLERR) or 0x18 (POLLERR|POLLHUP - on stream
1223                          *  unix socket close) , so better catch all events --andrei */
1224                         if (likely(sigio_band)){
1225                                 fm=get_fd_map(h, sigio_fd);
1226                                 revents=sigio_band;
1227                                 /* fix revents==POLLPRI case */
1228                                 revents |= (!(revents & POLLPRI)-1) & POLLIN;
1229                                 /* we can have queued signals generated by fds not watched
1230                                  * any more, or by fds in transition, to a child 
1231                                  * => ignore them */
1232                                 if (fm->type && ((fm->events|POLLERR|POLLHUP) & revents))
1233                                         handle_io(fm, revents, -1);
1234                                 else
1235                                         DBG("WARNING: io_wait_loop_sigio_rt: ignoring event"
1236                                                         " %x on fd %d, watching for %x, si_code=%x "
1237                                                         "(fm->type=%d, fm->fd=%d, fm->data=%p)\n",
1238                                                         sigio_band, sigio_fd, fm->events, siginfo.si_code,
1239                                                         fm->type, fm->fd, fm->data);
1240                         }else{
1241                                 LOG(L_ERR, "ERROR: io_wait_loop_sigio_rt: unexpected event"
1242                                                         " on fd %d: %x\n", sigio_fd, sigio_band);
1243                         }
1244                 }
1245         }else{
1246                 /* signal queue overflow 
1247                  * TODO: increase signal queue size: 2.4x /proc/.., 2.6x -rlimits */
1248                 LOG(L_WARN, "WARNING: io_wait_loop_sigio_rt: signal queue overflowed"
1249                                         "- falling back to poll\n");
1250                 /* clear real-time signal queue
1251                  * both SIG_IGN and SIG_DFL are needed , it doesn't work
1252                  * only with SIG_DFL  */
1253                 if (signal(h->signo, SIG_IGN)==SIG_ERR){
1254                         LOG(L_CRIT, "BUG: do_poll: couldn't reset signal to IGN\n");
1255                 }
1256                 
1257                 if (signal(h->signo, SIG_DFL)==SIG_ERR){
1258                         LOG(L_CRIT, "BUG: do_poll: couldn't reset signal to DFL\n");
1259                 }
1260                 /* falling back to normal poll */
1261                 ret=io_wait_loop_poll(h, -1, 1);
1262         }
1263 end:
1264         return ret;
1265 error:
1266         return -1;
1267 }
1268 #endif
1269
1270
1271
1272 #ifdef HAVE_DEVPOLL
1273 inline static int io_wait_loop_devpoll(io_wait_h* h, int t, int repeat)
1274 {
1275         int n, r;
1276         int ret;
1277         struct dvpoll dpoll;
1278         struct fd_map* fm;
1279
1280                 dpoll.dp_timeout=t*1000;
1281                 dpoll.dp_nfds=h->fd_no;
1282                 dpoll.dp_fds=h->fd_array;
1283 again:
1284                 ret=n=ioctl(h->dpoll_fd, DP_POLL, &dpoll);
1285                 if (unlikely(n==-1)){
1286                         if (errno==EINTR) goto again; /* signal, ignore it */
1287                         else{
1288                                 LOG(L_ERR, "ERROR:io_wait_loop_devpoll: ioctl: %s [%d]\n",
1289                                                 strerror(errno), errno);
1290                                 goto error;
1291                         }
1292                 }
1293                 for (r=0; r< n; r++){
1294                         if (h->fd_array[r].revents & (POLLNVAL|POLLERR)){
1295                                 LOG(L_ERR, "ERROR: io_wait_loop_devpoll: pollinval returned"
1296                                                         " for fd %d, revents=%x\n",
1297                                                         h->fd_array[r].fd, h->fd_array[r].revents);
1298                         }
1299                         /* POLLIN|POLLHUP just go through */
1300                         fm=get_fd_map(h, h->fd_array[r].fd);
1301                         while(fm->type && (fm->events & h->fd_array[r].revents) &&
1302                                         (handle_io(fm, h->fd_array[r].revents, r) > 0) && repeat);
1303                 }
1304 error:
1305         return ret;
1306 }
1307 #endif
1308
1309
1310
1311 /* init */
1312
1313
1314 /* initializes the static vars/arrays
1315  * params:      h - pointer to the io_wait_h that will be initialized
1316  *         max_fd - maximum allowed fd number
1317  *         poll_m - poll method (0 for automatic best fit)
1318  */
1319 int init_io_wait(io_wait_h* h, int max_fd, enum poll_types poll_method);
1320
1321 /* destroys everything init_io_wait allocated */
1322 void destroy_io_wait(io_wait_h* h);
1323
1324
1325 #endif