| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu> | |
| 3 * All rights reserved. | |
| 4 * | |
| 5 * Redistribution and use in source and binary forms, with or without | |
| 6 * modification, are permitted provided that the following conditions | |
| 7 * are met: | |
| 8 * 1. Redistributions of source code must retain the above copyright | |
| 9 * notice, this list of conditions and the following disclaimer. | |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | |
| 11 * notice, this list of conditions and the following disclaimer in the | |
| 12 * documentation and/or other materials provided with the distribution. | |
| 13 * 3. The name of the author may not be used to endorse or promote products | |
| 14 * derived from this software without specific prior written permission. | |
| 15 * | |
| 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
| 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
| 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
| 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |
| 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
| 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 26 */ | |
| 27 #ifdef HAVE_CONFIG_H | |
| 28 #include "config.h" | |
| 29 #endif | |
| 30 | |
| 31 #include <stdint.h> | |
| 32 #include <sys/types.h> | |
| 33 #include <sys/resource.h> | |
| 34 #ifdef HAVE_SYS_TIME_H | |
| 35 #include <sys/time.h> | |
| 36 #else | |
| 37 #include <sys/_libevent_time.h> | |
| 38 #endif | |
| 39 #include <sys/queue.h> | |
| 40 #include <sys/epoll.h> | |
| 41 #include <signal.h> | |
| 42 #include <stdio.h> | |
| 43 #include <stdlib.h> | |
| 44 #include <string.h> | |
| 45 #include <unistd.h> | |
| 46 #include <errno.h> | |
| 47 #ifdef HAVE_FCNTL_H | |
| 48 #include <fcntl.h> | |
| 49 #endif | |
| 50 | |
| 51 #include "event.h" | |
| 52 #include "event-internal.h" | |
| 53 #include "evsignal.h" | |
| 54 #include "log.h" | |
| 55 | |
| 56 /* due to limitations in the epoll interface, we need to keep track of | |
| 57 * all file descriptors outself. | |
| 58 */ | |
| 59 struct evepoll { | |
| 60 struct event *evread; | |
| 61 struct event *evwrite; | |
| 62 }; | |
| 63 | |
| 64 struct epollop { | |
| 65 struct evepoll *fds; | |
| 66 int nfds; | |
| 67 struct epoll_event *events; | |
| 68 int nevents; | |
| 69 int epfd; | |
| 70 }; | |
| 71 | |
| 72 static void *epoll_init (struct event_base *); | |
| 73 static int epoll_add (void *, struct event *); | |
| 74 static int epoll_del (void *, struct event *); | |
| 75 static int epoll_dispatch (struct event_base *, void *, struct timeval *); | |
| 76 static void epoll_dealloc (struct event_base *, void *); | |
| 77 | |
| 78 const struct eventop epollops = { | |
| 79 "epoll", | |
| 80 epoll_init, | |
| 81 epoll_add, | |
| 82 epoll_del, | |
| 83 epoll_dispatch, | |
| 84 epoll_dealloc, | |
| 85 1 /* need reinit */ | |
| 86 }; | |
| 87 | |
| 88 #ifdef HAVE_SETFD | |
| 89 #define FD_CLOSEONEXEC(x) do { \ | |
| 90 if (fcntl(x, F_SETFD, 1) == -1) \ | |
| 91 event_warn("fcntl(%d, F_SETFD)", x); \ | |
| 92 } while (0) | |
| 93 #else | |
| 94 #define FD_CLOSEONEXEC(x) | |
| 95 #endif | |
| 96 | |
| 97 /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout | |
| 98 * values bigger than (LONG_MAX - 999ULL)/HZ. HZ in the wild can be | |
| 99 * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the | |
| 100 * largest number of msec we can support here is 2147482. Let's | |
| 101 * round that down by 47 seconds. | |
| 102 */ | |
| 103 #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000) | |
| 104 | |
| 105 #define INITIAL_NFILES 32 | |
| 106 #define INITIAL_NEVENTS 32 | |
| 107 #define MAX_NEVENTS 4096 | |
| 108 | |
| 109 static void * | |
| 110 epoll_init(struct event_base *base) | |
| 111 { | |
| 112 int epfd; | |
| 113 struct epollop *epollop; | |
| 114 | |
| 115 /* Disable epollueue when this environment variable is set */ | |
| 116 if (evutil_getenv("EVENT_NOEPOLL")) | |
| 117 return (NULL); | |
| 118 | |
| 119 /* Initalize the kernel queue */ | |
| 120 if ((epfd = epoll_create(32000)) == -1) { | |
| 121 if (errno != ENOSYS) | |
| 122 event_warn("epoll_create"); | |
| 123 return (NULL); | |
| 124 } | |
| 125 | |
| 126 FD_CLOSEONEXEC(epfd); | |
| 127 | |
| 128 if (!(epollop = calloc(1, sizeof(struct epollop)))) | |
| 129 return (NULL); | |
| 130 | |
| 131 epollop->epfd = epfd; | |
| 132 | |
| 133 /* Initalize fields */ | |
| 134 epollop->events = malloc(INITIAL_NEVENTS * sizeof(struct epoll_event)); | |
| 135 if (epollop->events == NULL) { | |
| 136 free(epollop); | |
| 137 return (NULL); | |
| 138 } | |
| 139 epollop->nevents = INITIAL_NEVENTS; | |
| 140 | |
| 141 epollop->fds = calloc(INITIAL_NFILES, sizeof(struct evepoll)); | |
| 142 if (epollop->fds == NULL) { | |
| 143 free(epollop->events); | |
| 144 free(epollop); | |
| 145 return (NULL); | |
| 146 } | |
| 147 epollop->nfds = INITIAL_NFILES; | |
| 148 | |
| 149 evsignal_init(base); | |
| 150 | |
| 151 return (epollop); | |
| 152 } | |
| 153 | |
| 154 static int | |
| 155 epoll_recalc(struct event_base *base, void *arg, int max) | |
| 156 { | |
| 157 struct epollop *epollop = arg; | |
| 158 | |
| 159 if (max >= epollop->nfds) { | |
| 160 struct evepoll *fds; | |
| 161 int nfds; | |
| 162 | |
| 163 nfds = epollop->nfds; | |
| 164 while (nfds <= max) | |
| 165 nfds <<= 1; | |
| 166 | |
| 167 fds = realloc(epollop->fds, nfds * sizeof(struct evepoll)); | |
| 168 if (fds == NULL) { | |
| 169 event_warn("realloc"); | |
| 170 return (-1); | |
| 171 } | |
| 172 epollop->fds = fds; | |
| 173 memset(fds + epollop->nfds, 0, | |
| 174 (nfds - epollop->nfds) * sizeof(struct evepoll)); | |
| 175 epollop->nfds = nfds; | |
| 176 } | |
| 177 | |
| 178 return (0); | |
| 179 } | |
| 180 | |
| 181 static int | |
| 182 epoll_dispatch(struct event_base *base, void *arg, struct timeval *tv) | |
| 183 { | |
| 184 struct epollop *epollop = arg; | |
| 185 struct epoll_event *events = epollop->events; | |
| 186 struct evepoll *evep; | |
| 187 int i, res, timeout = -1; | |
| 188 | |
| 189 if (tv != NULL) | |
| 190 timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000; | |
| 191 | |
| 192 if (timeout > MAX_EPOLL_TIMEOUT_MSEC) { | |
| 193 /* Linux kernels can wait forever if the timeout is too big; | |
| 194 * see comment on MAX_EPOLL_TIMEOUT_MSEC. */ | |
| 195 timeout = MAX_EPOLL_TIMEOUT_MSEC; | |
| 196 } | |
| 197 | |
| 198 res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout); | |
| 199 | |
| 200 if (res == -1) { | |
| 201 if (errno != EINTR) { | |
| 202 event_warn("epoll_wait"); | |
| 203 return (-1); | |
| 204 } | |
| 205 | |
| 206 evsignal_process(base); | |
| 207 return (0); | |
| 208 } else if (base->sig.evsignal_caught) { | |
| 209 evsignal_process(base); | |
| 210 } | |
| 211 | |
| 212 event_debug(("%s: epoll_wait reports %d", __func__, res)); | |
| 213 | |
| 214 for (i = 0; i < res; i++) { | |
| 215 int what = events[i].events; | |
| 216 struct event *evread = NULL, *evwrite = NULL; | |
| 217 int fd = events[i].data.fd; | |
| 218 | |
| 219 if (fd < 0 || fd >= epollop->nfds) | |
| 220 continue; | |
| 221 evep = &epollop->fds[fd]; | |
| 222 | |
| 223 if (what & (EPOLLHUP|EPOLLERR)) { | |
| 224 evread = evep->evread; | |
| 225 evwrite = evep->evwrite; | |
| 226 } else { | |
| 227 if (what & EPOLLIN) { | |
| 228 evread = evep->evread; | |
| 229 } | |
| 230 | |
| 231 if (what & EPOLLOUT) { | |
| 232 evwrite = evep->evwrite; | |
| 233 } | |
| 234 } | |
| 235 | |
| 236 if (!(evread||evwrite)) | |
| 237 continue; | |
| 238 | |
| 239 if (evread != NULL) | |
| 240 event_active(evread, EV_READ, 1); | |
| 241 if (evwrite != NULL) | |
| 242 event_active(evwrite, EV_WRITE, 1); | |
| 243 } | |
| 244 | |
| 245 if (res == epollop->nevents && epollop->nevents < MAX_NEVENTS) { | |
| 246 /* We used all of the event space this time. We should | |
| 247 be ready for more events next time. */ | |
| 248 int new_nevents = epollop->nevents * 2; | |
| 249 struct epoll_event *new_events; | |
| 250 | |
| 251 new_events = realloc(epollop->events, | |
| 252 new_nevents * sizeof(struct epoll_event)); | |
| 253 if (new_events) { | |
| 254 epollop->events = new_events; | |
| 255 epollop->nevents = new_nevents; | |
| 256 } | |
| 257 } | |
| 258 | |
| 259 return (0); | |
| 260 } | |
| 261 | |
| 262 | |
| 263 static int | |
| 264 epoll_add(void *arg, struct event *ev) | |
| 265 { | |
| 266 struct epollop *epollop = arg; | |
| 267 struct epoll_event epev = {0, {0}}; | |
| 268 struct evepoll *evep; | |
| 269 int fd, op, events; | |
| 270 | |
| 271 if (ev->ev_events & EV_SIGNAL) | |
| 272 return (evsignal_add(ev)); | |
| 273 | |
| 274 fd = ev->ev_fd; | |
| 275 if (fd >= epollop->nfds) { | |
| 276 /* Extent the file descriptor array as necessary */ | |
| 277 if (epoll_recalc(ev->ev_base, epollop, fd) == -1) | |
| 278 return (-1); | |
| 279 } | |
| 280 evep = &epollop->fds[fd]; | |
| 281 op = EPOLL_CTL_ADD; | |
| 282 events = 0; | |
| 283 if (evep->evread != NULL) { | |
| 284 events |= EPOLLIN; | |
| 285 op = EPOLL_CTL_MOD; | |
| 286 } | |
| 287 if (evep->evwrite != NULL) { | |
| 288 events |= EPOLLOUT; | |
| 289 op = EPOLL_CTL_MOD; | |
| 290 } | |
| 291 | |
| 292 if (ev->ev_events & EV_READ) | |
| 293 events |= EPOLLIN; | |
| 294 if (ev->ev_events & EV_WRITE) | |
| 295 events |= EPOLLOUT; | |
| 296 | |
| 297 epev.data.fd = fd; | |
| 298 epev.events = events; | |
| 299 if (epoll_ctl(epollop->epfd, op, ev->ev_fd, &epev) == -1) | |
| 300 return (-1); | |
| 301 | |
| 302 /* Update events responsible */ | |
| 303 if (ev->ev_events & EV_READ) | |
| 304 evep->evread = ev; | |
| 305 if (ev->ev_events & EV_WRITE) | |
| 306 evep->evwrite = ev; | |
| 307 | |
| 308 return (0); | |
| 309 } | |
| 310 | |
| 311 static int | |
| 312 epoll_del(void *arg, struct event *ev) | |
| 313 { | |
| 314 struct epollop *epollop = arg; | |
| 315 struct epoll_event epev = {0, {0}}; | |
| 316 struct evepoll *evep; | |
| 317 int fd, events, op; | |
| 318 int needwritedelete = 1, needreaddelete = 1; | |
| 319 | |
| 320 if (ev->ev_events & EV_SIGNAL) | |
| 321 return (evsignal_del(ev)); | |
| 322 | |
| 323 fd = ev->ev_fd; | |
| 324 if (fd >= epollop->nfds) | |
| 325 return (0); | |
| 326 evep = &epollop->fds[fd]; | |
| 327 | |
| 328 op = EPOLL_CTL_DEL; | |
| 329 events = 0; | |
| 330 | |
| 331 if (ev->ev_events & EV_READ) | |
| 332 events |= EPOLLIN; | |
| 333 if (ev->ev_events & EV_WRITE) | |
| 334 events |= EPOLLOUT; | |
| 335 | |
| 336 if ((events & (EPOLLIN|EPOLLOUT)) != (EPOLLIN|EPOLLOUT)) { | |
| 337 if ((events & EPOLLIN) && evep->evwrite != NULL) { | |
| 338 needwritedelete = 0; | |
| 339 events = EPOLLOUT; | |
| 340 op = EPOLL_CTL_MOD; | |
| 341 } else if ((events & EPOLLOUT) && evep->evread != NULL) { | |
| 342 needreaddelete = 0; | |
| 343 events = EPOLLIN; | |
| 344 op = EPOLL_CTL_MOD; | |
| 345 } | |
| 346 } | |
| 347 | |
| 348 epev.events = events; | |
| 349 epev.data.fd = fd; | |
| 350 | |
| 351 if (needreaddelete) | |
| 352 evep->evread = NULL; | |
| 353 if (needwritedelete) | |
| 354 evep->evwrite = NULL; | |
| 355 | |
| 356 if (epoll_ctl(epollop->epfd, op, fd, &epev) == -1) | |
| 357 return (-1); | |
| 358 | |
| 359 return (0); | |
| 360 } | |
| 361 | |
| 362 static void | |
| 363 epoll_dealloc(struct event_base *base, void *arg) | |
| 364 { | |
| 365 struct epollop *epollop = arg; | |
| 366 | |
| 367 evsignal_dealloc(base); | |
| 368 if (epollop->fds) | |
| 369 free(epollop->fds); | |
| 370 if (epollop->events) | |
| 371 free(epollop->events); | |
| 372 if (epollop->epfd >= 0) | |
| 373 close(epollop->epfd); | |
| 374 | |
| 375 memset(epollop, 0, sizeof(struct epollop)); | |
| 376 free(epollop); | |
| 377 } | |
| OLD | NEW |