OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu> | |
3 * All rights reserved. | |
4 * | |
5 * Redistribution and use in source and binary forms, with or without | |
6 * modification, are permitted provided that the following conditions | |
7 * are met: | |
8 * 1. Redistributions of source code must retain the above copyright | |
9 * notice, this list of conditions and the following disclaimer. | |
10 * 2. Redistributions in binary form must reproduce the above copyright | |
11 * notice, this list of conditions and the following disclaimer in the | |
12 * documentation and/or other materials provided with the distribution. | |
13 * 3. The name of the author may not be used to endorse or promote products | |
14 * derived from this software without specific prior written permission. | |
15 * | |
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
26 */ | |
27 #ifdef HAVE_CONFIG_H | |
28 #include "config.h" | |
29 #endif | |
30 | |
31 #include <stdint.h> | |
32 #include <sys/types.h> | |
33 #include <sys/resource.h> | |
34 #ifdef HAVE_SYS_TIME_H | |
35 #include <sys/time.h> | |
36 #else | |
37 #include <sys/_libevent_time.h> | |
38 #endif | |
39 #include <sys/queue.h> | |
40 #include <sys/epoll.h> | |
41 #include <signal.h> | |
42 #include <stdio.h> | |
43 #include <stdlib.h> | |
44 #include <string.h> | |
45 #include <unistd.h> | |
46 #include <errno.h> | |
47 #ifdef HAVE_FCNTL_H | |
48 #include <fcntl.h> | |
49 #endif | |
50 | |
51 #include "event.h" | |
52 #include "event-internal.h" | |
53 #include "evsignal.h" | |
54 #include "log.h" | |
55 | |
56 /* due to limitations in the epoll interface, we need to keep track of | |
57 * all file descriptors outself. | |
58 */ | |
59 struct evepoll { | |
60 struct event *evread; | |
61 struct event *evwrite; | |
62 }; | |
63 | |
64 struct epollop { | |
65 struct evepoll *fds; | |
66 int nfds; | |
67 struct epoll_event *events; | |
68 int nevents; | |
69 int epfd; | |
70 }; | |
71 | |
72 static void *epoll_init (struct event_base *); | |
73 static int epoll_add (void *, struct event *); | |
74 static int epoll_del (void *, struct event *); | |
75 static int epoll_dispatch (struct event_base *, void *, struct timeval *); | |
76 static void epoll_dealloc (struct event_base *, void *); | |
77 | |
78 const struct eventop epollops = { | |
79 "epoll", | |
80 epoll_init, | |
81 epoll_add, | |
82 epoll_del, | |
83 epoll_dispatch, | |
84 epoll_dealloc, | |
85 1 /* need reinit */ | |
86 }; | |
87 | |
88 #ifdef HAVE_SETFD | |
89 #define FD_CLOSEONEXEC(x) do { \ | |
90 if (fcntl(x, F_SETFD, 1) == -1) \ | |
91 event_warn("fcntl(%d, F_SETFD)", x); \ | |
92 } while (0) | |
93 #else | |
94 #define FD_CLOSEONEXEC(x) | |
95 #endif | |
96 | |
97 /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout | |
98 * values bigger than (LONG_MAX - 999ULL)/HZ. HZ in the wild can be | |
99 * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the | |
100 * largest number of msec we can support here is 2147482. Let's | |
101 * round that down by 47 seconds. | |
102 */ | |
103 #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000) | |
104 | |
105 #define INITIAL_NFILES 32 | |
106 #define INITIAL_NEVENTS 32 | |
107 #define MAX_NEVENTS 4096 | |
108 | |
109 static void * | |
110 epoll_init(struct event_base *base) | |
111 { | |
112 int epfd; | |
113 struct epollop *epollop; | |
114 | |
115 /* Disable epollueue when this environment variable is set */ | |
116 if (evutil_getenv("EVENT_NOEPOLL")) | |
117 return (NULL); | |
118 | |
119 /* Initalize the kernel queue */ | |
120 if ((epfd = epoll_create(32000)) == -1) { | |
121 if (errno != ENOSYS) | |
122 event_warn("epoll_create"); | |
123 return (NULL); | |
124 } | |
125 | |
126 FD_CLOSEONEXEC(epfd); | |
127 | |
128 if (!(epollop = calloc(1, sizeof(struct epollop)))) | |
129 return (NULL); | |
130 | |
131 epollop->epfd = epfd; | |
132 | |
133 /* Initalize fields */ | |
134 epollop->events = malloc(INITIAL_NEVENTS * sizeof(struct epoll_event)); | |
135 if (epollop->events == NULL) { | |
136 free(epollop); | |
137 return (NULL); | |
138 } | |
139 epollop->nevents = INITIAL_NEVENTS; | |
140 | |
141 epollop->fds = calloc(INITIAL_NFILES, sizeof(struct evepoll)); | |
142 if (epollop->fds == NULL) { | |
143 free(epollop->events); | |
144 free(epollop); | |
145 return (NULL); | |
146 } | |
147 epollop->nfds = INITIAL_NFILES; | |
148 | |
149 evsignal_init(base); | |
150 | |
151 return (epollop); | |
152 } | |
153 | |
154 static int | |
155 epoll_recalc(struct event_base *base, void *arg, int max) | |
156 { | |
157 struct epollop *epollop = arg; | |
158 | |
159 if (max >= epollop->nfds) { | |
160 struct evepoll *fds; | |
161 int nfds; | |
162 | |
163 nfds = epollop->nfds; | |
164 while (nfds <= max) | |
165 nfds <<= 1; | |
166 | |
167 fds = realloc(epollop->fds, nfds * sizeof(struct evepoll)); | |
168 if (fds == NULL) { | |
169 event_warn("realloc"); | |
170 return (-1); | |
171 } | |
172 epollop->fds = fds; | |
173 memset(fds + epollop->nfds, 0, | |
174 (nfds - epollop->nfds) * sizeof(struct evepoll)); | |
175 epollop->nfds = nfds; | |
176 } | |
177 | |
178 return (0); | |
179 } | |
180 | |
181 static int | |
182 epoll_dispatch(struct event_base *base, void *arg, struct timeval *tv) | |
183 { | |
184 struct epollop *epollop = arg; | |
185 struct epoll_event *events = epollop->events; | |
186 struct evepoll *evep; | |
187 int i, res, timeout = -1; | |
188 | |
189 if (tv != NULL) | |
190 timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000; | |
191 | |
192 if (timeout > MAX_EPOLL_TIMEOUT_MSEC) { | |
193 /* Linux kernels can wait forever if the timeout is too big; | |
194 * see comment on MAX_EPOLL_TIMEOUT_MSEC. */ | |
195 timeout = MAX_EPOLL_TIMEOUT_MSEC; | |
196 } | |
197 | |
198 res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout); | |
199 | |
200 if (res == -1) { | |
201 if (errno != EINTR) { | |
202 event_warn("epoll_wait"); | |
203 return (-1); | |
204 } | |
205 | |
206 evsignal_process(base); | |
207 return (0); | |
208 } else if (base->sig.evsignal_caught) { | |
209 evsignal_process(base); | |
210 } | |
211 | |
212 event_debug(("%s: epoll_wait reports %d", __func__, res)); | |
213 | |
214 for (i = 0; i < res; i++) { | |
215 int what = events[i].events; | |
216 struct event *evread = NULL, *evwrite = NULL; | |
217 int fd = events[i].data.fd; | |
218 | |
219 if (fd < 0 || fd >= epollop->nfds) | |
220 continue; | |
221 evep = &epollop->fds[fd]; | |
222 | |
223 if (what & (EPOLLHUP|EPOLLERR)) { | |
224 evread = evep->evread; | |
225 evwrite = evep->evwrite; | |
226 } else { | |
227 if (what & EPOLLIN) { | |
228 evread = evep->evread; | |
229 } | |
230 | |
231 if (what & EPOLLOUT) { | |
232 evwrite = evep->evwrite; | |
233 } | |
234 } | |
235 | |
236 if (!(evread||evwrite)) | |
237 continue; | |
238 | |
239 if (evread != NULL) | |
240 event_active(evread, EV_READ, 1); | |
241 if (evwrite != NULL) | |
242 event_active(evwrite, EV_WRITE, 1); | |
243 } | |
244 | |
245 if (res == epollop->nevents && epollop->nevents < MAX_NEVENTS) { | |
246 /* We used all of the event space this time. We should | |
247 be ready for more events next time. */ | |
248 int new_nevents = epollop->nevents * 2; | |
249 struct epoll_event *new_events; | |
250 | |
251 new_events = realloc(epollop->events, | |
252 new_nevents * sizeof(struct epoll_event)); | |
253 if (new_events) { | |
254 epollop->events = new_events; | |
255 epollop->nevents = new_nevents; | |
256 } | |
257 } | |
258 | |
259 return (0); | |
260 } | |
261 | |
262 | |
263 static int | |
264 epoll_add(void *arg, struct event *ev) | |
265 { | |
266 struct epollop *epollop = arg; | |
267 struct epoll_event epev = {0, {0}}; | |
268 struct evepoll *evep; | |
269 int fd, op, events; | |
270 | |
271 if (ev->ev_events & EV_SIGNAL) | |
272 return (evsignal_add(ev)); | |
273 | |
274 fd = ev->ev_fd; | |
275 if (fd >= epollop->nfds) { | |
276 /* Extent the file descriptor array as necessary */ | |
277 if (epoll_recalc(ev->ev_base, epollop, fd) == -1) | |
278 return (-1); | |
279 } | |
280 evep = &epollop->fds[fd]; | |
281 op = EPOLL_CTL_ADD; | |
282 events = 0; | |
283 if (evep->evread != NULL) { | |
284 events |= EPOLLIN; | |
285 op = EPOLL_CTL_MOD; | |
286 } | |
287 if (evep->evwrite != NULL) { | |
288 events |= EPOLLOUT; | |
289 op = EPOLL_CTL_MOD; | |
290 } | |
291 | |
292 if (ev->ev_events & EV_READ) | |
293 events |= EPOLLIN; | |
294 if (ev->ev_events & EV_WRITE) | |
295 events |= EPOLLOUT; | |
296 | |
297 epev.data.fd = fd; | |
298 epev.events = events; | |
299 if (epoll_ctl(epollop->epfd, op, ev->ev_fd, &epev) == -1) | |
300 return (-1); | |
301 | |
302 /* Update events responsible */ | |
303 if (ev->ev_events & EV_READ) | |
304 evep->evread = ev; | |
305 if (ev->ev_events & EV_WRITE) | |
306 evep->evwrite = ev; | |
307 | |
308 return (0); | |
309 } | |
310 | |
311 static int | |
312 epoll_del(void *arg, struct event *ev) | |
313 { | |
314 struct epollop *epollop = arg; | |
315 struct epoll_event epev = {0, {0}}; | |
316 struct evepoll *evep; | |
317 int fd, events, op; | |
318 int needwritedelete = 1, needreaddelete = 1; | |
319 | |
320 if (ev->ev_events & EV_SIGNAL) | |
321 return (evsignal_del(ev)); | |
322 | |
323 fd = ev->ev_fd; | |
324 if (fd >= epollop->nfds) | |
325 return (0); | |
326 evep = &epollop->fds[fd]; | |
327 | |
328 op = EPOLL_CTL_DEL; | |
329 events = 0; | |
330 | |
331 if (ev->ev_events & EV_READ) | |
332 events |= EPOLLIN; | |
333 if (ev->ev_events & EV_WRITE) | |
334 events |= EPOLLOUT; | |
335 | |
336 if ((events & (EPOLLIN|EPOLLOUT)) != (EPOLLIN|EPOLLOUT)) { | |
337 if ((events & EPOLLIN) && evep->evwrite != NULL) { | |
338 needwritedelete = 0; | |
339 events = EPOLLOUT; | |
340 op = EPOLL_CTL_MOD; | |
341 } else if ((events & EPOLLOUT) && evep->evread != NULL) { | |
342 needreaddelete = 0; | |
343 events = EPOLLIN; | |
344 op = EPOLL_CTL_MOD; | |
345 } | |
346 } | |
347 | |
348 epev.events = events; | |
349 epev.data.fd = fd; | |
350 | |
351 if (needreaddelete) | |
352 evep->evread = NULL; | |
353 if (needwritedelete) | |
354 evep->evwrite = NULL; | |
355 | |
356 if (epoll_ctl(epollop->epfd, op, fd, &epev) == -1) | |
357 return (-1); | |
358 | |
359 return (0); | |
360 } | |
361 | |
362 static void | |
363 epoll_dealloc(struct event_base *base, void *arg) | |
364 { | |
365 struct epollop *epollop = arg; | |
366 | |
367 evsignal_dealloc(base); | |
368 if (epollop->fds) | |
369 free(epollop->fds); | |
370 if (epollop->events) | |
371 free(epollop->events); | |
372 if (epollop->epfd >= 0) | |
373 close(epollop->epfd); | |
374 | |
375 memset(epollop, 0, sizeof(struct epollop)); | |
376 free(epollop); | |
377 } | |
OLD | NEW |