blob: 140bd130dd5aaec87b8add74d1a98dff6d8aaf10 [file] [log] [blame]
Benny Prijono4766ffe2005-11-01 17:56:59 +00001/* $Id$
Benny Prijonodd859a62005-11-01 16:42:51 +00002 */
Benny Prijonodd859a62005-11-01 16:42:51 +00003/*
4 * ioqueue_epoll.c
5 *
6 * This is the implementation of IOQueue framework using /dev/epoll
7 * API in _both_ Linux user-mode and kernel-mode.
8 */
9
10#include <pj/ioqueue.h>
11#include <pj/os.h>
12#include <pj/lock.h>
13#include <pj/log.h>
14#include <pj/list.h>
15#include <pj/pool.h>
16#include <pj/string.h>
17#include <pj/assert.h>
18#include <pj/errno.h>
19#include <pj/sock.h>
20#include <pj/compat/socket.h>
21
22#if !defined(PJ_LINUX_KERNEL) || PJ_LINUX_KERNEL==0
23 /*
24 * Linux user mode
25 */
26# include <sys/epoll.h>
27# include <errno.h>
28# include <unistd.h>
29
30# define epoll_data data.ptr
31# define epoll_data_type void*
Benny Prijonobc986152005-11-06 16:50:38 +000032# define ioctl_val_type unsigned long
Benny Prijonodd859a62005-11-01 16:42:51 +000033# define getsockopt_val_ptr int*
34# define os_getsockopt getsockopt
35# define os_ioctl ioctl
36# define os_read read
37# define os_close close
38# define os_epoll_create epoll_create
39# define os_epoll_ctl epoll_ctl
40# define os_epoll_wait epoll_wait
41#else
42 /*
43 * Linux kernel mode.
44 */
45# include <linux/config.h>
46# include <linux/version.h>
47# if defined(MODVERSIONS)
48# include <linux/modversions.h>
49# endif
50# include <linux/kernel.h>
51# include <linux/poll.h>
52# include <linux/eventpoll.h>
53# include <linux/syscalls.h>
54# include <linux/errno.h>
55# include <linux/unistd.h>
56# include <asm/ioctls.h>
57 enum EPOLL_EVENTS
58 {
59 EPOLLIN = 0x001,
60 EPOLLOUT = 0x004,
61 EPOLLERR = 0x008,
62 };
63# define os_epoll_create sys_epoll_create
64 static int os_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
65 {
66 long rc;
67 mm_segment_t oldfs = get_fs();
68 set_fs(KERNEL_DS);
69 rc = sys_epoll_ctl(epfd, op, fd, event);
70 set_fs(oldfs);
71 if (rc) {
72 errno = -rc;
73 return -1;
74 } else {
75 return 0;
76 }
77 }
78 static int os_epoll_wait(int epfd, struct epoll_event *events,
79 int maxevents, int timeout)
80 {
81 int count;
82 mm_segment_t oldfs = get_fs();
83 set_fs(KERNEL_DS);
84 count = sys_epoll_wait(epfd, events, maxevents, timeout);
85 set_fs(oldfs);
86 return count;
87 }
88# define os_close sys_close
89# define os_getsockopt pj_sock_getsockopt
90 static int os_read(int fd, void *buf, size_t len)
91 {
92 long rc;
93 mm_segment_t oldfs = get_fs();
94 set_fs(KERNEL_DS);
95 rc = sys_read(fd, buf, len);
96 set_fs(oldfs);
97 if (rc) {
98 errno = -rc;
99 return -1;
100 } else {
101 return 0;
102 }
103 }
104# define socklen_t unsigned
105# define ioctl_val_type unsigned long
106 int ioctl(int fd, int opt, ioctl_val_type value);
107 static int os_ioctl(int fd, int opt, ioctl_val_type value)
108 {
109 int rc;
110 mm_segment_t oldfs = get_fs();
111 set_fs(KERNEL_DS);
112 rc = ioctl(fd, opt, value);
113 set_fs(oldfs);
114 if (rc < 0) {
115 errno = -rc;
116 return rc;
117 } else
118 return rc;
119 }
120# define getsockopt_val_ptr char*
121
122# define epoll_data data
123# define epoll_data_type __u32
124#endif
125
126#define THIS_FILE "ioq_epoll"
127
Benny Prijonodd859a62005-11-01 16:42:51 +0000128//#define TRACE_(expr) PJ_LOG(3,expr)
129#define TRACE_(expr)
130
Benny Prijonobc986152005-11-06 16:50:38 +0000131/*
132 * Include common ioqueue abstraction.
133 */
134#include "ioqueue_common_abs.h"
Benny Prijonodd859a62005-11-01 16:42:51 +0000135
136/*
137 * This describes each key.
138 */
139struct pj_ioqueue_key_t
140{
Benny Prijonobc986152005-11-06 16:50:38 +0000141 DECLARE_COMMON_KEY
Benny Prijonodd859a62005-11-01 16:42:51 +0000142};
143
144/*
145 * This describes the I/O queue.
146 */
147struct pj_ioqueue_t
148{
Benny Prijonobc986152005-11-06 16:50:38 +0000149 DECLARE_COMMON_IOQUEUE
150
Benny Prijonodd859a62005-11-01 16:42:51 +0000151 unsigned max, count;
152 pj_ioqueue_key_t hlist;
153 int epfd;
154};
155
Benny Prijonobc986152005-11-06 16:50:38 +0000156/* Include implementation for common abstraction after we declare
157 * pj_ioqueue_key_t and pj_ioqueue_t.
158 */
159#include "ioqueue_common_abs.c"
160
Benny Prijonodd859a62005-11-01 16:42:51 +0000161/*
Benny Prijono40ce3fb2005-11-07 18:14:08 +0000162 * pj_ioqueue_name()
163 */
164PJ_DEF(const char*) pj_ioqueue_name(void)
165{
166#if defined(PJ_LINUX_KERNEL) && PJ_LINUX_KERNEL!=0
167 return "epoll-kernel";
168#else
169 return "epoll";
170#endif
171}
172
173/*
Benny Prijonodd859a62005-11-01 16:42:51 +0000174 * pj_ioqueue_create()
175 *
176 * Create select ioqueue.
177 */
178PJ_DEF(pj_status_t) pj_ioqueue_create( pj_pool_t *pool,
179 pj_size_t max_fd,
Benny Prijonodd859a62005-11-01 16:42:51 +0000180 pj_ioqueue_t **p_ioqueue)
181{
Benny Prijonobc986152005-11-06 16:50:38 +0000182 pj_ioqueue_t *ioqueue;
Benny Prijonodd859a62005-11-01 16:42:51 +0000183 pj_status_t rc;
Benny Prijonobc986152005-11-06 16:50:38 +0000184 pj_lock_t *lock;
Benny Prijonodd859a62005-11-01 16:42:51 +0000185
Benny Prijonobc986152005-11-06 16:50:38 +0000186 /* Check that arguments are valid. */
187 PJ_ASSERT_RETURN(pool != NULL && p_ioqueue != NULL &&
188 max_fd > 0, PJ_EINVAL);
Benny Prijonodd859a62005-11-01 16:42:51 +0000189
Benny Prijonobc986152005-11-06 16:50:38 +0000190 /* Check that size of pj_ioqueue_op_key_t is sufficient */
191 PJ_ASSERT_RETURN(sizeof(pj_ioqueue_op_key_t)-sizeof(void*) >=
192 sizeof(union operation_key), PJ_EBUG);
Benny Prijonodd859a62005-11-01 16:42:51 +0000193
Benny Prijonobc986152005-11-06 16:50:38 +0000194 ioqueue = pj_pool_alloc(pool, sizeof(pj_ioqueue_t));
Benny Prijonodd859a62005-11-01 16:42:51 +0000195
Benny Prijonobc986152005-11-06 16:50:38 +0000196 ioqueue_init(ioqueue);
197
198 ioqueue->max = max_fd;
199 ioqueue->count = 0;
200 pj_list_init(&ioqueue->hlist);
201
202 rc = pj_lock_create_simple_mutex(pool, "ioq%p", &lock);
Benny Prijonodd859a62005-11-01 16:42:51 +0000203 if (rc != PJ_SUCCESS)
204 return rc;
205
Benny Prijonobc986152005-11-06 16:50:38 +0000206 rc = pj_ioqueue_set_lock(ioqueue, lock, PJ_TRUE);
207 if (rc != PJ_SUCCESS)
208 return rc;
209
210 ioqueue->epfd = os_epoll_create(max_fd);
211 if (ioqueue->epfd < 0) {
212 ioqueue_destroy(ioqueue);
Benny Prijonodd859a62005-11-01 16:42:51 +0000213 return PJ_RETURN_OS_ERROR(pj_get_native_os_error());
214 }
215
Benny Prijonobc986152005-11-06 16:50:38 +0000216 PJ_LOG(4, ("pjlib", "epoll I/O Queue created (%p)", ioqueue));
Benny Prijonodd859a62005-11-01 16:42:51 +0000217
Benny Prijonobc986152005-11-06 16:50:38 +0000218 *p_ioqueue = ioqueue;
Benny Prijonodd859a62005-11-01 16:42:51 +0000219 return PJ_SUCCESS;
220}
221
222/*
223 * pj_ioqueue_destroy()
224 *
225 * Destroy ioqueue.
226 */
Benny Prijonobc986152005-11-06 16:50:38 +0000227PJ_DEF(pj_status_t) pj_ioqueue_destroy(pj_ioqueue_t *ioqueue)
Benny Prijonodd859a62005-11-01 16:42:51 +0000228{
Benny Prijonobc986152005-11-06 16:50:38 +0000229 PJ_ASSERT_RETURN(ioqueue, PJ_EINVAL);
230 PJ_ASSERT_RETURN(ioqueue->epfd > 0, PJ_EINVALIDOP);
Benny Prijonodd859a62005-11-01 16:42:51 +0000231
Benny Prijonobc986152005-11-06 16:50:38 +0000232 pj_lock_acquire(ioqueue->lock);
233 os_close(ioqueue->epfd);
234 ioqueue->epfd = 0;
235 return ioqueue_destroy(ioqueue);
Benny Prijonodd859a62005-11-01 16:42:51 +0000236}
237
238/*
Benny Prijonodd859a62005-11-01 16:42:51 +0000239 * pj_ioqueue_register_sock()
240 *
241 * Register a socket to ioqueue.
242 */
243PJ_DEF(pj_status_t) pj_ioqueue_register_sock( pj_pool_t *pool,
Benny Prijonobc986152005-11-06 16:50:38 +0000244 pj_ioqueue_t *ioqueue,
Benny Prijonodd859a62005-11-01 16:42:51 +0000245 pj_sock_t sock,
246 void *user_data,
247 const pj_ioqueue_callback *cb,
248 pj_ioqueue_key_t **p_key)
249{
250 pj_ioqueue_key_t *key = NULL;
251 pj_uint32_t value;
252 struct epoll_event ev;
253 int status;
254 pj_status_t rc = PJ_SUCCESS;
255
Benny Prijonobc986152005-11-06 16:50:38 +0000256 PJ_ASSERT_RETURN(pool && ioqueue && sock != PJ_INVALID_SOCKET &&
Benny Prijonodd859a62005-11-01 16:42:51 +0000257 cb && p_key, PJ_EINVAL);
258
Benny Prijonobc986152005-11-06 16:50:38 +0000259 pj_lock_acquire(ioqueue->lock);
Benny Prijonodd859a62005-11-01 16:42:51 +0000260
Benny Prijonobc986152005-11-06 16:50:38 +0000261 if (ioqueue->count >= ioqueue->max) {
Benny Prijonodd859a62005-11-01 16:42:51 +0000262 rc = PJ_ETOOMANY;
263 TRACE_((THIS_FILE, "pj_ioqueue_register_sock error: too many files"));
264 goto on_return;
265 }
266
267 /* Set socket to nonblocking. */
268 value = 1;
269 if ((rc=os_ioctl(sock, FIONBIO, (ioctl_val_type)&value))) {
270 TRACE_((THIS_FILE, "pj_ioqueue_register_sock error: ioctl rc=%d",
271 rc));
272 rc = pj_get_netos_error();
273 goto on_return;
274 }
275
276 /* Create key. */
277 key = (pj_ioqueue_key_t*)pj_pool_zalloc(pool, sizeof(pj_ioqueue_key_t));
Benny Prijonobc986152005-11-06 16:50:38 +0000278 rc = ioqueue_init_key(pool, ioqueue, key, sock, user_data, cb);
279 if (rc != PJ_SUCCESS) {
280 key = NULL;
281 goto on_return;
282 }
Benny Prijonodd859a62005-11-01 16:42:51 +0000283
284 /* os_epoll_ctl. */
285 ev.events = EPOLLIN | EPOLLOUT | EPOLLERR;
286 ev.epoll_data = (epoll_data_type)key;
Benny Prijonobc986152005-11-06 16:50:38 +0000287 status = os_epoll_ctl(ioqueue->epfd, EPOLL_CTL_ADD, sock, &ev);
Benny Prijonodd859a62005-11-01 16:42:51 +0000288 if (status < 0) {
289 rc = pj_get_os_error();
Benny Prijonobc986152005-11-06 16:50:38 +0000290 key = NULL;
Benny Prijonodd859a62005-11-01 16:42:51 +0000291 TRACE_((THIS_FILE,
292 "pj_ioqueue_register_sock error: os_epoll_ctl rc=%d",
293 status));
294 goto on_return;
295 }
296
297 /* Register */
Benny Prijonobc986152005-11-06 16:50:38 +0000298 pj_list_insert_before(&ioqueue->hlist, key);
299 ++ioqueue->count;
Benny Prijonodd859a62005-11-01 16:42:51 +0000300
301on_return:
302 *p_key = key;
Benny Prijonobc986152005-11-06 16:50:38 +0000303 pj_lock_release(ioqueue->lock);
Benny Prijonodd859a62005-11-01 16:42:51 +0000304
305 return rc;
306}
307
308/*
309 * pj_ioqueue_unregister()
310 *
311 * Unregister handle from ioqueue.
312 */
Benny Prijonobc986152005-11-06 16:50:38 +0000313PJ_DEF(pj_status_t) pj_ioqueue_unregister( pj_ioqueue_key_t *key)
Benny Prijonodd859a62005-11-01 16:42:51 +0000314{
Benny Prijonobc986152005-11-06 16:50:38 +0000315 pj_ioqueue_t *ioqueue;
Benny Prijonodd859a62005-11-01 16:42:51 +0000316 struct epoll_event ev;
317 int status;
318
Benny Prijonobc986152005-11-06 16:50:38 +0000319 PJ_ASSERT_RETURN(key != NULL, PJ_EINVAL);
Benny Prijonodd859a62005-11-01 16:42:51 +0000320
Benny Prijonobc986152005-11-06 16:50:38 +0000321 ioqueue = key->ioqueue;
322 pj_lock_acquire(ioqueue->lock);
Benny Prijonodd859a62005-11-01 16:42:51 +0000323
Benny Prijonobc986152005-11-06 16:50:38 +0000324 pj_assert(ioqueue->count > 0);
325 --ioqueue->count;
Benny Prijonodd859a62005-11-01 16:42:51 +0000326 pj_list_erase(key);
327
328 ev.events = 0;
329 ev.epoll_data = (epoll_data_type)key;
Benny Prijonobc986152005-11-06 16:50:38 +0000330 status = os_epoll_ctl( ioqueue->epfd, EPOLL_CTL_DEL, key->fd, &ev);
Benny Prijonodd859a62005-11-01 16:42:51 +0000331 if (status != 0) {
332 pj_status_t rc = pj_get_os_error();
Benny Prijonobc986152005-11-06 16:50:38 +0000333 pj_lock_release(ioqueue->lock);
Benny Prijonodd859a62005-11-01 16:42:51 +0000334 return rc;
335 }
336
Benny Prijonobc986152005-11-06 16:50:38 +0000337 pj_lock_release(ioqueue->lock);
338
339 /* Destroy the key. */
340 ioqueue_destroy_key(key);
341
Benny Prijonodd859a62005-11-01 16:42:51 +0000342 return PJ_SUCCESS;
343}
344
Benny Prijonobc986152005-11-06 16:50:38 +0000345/* ioqueue_remove_from_set()
346 * This function is called from ioqueue_dispatch_event() to instruct
347 * the ioqueue to remove the specified descriptor from ioqueue's descriptor
348 * set for the specified event.
Benny Prijonodd859a62005-11-01 16:42:51 +0000349 */
Benny Prijonobc986152005-11-06 16:50:38 +0000350static void ioqueue_remove_from_set( pj_ioqueue_t *ioqueue,
351 pj_sock_t fd,
352 enum ioqueue_event_type event_type)
Benny Prijonodd859a62005-11-01 16:42:51 +0000353{
Benny Prijonodd859a62005-11-01 16:42:51 +0000354}
355
Benny Prijonobc986152005-11-06 16:50:38 +0000356/*
357 * ioqueue_add_to_set()
358 * This function is called from pj_ioqueue_recv(), pj_ioqueue_send() etc
359 * to instruct the ioqueue to add the specified handle to ioqueue's descriptor
360 * set for the specified event.
361 */
362static void ioqueue_add_to_set( pj_ioqueue_t *ioqueue,
363 pj_sock_t fd,
364 enum ioqueue_event_type event_type )
365{
366}
Benny Prijonodd859a62005-11-01 16:42:51 +0000367
368/*
369 * pj_ioqueue_poll()
370 *
371 */
Benny Prijonobc986152005-11-06 16:50:38 +0000372PJ_DEF(int) pj_ioqueue_poll( pj_ioqueue_t *ioqueue, const pj_time_val *timeout)
Benny Prijonodd859a62005-11-01 16:42:51 +0000373{
374 int i, count, processed;
Benny Prijonobc986152005-11-06 16:50:38 +0000375 struct epoll_event events[PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL];
Benny Prijonodd859a62005-11-01 16:42:51 +0000376 int msec;
Benny Prijonobc986152005-11-06 16:50:38 +0000377 struct queue {
378 pj_ioqueue_key_t *key;
379 enum ioqueue_event_type event_type;
380 } queue[PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL];
Benny Prijonodd859a62005-11-01 16:42:51 +0000381
382 PJ_CHECK_STACK();
383
384 msec = timeout ? PJ_TIME_VAL_MSEC(*timeout) : 9000;
385
Benny Prijonobc986152005-11-06 16:50:38 +0000386 count = os_epoll_wait( ioqueue->epfd, events, PJ_ARRAY_SIZE(events), msec);
Benny Prijonodd859a62005-11-01 16:42:51 +0000387 if (count <= 0)
388 return count;
389
390 /* Lock ioqueue. */
Benny Prijonobc986152005-11-06 16:50:38 +0000391 pj_lock_acquire(ioqueue->lock);
Benny Prijonodd859a62005-11-01 16:42:51 +0000392
Benny Prijonobc986152005-11-06 16:50:38 +0000393 for (processed=0, i=0; i<count; ++i) {
Benny Prijonodd859a62005-11-01 16:42:51 +0000394 pj_ioqueue_key_t *h = (pj_ioqueue_key_t*)(epoll_data_type)
395 events[i].epoll_data;
Benny Prijonodd859a62005-11-01 16:42:51 +0000396
397 /*
Benny Prijonobc986152005-11-06 16:50:38 +0000398 * Check readability.
Benny Prijonodd859a62005-11-01 16:42:51 +0000399 */
Benny Prijonobc986152005-11-06 16:50:38 +0000400 if ((events[i].events & EPOLLIN) &&
401 (key_has_pending_read(h) || key_has_pending_accept(h))) {
402 queue[processed].key = h;
403 queue[processed].event_type = READABLE_EVENT;
Benny Prijonodd859a62005-11-01 16:42:51 +0000404 ++processed;
405 }
406
407 /*
Benny Prijonobc986152005-11-06 16:50:38 +0000408 * Check for writeability.
Benny Prijonodd859a62005-11-01 16:42:51 +0000409 */
Benny Prijonobc986152005-11-06 16:50:38 +0000410 if ((events[i].events & EPOLLOUT) && key_has_pending_write(h)) {
411 queue[processed].key = h;
412 queue[processed].event_type = WRITEABLE_EVENT;
Benny Prijonodd859a62005-11-01 16:42:51 +0000413 ++processed;
414 }
Benny Prijonobc986152005-11-06 16:50:38 +0000415
Benny Prijonodd859a62005-11-01 16:42:51 +0000416#if PJ_HAS_TCP
417 /*
418 * Check for completion of connect() operation.
419 */
Benny Prijonobc986152005-11-06 16:50:38 +0000420 if ((events[i].events & EPOLLOUT) && (h->connecting)) {
421 queue[processed].key = h;
422 queue[processed].event_type = WRITEABLE_EVENT;
Benny Prijonodd859a62005-11-01 16:42:51 +0000423 ++processed;
424 }
425#endif /* PJ_HAS_TCP */
426
427 /*
428 * Check for error condition.
429 */
Benny Prijonobc986152005-11-06 16:50:38 +0000430 if (events[i].events & EPOLLERR && (h->connecting)) {
431 queue[processed].key = h;
432 queue[processed].event_type = EXCEPTION_EVENT;
433 ++processed;
Benny Prijonodd859a62005-11-01 16:42:51 +0000434 }
435 }
Benny Prijonobc986152005-11-06 16:50:38 +0000436 pj_lock_release(ioqueue->lock);
437
438 /* Now process the events. */
439 for (i=0; i<processed; ++i) {
440 switch (queue[i].event_type) {
441 case READABLE_EVENT:
442 ioqueue_dispatch_read_event(ioqueue, queue[i].key);
443 break;
444 case WRITEABLE_EVENT:
445 ioqueue_dispatch_write_event(ioqueue, queue[i].key);
446 break;
447 case EXCEPTION_EVENT:
448 ioqueue_dispatch_exception_event(ioqueue, queue[i].key);
449 break;
450 case NO_EVENT:
451 pj_assert(!"Invalid event!");
452 break;
453 }
454 }
Benny Prijonodd859a62005-11-01 16:42:51 +0000455
456 return processed;
457}
458