blob: aa01253127609b59e32b0e826ceab66cf78e6e19 [file] [log] [blame]
Benny Prijono4766ffe2005-11-01 17:56:59 +00001/* $Id$
Benny Prijonodd859a62005-11-01 16:42:51 +00002 */
Benny Prijonodd859a62005-11-01 16:42:51 +00003/*
4 * ioqueue_epoll.c
5 *
6 * This is the implementation of IOQueue framework using /dev/epoll
7 * API in _both_ Linux user-mode and kernel-mode.
8 */
9
10#include <pj/ioqueue.h>
11#include <pj/os.h>
12#include <pj/lock.h>
13#include <pj/log.h>
14#include <pj/list.h>
15#include <pj/pool.h>
16#include <pj/string.h>
17#include <pj/assert.h>
18#include <pj/errno.h>
19#include <pj/sock.h>
20#include <pj/compat/socket.h>
21
22#if !defined(PJ_LINUX_KERNEL) || PJ_LINUX_KERNEL==0
23 /*
24 * Linux user mode
25 */
26# include <sys/epoll.h>
27# include <errno.h>
28# include <unistd.h>
29
30# define epoll_data data.ptr
31# define epoll_data_type void*
Benny Prijonobc986152005-11-06 16:50:38 +000032# define ioctl_val_type unsigned long
Benny Prijonodd859a62005-11-01 16:42:51 +000033# define getsockopt_val_ptr int*
34# define os_getsockopt getsockopt
35# define os_ioctl ioctl
36# define os_read read
37# define os_close close
38# define os_epoll_create epoll_create
39# define os_epoll_ctl epoll_ctl
40# define os_epoll_wait epoll_wait
41#else
42 /*
43 * Linux kernel mode.
44 */
45# include <linux/config.h>
46# include <linux/version.h>
47# if defined(MODVERSIONS)
48# include <linux/modversions.h>
49# endif
50# include <linux/kernel.h>
51# include <linux/poll.h>
52# include <linux/eventpoll.h>
53# include <linux/syscalls.h>
54# include <linux/errno.h>
55# include <linux/unistd.h>
56# include <asm/ioctls.h>
57 enum EPOLL_EVENTS
58 {
59 EPOLLIN = 0x001,
60 EPOLLOUT = 0x004,
61 EPOLLERR = 0x008,
62 };
63# define os_epoll_create sys_epoll_create
64 static int os_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
65 {
66 long rc;
67 mm_segment_t oldfs = get_fs();
68 set_fs(KERNEL_DS);
69 rc = sys_epoll_ctl(epfd, op, fd, event);
70 set_fs(oldfs);
71 if (rc) {
72 errno = -rc;
73 return -1;
74 } else {
75 return 0;
76 }
77 }
78 static int os_epoll_wait(int epfd, struct epoll_event *events,
79 int maxevents, int timeout)
80 {
81 int count;
82 mm_segment_t oldfs = get_fs();
83 set_fs(KERNEL_DS);
84 count = sys_epoll_wait(epfd, events, maxevents, timeout);
85 set_fs(oldfs);
86 return count;
87 }
88# define os_close sys_close
89# define os_getsockopt pj_sock_getsockopt
90 static int os_read(int fd, void *buf, size_t len)
91 {
92 long rc;
93 mm_segment_t oldfs = get_fs();
94 set_fs(KERNEL_DS);
95 rc = sys_read(fd, buf, len);
96 set_fs(oldfs);
97 if (rc) {
98 errno = -rc;
99 return -1;
100 } else {
101 return 0;
102 }
103 }
104# define socklen_t unsigned
105# define ioctl_val_type unsigned long
106 int ioctl(int fd, int opt, ioctl_val_type value);
107 static int os_ioctl(int fd, int opt, ioctl_val_type value)
108 {
109 int rc;
110 mm_segment_t oldfs = get_fs();
111 set_fs(KERNEL_DS);
112 rc = ioctl(fd, opt, value);
113 set_fs(oldfs);
114 if (rc < 0) {
115 errno = -rc;
116 return rc;
117 } else
118 return rc;
119 }
120# define getsockopt_val_ptr char*
121
122# define epoll_data data
123# define epoll_data_type __u32
124#endif
125
126#define THIS_FILE "ioq_epoll"
127
Benny Prijonodd859a62005-11-01 16:42:51 +0000128//#define TRACE_(expr) PJ_LOG(3,expr)
129#define TRACE_(expr)
130
Benny Prijonobc986152005-11-06 16:50:38 +0000131/*
132 * Include common ioqueue abstraction.
133 */
134#include "ioqueue_common_abs.h"
Benny Prijonodd859a62005-11-01 16:42:51 +0000135
136/*
137 * This describes each key.
138 */
139struct pj_ioqueue_key_t
140{
Benny Prijonobc986152005-11-06 16:50:38 +0000141 DECLARE_COMMON_KEY
Benny Prijonodd859a62005-11-01 16:42:51 +0000142};
143
144/*
145 * This describes the I/O queue.
146 */
147struct pj_ioqueue_t
148{
Benny Prijonobc986152005-11-06 16:50:38 +0000149 DECLARE_COMMON_IOQUEUE
150
Benny Prijonodd859a62005-11-01 16:42:51 +0000151 unsigned max, count;
152 pj_ioqueue_key_t hlist;
153 int epfd;
154};
155
Benny Prijonobc986152005-11-06 16:50:38 +0000156/* Include implementation for common abstraction after we declare
157 * pj_ioqueue_key_t and pj_ioqueue_t.
158 */
159#include "ioqueue_common_abs.c"
160
Benny Prijonodd859a62005-11-01 16:42:51 +0000161/*
162 * pj_ioqueue_create()
163 *
164 * Create select ioqueue.
165 */
166PJ_DEF(pj_status_t) pj_ioqueue_create( pj_pool_t *pool,
167 pj_size_t max_fd,
Benny Prijonodd859a62005-11-01 16:42:51 +0000168 pj_ioqueue_t **p_ioqueue)
169{
Benny Prijonobc986152005-11-06 16:50:38 +0000170 pj_ioqueue_t *ioqueue;
Benny Prijonodd859a62005-11-01 16:42:51 +0000171 pj_status_t rc;
Benny Prijonobc986152005-11-06 16:50:38 +0000172 pj_lock_t *lock;
Benny Prijonodd859a62005-11-01 16:42:51 +0000173
Benny Prijonobc986152005-11-06 16:50:38 +0000174 /* Check that arguments are valid. */
175 PJ_ASSERT_RETURN(pool != NULL && p_ioqueue != NULL &&
176 max_fd > 0, PJ_EINVAL);
Benny Prijonodd859a62005-11-01 16:42:51 +0000177
Benny Prijonobc986152005-11-06 16:50:38 +0000178 /* Check that size of pj_ioqueue_op_key_t is sufficient */
179 PJ_ASSERT_RETURN(sizeof(pj_ioqueue_op_key_t)-sizeof(void*) >=
180 sizeof(union operation_key), PJ_EBUG);
Benny Prijonodd859a62005-11-01 16:42:51 +0000181
Benny Prijonobc986152005-11-06 16:50:38 +0000182 ioqueue = pj_pool_alloc(pool, sizeof(pj_ioqueue_t));
Benny Prijonodd859a62005-11-01 16:42:51 +0000183
Benny Prijonobc986152005-11-06 16:50:38 +0000184 ioqueue_init(ioqueue);
185
186 ioqueue->max = max_fd;
187 ioqueue->count = 0;
188 pj_list_init(&ioqueue->hlist);
189
190 rc = pj_lock_create_simple_mutex(pool, "ioq%p", &lock);
Benny Prijonodd859a62005-11-01 16:42:51 +0000191 if (rc != PJ_SUCCESS)
192 return rc;
193
Benny Prijonobc986152005-11-06 16:50:38 +0000194 rc = pj_ioqueue_set_lock(ioqueue, lock, PJ_TRUE);
195 if (rc != PJ_SUCCESS)
196 return rc;
197
198 ioqueue->epfd = os_epoll_create(max_fd);
199 if (ioqueue->epfd < 0) {
200 ioqueue_destroy(ioqueue);
Benny Prijonodd859a62005-11-01 16:42:51 +0000201 return PJ_RETURN_OS_ERROR(pj_get_native_os_error());
202 }
203
Benny Prijonobc986152005-11-06 16:50:38 +0000204 PJ_LOG(4, ("pjlib", "epoll I/O Queue created (%p)", ioqueue));
Benny Prijonodd859a62005-11-01 16:42:51 +0000205
Benny Prijonobc986152005-11-06 16:50:38 +0000206 *p_ioqueue = ioqueue;
Benny Prijonodd859a62005-11-01 16:42:51 +0000207 return PJ_SUCCESS;
208}
209
210/*
211 * pj_ioqueue_destroy()
212 *
213 * Destroy ioqueue.
214 */
Benny Prijonobc986152005-11-06 16:50:38 +0000215PJ_DEF(pj_status_t) pj_ioqueue_destroy(pj_ioqueue_t *ioqueue)
Benny Prijonodd859a62005-11-01 16:42:51 +0000216{
Benny Prijonobc986152005-11-06 16:50:38 +0000217 PJ_ASSERT_RETURN(ioqueue, PJ_EINVAL);
218 PJ_ASSERT_RETURN(ioqueue->epfd > 0, PJ_EINVALIDOP);
Benny Prijonodd859a62005-11-01 16:42:51 +0000219
Benny Prijonobc986152005-11-06 16:50:38 +0000220 pj_lock_acquire(ioqueue->lock);
221 os_close(ioqueue->epfd);
222 ioqueue->epfd = 0;
223 return ioqueue_destroy(ioqueue);
Benny Prijonodd859a62005-11-01 16:42:51 +0000224}
225
226/*
Benny Prijonodd859a62005-11-01 16:42:51 +0000227 * pj_ioqueue_register_sock()
228 *
229 * Register a socket to ioqueue.
230 */
231PJ_DEF(pj_status_t) pj_ioqueue_register_sock( pj_pool_t *pool,
Benny Prijonobc986152005-11-06 16:50:38 +0000232 pj_ioqueue_t *ioqueue,
Benny Prijonodd859a62005-11-01 16:42:51 +0000233 pj_sock_t sock,
234 void *user_data,
235 const pj_ioqueue_callback *cb,
236 pj_ioqueue_key_t **p_key)
237{
238 pj_ioqueue_key_t *key = NULL;
239 pj_uint32_t value;
240 struct epoll_event ev;
241 int status;
242 pj_status_t rc = PJ_SUCCESS;
243
Benny Prijonobc986152005-11-06 16:50:38 +0000244 PJ_ASSERT_RETURN(pool && ioqueue && sock != PJ_INVALID_SOCKET &&
Benny Prijonodd859a62005-11-01 16:42:51 +0000245 cb && p_key, PJ_EINVAL);
246
Benny Prijonobc986152005-11-06 16:50:38 +0000247 pj_lock_acquire(ioqueue->lock);
Benny Prijonodd859a62005-11-01 16:42:51 +0000248
Benny Prijonobc986152005-11-06 16:50:38 +0000249 if (ioqueue->count >= ioqueue->max) {
Benny Prijonodd859a62005-11-01 16:42:51 +0000250 rc = PJ_ETOOMANY;
251 TRACE_((THIS_FILE, "pj_ioqueue_register_sock error: too many files"));
252 goto on_return;
253 }
254
255 /* Set socket to nonblocking. */
256 value = 1;
257 if ((rc=os_ioctl(sock, FIONBIO, (ioctl_val_type)&value))) {
258 TRACE_((THIS_FILE, "pj_ioqueue_register_sock error: ioctl rc=%d",
259 rc));
260 rc = pj_get_netos_error();
261 goto on_return;
262 }
263
264 /* Create key. */
265 key = (pj_ioqueue_key_t*)pj_pool_zalloc(pool, sizeof(pj_ioqueue_key_t));
Benny Prijonobc986152005-11-06 16:50:38 +0000266 rc = ioqueue_init_key(pool, ioqueue, key, sock, user_data, cb);
267 if (rc != PJ_SUCCESS) {
268 key = NULL;
269 goto on_return;
270 }
Benny Prijonodd859a62005-11-01 16:42:51 +0000271
272 /* os_epoll_ctl. */
273 ev.events = EPOLLIN | EPOLLOUT | EPOLLERR;
274 ev.epoll_data = (epoll_data_type)key;
Benny Prijonobc986152005-11-06 16:50:38 +0000275 status = os_epoll_ctl(ioqueue->epfd, EPOLL_CTL_ADD, sock, &ev);
Benny Prijonodd859a62005-11-01 16:42:51 +0000276 if (status < 0) {
277 rc = pj_get_os_error();
Benny Prijonobc986152005-11-06 16:50:38 +0000278 key = NULL;
Benny Prijonodd859a62005-11-01 16:42:51 +0000279 TRACE_((THIS_FILE,
280 "pj_ioqueue_register_sock error: os_epoll_ctl rc=%d",
281 status));
282 goto on_return;
283 }
284
285 /* Register */
Benny Prijonobc986152005-11-06 16:50:38 +0000286 pj_list_insert_before(&ioqueue->hlist, key);
287 ++ioqueue->count;
Benny Prijonodd859a62005-11-01 16:42:51 +0000288
289on_return:
290 *p_key = key;
Benny Prijonobc986152005-11-06 16:50:38 +0000291 pj_lock_release(ioqueue->lock);
Benny Prijonodd859a62005-11-01 16:42:51 +0000292
293 return rc;
294}
295
296/*
297 * pj_ioqueue_unregister()
298 *
299 * Unregister handle from ioqueue.
300 */
Benny Prijonobc986152005-11-06 16:50:38 +0000301PJ_DEF(pj_status_t) pj_ioqueue_unregister( pj_ioqueue_key_t *key)
Benny Prijonodd859a62005-11-01 16:42:51 +0000302{
Benny Prijonobc986152005-11-06 16:50:38 +0000303 pj_ioqueue_t *ioqueue;
Benny Prijonodd859a62005-11-01 16:42:51 +0000304 struct epoll_event ev;
305 int status;
306
Benny Prijonobc986152005-11-06 16:50:38 +0000307 PJ_ASSERT_RETURN(key != NULL, PJ_EINVAL);
Benny Prijonodd859a62005-11-01 16:42:51 +0000308
Benny Prijonobc986152005-11-06 16:50:38 +0000309 ioqueue = key->ioqueue;
310 pj_lock_acquire(ioqueue->lock);
Benny Prijonodd859a62005-11-01 16:42:51 +0000311
Benny Prijonobc986152005-11-06 16:50:38 +0000312 pj_assert(ioqueue->count > 0);
313 --ioqueue->count;
Benny Prijonodd859a62005-11-01 16:42:51 +0000314 pj_list_erase(key);
315
316 ev.events = 0;
317 ev.epoll_data = (epoll_data_type)key;
Benny Prijonobc986152005-11-06 16:50:38 +0000318 status = os_epoll_ctl( ioqueue->epfd, EPOLL_CTL_DEL, key->fd, &ev);
Benny Prijonodd859a62005-11-01 16:42:51 +0000319 if (status != 0) {
320 pj_status_t rc = pj_get_os_error();
Benny Prijonobc986152005-11-06 16:50:38 +0000321 pj_lock_release(ioqueue->lock);
Benny Prijonodd859a62005-11-01 16:42:51 +0000322 return rc;
323 }
324
Benny Prijonobc986152005-11-06 16:50:38 +0000325 pj_lock_release(ioqueue->lock);
326
327 /* Destroy the key. */
328 ioqueue_destroy_key(key);
329
Benny Prijonodd859a62005-11-01 16:42:51 +0000330 return PJ_SUCCESS;
331}
332
Benny Prijonobc986152005-11-06 16:50:38 +0000333/* ioqueue_remove_from_set()
334 * This function is called from ioqueue_dispatch_event() to instruct
335 * the ioqueue to remove the specified descriptor from ioqueue's descriptor
336 * set for the specified event.
Benny Prijonodd859a62005-11-01 16:42:51 +0000337 */
Benny Prijonobc986152005-11-06 16:50:38 +0000338static void ioqueue_remove_from_set( pj_ioqueue_t *ioqueue,
339 pj_sock_t fd,
340 enum ioqueue_event_type event_type)
Benny Prijonodd859a62005-11-01 16:42:51 +0000341{
Benny Prijonodd859a62005-11-01 16:42:51 +0000342}
343
Benny Prijonobc986152005-11-06 16:50:38 +0000344/*
345 * ioqueue_add_to_set()
346 * This function is called from pj_ioqueue_recv(), pj_ioqueue_send() etc
347 * to instruct the ioqueue to add the specified handle to ioqueue's descriptor
348 * set for the specified event.
349 */
350static void ioqueue_add_to_set( pj_ioqueue_t *ioqueue,
351 pj_sock_t fd,
352 enum ioqueue_event_type event_type )
353{
354}
Benny Prijonodd859a62005-11-01 16:42:51 +0000355
356/*
357 * pj_ioqueue_poll()
358 *
359 */
Benny Prijonobc986152005-11-06 16:50:38 +0000360PJ_DEF(int) pj_ioqueue_poll( pj_ioqueue_t *ioqueue, const pj_time_val *timeout)
Benny Prijonodd859a62005-11-01 16:42:51 +0000361{
362 int i, count, processed;
Benny Prijonobc986152005-11-06 16:50:38 +0000363 struct epoll_event events[PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL];
Benny Prijonodd859a62005-11-01 16:42:51 +0000364 int msec;
Benny Prijonobc986152005-11-06 16:50:38 +0000365 struct queue {
366 pj_ioqueue_key_t *key;
367 enum ioqueue_event_type event_type;
368 } queue[PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL];
Benny Prijonodd859a62005-11-01 16:42:51 +0000369
370 PJ_CHECK_STACK();
371
372 msec = timeout ? PJ_TIME_VAL_MSEC(*timeout) : 9000;
373
Benny Prijonobc986152005-11-06 16:50:38 +0000374 count = os_epoll_wait( ioqueue->epfd, events, PJ_ARRAY_SIZE(events), msec);
Benny Prijonodd859a62005-11-01 16:42:51 +0000375 if (count <= 0)
376 return count;
377
378 /* Lock ioqueue. */
Benny Prijonobc986152005-11-06 16:50:38 +0000379 pj_lock_acquire(ioqueue->lock);
Benny Prijonodd859a62005-11-01 16:42:51 +0000380
Benny Prijonobc986152005-11-06 16:50:38 +0000381 for (processed=0, i=0; i<count; ++i) {
Benny Prijonodd859a62005-11-01 16:42:51 +0000382 pj_ioqueue_key_t *h = (pj_ioqueue_key_t*)(epoll_data_type)
383 events[i].epoll_data;
Benny Prijonodd859a62005-11-01 16:42:51 +0000384
385 /*
Benny Prijonobc986152005-11-06 16:50:38 +0000386 * Check readability.
Benny Prijonodd859a62005-11-01 16:42:51 +0000387 */
Benny Prijonobc986152005-11-06 16:50:38 +0000388 if ((events[i].events & EPOLLIN) &&
389 (key_has_pending_read(h) || key_has_pending_accept(h))) {
390 queue[processed].key = h;
391 queue[processed].event_type = READABLE_EVENT;
Benny Prijonodd859a62005-11-01 16:42:51 +0000392 ++processed;
393 }
394
395 /*
Benny Prijonobc986152005-11-06 16:50:38 +0000396 * Check for writeability.
Benny Prijonodd859a62005-11-01 16:42:51 +0000397 */
Benny Prijonobc986152005-11-06 16:50:38 +0000398 if ((events[i].events & EPOLLOUT) && key_has_pending_write(h)) {
399 queue[processed].key = h;
400 queue[processed].event_type = WRITEABLE_EVENT;
Benny Prijonodd859a62005-11-01 16:42:51 +0000401 ++processed;
402 }
Benny Prijonobc986152005-11-06 16:50:38 +0000403
Benny Prijonodd859a62005-11-01 16:42:51 +0000404#if PJ_HAS_TCP
405 /*
406 * Check for completion of connect() operation.
407 */
Benny Prijonobc986152005-11-06 16:50:38 +0000408 if ((events[i].events & EPOLLOUT) && (h->connecting)) {
409 queue[processed].key = h;
410 queue[processed].event_type = WRITEABLE_EVENT;
Benny Prijonodd859a62005-11-01 16:42:51 +0000411 ++processed;
412 }
413#endif /* PJ_HAS_TCP */
414
415 /*
416 * Check for error condition.
417 */
Benny Prijonobc986152005-11-06 16:50:38 +0000418 if (events[i].events & EPOLLERR && (h->connecting)) {
419 queue[processed].key = h;
420 queue[processed].event_type = EXCEPTION_EVENT;
421 ++processed;
Benny Prijonodd859a62005-11-01 16:42:51 +0000422 }
423 }
Benny Prijonobc986152005-11-06 16:50:38 +0000424 pj_lock_release(ioqueue->lock);
425
426 /* Now process the events. */
427 for (i=0; i<processed; ++i) {
428 switch (queue[i].event_type) {
429 case READABLE_EVENT:
430 ioqueue_dispatch_read_event(ioqueue, queue[i].key);
431 break;
432 case WRITEABLE_EVENT:
433 ioqueue_dispatch_write_event(ioqueue, queue[i].key);
434 break;
435 case EXCEPTION_EVENT:
436 ioqueue_dispatch_exception_event(ioqueue, queue[i].key);
437 break;
438 case NO_EVENT:
439 pj_assert(!"Invalid event!");
440 break;
441 }
442 }
Benny Prijonodd859a62005-11-01 16:42:51 +0000443
444 return processed;
445}
446