blob: 66dd13e50e8493d9ac7e4b97309cac2e85241bf5 [file] [log] [blame]
Benny Prijono9033e312005-11-21 02:08:39 +00001/* $Id$ */
2/*
3 * Copyright (C)2003-2006 Benny Prijono <benny@prijono.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19/*
20 * ioqueue_epoll.c
21 *
22 * This is the implementation of IOQueue framework using /dev/epoll
23 * API in _both_ Linux user-mode and kernel-mode.
24 */
25
26#include <pj/ioqueue.h>
27#include <pj/os.h>
28#include <pj/lock.h>
29#include <pj/log.h>
30#include <pj/list.h>
31#include <pj/pool.h>
32#include <pj/string.h>
33#include <pj/assert.h>
34#include <pj/errno.h>
35#include <pj/sock.h>
36#include <pj/compat/socket.h>
37
38#if !defined(PJ_LINUX_KERNEL) || PJ_LINUX_KERNEL==0
39 /*
40 * Linux user mode
41 */
42# include <sys/epoll.h>
43# include <errno.h>
44# include <unistd.h>
45
46# define epoll_data data.ptr
47# define epoll_data_type void*
48# define ioctl_val_type unsigned long
49# define getsockopt_val_ptr int*
50# define os_getsockopt getsockopt
51# define os_ioctl ioctl
52# define os_read read
53# define os_close close
54# define os_epoll_create epoll_create
55# define os_epoll_ctl epoll_ctl
56# define os_epoll_wait epoll_wait
57#else
58 /*
59 * Linux kernel mode.
60 */
61# include <linux/config.h>
62# include <linux/version.h>
63# if defined(MODVERSIONS)
64# include <linux/modversions.h>
65# endif
66# include <linux/kernel.h>
67# include <linux/poll.h>
68# include <linux/eventpoll.h>
69# include <linux/syscalls.h>
70# include <linux/errno.h>
71# include <linux/unistd.h>
72# include <asm/ioctls.h>
73 enum EPOLL_EVENTS
74 {
75 EPOLLIN = 0x001,
76 EPOLLOUT = 0x004,
77 EPOLLERR = 0x008,
78 };
79# define os_epoll_create sys_epoll_create
80 static int os_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
81 {
82 long rc;
83 mm_segment_t oldfs = get_fs();
84 set_fs(KERNEL_DS);
85 rc = sys_epoll_ctl(epfd, op, fd, event);
86 set_fs(oldfs);
87 if (rc) {
88 errno = -rc;
89 return -1;
90 } else {
91 return 0;
92 }
93 }
94 static int os_epoll_wait(int epfd, struct epoll_event *events,
95 int maxevents, int timeout)
96 {
97 int count;
98 mm_segment_t oldfs = get_fs();
99 set_fs(KERNEL_DS);
100 count = sys_epoll_wait(epfd, events, maxevents, timeout);
101 set_fs(oldfs);
102 return count;
103 }
104# define os_close sys_close
105# define os_getsockopt pj_sock_getsockopt
106 static int os_read(int fd, void *buf, size_t len)
107 {
108 long rc;
109 mm_segment_t oldfs = get_fs();
110 set_fs(KERNEL_DS);
111 rc = sys_read(fd, buf, len);
112 set_fs(oldfs);
113 if (rc) {
114 errno = -rc;
115 return -1;
116 } else {
117 return 0;
118 }
119 }
120# define socklen_t unsigned
121# define ioctl_val_type unsigned long
122 int ioctl(int fd, int opt, ioctl_val_type value);
123 static int os_ioctl(int fd, int opt, ioctl_val_type value)
124 {
125 int rc;
126 mm_segment_t oldfs = get_fs();
127 set_fs(KERNEL_DS);
128 rc = ioctl(fd, opt, value);
129 set_fs(oldfs);
130 if (rc < 0) {
131 errno = -rc;
132 return rc;
133 } else
134 return rc;
135 }
136# define getsockopt_val_ptr char*
137
138# define epoll_data data
139# define epoll_data_type __u32
140#endif
141
142#define THIS_FILE "ioq_epoll"
143
Benny Prijono63ab3562006-07-08 19:46:43 +0000144//#define TRACE_(expr) PJ_LOG(3,expr)
145#define TRACE_(expr)
Benny Prijono9033e312005-11-21 02:08:39 +0000146
147/*
148 * Include common ioqueue abstraction.
149 */
150#include "ioqueue_common_abs.h"
151
152/*
153 * This describes each key.
154 */
155struct pj_ioqueue_key_t
156{
157 DECLARE_COMMON_KEY
158};
159
Benny Prijono63ab3562006-07-08 19:46:43 +0000160struct queue
161{
162 pj_ioqueue_key_t *key;
163 enum ioqueue_event_type event_type;
164};
165
Benny Prijono9033e312005-11-21 02:08:39 +0000166/*
167 * This describes the I/O queue.
168 */
169struct pj_ioqueue_t
170{
171 DECLARE_COMMON_IOQUEUE
172
173 unsigned max, count;
174 pj_ioqueue_key_t hlist;
175 int epfd;
Benny Prijono63ab3562006-07-08 19:46:43 +0000176 struct epoll_event *events;
177 struct queue *queue;
Benny Prijono9033e312005-11-21 02:08:39 +0000178};
179
180/* Include implementation for common abstraction after we declare
181 * pj_ioqueue_key_t and pj_ioqueue_t.
182 */
183#include "ioqueue_common_abs.c"
184
185/*
186 * pj_ioqueue_name()
187 */
188PJ_DEF(const char*) pj_ioqueue_name(void)
189{
190#if defined(PJ_LINUX_KERNEL) && PJ_LINUX_KERNEL!=0
191 return "epoll-kernel";
192#else
193 return "epoll";
194#endif
195}
196
197/*
198 * pj_ioqueue_create()
199 *
200 * Create select ioqueue.
201 */
202PJ_DEF(pj_status_t) pj_ioqueue_create( pj_pool_t *pool,
203 pj_size_t max_fd,
204 pj_ioqueue_t **p_ioqueue)
205{
206 pj_ioqueue_t *ioqueue;
207 pj_status_t rc;
208 pj_lock_t *lock;
209
210 /* Check that arguments are valid. */
211 PJ_ASSERT_RETURN(pool != NULL && p_ioqueue != NULL &&
212 max_fd > 0, PJ_EINVAL);
213
214 /* Check that size of pj_ioqueue_op_key_t is sufficient */
215 PJ_ASSERT_RETURN(sizeof(pj_ioqueue_op_key_t)-sizeof(void*) >=
216 sizeof(union operation_key), PJ_EBUG);
217
218 ioqueue = pj_pool_alloc(pool, sizeof(pj_ioqueue_t));
219
220 ioqueue_init(ioqueue);
221
222 ioqueue->max = max_fd;
223 ioqueue->count = 0;
224 pj_list_init(&ioqueue->hlist);
225
226 rc = pj_lock_create_simple_mutex(pool, "ioq%p", &lock);
227 if (rc != PJ_SUCCESS)
228 return rc;
229
230 rc = pj_ioqueue_set_lock(ioqueue, lock, PJ_TRUE);
231 if (rc != PJ_SUCCESS)
232 return rc;
233
234 ioqueue->epfd = os_epoll_create(max_fd);
235 if (ioqueue->epfd < 0) {
236 ioqueue_destroy(ioqueue);
237 return PJ_RETURN_OS_ERROR(pj_get_native_os_error());
238 }
239
Benny Prijono63ab3562006-07-08 19:46:43 +0000240 ioqueue->events = pj_pool_calloc(pool, max_fd, sizeof(struct epoll_event));
241 PJ_ASSERT_RETURN(ioqueue->events != NULL, PJ_ENOMEM);
242
243 ioqueue->queue = pj_pool_calloc(pool, max_fd, sizeof(struct queue));
244 PJ_ASSERT_RETURN(ioqueue->queue != NULL, PJ_ENOMEM);
245
Benny Prijono9033e312005-11-21 02:08:39 +0000246 PJ_LOG(4, ("pjlib", "epoll I/O Queue created (%p)", ioqueue));
247
248 *p_ioqueue = ioqueue;
249 return PJ_SUCCESS;
250}
251
252/*
253 * pj_ioqueue_destroy()
254 *
255 * Destroy ioqueue.
256 */
257PJ_DEF(pj_status_t) pj_ioqueue_destroy(pj_ioqueue_t *ioqueue)
258{
259 PJ_ASSERT_RETURN(ioqueue, PJ_EINVAL);
260 PJ_ASSERT_RETURN(ioqueue->epfd > 0, PJ_EINVALIDOP);
261
262 pj_lock_acquire(ioqueue->lock);
263 os_close(ioqueue->epfd);
264 ioqueue->epfd = 0;
265 return ioqueue_destroy(ioqueue);
266}
267
268/*
269 * pj_ioqueue_register_sock()
270 *
271 * Register a socket to ioqueue.
272 */
273PJ_DEF(pj_status_t) pj_ioqueue_register_sock( pj_pool_t *pool,
274 pj_ioqueue_t *ioqueue,
275 pj_sock_t sock,
276 void *user_data,
277 const pj_ioqueue_callback *cb,
278 pj_ioqueue_key_t **p_key)
279{
280 pj_ioqueue_key_t *key = NULL;
281 pj_uint32_t value;
282 struct epoll_event ev;
283 int status;
284 pj_status_t rc = PJ_SUCCESS;
285
286 PJ_ASSERT_RETURN(pool && ioqueue && sock != PJ_INVALID_SOCKET &&
287 cb && p_key, PJ_EINVAL);
288
289 pj_lock_acquire(ioqueue->lock);
290
291 if (ioqueue->count >= ioqueue->max) {
292 rc = PJ_ETOOMANY;
293 TRACE_((THIS_FILE, "pj_ioqueue_register_sock error: too many files"));
294 goto on_return;
295 }
296
297 /* Set socket to nonblocking. */
298 value = 1;
299 if ((rc=os_ioctl(sock, FIONBIO, (ioctl_val_type)&value))) {
300 TRACE_((THIS_FILE, "pj_ioqueue_register_sock error: ioctl rc=%d",
301 rc));
302 rc = pj_get_netos_error();
303 goto on_return;
304 }
305
306 /* Create key. */
307 key = (pj_ioqueue_key_t*)pj_pool_zalloc(pool, sizeof(pj_ioqueue_key_t));
308 rc = ioqueue_init_key(pool, ioqueue, key, sock, user_data, cb);
309 if (rc != PJ_SUCCESS) {
310 key = NULL;
311 goto on_return;
312 }
313
Benny Prijonofe9c9b62006-07-06 20:43:07 +0000314 /* Create key's mutex */
315 rc = pj_mutex_create_recursive(pool, NULL, &key->mutex);
316 if (rc != PJ_SUCCESS) {
317 key = NULL;
318 goto on_return;
319 }
320
Benny Prijono9033e312005-11-21 02:08:39 +0000321 /* os_epoll_ctl. */
Benny Prijono63ab3562006-07-08 19:46:43 +0000322 ev.events = EPOLLIN | EPOLLERR;
Benny Prijono9033e312005-11-21 02:08:39 +0000323 ev.epoll_data = (epoll_data_type)key;
324 status = os_epoll_ctl(ioqueue->epfd, EPOLL_CTL_ADD, sock, &ev);
325 if (status < 0) {
326 rc = pj_get_os_error();
Benny Prijonofe9c9b62006-07-06 20:43:07 +0000327 pj_mutex_destroy(key->mutex);
Benny Prijono9033e312005-11-21 02:08:39 +0000328 key = NULL;
329 TRACE_((THIS_FILE,
330 "pj_ioqueue_register_sock error: os_epoll_ctl rc=%d",
331 status));
332 goto on_return;
333 }
334
335 /* Register */
336 pj_list_insert_before(&ioqueue->hlist, key);
337 ++ioqueue->count;
338
Benny Prijono63ab3562006-07-08 19:46:43 +0000339 //TRACE_((THIS_FILE, "socket registered, count=%d", ioqueue->count));
340
Benny Prijono9033e312005-11-21 02:08:39 +0000341on_return:
342 *p_key = key;
343 pj_lock_release(ioqueue->lock);
344
345 return rc;
346}
347
348/*
349 * pj_ioqueue_unregister()
350 *
351 * Unregister handle from ioqueue.
352 */
353PJ_DEF(pj_status_t) pj_ioqueue_unregister( pj_ioqueue_key_t *key)
354{
355 pj_ioqueue_t *ioqueue;
356 struct epoll_event ev;
357 int status;
358
359 PJ_ASSERT_RETURN(key != NULL, PJ_EINVAL);
360
361 ioqueue = key->ioqueue;
362 pj_lock_acquire(ioqueue->lock);
363
364 pj_assert(ioqueue->count > 0);
365 --ioqueue->count;
366 pj_list_erase(key);
367
368 ev.events = 0;
369 ev.epoll_data = (epoll_data_type)key;
370 status = os_epoll_ctl( ioqueue->epfd, EPOLL_CTL_DEL, key->fd, &ev);
371 if (status != 0) {
372 pj_status_t rc = pj_get_os_error();
373 pj_lock_release(ioqueue->lock);
374 return rc;
375 }
376
377 pj_lock_release(ioqueue->lock);
378
379 /* Destroy the key. */
Benny Prijonofe9c9b62006-07-06 20:43:07 +0000380 pj_sock_close(key->fd);
381 pj_mutex_destroy(key->mutex);
Benny Prijono9033e312005-11-21 02:08:39 +0000382
383 return PJ_SUCCESS;
384}
385
386/* ioqueue_remove_from_set()
387 * This function is called from ioqueue_dispatch_event() to instruct
388 * the ioqueue to remove the specified descriptor from ioqueue's descriptor
389 * set for the specified event.
390 */
391static void ioqueue_remove_from_set( pj_ioqueue_t *ioqueue,
Benny Prijono63ab3562006-07-08 19:46:43 +0000392 pj_ioqueue_key_t *key,
Benny Prijono9033e312005-11-21 02:08:39 +0000393 enum ioqueue_event_type event_type)
394{
Benny Prijono63ab3562006-07-08 19:46:43 +0000395 if (event_type == WRITEABLE_EVENT) {
396 struct epoll_event ev;
397
398 ev.events = EPOLLIN | EPOLLERR;
399 ev.epoll_data = (epoll_data_type)key;
400 os_epoll_ctl( ioqueue->epfd, EPOLL_CTL_MOD, key->fd, &ev);
401 }
Benny Prijono9033e312005-11-21 02:08:39 +0000402}
403
404/*
405 * ioqueue_add_to_set()
406 * This function is called from pj_ioqueue_recv(), pj_ioqueue_send() etc
407 * to instruct the ioqueue to add the specified handle to ioqueue's descriptor
408 * set for the specified event.
409 */
410static void ioqueue_add_to_set( pj_ioqueue_t *ioqueue,
Benny Prijono63ab3562006-07-08 19:46:43 +0000411 pj_ioqueue_key_t *key,
Benny Prijono9033e312005-11-21 02:08:39 +0000412 enum ioqueue_event_type event_type )
413{
Benny Prijono63ab3562006-07-08 19:46:43 +0000414 if (event_type == WRITEABLE_EVENT) {
415 struct epoll_event ev;
416
417 ev.events = EPOLLIN | EPOLLOUT | EPOLLERR;
418 ev.epoll_data = (epoll_data_type)key;
419 os_epoll_ctl( ioqueue->epfd, EPOLL_CTL_MOD, key->fd, &ev);
420 }
Benny Prijono9033e312005-11-21 02:08:39 +0000421}
422
423/*
424 * pj_ioqueue_poll()
425 *
426 */
427PJ_DEF(int) pj_ioqueue_poll( pj_ioqueue_t *ioqueue, const pj_time_val *timeout)
428{
429 int i, count, processed;
Benny Prijono9033e312005-11-21 02:08:39 +0000430 int msec;
Benny Prijono63ab3562006-07-08 19:46:43 +0000431 struct epoll_event *events = ioqueue->events;
432 struct queue *queue = ioqueue->queue;
433 pj_timestamp t1, t2;
Benny Prijono9033e312005-11-21 02:08:39 +0000434
435 PJ_CHECK_STACK();
436
437 msec = timeout ? PJ_TIME_VAL_MSEC(*timeout) : 9000;
Benny Prijono63ab3562006-07-08 19:46:43 +0000438
439 TRACE_((THIS_FILE, "start os_epoll_wait, msec=%d", msec));
440 pj_get_timestamp(&t1);
441
442 count = os_epoll_wait( ioqueue->epfd, events, ioqueue->max, msec);
443 if (count == 0) {
444 TRACE_((THIS_FILE, "os_epoll_wait timed out"));
Benny Prijono9033e312005-11-21 02:08:39 +0000445 return count;
Benny Prijono63ab3562006-07-08 19:46:43 +0000446 }
447 else if (count < 0) {
448 TRACE_((THIS_FILE, "os_epoll_wait error"));
Benny Prijono37e8d332006-01-20 21:03:36 +0000449 return -pj_get_netos_error();
Benny Prijono63ab3562006-07-08 19:46:43 +0000450 }
451
452 pj_get_timestamp(&t2);
453 TRACE_((THIS_FILE, "os_epoll_wait returns %d, time=%d usec",
454 count, pj_elapsed_usec(&t1, &t2)));
Benny Prijono9033e312005-11-21 02:08:39 +0000455
456 /* Lock ioqueue. */
457 pj_lock_acquire(ioqueue->lock);
458
459 for (processed=0, i=0; i<count; ++i) {
460 pj_ioqueue_key_t *h = (pj_ioqueue_key_t*)(epoll_data_type)
461 events[i].epoll_data;
462
Benny Prijono63ab3562006-07-08 19:46:43 +0000463 TRACE_((THIS_FILE, "event %d: events=%d", i, events[i].events));
464
Benny Prijono9033e312005-11-21 02:08:39 +0000465 /*
466 * Check readability.
467 */
468 if ((events[i].events & EPOLLIN) &&
469 (key_has_pending_read(h) || key_has_pending_accept(h))) {
470 queue[processed].key = h;
471 queue[processed].event_type = READABLE_EVENT;
472 ++processed;
473 }
474
475 /*
476 * Check for writeability.
477 */
478 if ((events[i].events & EPOLLOUT) && key_has_pending_write(h)) {
479 queue[processed].key = h;
480 queue[processed].event_type = WRITEABLE_EVENT;
481 ++processed;
482 }
483
484#if PJ_HAS_TCP
485 /*
486 * Check for completion of connect() operation.
487 */
488 if ((events[i].events & EPOLLOUT) && (h->connecting)) {
489 queue[processed].key = h;
490 queue[processed].event_type = WRITEABLE_EVENT;
491 ++processed;
492 }
493#endif /* PJ_HAS_TCP */
494
495 /*
496 * Check for error condition.
497 */
498 if (events[i].events & EPOLLERR && (h->connecting)) {
499 queue[processed].key = h;
500 queue[processed].event_type = EXCEPTION_EVENT;
501 ++processed;
502 }
503 }
504 pj_lock_release(ioqueue->lock);
505
506 /* Now process the events. */
507 for (i=0; i<processed; ++i) {
508 switch (queue[i].event_type) {
509 case READABLE_EVENT:
510 ioqueue_dispatch_read_event(ioqueue, queue[i].key);
511 break;
512 case WRITEABLE_EVENT:
513 ioqueue_dispatch_write_event(ioqueue, queue[i].key);
514 break;
515 case EXCEPTION_EVENT:
516 ioqueue_dispatch_exception_event(ioqueue, queue[i].key);
517 break;
518 case NO_EVENT:
519 pj_assert(!"Invalid event!");
520 break;
521 }
522 }
523
Benny Prijono09413ca2006-02-27 23:52:06 +0000524 /* Special case:
525 * When epoll returns > 0 but no descriptors are actually set!
526 */
527 if (count > 0 && !processed && msec > 0) {
528 pj_thread_sleep(msec);
529 }
Benny Prijono63ab3562006-07-08 19:46:43 +0000530
531 pj_get_timestamp(&t1);
532 TRACE_((THIS_FILE, "ioqueue_poll() returns %d, time=%d usec",
533 processed, pj_elapsed_usec(&t2, &t1)));
534
Benny Prijono9033e312005-11-21 02:08:39 +0000535 return processed;
536}
537