blob: 16a511a8b4421ff6beee56190c08d75111a5dadf [file] [log] [blame]
Benny Prijono9033e312005-11-21 02:08:39 +00001/* $Id$ */
2/*
3 * Copyright (C)2003-2006 Benny Prijono <benny@prijono.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20/*
21 * sock_select.c
22 *
23 * This is the implementation of IOQueue using pj_sock_select().
24 * It runs anywhere where pj_sock_select() is available (currently
25 * Win32, Linux, Linux kernel, etc.).
26 */
27
28#include <pj/ioqueue.h>
29#include <pj/os.h>
30#include <pj/lock.h>
31#include <pj/log.h>
32#include <pj/list.h>
33#include <pj/pool.h>
34#include <pj/string.h>
35#include <pj/assert.h>
36#include <pj/sock.h>
37#include <pj/compat/socket.h>
38#include <pj/sock_select.h>
39#include <pj/errno.h>
40
41/*
42 * Include declaration from common abstraction.
43 */
44#include "ioqueue_common_abs.h"
45
46/*
47 * ISSUES with ioqueue_select()
48 *
49 * EAGAIN/EWOULDBLOCK error in recv():
50 * - when multiple threads are working with the ioqueue, application
51 * may receive EAGAIN or EWOULDBLOCK in the receive callback.
52 * This error happens because more than one thread is watching for
53 * the same descriptor set, so when all of them call recv() or recvfrom()
54 * simultaneously, only one will succeed and the rest will get the error.
55 *
56 */
57#define THIS_FILE "ioq_select"
58
59/*
60 * The select ioqueue relies on socket functions (pj_sock_xxx()) to return
61 * the correct error code.
62 */
63#if PJ_RETURN_OS_ERROR(100) != PJ_STATUS_FROM_OS(100)
64# error "Error reporting must be enabled for this function to work!"
65#endif
66
67/**
68 * Get the number of descriptors in the set. This is defined in sock_select.c
69 * This function will only return the number of sockets set from PJ_FD_SET
70 * operation. When the set is modified by other means (such as by select()),
71 * the count will not be reflected here.
72 *
73 * That's why don't export this function in the header file, to avoid
74 * misunderstanding.
75 *
76 * @param fdsetp The descriptor set.
77 *
78 * @return Number of descriptors in the set.
79 */
80PJ_DECL(pj_size_t) PJ_FD_COUNT(const pj_fd_set_t *fdsetp);
81
82
83/*
84 * During debugging build, VALIDATE_FD_SET is set.
85 * This will check the validity of the fd_sets.
86 */
87/*
88#if defined(PJ_DEBUG) && PJ_DEBUG != 0
89# define VALIDATE_FD_SET 1
90#else
91# define VALIDATE_FD_SET 0
92#endif
93*/
94#define VALIDATE_FD_SET 0
95
96/*
97 * This describes each key.
98 */
99struct pj_ioqueue_key_t
100{
101 DECLARE_COMMON_KEY
102};
103
104/*
105 * This describes the I/O queue itself.
106 */
107struct pj_ioqueue_t
108{
109 DECLARE_COMMON_IOQUEUE
110
111 unsigned max, count;
112 pj_ioqueue_key_t key_list;
113 pj_fd_set_t rfdset;
114 pj_fd_set_t wfdset;
115#if PJ_HAS_TCP
116 pj_fd_set_t xfdset;
117#endif
118};
119
120/* Include implementation for common abstraction after we declare
121 * pj_ioqueue_key_t and pj_ioqueue_t.
122 */
123#include "ioqueue_common_abs.c"
124
125/*
126 * pj_ioqueue_name()
127 */
128PJ_DEF(const char*) pj_ioqueue_name(void)
129{
130 return "select";
131}
132
133/*
134 * pj_ioqueue_create()
135 *
136 * Create select ioqueue.
137 */
138PJ_DEF(pj_status_t) pj_ioqueue_create( pj_pool_t *pool,
139 pj_size_t max_fd,
140 pj_ioqueue_t **p_ioqueue)
141{
142 pj_ioqueue_t *ioqueue;
143 pj_lock_t *lock;
144 pj_status_t rc;
145
146 /* Check that arguments are valid. */
147 PJ_ASSERT_RETURN(pool != NULL && p_ioqueue != NULL &&
148 max_fd > 0 && max_fd <= PJ_IOQUEUE_MAX_HANDLES,
149 PJ_EINVAL);
150
151 /* Check that size of pj_ioqueue_op_key_t is sufficient */
152 PJ_ASSERT_RETURN(sizeof(pj_ioqueue_op_key_t)-sizeof(void*) >=
153 sizeof(union operation_key), PJ_EBUG);
154
155 ioqueue = pj_pool_alloc(pool, sizeof(pj_ioqueue_t));
156
157 ioqueue_init(ioqueue);
158
159 ioqueue->max = max_fd;
160 ioqueue->count = 0;
161 PJ_FD_ZERO(&ioqueue->rfdset);
162 PJ_FD_ZERO(&ioqueue->wfdset);
163#if PJ_HAS_TCP
164 PJ_FD_ZERO(&ioqueue->xfdset);
165#endif
166 pj_list_init(&ioqueue->key_list);
167
168 rc = pj_lock_create_simple_mutex(pool, "ioq%p", &lock);
169 if (rc != PJ_SUCCESS)
170 return rc;
171
172 rc = pj_ioqueue_set_lock(ioqueue, lock, PJ_TRUE);
173 if (rc != PJ_SUCCESS)
174 return rc;
175
176 PJ_LOG(4, ("pjlib", "select() I/O Queue created (%p)", ioqueue));
177
178 *p_ioqueue = ioqueue;
179 return PJ_SUCCESS;
180}
181
182/*
183 * pj_ioqueue_destroy()
184 *
185 * Destroy ioqueue.
186 */
187PJ_DEF(pj_status_t) pj_ioqueue_destroy(pj_ioqueue_t *ioqueue)
188{
189 PJ_ASSERT_RETURN(ioqueue, PJ_EINVAL);
190
191 pj_lock_acquire(ioqueue->lock);
192 return ioqueue_destroy(ioqueue);
193}
194
195
196/*
197 * pj_ioqueue_register_sock()
198 *
199 * Register a handle to ioqueue.
200 */
201PJ_DEF(pj_status_t) pj_ioqueue_register_sock( pj_pool_t *pool,
202 pj_ioqueue_t *ioqueue,
203 pj_sock_t sock,
204 void *user_data,
205 const pj_ioqueue_callback *cb,
206 pj_ioqueue_key_t **p_key)
207{
208 pj_ioqueue_key_t *key = NULL;
209 pj_uint32_t value;
210 pj_status_t rc = PJ_SUCCESS;
211
212 PJ_ASSERT_RETURN(pool && ioqueue && sock != PJ_INVALID_SOCKET &&
213 cb && p_key, PJ_EINVAL);
214
215 pj_lock_acquire(ioqueue->lock);
216
217 if (ioqueue->count >= ioqueue->max) {
218 rc = PJ_ETOOMANY;
219 goto on_return;
220 }
221
222 /* Set socket to nonblocking. */
223 value = 1;
Benny Prijono9cf138e2006-01-19 03:58:29 +0000224#if defined(PJ_WIN32) && PJ_WIN32!=0 || \
225 defined(PJ_WIN32_WINCE) && PJ_WIN32_WINCE!=0
Benny Prijono9033e312005-11-21 02:08:39 +0000226 if (ioctlsocket(sock, FIONBIO, (u_long*)&value)) {
227#else
228 if (ioctl(sock, FIONBIO, &value)) {
229#endif
230 rc = pj_get_netos_error();
231 goto on_return;
232 }
233
234 /* Create key. */
235 key = (pj_ioqueue_key_t*)pj_pool_zalloc(pool, sizeof(pj_ioqueue_key_t));
236 rc = ioqueue_init_key(pool, ioqueue, key, sock, user_data, cb);
237 if (rc != PJ_SUCCESS) {
238 key = NULL;
239 goto on_return;
240 }
241
242 /* Register */
243 pj_list_insert_before(&ioqueue->key_list, key);
244 ++ioqueue->count;
245
246on_return:
247 /* On error, socket may be left in non-blocking mode. */
248 *p_key = key;
249 pj_lock_release(ioqueue->lock);
250
251 return rc;
252}
253
254/*
255 * pj_ioqueue_unregister()
256 *
257 * Unregister handle from ioqueue.
258 */
259PJ_DEF(pj_status_t) pj_ioqueue_unregister( pj_ioqueue_key_t *key)
260{
261 pj_ioqueue_t *ioqueue;
262
263 PJ_ASSERT_RETURN(key, PJ_EINVAL);
264
265 ioqueue = key->ioqueue;
266
267 pj_lock_acquire(ioqueue->lock);
268
269 pj_assert(ioqueue->count > 0);
270 --ioqueue->count;
271 pj_list_erase(key);
272 PJ_FD_CLR(key->fd, &ioqueue->rfdset);
273 PJ_FD_CLR(key->fd, &ioqueue->wfdset);
274#if PJ_HAS_TCP
275 PJ_FD_CLR(key->fd, &ioqueue->xfdset);
276#endif
277
278 /* ioqueue_destroy may try to acquire key's mutex.
279 * Since normally the order of locking is to lock key's mutex first
280 * then ioqueue's mutex, ioqueue_destroy may deadlock unless we
281 * release ioqueue's mutex first.
282 */
283 pj_lock_release(ioqueue->lock);
284
285 /* Destroy the key. */
286 ioqueue_destroy_key(key);
287
288 return PJ_SUCCESS;
289}
290
291
292/* This supposed to check whether the fd_set values are consistent
293 * with the operation currently set in each key.
294 */
295#if VALIDATE_FD_SET
296static void validate_sets(const pj_ioqueue_t *ioqueue,
297 const pj_fd_set_t *rfdset,
298 const pj_fd_set_t *wfdset,
299 const pj_fd_set_t *xfdset)
300{
301 pj_ioqueue_key_t *key;
302
303 /*
304 * This basicly would not work anymore.
305 * We need to lock key before performing the check, but we can't do
306 * so because we're holding ioqueue mutex. If we acquire key's mutex
307 * now, the will cause deadlock.
308 */
309 pj_assert(0);
310
311 key = ioqueue->key_list.next;
312 while (key != &ioqueue->key_list) {
313 if (!pj_list_empty(&key->read_list)
314#if defined(PJ_HAS_TCP) && PJ_HAS_TCP != 0
315 || !pj_list_empty(&key->accept_list)
316#endif
317 )
318 {
319 pj_assert(PJ_FD_ISSET(key->fd, rfdset));
320 }
321 else {
322 pj_assert(PJ_FD_ISSET(key->fd, rfdset) == 0);
323 }
324 if (!pj_list_empty(&key->write_list)
325#if defined(PJ_HAS_TCP) && PJ_HAS_TCP != 0
326 || key->connecting
327#endif
328 )
329 {
330 pj_assert(PJ_FD_ISSET(key->fd, wfdset));
331 }
332 else {
333 pj_assert(PJ_FD_ISSET(key->fd, wfdset) == 0);
334 }
335#if defined(PJ_HAS_TCP) && PJ_HAS_TCP != 0
336 if (key->connecting)
337 {
338 pj_assert(PJ_FD_ISSET(key->fd, xfdset));
339 }
340 else {
341 pj_assert(PJ_FD_ISSET(key->fd, xfdset) == 0);
342 }
343#endif /* PJ_HAS_TCP */
344
345 key = key->next;
346 }
347}
348#endif /* VALIDATE_FD_SET */
349
350
351/* ioqueue_remove_from_set()
352 * This function is called from ioqueue_dispatch_event() to instruct
353 * the ioqueue to remove the specified descriptor from ioqueue's descriptor
354 * set for the specified event.
355 */
356static void ioqueue_remove_from_set( pj_ioqueue_t *ioqueue,
357 pj_sock_t fd,
358 enum ioqueue_event_type event_type)
359{
360 pj_lock_acquire(ioqueue->lock);
361
362 if (event_type == READABLE_EVENT)
363 PJ_FD_CLR((pj_sock_t)fd, &ioqueue->rfdset);
364 else if (event_type == WRITEABLE_EVENT)
365 PJ_FD_CLR((pj_sock_t)fd, &ioqueue->wfdset);
366 else if (event_type == EXCEPTION_EVENT)
367 PJ_FD_CLR((pj_sock_t)fd, &ioqueue->xfdset);
368 else
369 pj_assert(0);
370
371 pj_lock_release(ioqueue->lock);
372}
373
374/*
375 * ioqueue_add_to_set()
376 * This function is called from pj_ioqueue_recv(), pj_ioqueue_send() etc
377 * to instruct the ioqueue to add the specified handle to ioqueue's descriptor
378 * set for the specified event.
379 */
380static void ioqueue_add_to_set( pj_ioqueue_t *ioqueue,
381 pj_sock_t fd,
382 enum ioqueue_event_type event_type )
383{
384 pj_lock_acquire(ioqueue->lock);
385
386 if (event_type == READABLE_EVENT)
387 PJ_FD_SET((pj_sock_t)fd, &ioqueue->rfdset);
388 else if (event_type == WRITEABLE_EVENT)
389 PJ_FD_SET((pj_sock_t)fd, &ioqueue->wfdset);
390 else if (event_type == EXCEPTION_EVENT)
391 PJ_FD_SET((pj_sock_t)fd, &ioqueue->xfdset);
392 else
393 pj_assert(0);
394
395 pj_lock_release(ioqueue->lock);
396}
397
398/*
399 * pj_ioqueue_poll()
400 *
401 * Few things worth written:
402 *
403 * - we used to do only one callback called per poll, but it didn't go
404 * very well. The reason is because on some situation, the write
405 * callback gets called all the time, thus doesn't give the read
406 * callback to get called. This happens, for example, when user
407 * submit write operation inside the write callback.
408 * As the result, we changed the behaviour so that now multiple
409 * callbacks are called in a single poll. It should be fast too,
410 * just that we need to be carefull with the ioqueue data structs.
411 *
412 * - to guarantee preemptiveness etc, the poll function must strictly
413 * work on fd_set copy of the ioqueue (not the original one).
414 */
415PJ_DEF(int) pj_ioqueue_poll( pj_ioqueue_t *ioqueue, const pj_time_val *timeout)
416{
417 pj_fd_set_t rfdset, wfdset, xfdset;
418 int count, counter;
419 pj_ioqueue_key_t *h;
420 struct event
421 {
422 pj_ioqueue_key_t *key;
423 enum ioqueue_event_type event_type;
424 } event[PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL];
425
Benny Prijono37e8d332006-01-20 21:03:36 +0000426 PJ_ASSERT_RETURN(ioqueue, -PJ_EINVAL);
Benny Prijono9033e312005-11-21 02:08:39 +0000427
428 /* Lock ioqueue before making fd_set copies */
429 pj_lock_acquire(ioqueue->lock);
430
431 /* We will only do select() when there are sockets to be polled.
432 * Otherwise select() will return error.
433 */
434 if (PJ_FD_COUNT(&ioqueue->rfdset)==0 &&
435 PJ_FD_COUNT(&ioqueue->wfdset)==0 &&
436 PJ_FD_COUNT(&ioqueue->xfdset)==0)
437 {
438 pj_lock_release(ioqueue->lock);
439 if (timeout)
440 pj_thread_sleep(PJ_TIME_VAL_MSEC(*timeout));
441 return 0;
442 }
443
444 /* Copy ioqueue's pj_fd_set_t to local variables. */
445 pj_memcpy(&rfdset, &ioqueue->rfdset, sizeof(pj_fd_set_t));
446 pj_memcpy(&wfdset, &ioqueue->wfdset, sizeof(pj_fd_set_t));
447#if PJ_HAS_TCP
448 pj_memcpy(&xfdset, &ioqueue->xfdset, sizeof(pj_fd_set_t));
449#else
450 PJ_FD_ZERO(&xfdset);
451#endif
452
453#if VALIDATE_FD_SET
454 validate_sets(ioqueue, &rfdset, &wfdset, &xfdset);
455#endif
456
457 /* Unlock ioqueue before select(). */
458 pj_lock_release(ioqueue->lock);
459
460 count = pj_sock_select(FD_SETSIZE, &rfdset, &wfdset, &xfdset, timeout);
461
462 if (count <= 0)
Benny Prijono37e8d332006-01-20 21:03:36 +0000463 return -pj_get_netos_error();
Benny Prijono9033e312005-11-21 02:08:39 +0000464 else if (count > PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL)
465 count = PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL;
466
467 /* Scan descriptor sets for event and add the events in the event
468 * array to be processed later in this function. We do this so that
469 * events can be processed in parallel without holding ioqueue lock.
470 */
471 pj_lock_acquire(ioqueue->lock);
472
473 counter = 0;
474
475 /* Scan for writable sockets first to handle piggy-back data
476 * coming with accept().
477 */
478 h = ioqueue->key_list.next;
479 for ( ; h!=&ioqueue->key_list && counter<count; h = h->next) {
480 if ( (key_has_pending_write(h) || key_has_pending_connect(h))
481 && PJ_FD_ISSET(h->fd, &wfdset))
482 {
483 event[counter].key = h;
484 event[counter].event_type = WRITEABLE_EVENT;
485 ++counter;
486 }
487
488 /* Scan for readable socket. */
489 if ((key_has_pending_read(h) || key_has_pending_accept(h))
490 && PJ_FD_ISSET(h->fd, &rfdset))
491 {
492 event[counter].key = h;
493 event[counter].event_type = READABLE_EVENT;
494 ++counter;
495 }
496
497#if PJ_HAS_TCP
498 if (key_has_pending_connect(h) && PJ_FD_ISSET(h->fd, &xfdset)) {
499 event[counter].key = h;
500 event[counter].event_type = EXCEPTION_EVENT;
501 ++counter;
502 }
503#endif
504 }
505
506 pj_lock_release(ioqueue->lock);
507
508 count = counter;
509
510 /* Now process all events. The dispatch functions will take care
511 * of locking in each of the key
512 */
513 for (counter=0; counter<count; ++counter) {
514 switch (event[counter].event_type) {
515 case READABLE_EVENT:
516 ioqueue_dispatch_read_event(ioqueue, event[counter].key);
517 break;
518 case WRITEABLE_EVENT:
519 ioqueue_dispatch_write_event(ioqueue, event[counter].key);
520 break;
521 case EXCEPTION_EVENT:
522 ioqueue_dispatch_exception_event(ioqueue, event[counter].key);
523 break;
524 case NO_EVENT:
525 pj_assert(!"Invalid event!");
526 break;
527 }
528 }
529
530 return count;
531}
532