Branch data Line data Source code
1 : : /*
2 : : * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
3 : : *
4 : : * Licensed under the Apache License, Version 2.0 (the "License");
5 : : * you may not use this file except in compliance with the License.
6 : : * You may obtain a copy of the License at:
7 : : *
8 : : * http://www.apache.org/licenses/LICENSE-2.0
9 : : *
10 : : * Unless required by applicable law or agreed to in writing, software
11 : : * distributed under the License is distributed on an "AS IS" BASIS,
12 : : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 : : * See the License for the specific language governing permissions and
14 : : * limitations under the License.
15 : : */
16 : :
17 : : #include <config.h>
18 : : #include "poll-loop.h"
19 : : #include <errno.h>
20 : : #include <inttypes.h>
21 : : #include <poll.h>
22 : : #include <stdlib.h>
23 : : #include <string.h>
24 : : #include "coverage.h"
25 : : #include "openvswitch/dynamic-string.h"
26 : : #include "fatal-signal.h"
27 : : #include "openvswitch/list.h"
28 : : #include "ovs-thread.h"
29 : : #include "seq.h"
30 : : #include "socket-util.h"
31 : : #include "timeval.h"
32 : : #include "openvswitch/vlog.h"
33 : : #include "openvswitch/hmap.h"
34 : : #include "hash.h"
35 : :
36 : 53956 : VLOG_DEFINE_THIS_MODULE(poll_loop);
37 : :
38 : 21412052 : COVERAGE_DEFINE(poll_create_node);
39 : 446962 : COVERAGE_DEFINE(poll_zero_timeout);
40 : :
41 : : struct poll_node {
42 : : struct hmap_node hmap_node;
43 : : struct pollfd pollfd; /* Events to pass to time_poll(). */
44 : : HANDLE wevent; /* Events for WaitForMultipleObjects(). */
45 : : const char *where; /* Where poll_node was created. */
46 : : };
47 : :
48 : : struct poll_loop {
49 : : /* All active poll waiters. */
50 : : struct hmap poll_nodes;
51 : :
52 : : /* Time at which to wake up the next call to poll_block(), LLONG_MIN to
53 : : * wake up immediately, or LLONG_MAX to wait forever. */
54 : : long long int timeout_when; /* In msecs as returned by time_msec(). */
55 : : const char *timeout_where; /* Where 'timeout_when' was set. */
56 : : };
57 : :
58 : : static struct poll_loop *poll_loop(void);
59 : :
60 : : /* Look up the node with same fd or wevent. */
61 : : static struct poll_node *
62 : 3545002 : find_poll_node(struct poll_loop *loop, int fd, HANDLE wevent)
63 : : {
64 : : struct poll_node *node;
65 : :
66 : : /* Both 'fd' and 'wevent' cannot be set. */
67 [ - + ]: 3545002 : ovs_assert(!fd != !wevent);
68 : :
69 [ + + ][ - + ]: 3545002 : HMAP_FOR_EACH_WITH_HASH (node, hmap_node,
70 : : hash_2words(fd, (uint32_t)wevent),
71 : : &loop->poll_nodes) {
72 [ + - ][ - + ]: 1557207 : if ((fd && node->pollfd.fd == fd)
73 [ # # ][ # # ]: 0 : || (wevent && node->wevent == wevent)) {
74 : 1557207 : return node;
75 : : }
76 : : }
77 : 1987791 : return NULL;
78 : : }
79 : :
80 : : /* On Unix based systems:
81 : : *
82 : : * Registers 'fd' as waiting for the specified 'events' (which should be
83 : : * POLLIN or POLLOUT or POLLIN | POLLOUT). The following call to
84 : : * poll_block() will wake up when 'fd' becomes ready for one or more of the
85 : : * requested events. The 'fd's are given to poll() function later.
86 : : *
87 : : * On Windows system:
88 : : *
89 : : * If 'fd' is specified, create a new 'wevent'. Association of 'fd' and
90 : : * 'wevent' for 'events' happens in poll_block(). If 'wevent' is specified,
91 : : * it is assumed that it is unrelated to any sockets and poll_block()
92 : : * will wake up on any event on that 'wevent'. It is an error to pass
93 : : * both 'wevent' and 'fd'.
94 : : *
95 : : * The event registration is one-shot: only the following call to
96 : : * poll_block() is affected. The event will need to be re-registered after
97 : : * poll_block() is called if it is to persist.
98 : : *
99 : : * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to
100 : : * automatically provide the caller's source file and line number for
101 : : * 'where'.) */
102 : : static void
103 : 3545006 : poll_create_node(int fd, HANDLE wevent, short int events, const char *where)
104 : : {
105 : 3545006 : struct poll_loop *loop = poll_loop();
106 : : struct poll_node *node;
107 : :
108 : 3545005 : COVERAGE_INC(poll_create_node);
109 : :
110 : : /* Both 'fd' and 'wevent' cannot be set. */
111 [ - + ]: 3545003 : ovs_assert(!fd != !wevent);
112 : :
113 : : /* Check for duplicate. If found, "or" the events. */
114 : 3545003 : node = find_poll_node(loop, fd, wevent);
115 [ + + ]: 3544998 : if (node) {
116 : 1557207 : node->pollfd.events |= events;
117 : : } else {
118 : 1987791 : node = xzalloc(sizeof *node);
119 : 1987799 : hmap_insert(&loop->poll_nodes, &node->hmap_node,
120 : : hash_2words(fd, (uint32_t)wevent));
121 : 1987799 : node->pollfd.fd = fd;
122 : 1987799 : node->pollfd.events = events;
123 : : #ifdef _WIN32
124 : : if (!wevent) {
125 : : wevent = CreateEvent(NULL, FALSE, FALSE, NULL);
126 : : }
127 : : #endif
128 : 1987799 : node->wevent = wevent;
129 : 1987799 : node->where = where;
130 : : }
131 : 3545006 : }
132 : :
133 : : /* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN
134 : : * or POLLOUT or POLLIN | POLLOUT). The following call to poll_block() will
135 : : * wake up when 'fd' becomes ready for one or more of the requested events.
136 : : *
137 : : * On Windows, 'fd' must be a socket.
138 : : *
139 : : * The event registration is one-shot: only the following call to poll_block()
140 : : * is affected. The event will need to be re-registered after poll_block() is
141 : : * called if it is to persist.
142 : : *
143 : : * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to
144 : : * automatically provide the caller's source file and line number for
145 : : * 'where'.) */
146 : : void
147 : 3545006 : poll_fd_wait_at(int fd, short int events, const char *where)
148 : : {
149 : 3545006 : poll_create_node(fd, 0, events, where);
150 : 3545001 : }
151 : :
152 : : #ifdef _WIN32
153 : : /* Registers for the next call to poll_block() to wake up when 'wevent' is
154 : : * signaled.
155 : : *
156 : : * The event registration is one-shot: only the following call to poll_block()
157 : : * is affected. The event will need to be re-registered after poll_block() is
158 : : * called if it is to persist.
159 : : *
160 : : * ('where' is used in debug logging. Commonly one would use
161 : : * poll_wevent_wait() to automatically provide the caller's source file and
162 : : * line number for 'where'.) */
163 : : void
164 : : poll_wevent_wait_at(HANDLE wevent, const char *where)
165 : : {
166 : : poll_create_node(0, wevent, 0, where);
167 : : }
168 : : #endif /* _WIN32 */
169 : :
170 : : /* Causes the following call to poll_block() to block for no more than 'msec'
171 : : * milliseconds. If 'msec' is nonpositive, the following call to poll_block()
172 : : * will not block at all.
173 : : *
174 : : * The timer registration is one-shot: only the following call to poll_block()
175 : : * is affected. The timer will need to be re-registered after poll_block() is
176 : : * called if it is to persist.
177 : : *
178 : : * ('where' is used in debug logging. Commonly one would use poll_timer_wait()
179 : : * to automatically provide the caller's source file and line number for
180 : : * 'where'.) */
181 : : void
182 : 82430 : poll_timer_wait_at(long long int msec, const char *where)
183 : : {
184 : 82430 : long long int now = time_msec();
185 : : long long int when;
186 : :
187 [ + + ]: 82430 : if (msec <= 0) {
188 : : /* Wake up immediately. */
189 : 61511 : when = LLONG_MIN;
190 [ + - ]: 20919 : } else if ((unsigned long long int) now + msec <= LLONG_MAX) {
191 : : /* Normal case. */
192 : 20919 : when = now + msec;
193 : : } else {
194 : : /* now + msec would overflow. */
195 : 0 : when = LLONG_MAX;
196 : : }
197 : :
198 : 82430 : poll_timer_wait_until_at(when, where);
199 : 82430 : }
200 : :
201 : : /* Causes the following call to poll_block() to wake up when the current time,
202 : : * as returned by time_msec(), reaches 'when' or later. If 'when' is earlier
203 : : * than the current time, the following call to poll_block() will not block at
204 : : * all.
205 : : *
206 : : * The timer registration is one-shot: only the following call to poll_block()
207 : : * is affected. The timer will need to be re-registered after poll_block() is
208 : : * called if it is to persist.
209 : : *
210 : : * ('where' is used in debug logging. Commonly one would use
211 : : * poll_timer_wait_until() to automatically provide the caller's source file
212 : : * and line number for 'where'.) */
213 : : void
214 : 578645 : poll_timer_wait_until_at(long long int when, const char *where)
215 : : {
216 : 578645 : struct poll_loop *loop = poll_loop();
217 [ + + ]: 578645 : if (when < loop->timeout_when) {
218 : 384580 : loop->timeout_when = when;
219 : 384580 : loop->timeout_where = where;
220 : : }
221 : 578645 : }
222 : :
223 : : /* Causes the following call to poll_block() to wake up immediately, without
224 : : * blocking.
225 : : *
226 : : * ('where' is used in debug logging. Commonly one would use
227 : : * poll_immediate_wake() to automatically provide the caller's source file and
228 : : * line number for 'where'.) */
229 : : void
230 : 61508 : poll_immediate_wake_at(const char *where)
231 : : {
232 : 61508 : poll_timer_wait_at(0, where);
233 : 61508 : }
234 : :
235 : : /* Logs, if appropriate, that the poll loop was awakened by an event
236 : : * registered at 'where' (typically a source file and line number). The other
237 : : * arguments have two possible interpretations:
238 : : *
239 : : * - If 'pollfd' is nonnull then it should be the "struct pollfd" that caused
240 : : * the wakeup. 'timeout' is ignored.
241 : : *
242 : : * - If 'pollfd' is NULL then 'timeout' is the number of milliseconds after
243 : : * which the poll loop woke up.
244 : : */
245 : : static void
246 : 69952 : log_wakeup(const char *where, const struct pollfd *pollfd, int timeout)
247 : : {
248 : : static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
249 : : enum vlog_level level;
250 : : int cpu_usage;
251 : : struct ds s;
252 : :
253 : 69952 : cpu_usage = get_cpu_usage();
254 [ + + ]: 69952 : if (VLOG_IS_DBG_ENABLED()) {
255 : 79 : level = VLL_DBG;
256 [ + + ]: 69873 : } else if (cpu_usage > 50
257 [ + - ]: 1430 : && !thread_is_pmd()
258 [ + + ]: 1430 : && !VLOG_DROP_INFO(&rl)) {
259 : 10 : level = VLL_INFO;
260 : : } else {
261 : 69863 : return;
262 : : }
263 : :
264 : 89 : ds_init(&s);
265 : 89 : ds_put_cstr(&s, "wakeup due to ");
266 [ + + ]: 89 : if (pollfd) {
267 : 57 : char *description = describe_fd(pollfd->fd);
268 [ + - ]: 57 : if (pollfd->revents & POLLIN) {
269 : 57 : ds_put_cstr(&s, "[POLLIN]");
270 : : }
271 [ - + ]: 57 : if (pollfd->revents & POLLOUT) {
272 : 0 : ds_put_cstr(&s, "[POLLOUT]");
273 : : }
274 [ - + ]: 57 : if (pollfd->revents & POLLERR) {
275 : 0 : ds_put_cstr(&s, "[POLLERR]");
276 : : }
277 [ + + ]: 57 : if (pollfd->revents & POLLHUP) {
278 : 14 : ds_put_cstr(&s, "[POLLHUP]");
279 : : }
280 [ - + ]: 57 : if (pollfd->revents & POLLNVAL) {
281 : 0 : ds_put_cstr(&s, "[POLLNVAL]");
282 : : }
283 : 57 : ds_put_format(&s, " on fd %d (%s)", pollfd->fd, description);
284 : 57 : free(description);
285 : : } else {
286 : 32 : ds_put_format(&s, "%d-ms timeout", timeout);
287 : : }
288 [ + + ]: 89 : if (where) {
289 : 74 : ds_put_format(&s, " at %s", where);
290 : : }
291 [ + + ]: 89 : if (cpu_usage >= 0) {
292 : 10 : ds_put_format(&s, " (%d%% CPU usage)", cpu_usage);
293 : : }
294 [ + - ]: 89 : VLOG(level, "%s", ds_cstr(&s));
295 : 89 : ds_destroy(&s);
296 : : }
297 : :
298 : : static void
299 : 345495 : free_poll_nodes(struct poll_loop *loop)
300 : : {
301 : : struct poll_node *node, *next;
302 : :
303 [ + + ][ - + ]: 2330439 : HMAP_FOR_EACH_SAFE (node, next, hmap_node, &loop->poll_nodes) {
[ + + ]
304 : 1984921 : hmap_remove(&loop->poll_nodes, &node->hmap_node);
305 : : #ifdef _WIN32
306 : : if (node->wevent && node->pollfd.fd) {
307 : : WSAEventSelect(node->pollfd.fd, NULL, 0);
308 : : CloseHandle(node->wevent);
309 : : }
310 : : #endif
311 : 1984944 : free(node);
312 : : }
313 : 345500 : }
314 : :
315 : : /* Blocks until one or more of the events registered with poll_fd_wait()
316 : : * occurs, or until the minimum duration registered with poll_timer_wait()
317 : : * elapses, or not at all if poll_immediate_wake() has been called. */
318 : : void
319 : 344740 : poll_block(void)
320 : : {
321 : 344740 : struct poll_loop *loop = poll_loop();
322 : : struct poll_node *node;
323 : : struct pollfd *pollfds;
324 : 344740 : HANDLE *wevents = NULL;
325 : : int elapsed;
326 : : int retval;
327 : : int i;
328 : :
329 : : /* Register fatal signal events before actually doing any real work for
330 : : * poll_block. */
331 : 344740 : fatal_signal_wait();
332 : :
333 [ + + ]: 344740 : if (loop->timeout_when == LLONG_MIN) {
334 : 50821 : COVERAGE_INC(poll_zero_timeout);
335 : : }
336 : :
337 : 344740 : timewarp_run();
338 : 344740 : pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
339 : :
340 : : #ifdef _WIN32
341 : : wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
342 : : #endif
343 : :
344 : : /* Populate with all the fds and events. */
345 : 344740 : i = 0;
346 [ + + ][ - + ]: 2332536 : HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
347 : 1987797 : pollfds[i] = node->pollfd;
348 : : #ifdef _WIN32
349 : : wevents[i] = node->wevent;
350 : : if (node->pollfd.fd && node->wevent) {
351 : : short int wsa_events = 0;
352 : : if (node->pollfd.events & POLLIN) {
353 : : wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
354 : : }
355 : : if (node->pollfd.events & POLLOUT) {
356 : : wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
357 : : }
358 : : WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
359 : : }
360 : : #endif
361 : 1987797 : i++;
362 : : }
363 : :
364 : 344740 : retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,
365 : : loop->timeout_when, &elapsed);
366 [ - + ]: 343574 : if (retval < 0) {
367 : : static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
368 [ # # ]: 0 : VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));
369 [ + + ]: 343574 : } else if (!retval) {
370 : 69144 : log_wakeup(loop->timeout_where, NULL, elapsed);
371 [ + + ][ + + ]: 274430 : } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) {
372 : 808 : i = 0;
373 [ + + ][ - + ]: 9220 : HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
374 [ + + ]: 8412 : if (pollfds[i].revents) {
375 : 808 : log_wakeup(node->where, &pollfds[i], 0);
376 : : }
377 : 8412 : i++;
378 : : }
379 : : }
380 : :
381 : 343570 : free_poll_nodes(loop);
382 : 343574 : loop->timeout_when = LLONG_MAX;
383 : 343574 : loop->timeout_where = NULL;
384 : 343574 : free(pollfds);
385 : 343574 : free(wevents);
386 : :
387 : : /* Handle any pending signals before doing anything else. */
388 : 343574 : fatal_signal_run();
389 : :
390 : 343572 : seq_woke();
391 : 343574 : }
392 : :
393 : : static void
394 : 1926 : free_poll_loop(void *loop_)
395 : : {
396 : 1926 : struct poll_loop *loop = loop_;
397 : :
398 : 1926 : free_poll_nodes(loop);
399 : 1926 : hmap_destroy(&loop->poll_nodes);
400 : 1926 : free(loop);
401 : 1926 : }
402 : :
403 : : static struct poll_loop *
404 : 4468385 : poll_loop(void)
405 : : {
406 : : static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
407 : : static pthread_key_t key;
408 : : struct poll_loop *loop;
409 : :
410 [ + + ]: 4468385 : if (ovsthread_once_start(&once)) {
411 : 15692 : xpthread_key_create(&key, free_poll_loop);
412 : 15692 : ovsthread_once_done(&once);
413 : : }
414 : :
415 : 4468380 : loop = pthread_getspecific(key);
416 [ + + ]: 4468380 : if (!loop) {
417 : 18784 : loop = xzalloc(sizeof *loop);
418 : 18784 : hmap_init(&loop->poll_nodes);
419 : 18784 : xpthread_setspecific(key, loop);
420 : : }
421 : 4468378 : return loop;
422 : : }
423 : :
|