Branch data Line data Source code
1 : : /*
2 : : * Copyright (c) 2014, 2016 Nicira, Inc.
3 : : *
4 : : * Licensed under the Apache License, Version 2.0 (the "License");
5 : : * you may not use this file except in compliance with the License.
6 : : * You may obtain a copy of the License at:
7 : : *
8 : : * http://www.apache.org/licenses/LICENSE-2.0
9 : : *
10 : : * Unless required by applicable law or agreed to in writing, software
11 : : * distributed under the License is distributed on an "AS IS" BASIS,
12 : : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 : : * See the License for the specific language governing permissions and
14 : : * limitations under the License.
15 : : */
16 : :
17 : : #include <config.h>
18 : : #include "cmap.h"
19 : : #include "coverage.h"
20 : : #include "bitmap.h"
21 : : #include "hash.h"
22 : : #include "ovs-rcu.h"
23 : : #include "random.h"
24 : : #include "util.h"
25 : :
26 : 3911766 : COVERAGE_DEFINE(cmap_expand);
27 : 3866070 : COVERAGE_DEFINE(cmap_shrink);
28 : :
29 : : /* Optimistic Concurrent Cuckoo Hash
30 : : * =================================
31 : : *
32 : : * A "cuckoo hash" is an open addressing hash table schema, designed such that
33 : : * a given element can be in one of only a small number of buckets 'd', each of
34 : : * which holds up to a small number 'k' elements. Thus, the expected and
35 : : * worst-case lookup times are O(1) because they require comparing no more than
36 : : * a fixed number of elements (k * d). Inserting a new element can require
37 : : * moving around existing elements, but it is also O(1) amortized expected
38 : : * time.
39 : : *
40 : : * An optimistic concurrent hash table goes one step further, making it
41 : : * possible for a single writer to execute concurrently with any number of
42 : : * readers without requiring the readers to take any locks.
43 : : *
44 : : * This cuckoo hash implementation uses:
45 : : *
46 : : * - Two hash functions (d=2). More hash functions allow for a higher load
47 : : * factor, but increasing 'k' is easier and the benefits of increasing 'd'
48 : : * quickly fall off with the 'k' values used here. Also, the method of
49 : : * generating hashes used in this implementation is hard to reasonably
50 : : * extend beyond d=2. Finally, each additional hash function means that a
51 : : * lookup has to look at least one extra cache line.
52 : : *
53 : : * - 5 or 7 elements per bucket (k=5 or k=7), chosen to make buckets
54 : : * exactly one cache line in size.
55 : : *
56 : : * According to Erlingsson [4], these parameters suggest a maximum load factor
57 : : * of about 93%. The current implementation is conservative, expanding the
58 : : * hash table when it is over 85% full.
59 : : *
60 : : * When the load factor is below 20%, the hash table will be shrinked by half.
61 : : * This is to reduce the memory utilization of the hash table and to avoid
62 : : * the hash table occupying the top of heap chunk which prevents the trimming
63 : : * of heap.
64 : : *
65 : : * Hash Functions
66 : : * ==============
67 : : *
68 : : * A cuckoo hash requires multiple hash functions. When reorganizing the hash
69 : : * becomes too difficult, it also requires the ability to change the hash
70 : : * functions. Requiring the client to provide multiple hashes and to be able
71 : : * to change them to new hashes upon insertion is inconvenient.
72 : : *
73 : : * This implementation takes another approach. The client provides a single,
74 : : * fixed hash. The cuckoo hash internally "rehashes" this hash against a
75 : : * randomly selected basis value (see rehash()). This rehashed value is one of
76 : : * the two hashes. The other hash is computed by 16-bit circular rotation of
77 : : * the rehashed value. Updating the basis changes the hash functions.
78 : : *
79 : : * To work properly, the hash functions used by a cuckoo hash must be
80 : : * independent. If one hash function is a function of the other (e.g. h2(x) =
81 : : * h1(x) + 1, or h2(x) = hash(h1(x))), then insertion will eventually fail
82 : : * catastrophically (loop forever) because of collisions. With this rehashing
83 : : * technique, the two hashes are completely independent for masks up to 16 bits
84 : : * wide. For masks wider than 16 bits, only 32-n bits are independent between
85 : : * the two hashes. Thus, it becomes risky to grow a cuckoo hash table beyond
86 : : * about 2**24 buckets (about 71 million elements with k=5 and maximum load
87 : : * 85%). Fortunately, Open vSwitch does not normally deal with hash tables
88 : : * this large.
89 : : *
90 : : *
91 : : * Handling Duplicates
92 : : * ===================
93 : : *
94 : : * This cuckoo hash table implementation deals with duplicate client-provided
95 : : * hash values by chaining: the second and subsequent cmap_nodes with a given
96 : : * hash are chained off the initially inserted node's 'next' member. The hash
97 : : * table maintains the invariant that a single client-provided hash value
98 : : * exists in only a single chain in a single bucket (even though that hash
99 : : * could be stored in two buckets).
100 : : *
101 : : *
102 : : * References
103 : : * ==========
104 : : *
105 : : * [1] D. Zhou, B. Fan, H. Lim, M. Kaminsky, D. G. Andersen, "Scalable, High
106 : : * Performance Ethernet Forwarding with CuckooSwitch". In Proc. 9th
107 : : * CoNEXT, Dec. 2013.
108 : : *
109 : : * [2] B. Fan, D. G. Andersen, and M. Kaminsky. "MemC3: Compact and concurrent
110 : : * memcache with dumber caching and smarter hashing". In Proc. 10th USENIX
111 : : * NSDI, Apr. 2013
112 : : *
113 : : * [3] R. Pagh and F. Rodler. "Cuckoo hashing". Journal of Algorithms, 51(2):
114 : : * 122-144, May 2004.
115 : : *
116 : : * [4] U. Erlingsson, M. Manasse, F. McSherry, "A Cool and Practical
117 : : * Alternative to Traditional Hash Tables". In Proc. 7th Workshop on
118 : : * Distributed Data and Structures (WDAS'06), 2006.
119 : : */
120 : : /* An entry is an int and a pointer: 8 bytes on 32-bit, 12 bytes on 64-bit. */
121 : : #define CMAP_ENTRY_SIZE (4 + (UINTPTR_MAX == UINT32_MAX ? 4 : 8))
122 : :
123 : : /* Number of entries per bucket: 7 on 32-bit, 5 on 64-bit. */
124 : : #define CMAP_K ((CACHE_LINE_SIZE - 4) / CMAP_ENTRY_SIZE)
125 : :
126 : : /* Pad to make a bucket a full cache line in size: 4 on 32-bit, 0 on 64-bit. */
127 : : #define CMAP_PADDING ((CACHE_LINE_SIZE - 4) - (CMAP_K * CMAP_ENTRY_SIZE))
128 : :
129 : : /* A cuckoo hash bucket. Designed to be cache-aligned and exactly one cache
130 : : * line long. */
131 : : struct cmap_bucket {
132 : : /* Allows readers to track in-progress changes. Initially zero, each
133 : : * writer increments this value just before and just after each change (see
134 : : * cmap_set_bucket()). Thus, a reader can ensure that it gets a consistent
135 : : * snapshot by waiting for the counter to become even (see
136 : : * read_even_counter()), then checking that its value does not change while
137 : : * examining the bucket (see cmap_find()). */
138 : : atomic_uint32_t counter;
139 : :
140 : : /* (hash, node) slots. They are parallel arrays instead of an array of
141 : : * structs to reduce the amount of space lost to padding.
142 : : *
143 : : * The slots are in no particular order. A null pointer indicates that a
144 : : * pair is unused. In-use slots are not necessarily in the earliest
145 : : * slots. */
146 : : uint32_t hashes[CMAP_K];
147 : : struct cmap_node nodes[CMAP_K];
148 : :
149 : : /* Padding to make cmap_bucket exactly one cache line long. */
150 : : #if CMAP_PADDING > 0
151 : : uint8_t pad[CMAP_PADDING];
152 : : #endif
153 : : };
154 : : BUILD_ASSERT_DECL(sizeof(struct cmap_bucket) == CACHE_LINE_SIZE);
155 : :
156 : : /* Default maximum load factor (as a fraction of UINT32_MAX + 1) before
157 : : * enlarging a cmap. Reasonable values lie between about 75% and 93%. Smaller
158 : : * values waste memory; larger values increase the average insertion time. */
159 : : #define CMAP_MAX_LOAD ((uint32_t) (UINT32_MAX * .85))
160 : :
161 : : /* Default minimum load factor (as a fraction of UINT32_MAX + 1) before
162 : : * shrinking a cmap. Currently, the value is chosen to be 20%, this
163 : : * means cmap will have a 40% load factor after shrink. */
164 : : #define CMAP_MIN_LOAD ((uint32_t) (UINT32_MAX * .20))
165 : :
166 : : /* The implementation of a concurrent hash map. */
167 : : struct cmap_impl {
168 : : unsigned int n; /* Number of in-use elements. */
169 : : unsigned int max_n; /* Max elements before enlarging. */
170 : : unsigned int min_n; /* Min elements before shrinking. */
171 : : uint32_t mask; /* Number of 'buckets', minus one. */
172 : : uint32_t basis; /* Basis for rehashing client's hash values. */
173 : : uint8_t pad[CACHE_LINE_SIZE - 4 * 5]; /* Pad to end of cache line. */
174 : : struct cmap_bucket buckets[1];
175 : : };
176 : : BUILD_ASSERT_DECL(sizeof(struct cmap_impl) == CACHE_LINE_SIZE * 2);
177 : :
178 : : /* An empty cmap. */
179 : : OVS_ALIGNED_VAR(CACHE_LINE_SIZE) const struct cmap_impl empty_cmap;
180 : :
181 : : static struct cmap_impl *cmap_rehash(struct cmap *, uint32_t mask);
182 : :
183 : : /* Explicit inline keywords in utility functions seem to be necessary
184 : : * to prevent performance regression on cmap_find(). */
185 : :
186 : : /* Given a rehashed value 'hash', returns the other hash for that rehashed
187 : : * value. This is symmetric: other_hash(other_hash(x)) == x. (See also "Hash
188 : : * Functions" at the top of this file.) */
189 : : static inline uint32_t
190 : 73681118 : other_hash(uint32_t hash)
191 : : {
192 : 73681118 : return (hash << 16) | (hash >> 16);
193 : : }
194 : :
195 : : /* Returns the rehashed value for 'hash' within 'impl'. (See also "Hash
196 : : * Functions" at the top of this file.) */
197 : : static inline uint32_t
198 : 79291579 : rehash(const struct cmap_impl *impl, uint32_t hash)
199 : : {
200 : 79291579 : return hash_finish(impl->basis, hash);
201 : : }
202 : :
203 : : /* Not always without the inline keyword. */
204 : : static inline struct cmap_impl *
205 : 100996103 : cmap_get_impl(const struct cmap *cmap)
206 : : {
207 : 100996103 : return ovsrcu_get(struct cmap_impl *, &cmap->impl);
208 : : }
209 : :
210 : : static uint32_t
211 : 1265381 : calc_max_n(uint32_t mask)
212 : : {
213 : 1265381 : return ((uint64_t) (mask + 1) * CMAP_K * CMAP_MAX_LOAD) >> 32;
214 : : }
215 : :
216 : : static uint32_t
217 : 1265381 : calc_min_n(uint32_t mask)
218 : : {
219 : 1265381 : return ((uint64_t) (mask + 1) * CMAP_K * CMAP_MIN_LOAD) >> 32;
220 : : }
221 : :
222 : : static struct cmap_impl *
223 : 1265381 : cmap_impl_create(uint32_t mask)
224 : : {
225 : : struct cmap_impl *impl;
226 : :
227 [ - + ]: 1265381 : ovs_assert(is_pow2(mask + 1));
228 : :
229 : : /* There are 'mask + 1' buckets but struct cmap_impl has one bucket built
230 : : * in, so we only need to add space for the extra 'mask' buckets. */
231 : 1265381 : impl = xzalloc_cacheline(sizeof *impl + mask * sizeof *impl->buckets);
232 : 1265381 : impl->n = 0;
233 : 1265381 : impl->max_n = calc_max_n(mask);
234 : 1265381 : impl->min_n = calc_min_n(mask);
235 : 1265381 : impl->mask = mask;
236 : 1265381 : impl->basis = random_uint32();
237 : :
238 : 1265381 : return impl;
239 : : }
240 : :
241 : : /* Initializes 'cmap' as an empty concurrent hash map. */
242 : : void
243 : 1161589 : cmap_init(struct cmap *cmap)
244 : : {
245 : 1161589 : ovsrcu_set(&cmap->impl, CONST_CAST(struct cmap_impl *, &empty_cmap));
246 : 1161589 : }
247 : :
248 : : /* Destroys 'cmap'.
249 : : *
250 : : * The client is responsible for destroying any data previously held in
251 : : * 'cmap'. */
252 : : void
253 : 993238 : cmap_destroy(struct cmap *cmap)
254 : : {
255 [ + - ]: 993238 : if (cmap) {
256 : 993238 : struct cmap_impl *impl = cmap_get_impl(cmap);
257 [ + + ]: 993238 : if (impl != &empty_cmap) {
258 : 621653 : ovsrcu_postpone(free_cacheline, impl);
259 : : }
260 : : }
261 : 993238 : }
262 : :
263 : : /* Returns the number of elements in 'cmap'. */
264 : : size_t
265 : 664385 : cmap_count(const struct cmap *cmap)
266 : : {
267 : 664385 : return cmap_get_impl(cmap)->n;
268 : : }
269 : :
270 : : /* Returns true if 'cmap' is empty, false otherwise. */
271 : : bool
272 : 500809 : cmap_is_empty(const struct cmap *cmap)
273 : : {
274 : 500809 : return cmap_count(cmap) == 0;
275 : : }
276 : :
277 : : static inline uint32_t
278 : 130523923 : read_counter(const struct cmap_bucket *bucket_)
279 : : {
280 : 130523923 : struct cmap_bucket *bucket = CONST_CAST(struct cmap_bucket *, bucket_);
281 : : uint32_t counter;
282 : :
283 : 130523923 : atomic_read_explicit(&bucket->counter, &counter, memory_order_acquire);
284 : :
285 : 130523923 : return counter;
286 : : }
287 : :
288 : : static inline uint32_t
289 : 130523922 : read_even_counter(const struct cmap_bucket *bucket)
290 : : {
291 : : uint32_t counter;
292 : :
293 : : do {
294 : 130523925 : counter = read_counter(bucket);
295 [ + + ]: 130523920 : } while (OVS_UNLIKELY(counter & 1));
296 : :
297 : 130523917 : return counter;
298 : : }
299 : :
300 : : static inline bool
301 : 182804996 : counter_changed(const struct cmap_bucket *b_, uint32_t c)
302 : : {
303 : 182804996 : struct cmap_bucket *b = CONST_CAST(struct cmap_bucket *, b_);
304 : : uint32_t counter;
305 : :
306 : : /* Need to make sure the counter read is not moved up, before the hash and
307 : : * cmap_node_next(). Using atomic_read_explicit with memory_order_acquire
308 : : * would allow prior reads to be moved after the barrier.
309 : : * atomic_thread_fence prevents all following memory accesses from moving
310 : : * prior to preceding loads. */
311 : 182804996 : atomic_thread_fence(memory_order_acquire);
312 : 182804996 : atomic_read_relaxed(&b->counter, &counter);
313 : :
314 : 182804996 : return OVS_UNLIKELY(counter != c);
315 : : }
316 : :
317 : : static inline const struct cmap_node *
318 : 130523893 : cmap_find_in_bucket(const struct cmap_bucket *bucket, uint32_t hash)
319 : : {
320 [ + + ]: 673608296 : for (int i = 0; i < CMAP_K; i++) {
321 [ + + ]: 568211845 : if (bucket->hashes[i] == hash) {
322 : 25127442 : return cmap_node_next(&bucket->nodes[i]);
323 : : }
324 : : }
325 : 105396451 : return NULL;
326 : : }
327 : :
328 : : static inline const struct cmap_node *
329 : 71333840 : cmap_find__(const struct cmap_bucket *b1, const struct cmap_bucket *b2,
330 : : uint32_t hash)
331 : : {
332 : : uint32_t c1, c2;
333 : : const struct cmap_node *node;
334 : :
335 : : do {
336 : : do {
337 : 71333842 : c1 = read_even_counter(b1);
338 : 71333832 : node = cmap_find_in_bucket(b1, hash);
339 [ - + ]: 71333824 : } while (OVS_UNLIKELY(counter_changed(b1, c1)));
340 [ + + ]: 71333821 : if (node) {
341 : 18609417 : break;
342 : : }
343 : : do {
344 : 52724404 : c2 = read_even_counter(b2);
345 : 52724414 : node = cmap_find_in_bucket(b2, hash);
346 [ - + ]: 52724414 : } while (OVS_UNLIKELY(counter_changed(b2, c2)));
347 [ + + ]: 52724414 : if (node) {
348 : 475489 : break;
349 : : }
350 [ + + ]: 52248925 : } while (OVS_UNLIKELY(counter_changed(b1, c1)));
351 : :
352 : 71333829 : return node;
353 : : }
354 : :
355 : : /* Searches 'cmap' for an element with the specified 'hash'. If one or more is
356 : : * found, returns a pointer to the first one, otherwise a null pointer. All of
357 : : * the nodes on the returned list are guaranteed to have exactly the given
358 : : * 'hash'.
359 : : *
360 : : * This function works even if 'cmap' is changing concurrently. If 'cmap' is
361 : : * not changing, then cmap_find_protected() is slightly faster.
362 : : *
363 : : * CMAP_FOR_EACH_WITH_HASH is usually more convenient. */
364 : : const struct cmap_node *
365 : 71333842 : cmap_find(const struct cmap *cmap, uint32_t hash)
366 : : {
367 : 71333842 : const struct cmap_impl *impl = cmap_get_impl(cmap);
368 : 71333847 : uint32_t h1 = rehash(impl, hash);
369 : 71333842 : uint32_t h2 = other_hash(h1);
370 : :
371 : 71333843 : return cmap_find__(&impl->buckets[h1 & impl->mask],
372 : 71333843 : &impl->buckets[h2 & impl->mask],
373 : : hash);
374 : : }
375 : :
376 : : /* Looks up multiple 'hashes', when the corresponding bit in 'map' is 1,
377 : : * and sets the corresponding pointer in 'nodes', if the hash value was
378 : : * found from the 'cmap'. In other cases the 'nodes' values are not changed,
379 : : * i.e., no NULL pointers are stored there.
380 : : * Returns a map where a bit is set to 1 if the corresponding 'nodes' pointer
381 : : * was stored, 0 otherwise.
382 : : * Generally, the caller wants to use CMAP_NODE_FOR_EACH to verify for
383 : : * hash collisions. */
384 : : unsigned long
385 : 337208 : cmap_find_batch(const struct cmap *cmap, unsigned long map,
386 : : uint32_t hashes[], const struct cmap_node *nodes[])
387 : : {
388 : 337208 : const struct cmap_impl *impl = cmap_get_impl(cmap);
389 : 337208 : unsigned long result = map;
390 : : int i;
391 : : uint32_t h1s[sizeof map * CHAR_BIT];
392 : : const struct cmap_bucket *b1s[sizeof map * CHAR_BIT];
393 : : const struct cmap_bucket *b2s[sizeof map * CHAR_BIT];
394 : : uint32_t c1s[sizeof map * CHAR_BIT];
395 : :
396 : : /* Compute hashes and prefetch 1st buckets. */
397 [ + + ]: 6375271 : ULLONG_FOR_EACH_1(i, map) {
398 : 6038063 : h1s[i] = rehash(impl, hashes[i]);
399 : 6038063 : b1s[i] = &impl->buckets[h1s[i] & impl->mask];
400 : 6038063 : OVS_PREFETCH(b1s[i]);
401 : : }
402 : : /* Lookups, Round 1. Only look up at the first bucket. */
403 [ + + ]: 6375271 : ULLONG_FOR_EACH_1(i, map) {
404 : : uint32_t c1;
405 : 6038063 : const struct cmap_bucket *b1 = b1s[i];
406 : : const struct cmap_node *node;
407 : :
408 : : do {
409 : 6038063 : c1 = read_even_counter(b1);
410 : 6038063 : node = cmap_find_in_bucket(b1, hashes[i]);
411 [ - + ]: 6038063 : } while (OVS_UNLIKELY(counter_changed(b1, c1)));
412 : :
413 [ + + ]: 6038063 : if (!node) {
414 : : /* Not found (yet); Prefetch the 2nd bucket. */
415 : 427606 : b2s[i] = &impl->buckets[other_hash(h1s[i]) & impl->mask];
416 : 427606 : OVS_PREFETCH(b2s[i]);
417 : 427606 : c1s[i] = c1; /* We may need to check this after Round 2. */
418 : 427606 : continue;
419 : : }
420 : : /* Found. */
421 : 5610457 : ULLONG_SET0(map, i); /* Ignore this on round 2. */
422 : 5610457 : OVS_PREFETCH(node);
423 : 5610457 : nodes[i] = node;
424 : : }
425 : : /* Round 2. Look into the 2nd bucket, if needed. */
426 [ + + ]: 764814 : ULLONG_FOR_EACH_1(i, map) {
427 : : uint32_t c2;
428 : 427606 : const struct cmap_bucket *b2 = b2s[i];
429 : : const struct cmap_node *node;
430 : :
431 : : do {
432 : 427606 : c2 = read_even_counter(b2);
433 : 427606 : node = cmap_find_in_bucket(b2, hashes[i]);
434 [ - + ]: 427606 : } while (OVS_UNLIKELY(counter_changed(b2, c2)));
435 : :
436 [ + + ]: 427606 : if (!node) {
437 : : /* Not found, but the node may have been moved from b2 to b1 right
438 : : * after we finished with b1 earlier. We just got a clean reading
439 : : * of the 2nd bucket, so we check the counter of the 1st bucket
440 : : * only. However, we need to check both buckets again, as the
441 : : * entry may be moved again to the 2nd bucket. Basically, we
442 : : * need to loop as long as it takes to get stable readings of
443 : : * both buckets. cmap_find__() does that, and now that we have
444 : : * fetched both buckets we can just use it. */
445 [ - + ]: 32168 : if (OVS_UNLIKELY(counter_changed(b1s[i], c1s[i]))) {
446 : 0 : node = cmap_find__(b1s[i], b2s[i], hashes[i]);
447 [ # # ]: 0 : if (node) {
448 : 0 : goto found;
449 : : }
450 : : }
451 : : /* Not found. */
452 : 32168 : ULLONG_SET0(result, i); /* Fix the result. */
453 : 32168 : continue;
454 : : }
455 : : found:
456 : 395438 : OVS_PREFETCH(node);
457 : 395438 : nodes[i] = node;
458 : : }
459 : 337208 : return result;
460 : : }
461 : :
462 : : static int
463 : 920616 : cmap_find_slot_protected(struct cmap_bucket *b, uint32_t hash)
464 : : {
465 : : int i;
466 : :
467 [ + + ]: 1152449 : for (i = 0; i < CMAP_K; i++) {
468 [ + + ][ + - ]: 1149063 : if (b->hashes[i] == hash && cmap_node_next_protected(&b->nodes[i])) {
469 : 917230 : return i;
470 : : }
471 : : }
472 : 3386 : return -1;
473 : : }
474 : :
475 : : static struct cmap_node *
476 : 508 : cmap_find_bucket_protected(struct cmap_impl *impl, uint32_t hash, uint32_t h)
477 : : {
478 : 508 : struct cmap_bucket *b = &impl->buckets[h & impl->mask];
479 : : int i;
480 : :
481 [ + + ]: 3048 : for (i = 0; i < CMAP_K; i++) {
482 [ - + ]: 2540 : if (b->hashes[i] == hash) {
483 : 0 : return cmap_node_next_protected(&b->nodes[i]);
484 : : }
485 : : }
486 : 508 : return NULL;
487 : : }
488 : :
489 : : /* Like cmap_find(), but only for use if 'cmap' cannot change concurrently.
490 : : *
491 : : * CMAP_FOR_EACH_WITH_HASH_PROTECTED is usually more convenient. */
492 : : struct cmap_node *
493 : 254 : cmap_find_protected(const struct cmap *cmap, uint32_t hash)
494 : : {
495 : 254 : struct cmap_impl *impl = cmap_get_impl(cmap);
496 : 254 : uint32_t h1 = rehash(impl, hash);
497 : 254 : uint32_t h2 = other_hash(hash);
498 : : struct cmap_node *node;
499 : :
500 : 254 : node = cmap_find_bucket_protected(impl, hash, h1);
501 [ - + ]: 254 : if (node) {
502 : 0 : return node;
503 : : }
504 : 254 : return cmap_find_bucket_protected(impl, hash, h2);
505 : : }
506 : :
507 : : static int
508 : 6623 : cmap_find_empty_slot_protected(const struct cmap_bucket *b)
509 : : {
510 : : int i;
511 : :
512 [ + + ]: 32328 : for (i = 0; i < CMAP_K; i++) {
513 [ + + ]: 28652 : if (!cmap_node_next_protected(&b->nodes[i])) {
514 : 2947 : return i;
515 : : }
516 : : }
517 : 3676 : return -1;
518 : : }
519 : :
520 : : static void
521 : 980172 : cmap_set_bucket(struct cmap_bucket *b, int i,
522 : : struct cmap_node *node, uint32_t hash)
523 : : {
524 : : uint32_t c;
525 : :
526 : 980172 : atomic_read_explicit(&b->counter, &c, memory_order_acquire);
527 : 980172 : atomic_store_explicit(&b->counter, c + 1, memory_order_release);
528 : 980172 : ovsrcu_set(&b->nodes[i].next, node); /* Also atomic. */
529 : 980172 : b->hashes[i] = hash;
530 : 980172 : atomic_store_explicit(&b->counter, c + 2, memory_order_release);
531 : 980172 : }
532 : :
533 : : /* Searches 'b' for a node with the given 'hash'. If it finds one, adds
534 : : * 'new_node' to the node's linked list and returns true. If it does not find
535 : : * one, returns false. */
536 : : static bool
537 : 1968377 : cmap_insert_dup(struct cmap_node *new_node, uint32_t hash,
538 : : struct cmap_bucket *b)
539 : : {
540 : : int i;
541 : :
542 [ + + ]: 11743638 : for (i = 0; i < CMAP_K; i++) {
543 [ + + ]: 9789169 : if (b->hashes[i] == hash) {
544 : 13908 : struct cmap_node *node = cmap_node_next_protected(&b->nodes[i]);
545 : :
546 [ + + ]: 13908 : if (node) {
547 : : struct cmap_node *p;
548 : :
549 : : /* The common case is that 'new_node' is a singleton,
550 : : * with a null 'next' pointer. Rehashing can add a
551 : : * longer chain, but due to our invariant of always
552 : : * having all nodes with the same (user) hash value at
553 : : * a single chain, rehashing will always insert the
554 : : * chain to an empty node. The only way we can end up
555 : : * here is by the user inserting a chain of nodes at
556 : : * once. Find the end of the chain starting at
557 : : * 'new_node', then splice 'node' to the end of that
558 : : * chain. */
559 : 1031 : p = new_node;
560 : : for (;;) {
561 : 1031 : struct cmap_node *next = cmap_node_next_protected(p);
562 : :
563 [ + - ]: 1031 : if (!next) {
564 : 1031 : break;
565 : : }
566 : 0 : p = next;
567 : 0 : }
568 : 1031 : ovsrcu_set_hidden(&p->next, node);
569 : : } else {
570 : : /* The hash value is there from some previous insertion, but
571 : : * the associated node has been removed. We're not really
572 : : * inserting a duplicate, but we can still reuse the slot.
573 : : * Carry on. */
574 : : }
575 : :
576 : : /* Change the bucket to point to 'new_node'. This is a degenerate
577 : : * form of cmap_set_bucket() that doesn't update the counter since
578 : : * we're only touching one field and in a way that doesn't change
579 : : * the bucket's meaning for readers. */
580 : 13908 : ovsrcu_set(&b->nodes[i].next, new_node);
581 : :
582 : 13908 : return true;
583 : : }
584 : : }
585 : 1954469 : return false;
586 : : }
587 : :
588 : : /* Searches 'b' for an empty slot. If successful, stores 'node' and 'hash' in
589 : : * the slot and returns true. Otherwise, returns false. */
590 : : static bool
591 : 986791 : cmap_insert_bucket(struct cmap_node *node, uint32_t hash,
592 : : struct cmap_bucket *b)
593 : : {
594 : : int i;
595 : :
596 [ + + ]: 1384423 : for (i = 0; i < CMAP_K; i++) {
597 [ + + ]: 1371888 : if (!cmap_node_next_protected(&b->nodes[i])) {
598 : 974256 : cmap_set_bucket(b, i, node, hash);
599 : 974257 : return true;
600 : : }
601 : : }
602 : 12535 : return false;
603 : : }
604 : :
605 : : /* Returns the other bucket that b->nodes[slot] could occupy in 'impl'. (This
606 : : * might be the same as 'b'.) */
607 : : static struct cmap_bucket *
608 : 11056 : other_bucket_protected(struct cmap_impl *impl, struct cmap_bucket *b, int slot)
609 : : {
610 : 11056 : uint32_t h1 = rehash(impl, b->hashes[slot]);
611 : 11056 : uint32_t h2 = other_hash(h1);
612 : 11056 : uint32_t b_idx = b - impl->buckets;
613 [ + + ]: 11056 : uint32_t other_h = (h1 & impl->mask) == b_idx ? h2 : h1;
614 : :
615 : 11056 : return &impl->buckets[other_h & impl->mask];
616 : : }
617 : :
618 : : /* 'new_node' is to be inserted into 'impl', but both candidate buckets 'b1'
619 : : * and 'b2' are full. This function attempts to rearrange buckets within
620 : : * 'impl' to make room for 'new_node'.
621 : : *
622 : : * The implementation is a general-purpose breadth-first search. At first
623 : : * glance, this is more complex than a random walk through 'impl' (suggested by
624 : : * some references), but random walks have a tendency to loop back through a
625 : : * single bucket. We have to move nodes backward along the path that we find,
626 : : * so that no node actually disappears from the hash table, which means a
627 : : * random walk would have to be careful to deal with loops. By contrast, a
628 : : * successful breadth-first search always finds a *shortest* path through the
629 : : * hash table, and a shortest path will never contain loops, so it avoids that
630 : : * problem entirely.
631 : : */
632 : : static bool
633 : 2964 : cmap_insert_bfs(struct cmap_impl *impl, struct cmap_node *new_node,
634 : : uint32_t hash, struct cmap_bucket *b1, struct cmap_bucket *b2)
635 : : {
636 : : enum { MAX_DEPTH = 4 };
637 : :
638 : : /* A path from 'start' to 'end' via the 'n' steps in 'slots[]'.
639 : : *
640 : : * One can follow the path via:
641 : : *
642 : : * struct cmap_bucket *b;
643 : : * int i;
644 : : *
645 : : * b = path->start;
646 : : * for (i = 0; i < path->n; i++) {
647 : : * b = other_bucket_protected(impl, b, path->slots[i]);
648 : : * }
649 : : * ovs_assert(b == path->end);
650 : : */
651 : : struct cmap_path {
652 : : struct cmap_bucket *start; /* First bucket along the path. */
653 : : struct cmap_bucket *end; /* Last bucket on the path. */
654 : : uint8_t slots[MAX_DEPTH]; /* Slots used for each hop. */
655 : : int n; /* Number of slots[]. */
656 : : };
657 : :
658 : : /* We need to limit the amount of work we do trying to find a path. It
659 : : * might actually be impossible to rearrange the cmap, and after some time
660 : : * it is likely to be easier to rehash the entire cmap.
661 : : *
662 : : * This value of MAX_QUEUE is an arbitrary limit suggested by one of the
663 : : * references. Empirically, it seems to work OK. */
664 : : enum { MAX_QUEUE = 500 };
665 : : struct cmap_path queue[MAX_QUEUE];
666 : 2964 : int head = 0;
667 : 2964 : int tail = 0;
668 : :
669 : : /* Add 'b1' and 'b2' as starting points for the search. */
670 : 2964 : queue[head].start = b1;
671 : 2964 : queue[head].end = b1;
672 : 2964 : queue[head].n = 0;
673 : 2964 : head++;
674 [ + + ]: 2964 : if (b1 != b2) {
675 : 2250 : queue[head].start = b2;
676 : 2250 : queue[head].end = b2;
677 : 2250 : queue[head].n = 0;
678 : 2250 : head++;
679 : : }
680 : :
681 [ + + ]: 3522 : while (tail < head) {
682 : 3505 : const struct cmap_path *path = &queue[tail++];
683 : 3505 : struct cmap_bucket *this = path->end;
684 : : int i;
685 : :
686 [ + + ]: 8645 : for (i = 0; i < CMAP_K; i++) {
687 : 8087 : struct cmap_bucket *next = other_bucket_protected(impl, this, i);
688 : : int j;
689 : :
690 [ + + ]: 8087 : if (this == next) {
691 : 1464 : continue;
692 : : }
693 : :
694 : 6623 : j = cmap_find_empty_slot_protected(next);
695 [ + + ]: 6623 : if (j >= 0) {
696 : : /* We've found a path along which we can rearrange the hash
697 : : * table: Start at path->start, follow all the slots in
698 : : * path->slots[], then follow slot 'i', then the bucket you
699 : : * arrive at has slot 'j' empty. */
700 : : struct cmap_bucket *buckets[MAX_DEPTH + 2];
701 : : int slots[MAX_DEPTH + 2];
702 : : int k;
703 : :
704 : : /* Figure out the full sequence of slots. */
705 [ + + ]: 2969 : for (k = 0; k < path->n; k++) {
706 : 22 : slots[k] = path->slots[k];
707 : : }
708 : 2947 : slots[path->n] = i;
709 : 2947 : slots[path->n + 1] = j;
710 : :
711 : : /* Figure out the full sequence of buckets. */
712 : 2947 : buckets[0] = path->start;
713 [ + + ]: 5916 : for (k = 0; k <= path->n; k++) {
714 : 2969 : buckets[k + 1] = other_bucket_protected(impl, buckets[k], slots[k]);
715 : : }
716 : :
717 : : /* Now the path is fully expressed. One can start from
718 : : * buckets[0], go via slots[0] to buckets[1], via slots[1] to
719 : : * buckets[2], and so on.
720 : : *
721 : : * Move all the nodes across the path "backward". After each
722 : : * step some node appears in two buckets. Thus, every node is
723 : : * always visible to a concurrent search. */
724 [ + + ]: 5916 : for (k = path->n + 1; k > 0; k--) {
725 : 2969 : int slot = slots[k - 1];
726 : :
727 : 2969 : cmap_set_bucket(
728 : : buckets[k], slots[k],
729 : 2969 : cmap_node_next_protected(&buckets[k - 1]->nodes[slot]),
730 : 2969 : buckets[k - 1]->hashes[slot]);
731 : : }
732 : :
733 : : /* Finally, replace the first node on the path by
734 : : * 'new_node'. */
735 : 2947 : cmap_set_bucket(buckets[0], slots[0], new_node, hash);
736 : :
737 : 2947 : return true;
738 : : }
739 : :
740 [ + + ][ + - ]: 3676 : if (path->n < MAX_DEPTH && head < MAX_QUEUE) {
741 : 2956 : struct cmap_path *new_path = &queue[head++];
742 : :
743 : 2956 : *new_path = *path;
744 : 2956 : new_path->end = next;
745 : 2956 : new_path->slots[new_path->n++] = i;
746 : : }
747 : : }
748 : : }
749 : :
750 : 2964 : return false;
751 : : }
752 : :
753 : : /* Adds 'node', with the given 'hash', to 'impl'.
754 : : *
755 : : * 'node' is ordinarily a single node, with a null 'next' pointer. When
756 : : * rehashing, however, it may be a longer chain of nodes. */
757 : : static bool
758 : 991129 : cmap_try_insert(struct cmap_impl *impl, struct cmap_node *node, uint32_t hash)
759 : : {
760 : 991129 : uint32_t h1 = rehash(impl, hash);
761 : 991129 : uint32_t h2 = other_hash(h1);
762 : 991129 : struct cmap_bucket *b1 = &impl->buckets[h1 & impl->mask];
763 : 991129 : struct cmap_bucket *b2 = &impl->buckets[h2 & impl->mask];
764 : :
765 : 2959506 : return (OVS_UNLIKELY(cmap_insert_dup(node, hash, b1) ||
766 [ + + + + ]: 986792 : cmap_insert_dup(node, hash, b2)) ||
767 : 986791 : OVS_LIKELY(cmap_insert_bucket(node, hash, b1) ||
768 [ + + + + : 2945597 : cmap_insert_bucket(node, hash, b2)) ||
+ + ]
769 : 2964 : cmap_insert_bfs(impl, node, hash, b1, b2));
770 : : }
771 : :
772 : : /* Inserts 'node', with the given 'hash', into 'cmap'. The caller must ensure
773 : : * that 'cmap' cannot change concurrently (from another thread). If duplicates
774 : : * are undesirable, the caller must have already verified that 'cmap' does not
775 : : * contain a duplicate of 'node'.
776 : : *
777 : : * Returns the current number of nodes in the cmap after the insertion. */
778 : : size_t
779 : 910605 : cmap_insert(struct cmap *cmap, struct cmap_node *node, uint32_t hash)
780 : : {
781 : 910605 : struct cmap_impl *impl = cmap_get_impl(cmap);
782 : :
783 : 910605 : ovsrcu_set_hidden(&node->next, NULL);
784 : :
785 [ + + ]: 910605 : if (OVS_UNLIKELY(impl->n >= impl->max_n)) {
786 : 636490 : COVERAGE_INC(cmap_expand);
787 : 636490 : impl = cmap_rehash(cmap, (impl->mask << 1) | 1);
788 : : }
789 : :
790 [ + + ]: 910622 : while (OVS_UNLIKELY(!cmap_try_insert(impl, node, hash))) {
791 : 17 : impl = cmap_rehash(cmap, impl->mask);
792 : : }
793 : 910605 : return ++impl->n;
794 : : }
795 : :
796 : : static bool
797 : 920616 : cmap_replace__(struct cmap_impl *impl, struct cmap_node *node,
798 : : struct cmap_node *replacement, uint32_t hash, uint32_t h)
799 : : {
800 : 920616 : struct cmap_bucket *b = &impl->buckets[h & impl->mask];
801 : : int slot;
802 : :
803 : 920616 : slot = cmap_find_slot_protected(b, hash);
804 [ + + ]: 920616 : if (slot < 0) {
805 : 3386 : return false;
806 : : }
807 : :
808 : : /* The pointer to 'node' is changed to point to 'replacement',
809 : : * which is the next node if no replacement node is given. */
810 [ + + ]: 917230 : if (!replacement) {
811 : 889181 : replacement = cmap_node_next_protected(node);
812 : : } else {
813 : : /* 'replacement' takes the position of 'node' in the list. */
814 : 28049 : ovsrcu_set_hidden(&replacement->next, cmap_node_next_protected(node));
815 : : }
816 : :
817 : 917230 : struct cmap_node *iter = &b->nodes[slot];
818 : : for (;;) {
819 : 1660603 : struct cmap_node *next = cmap_node_next_protected(iter);
820 : :
821 [ + + ]: 1660603 : if (next == node) {
822 : 917230 : ovsrcu_set(&iter->next, replacement);
823 : 917230 : return true;
824 : : }
825 : 743373 : iter = next;
826 : 743373 : }
827 : : }
828 : :
829 : : /* Replaces 'old_node' in 'cmap' with 'new_node'. The caller must
830 : : * ensure that 'cmap' cannot change concurrently (from another thread).
831 : : *
832 : : * 'old_node' must not be destroyed or modified or inserted back into 'cmap' or
833 : : * into any other concurrent hash map while any other thread might be accessing
834 : : * it. One correct way to do this is to free it from an RCU callback with
835 : : * ovsrcu_postpone().
836 : : *
837 : : * Returns the current number of nodes in the cmap after the replacement. The
838 : : * number of nodes decreases by one if 'new_node' is NULL. */
839 : : size_t
840 : 917230 : cmap_replace(struct cmap *cmap, struct cmap_node *old_node,
841 : : struct cmap_node *new_node, uint32_t hash)
842 : : {
843 : 917230 : struct cmap_impl *impl = cmap_get_impl(cmap);
844 : 917230 : uint32_t h1 = rehash(impl, hash);
845 : 917230 : uint32_t h2 = other_hash(h1);
846 : : bool ok;
847 : :
848 : 917230 : ok = cmap_replace__(impl, old_node, new_node, hash, h1)
849 [ + + ][ + - ]: 917230 : || cmap_replace__(impl, old_node, new_node, hash, h2);
850 [ - + ]: 917230 : ovs_assert(ok);
851 : :
852 [ + + ]: 917230 : if (!new_node) {
853 : 889181 : impl->n--;
854 [ + + ]: 889181 : if (OVS_UNLIKELY(impl->n < impl->min_n)) {
855 : 628874 : COVERAGE_INC(cmap_shrink);
856 : 628874 : impl = cmap_rehash(cmap, impl->mask >> 1);
857 : : }
858 : : }
859 : 917230 : return impl->n;
860 : : }
861 : :
862 : : static bool
863 : 1265381 : cmap_try_rehash(const struct cmap_impl *old, struct cmap_impl *new)
864 : : {
865 : : const struct cmap_bucket *b;
866 : :
867 [ + + ]: 3190939 : for (b = old->buckets; b <= &old->buckets[old->mask]; b++) {
868 : : int i;
869 : :
870 [ + + ]: 11553348 : for (i = 0; i < CMAP_K; i++) {
871 : : /* possible optimization here because we know the hashes are
872 : : * unique */
873 : 9627790 : struct cmap_node *node = cmap_node_next_protected(&b->nodes[i]);
874 : :
875 [ + + ][ - + ]: 9627790 : if (node && !cmap_try_insert(new, node, b->hashes[i])) {
876 : 0 : return false;
877 : : }
878 : : }
879 : : }
880 : 1265381 : return true;
881 : : }
882 : :
883 : : static struct cmap_impl *
884 : 1265381 : cmap_rehash(struct cmap *cmap, uint32_t mask)
885 : : {
886 : 1265381 : struct cmap_impl *old = cmap_get_impl(cmap);
887 : : struct cmap_impl *new;
888 : :
889 : 1265381 : new = cmap_impl_create(mask);
890 [ - + ]: 1265381 : ovs_assert(old->n < new->max_n);
891 : :
892 [ - + ]: 1265381 : while (!cmap_try_rehash(old, new)) {
893 : 0 : memset(new->buckets, 0, (mask + 1) * sizeof *new->buckets);
894 : 0 : new->basis = random_uint32();
895 : : }
896 : :
897 : 1265381 : new->n = old->n;
898 : 1265381 : ovsrcu_set(&cmap->impl, new);
899 [ + + ]: 1265381 : if (old != &empty_cmap) {
900 : 632851 : ovsrcu_postpone(free_cacheline, old);
901 : : }
902 : :
903 : 1265381 : return new;
904 : : }
905 : :
906 : : struct cmap_cursor
907 : 18310197 : cmap_cursor_start(const struct cmap *cmap)
908 : : {
909 : : struct cmap_cursor cursor;
910 : :
911 : 18310197 : cursor.impl = cmap_get_impl(cmap);
912 : 18309085 : cursor.bucket_idx = 0;
913 : 18309085 : cursor.entry_idx = 0;
914 : 18309085 : cursor.node = NULL;
915 : 18309085 : cmap_cursor_advance(&cursor);
916 : :
917 : 18309025 : return cursor;
918 : : }
919 : :
920 : : void
921 : 28361731 : cmap_cursor_advance(struct cmap_cursor *cursor)
922 : : {
923 : 28361731 : const struct cmap_impl *impl = cursor->impl;
924 : :
925 [ + + ]: 28361731 : if (cursor->node) {
926 : 10052627 : cursor->node = cmap_node_next(cursor->node);
927 [ + + ]: 10052627 : if (cursor->node) {
928 : 1997001 : return;
929 : : }
930 : : }
931 : :
932 [ + + ]: 47780497 : while (cursor->bucket_idx <= impl->mask) {
933 : 29473074 : const struct cmap_bucket *b = &impl->buckets[cursor->bucket_idx];
934 : :
935 [ + + ]: 128489985 : while (cursor->entry_idx < CMAP_K) {
936 : 107074218 : cursor->node = cmap_node_next(&b->nodes[cursor->entry_idx++]);
937 [ + + ]: 107072547 : if (cursor->node) {
938 : 8055636 : return;
939 : : }
940 : : }
941 : :
942 : 21415767 : cursor->bucket_idx++;
943 : 21415767 : cursor->entry_idx = 0;
944 : : }
945 : : }
946 : :
947 : : /* Returns the next node in 'cmap' in hash order, or NULL if no nodes remain in
948 : : * 'cmap'. Uses '*pos' to determine where to begin iteration, and updates
949 : : * '*pos' to pass on the next iteration into them before returning.
950 : : *
951 : : * It's better to use plain CMAP_FOR_EACH and related functions, since they are
952 : : * faster and better at dealing with cmaps that change during iteration.
953 : : *
954 : : * Before beginning iteration, set '*pos' to all zeros. */
955 : : struct cmap_node *
956 : 6264287 : cmap_next_position(const struct cmap *cmap,
957 : : struct cmap_position *pos)
958 : : {
959 : 6264287 : struct cmap_impl *impl = cmap_get_impl(cmap);
960 : 6264287 : unsigned int bucket = pos->bucket;
961 : 6264287 : unsigned int entry = pos->entry;
962 : 6264287 : unsigned int offset = pos->offset;
963 : :
964 [ + + ]: 8402389 : while (bucket <= impl->mask) {
965 : 8341113 : const struct cmap_bucket *b = &impl->buckets[bucket];
966 : :
967 [ + + ]: 14835360 : while (entry < CMAP_K) {
968 : 12697258 : const struct cmap_node *node = cmap_node_next(&b->nodes[entry]);
969 : : unsigned int i;
970 : :
971 [ + + ]: 845030758 : for (i = 0; node; i++, node = cmap_node_next(node)) {
972 [ + + ]: 838536511 : if (i == offset) {
973 [ + + ]: 6203011 : if (cmap_node_next(node)) {
974 : 1999998 : offset++;
975 : : } else {
976 : 4203013 : entry++;
977 : 4203013 : offset = 0;
978 : : }
979 : 6203011 : pos->bucket = bucket;
980 : 6203011 : pos->entry = entry;
981 : 6203011 : pos->offset = offset;
982 : 6203011 : return CONST_CAST(struct cmap_node *, node);
983 : : }
984 : : }
985 : :
986 : 6494247 : entry++;
987 : 6494247 : offset = 0;
988 : : }
989 : :
990 : 2138102 : bucket++;
991 : 2138102 : entry = offset = 0;
992 : : }
993 : :
994 : 61276 : pos->bucket = pos->entry = pos->offset = 0;
995 : 61276 : return NULL;
996 : : }
|