lsquic_malo.c revision 2d296031
1/* Copyright (c) 2017 - 2019 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_malo.c -- malo allocator implementation.
4 *
5 * The malo allocator is a pool of objects of fixed size.  It tries to
6 * allocate and deallocate objects as fast as possible.  To do so, it
7 * does the following:
8 *
9 *  1. Allocations occur 4 KB at a time.
10 *  2. No division or multiplication operations are performed.
11 *
12 * (In recent testing, malo was about 2.7 times faster than malloc for
13 * 64-byte objects.)
14 *
15 * Besides speed, two other important characteristics distinguish it
16 * from other pool allocators:
17 *
18 *  1. To free (put) an object, one does not need a pointer to the malo
19 *     object.  This makes this allocator easy to use.
20 *  2. A built-in iterator is provided to iterate over all allocated
21 *     objects (with ability to safely release objects while iterator
22 *     is active).  This may be useful in some circumstances.
23 *
24 * To gain all these advantages, there are trade-offs:
25 *
26 *  1. There are two memory penalties:
27 *      a. Per object overhead.  To avoid division and multiplication,
28 *         the object sizes is rounded up to the nearest power or two,
29 *         starting with 64 bytes (minumum) and up to 2 KB (maximum).
30 *         Thus, a 104-byte object will have a 24-byte overhead; a
31 *         130-byte object will have 126-byte overhead.  This is
32 *         something to keep in mind.
33 *      b. Per page overhead.  Page links occupy some bytes in the
34 *         page.  To keep things fast, at least one slot per page is
35 *         always occupied, independent of object size.  Thus, for a
36 *         1 KB object size, 25% of the page is used for the page
37 *         header.
38 *  2. 4 KB pages are not freed until the malo allocator is destroyed.
39 *     This is something to keep in mind.
40 *
41 * P.S. In Russian, "malo" (��������) means "little" or "few".  Thus, the
42 *      malo allocator aims to perform its job in as few CPU cycles as
43 *      possible.
44 */
45
46#include <assert.h>
47#include <errno.h>
48#include <stdint.h>
49#include <stdlib.h>
50#include <sys/queue.h>
51#ifdef WIN32
52#include <vc_compat.h>
53#include <intrin.h>
54#endif
55
56#include "fiu-local.h"
57#include "lsquic_malo.h"
58
59/* 64 slots in a 4KB page means that the smallest object is 64 bytes.
60 * The largest object is 2KB.
61 */
62#define MALO_MIN_NBITS 6
63#define MALO_MAX_NBITS 11
64
65/* A "free page" is a page with free slots available.
66 */
67
68static unsigned find_free_slot (uint64_t slots);
69static unsigned size_in_bits (size_t sz);
70
71struct malo_page {
72    SLIST_ENTRY(malo_page)  next_page;
73    LIST_ENTRY(malo_page)   next_free_page;
74    struct malo            *malo;
75    uint64_t                slots,
76                            full_slot_mask;
77    unsigned                nbits;
78    unsigned                initial_slot;
79};
80
81typedef char malo_header_fits_in_one_slot
82    [(sizeof(struct malo_page) > (1 << MALO_MAX_NBITS)) ? -1 : 1];
83
84struct malo {
85    struct malo_page        page_header;
86    SLIST_HEAD(, malo_page) all_pages;
87    LIST_HEAD(, malo_page)  free_pages;
88    struct {
89        struct malo_page   *cur_page;
90        unsigned            next_slot;
91    }                       iter;
92};
93
94struct malo *
95lsquic_malo_create (size_t obj_size)
96{
97    unsigned nbits = size_in_bits(obj_size);
98    if (nbits < MALO_MIN_NBITS)
99        nbits = MALO_MIN_NBITS;
100    else if (nbits > MALO_MAX_NBITS)
101    {
102        errno = EOVERFLOW;
103        return NULL;
104    }
105
106    struct malo *malo;
107    if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000))
108        return NULL;
109
110    SLIST_INIT(&malo->all_pages);
111    LIST_INIT(&malo->free_pages);
112    malo->iter.cur_page = &malo->page_header;
113    malo->iter.next_slot = 0;
114
115    int n_slots =   sizeof(*malo) / (1 << nbits)
116                + ((sizeof(*malo) % (1 << nbits)) > 0);
117
118    struct malo_page *const page = &malo->page_header;
119    SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
120    LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
121    page->malo = malo;
122    if (nbits == MALO_MIN_NBITS)
123        page->full_slot_mask = ~0ULL;
124    else
125        page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1;
126    page->slots = (1ULL << n_slots) - 1;
127    page->nbits = nbits;
128    page->initial_slot = n_slots;
129
130    return malo;
131}
132
133
134static struct malo_page *
135allocate_page (struct malo *malo)
136{
137    struct malo_page *page;
138    if (0 != posix_memalign((void **) &page, 0x1000, 0x1000))
139        return NULL;
140    SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
141    LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
142    page->slots = 1;
143    page->full_slot_mask = malo->page_header.full_slot_mask;
144    page->nbits = malo->page_header.nbits;
145    page->malo = malo;
146    page->initial_slot = 1;
147    return page;
148}
149
150
151#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0)
152
153/* Get a new object. */
154void *
155lsquic_malo_get (struct malo *malo)
156{
157    fiu_do_on("malo/get", FAIL_NOMEM);
158    struct malo_page *page = LIST_FIRST(&malo->free_pages);
159    if (!page)
160    {
161        page = allocate_page(malo);
162        if (!page)
163            return NULL;
164    }
165    unsigned slot = find_free_slot(page->slots);
166    page->slots |= (1ULL << slot);
167    if (page->full_slot_mask == page->slots)
168        LIST_REMOVE(page, next_free_page);
169    return (char *) page + (slot << page->nbits);
170}
171
172
173/* Return obj to the pool */
174void
175lsquic_malo_put (void *obj)
176{
177    uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1);
178    struct malo_page *page = (void *) page_addr;
179    unsigned slot = ((uintptr_t) obj - page_addr) >> page->nbits;
180    if (page->full_slot_mask == page->slots)
181        LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page);
182    page->slots &= ~(1ULL << slot);
183}
184
185
186void
187lsquic_malo_destroy (struct malo *malo)
188{
189    struct malo_page *page, *next;
190    page = SLIST_FIRST(&malo->all_pages);
191    while (page != &malo->page_header)
192    {
193        next = SLIST_NEXT(page, next_page);
194#ifndef WIN32
195        free(page);
196#else
197        _aligned_free(page);
198#endif
199        page = next;
200    }
201#ifndef WIN32
202    free(page);
203#else
204        _aligned_free(page);
205#endif
206}
207
208
209/* The iterator is built-in.  Usage:
210 * void *obj;
211 * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo))
212 *     do_stuff(obj);
213 */
214void *
215lsquic_malo_first (struct malo *malo)
216{
217    malo->iter.cur_page = SLIST_FIRST(&malo->all_pages);
218    malo->iter.next_slot = malo->iter.cur_page->initial_slot;
219    return lsquic_malo_next(malo);
220}
221
222
223void *
224lsquic_malo_next (struct malo *malo)
225{
226    struct malo_page *page;
227    unsigned max_slot, slot;
228
229    page = malo->iter.cur_page;
230    if (page)
231    {
232        max_slot = 1 << (12 - page->nbits);     /* Same for all pages */
233        slot = malo->iter.next_slot;
234        while (1)
235        {
236            for (; slot < max_slot; ++slot)
237            {
238                if (page->slots & (1ULL << slot))
239                {
240                    malo->iter.cur_page  = page;
241                    malo->iter.next_slot = slot + 1;
242                    return (char *) page + (slot << page->nbits);
243                }
244            }
245            page = SLIST_NEXT(page, next_page);
246            if (page)
247                slot = page->initial_slot;
248            else
249            {
250                malo->iter.cur_page = NULL;     /* Stop iterator */
251                return NULL;
252            }
253        }
254    }
255
256    return NULL;
257}
258
259
260static unsigned
261size_in_bits (size_t sz)
262{
263#if __GNUC__
264    unsigned clz = __builtin_clz(sz - 1);
265    return 32 - clz;
266#elif defined(WIN32)
267    unsigned char s;
268    unsigned long idx;
269    s = _BitScanReverse(&idx, sz);
270    assert(s);
271    return (unsigned) idx + 1;
272#else
273#error This function contains a bug!
274    unsigned clz;
275    size_t y;
276
277    --sz;
278    clz = 32;
279    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
280    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
281    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
282    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
283    y = sz >>  1;   if (y) return 32 - clz + 1;
284    return 32 - clz + sz;
285#endif
286}
287
288
289static unsigned
290find_free_slot (uint64_t slots)
291{
292#if __GNUC__
293    return __builtin_ffsll(~slots) - 1;
294#else
295    unsigned n;
296
297    slots =~ slots;
298    n = 0;
299
300    if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; }
301    if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; }
302    if (0 == (slots & ((1ULL <<  8) - 1))) { n +=  8; slots >>=  8; }
303    if (0 == (slots & ((1ULL <<  4) - 1))) { n +=  4; slots >>=  4; }
304    if (0 == (slots & ((1ULL <<  2) - 1))) { n +=  2; slots >>=  2; }
305    if (0 == (slots & ((1ULL <<  1) - 1))) { n +=  1; slots >>=  1; }
306    return n;
307#endif
308}
309
310
311size_t
312lsquic_malo_mem_used (const struct malo *malo)
313{
314    const struct malo_page *page;
315    size_t size;
316
317    size = 0;
318    SLIST_FOREACH(page, &malo->all_pages, next_page)
319        size += sizeof(*page);
320
321    return size;
322}
323