lsquic_malo.c revision 10c492f0
1/* Copyright (c) 2017 - 2018 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_malo.c -- malo allocator implementation.
4 *
5 * The malo allocator is a pool of objects of fixed size.  It tries to
6 * allocate and deallocate objects as fast as possible.  To do so, it
7 * does the following:
8 *
9 *  1. Allocations occur 4 KB at a time.
10 *  2. No division or multiplication operations are performed.
11 *
12 * (In recent testing, malo was about 2.7 times faster than malloc for
13 * 64-byte objects.)
14 *
15 * Besides speed, two other important characteristics distinguish it
16 * from other pool allocators:
17 *
18 *  1. To free (put) an object, one does not need a pointer to the malo
19 *     object.  This makes this allocator easy to use.
20 *  2. A built-in iterator is provided to iterate over all allocated
21 *     objects (with ability to safely release objects while iterator
22 *     is active).  This may be useful in some circumstances.
23 *
24 * To gain all these advantages, there are trade-offs:
25 *
26 *  1. There are two memory penalties:
27 *      a. Per object overhead.  To avoid division and multiplication,
28 *         the object sizes is rounded up to the nearest power or two,
29 *         starting with 64 bytes (minumum) and up to 2 KB (maximum).
30 *         Thus, a 104-byte object will have a 24-byte overhead; a
31 *         130-byte object will have 126-byte overhead.  This is
32 *         something to keep in mind.
33 *      b. Per page overhead.  Page links occupy some bytes in the
34 *         page.  To keep things fast, at least one slot per page is
35 *         always occupied, independent of object size.  Thus, for a
36 *         1 KB object size, 25% of the page is used for the page
37 *         header.
38 *  2. 4 KB pages are not freed until the malo allocator is destroyed.
39 *     This is something to keep in mind.
40 *
41 * P.S. In Russian, "malo" (��������) means "little" or "few".  Thus, the
42 *      malo allocator aims to perform its job in as few CPU cycles as
43 *      possible.
44 */
45
46#include <assert.h>
47#include <errno.h>
48#include <stdint.h>
49#include <stdlib.h>
50#include <sys/queue.h>
51#ifdef WIN32
52#include <vc_compat.h>
53#endif
54
55#include "fiu-local.h"
56#include "lsquic_malo.h"
57
58/* 64 slots in a 4KB page means that the smallest object is 64 bytes.
59 * The largest object is 2KB.
60 */
61#define MALO_MIN_NBITS 6
62#define MALO_MAX_NBITS 11
63
64/* A "free page" is a page with free slots available.
65 */
66
67static unsigned find_free_slot (uint64_t slots);
68static unsigned size_in_bits (size_t sz);
69
70struct malo_page {
71    SLIST_ENTRY(malo_page)  next_page;
72    LIST_ENTRY(malo_page)   next_free_page;
73    struct malo            *malo;
74    uint64_t                slots,
75                            full_slot_mask;
76    unsigned                nbits;
77    unsigned                initial_slot;
78};
79
80typedef char malo_header_fits_in_one_slot
81    [(sizeof(struct malo_page) > (1 << MALO_MAX_NBITS)) ? -1 : 1];
82
83struct malo {
84    struct malo_page        page_header;
85    SLIST_HEAD(, malo_page) all_pages;
86    LIST_HEAD(, malo_page)  free_pages;
87    struct {
88        struct malo_page   *cur_page;
89        unsigned            next_slot;
90    }                       iter;
91};
92
93struct malo *
94lsquic_malo_create (size_t obj_size)
95{
96    unsigned nbits = size_in_bits(obj_size);
97    if (nbits < MALO_MIN_NBITS)
98        nbits = MALO_MIN_NBITS;
99    else if (nbits > MALO_MAX_NBITS)
100    {
101        errno = EOVERFLOW;
102        return NULL;
103    }
104
105    struct malo *malo;
106    if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000))
107        return NULL;
108
109    SLIST_INIT(&malo->all_pages);
110    LIST_INIT(&malo->free_pages);
111    malo->iter.cur_page = &malo->page_header;
112    malo->iter.next_slot = 0;
113
114    int n_slots =   sizeof(*malo) / (1 << nbits)
115                + ((sizeof(*malo) % (1 << nbits)) > 0);
116
117    struct malo_page *const page = &malo->page_header;
118    SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
119    LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
120    page->malo = malo;
121    if (nbits == MALO_MIN_NBITS)
122        page->full_slot_mask = ~0ULL;
123    else
124        page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1;
125    page->slots = (1ULL << n_slots) - 1;
126    page->nbits = nbits;
127    page->initial_slot = n_slots;
128
129    return malo;
130}
131
132
133static struct malo_page *
134allocate_page (struct malo *malo)
135{
136    struct malo_page *page;
137    if (0 != posix_memalign((void **) &page, 0x1000, 0x1000))
138        return NULL;
139    SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
140    LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
141    page->slots = 1;
142    page->full_slot_mask = malo->page_header.full_slot_mask;
143    page->nbits = malo->page_header.nbits;
144    page->malo = malo;
145    page->initial_slot = 1;
146    return page;
147}
148
149
150#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0)
151
152/* Get a new object. */
153void *
154lsquic_malo_get (struct malo *malo)
155{
156    fiu_do_on("malo/get", FAIL_NOMEM);
157    struct malo_page *page = LIST_FIRST(&malo->free_pages);
158    if (!page)
159    {
160        page = allocate_page(malo);
161        if (!page)
162            return NULL;
163    }
164    unsigned slot = find_free_slot(page->slots);
165    page->slots |= (1ULL << slot);
166    if (page->full_slot_mask == page->slots)
167        LIST_REMOVE(page, next_free_page);
168    return (char *) page + (slot << page->nbits);
169}
170
171
172/* Return obj to the pool */
173void
174lsquic_malo_put (void *obj)
175{
176    uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1);
177    struct malo_page *page = (void *) page_addr;
178    unsigned slot = ((uintptr_t) obj - page_addr) >> page->nbits;
179    if (page->full_slot_mask == page->slots)
180        LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page);
181    page->slots &= ~(1ULL << slot);
182}
183
184
185void
186lsquic_malo_destroy (struct malo *malo)
187{
188    struct malo_page *page, *next;
189    page = SLIST_FIRST(&malo->all_pages);
190    while (page != &malo->page_header)
191    {
192        next = SLIST_NEXT(page, next_page);
193#ifndef WIN32
194        free(page);
195#else
196        _aligned_free(page);
197#endif
198        page = next;
199    }
200#ifndef WIN32
201    free(page);
202#else
203        _aligned_free(page);
204#endif
205}
206
207
208/* The iterator is built-in.  Usage:
209 * void *obj;
210 * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo))
211 *     do_stuff(obj);
212 */
213void *
214lsquic_malo_first (struct malo *malo)
215{
216    malo->iter.cur_page = SLIST_FIRST(&malo->all_pages);
217    malo->iter.next_slot = malo->iter.cur_page->initial_slot;
218    return lsquic_malo_next(malo);
219}
220
221
222void *
223lsquic_malo_next (struct malo *malo)
224{
225    struct malo_page *page;
226    unsigned max_slot, slot;
227
228    page = malo->iter.cur_page;
229    if (page)
230    {
231        max_slot = 1 << (12 - page->nbits);     /* Same for all pages */
232        slot = malo->iter.next_slot;
233        while (1)
234        {
235            for (; slot < max_slot; ++slot)
236            {
237                if (page->slots & (1ULL << slot))
238                {
239                    malo->iter.cur_page  = page;
240                    malo->iter.next_slot = slot + 1;
241                    return (char *) page + (slot << page->nbits);
242                }
243            }
244            page = SLIST_NEXT(page, next_page);
245            if (page)
246                slot = page->initial_slot;
247            else
248            {
249                malo->iter.cur_page = NULL;     /* Stop iterator */
250                return NULL;
251            }
252        }
253    }
254
255    return NULL;
256}
257
258
259static unsigned
260size_in_bits (size_t sz)
261{
262#if __GNUC__
263    unsigned clz = __builtin_clz(sz - 1);
264    return 32 - clz;
265#else
266    unsigned clz;
267    size_t y;
268
269    --sz;
270    clz = 32;
271    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
272    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
273    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
274    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
275    y = sz >>  1;   if (y) return 32 - clz + 1;
276    return 32 - clz + sz;
277#endif
278}
279
280
281static unsigned
282find_free_slot (uint64_t slots)
283{
284#if __GNUC__
285    return __builtin_ffsll(~slots) - 1;
286#else
287    unsigned n;
288
289    slots =~ slots;
290    n = 0;
291
292    if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; }
293    if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; }
294    if (0 == (slots & ((1ULL <<  8) - 1))) { n +=  8; slots >>=  8; }
295    if (0 == (slots & ((1ULL <<  4) - 1))) { n +=  4; slots >>=  4; }
296    if (0 == (slots & ((1ULL <<  2) - 1))) { n +=  2; slots >>=  2; }
297    if (0 == (slots & ((1ULL <<  1) - 1))) { n +=  1; slots >>=  1; }
298    return n;
299#endif
300}
301
302
303size_t
304lsquic_malo_mem_used (const struct malo *malo)
305{
306    const struct malo_page *page;
307    size_t size;
308
309    size = 0;
310    SLIST_FOREACH(page, &malo->all_pages, next_page)
311        size += sizeof(*page);
312
313    return size;
314}
315