lsquic_malo.c revision 50aadb33
1/* Copyright (c) 2017 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_malo.c -- malo allocator implementation.
4 *
5 * The malo allocator is a pool of objects of fixed size.  It tries to
6 * allocate and deallocate objects as fast as possible.  To do so, it
7 * does the following:
8 *
9 *  1. Allocations occur 4 KB at a time.
10 *  2. No division or multiplication operations are performed.
11 *
12 * (In recent testing, malo was about 2.7 times faster than malloc for
13 * 64-byte objects.)
14 *
15 * Besides speed, two other important characteristics distinguish it
16 * from other pool allocators:
17 *
18 *  1. To free (put) an object, one does not need a pointer to the malo
19 *     object.  This makes this allocator easy to use.
20 *  2. A built-in iterator is provided to iterate over all allocated
21 *     objects (with ability to safely release objects while iterator
22 *     is active).  This may be useful in some circumstances.
23 *
24 * To gain all these advantages, there are trade-offs:
25 *
26 *  1. There are two memory penalties:
27 *      a. Per object overhead.  To avoid division and multiplication,
28 *         the object sizes is rounded up to the nearest power or two,
29 *         starting with 64 bytes (minumum) and up to 2 KB (maximum).
30 *         Thus, a 104-byte object will have a 24-byte overhead; a
31 *         130-byte object will have 126-byte overhead.  This is
32 *         something to keep in mind.
33 *      b. Per page overhead.  Page links occupy some bytes in the
34 *         page.  To keep things fast, at least one slot per page is
35 *         always occupied, independent of object size.  Thus, for a
36 *         1 KB object size, 25% of the page is used for the page
37 *         header.
38 *  2. 4 KB pages are not freed until the malo allocator is destroyed.
39 *     This is something to keep in mind.
40 *
41 * P.S. In Russian, "malo" (��������) means "little" or "few".  Thus, the
42 *      malo allocator aims to perform its job in as few CPU cycles as
43 *      possible.
44 */
45
46#include <assert.h>
47#include <errno.h>
48#include <stdint.h>
49#include <stdlib.h>
50#include <sys/queue.h>
51
52#include "fiu-local.h"
53#include "lsquic_malo.h"
54
55/* 64 slots in a 4KB page means that the smallest object is 64 bytes.
56 * The largest object is 2KB.
57 */
58#define MALO_MIN_NBITS 6
59#define MALO_MAX_NBITS 11
60
61/* A "free page" is a page with free slots available.
62 */
63
64static unsigned find_free_slot (uint64_t slots);
65static unsigned size_in_bits (size_t sz);
66
67struct malo_page {
68    SLIST_ENTRY(malo_page)  next_page;
69    LIST_ENTRY(malo_page)   next_free_page;
70    struct malo            *malo;
71    uint64_t                slots,
72                            full_slot_mask;
73    unsigned                nbits;
74    unsigned                initial_slot;
75};
76
77typedef char malo_header_fits_in_one_slot
78    [0 - (sizeof(struct malo_page) > (1 << MALO_MAX_NBITS))];
79
80struct malo {
81    struct malo_page        page_header;
82    SLIST_HEAD(, malo_page) all_pages;
83    LIST_HEAD(, malo_page)  free_pages;
84    struct {
85        struct malo_page   *cur_page;
86        unsigned            next_slot;
87    }                       iter;
88};
89
90struct malo *
91lsquic_malo_create (size_t obj_size)
92{
93    unsigned nbits = size_in_bits(obj_size);
94    if (nbits < MALO_MIN_NBITS)
95        nbits = MALO_MIN_NBITS;
96    else if (nbits > MALO_MAX_NBITS)
97    {
98        errno = EOVERFLOW;
99        return NULL;
100    }
101
102    struct malo *malo;
103    if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000))
104        return NULL;
105
106    SLIST_INIT(&malo->all_pages);
107    LIST_INIT(&malo->free_pages);
108    malo->iter.cur_page = &malo->page_header;
109    malo->iter.next_slot = 0;
110
111    int n_slots =   sizeof(*malo) / (1 << nbits)
112                + ((sizeof(*malo) % (1 << nbits)) > 0);
113
114    struct malo_page *const page = &malo->page_header;
115    SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
116    LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
117    page->malo = malo;
118    if (nbits == MALO_MIN_NBITS)
119        page->full_slot_mask = ~0ULL;
120    else
121        page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1;
122    page->slots = (1ULL << n_slots) - 1;
123    page->nbits = nbits;
124    page->initial_slot = n_slots;
125
126    return malo;
127}
128
129
130static struct malo_page *
131allocate_page (struct malo *malo)
132{
133    struct malo_page *page;
134    if (0 != posix_memalign((void **) &page, 0x1000, 0x1000))
135        return NULL;
136    SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
137    LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
138    page->slots = 1;
139    page->full_slot_mask = malo->page_header.full_slot_mask;
140    page->nbits = malo->page_header.nbits;
141    page->malo = malo;
142    page->initial_slot = 1;
143    return page;
144}
145
146
147#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0)
148
149/* Get a new object. */
150void *
151lsquic_malo_get (struct malo *malo)
152{
153    fiu_do_on("malo/get", FAIL_NOMEM);
154    struct malo_page *page = LIST_FIRST(&malo->free_pages);
155    if (!page)
156    {
157        page = allocate_page(malo);
158        if (!page)
159            return NULL;
160    }
161    unsigned slot = find_free_slot(page->slots);
162    page->slots |= (1ULL << slot);
163    if (page->full_slot_mask == page->slots)
164        LIST_REMOVE(page, next_free_page);
165    return (char *) page + (slot << page->nbits);
166}
167
168
169/* Return obj to the pool */
170void
171lsquic_malo_put (void *obj)
172{
173    uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1);
174    struct malo_page *page = (void *) page_addr;
175    unsigned slot = ((uintptr_t) obj - page_addr) >> page->nbits;
176    if (page->full_slot_mask == page->slots)
177        LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page);
178    page->slots &= ~(1ULL << slot);
179}
180
181
182void
183lsquic_malo_destroy (struct malo *malo)
184{
185    struct malo_page *page, *next;
186    page = SLIST_FIRST(&malo->all_pages);
187    while (page != &malo->page_header)
188    {
189        next = SLIST_NEXT(page, next_page);
190        free(page);
191        page = next;
192    }
193    free(page);
194}
195
196
197/* The iterator is built-in.  Usage:
198 * void *obj;
199 * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo))
200 *     do_stuff(obj);
201 */
202void *
203lsquic_malo_first (struct malo *malo)
204{
205    malo->iter.cur_page = SLIST_FIRST(&malo->all_pages);
206    malo->iter.next_slot = malo->iter.cur_page->initial_slot;
207    return lsquic_malo_next(malo);
208}
209
210
211void *
212lsquic_malo_next (struct malo *malo)
213{
214    struct malo_page *page;
215    unsigned max_slot, slot;
216
217    page = malo->iter.cur_page;
218    if (page)
219    {
220        max_slot = 1 << (12 - page->nbits);     /* Same for all pages */
221        slot = malo->iter.next_slot;
222        while (1)
223        {
224            for (; slot < max_slot; ++slot)
225            {
226                if (page->slots & (1ULL << slot))
227                {
228                    malo->iter.cur_page  = page;
229                    malo->iter.next_slot = slot + 1;
230                    return (char *) page + (slot << page->nbits);
231                }
232            }
233            page = SLIST_NEXT(page, next_page);
234            if (page)
235                slot = page->initial_slot;
236            else
237            {
238                malo->iter.cur_page = NULL;     /* Stop iterator */
239                return NULL;
240            }
241        }
242    }
243
244    return NULL;
245}
246
247
248static unsigned
249size_in_bits (size_t sz)
250{
251#if __GNUC__
252    unsigned clz = __builtin_clz(sz - 1);
253    return 32 - clz;
254#else
255    unsigned clz;
256    size_t y;
257
258    --sz;
259    clz = 32;
260    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
261    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
262    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
263    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
264    y = sz >>  1;   if (y) return 32 - clz + 1;
265    return 32 - clz + sz;
266#endif
267}
268
269
270static unsigned
271find_free_slot (uint64_t slots)
272{
273#if __GNUC__
274    return __builtin_ffsll(~slots) - 1;
275#else
276    unsigned n;
277
278    slots =~ slots;
279    n = 0;
280
281    if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; }
282    if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; }
283    if (0 == (slots & ((1ULL <<  8) - 1))) { n +=  8; slots >>=  8; }
284    if (0 == (slots & ((1ULL <<  4) - 1))) { n +=  4; slots >>=  4; }
285    if (0 == (slots & ((1ULL <<  2) - 1))) { n +=  2; slots >>=  2; }
286    if (0 == (slots & ((1ULL <<  1) - 1))) { n +=  1; slots >>=  1; }
287    return n;
288#endif
289}
290