lsquic_malo.c revision c51ce338
150aadb33SDmitri Tikhonov/* Copyright (c) 2017 LiteSpeed Technologies Inc.  See LICENSE. */
250aadb33SDmitri Tikhonov/*
350aadb33SDmitri Tikhonov * lsquic_malo.c -- malo allocator implementation.
450aadb33SDmitri Tikhonov *
550aadb33SDmitri Tikhonov * The malo allocator is a pool of objects of fixed size.  It tries to
650aadb33SDmitri Tikhonov * allocate and deallocate objects as fast as possible.  To do so, it
750aadb33SDmitri Tikhonov * does the following:
850aadb33SDmitri Tikhonov *
950aadb33SDmitri Tikhonov *  1. Allocations occur 4 KB at a time.
1050aadb33SDmitri Tikhonov *  2. No division or multiplication operations are performed.
1150aadb33SDmitri Tikhonov *
1250aadb33SDmitri Tikhonov * (In recent testing, malo was about 2.7 times faster than malloc for
1350aadb33SDmitri Tikhonov * 64-byte objects.)
1450aadb33SDmitri Tikhonov *
1550aadb33SDmitri Tikhonov * Besides speed, two other important characteristics distinguish it
1650aadb33SDmitri Tikhonov * from other pool allocators:
1750aadb33SDmitri Tikhonov *
1850aadb33SDmitri Tikhonov *  1. To free (put) an object, one does not need a pointer to the malo
1950aadb33SDmitri Tikhonov *     object.  This makes this allocator easy to use.
2050aadb33SDmitri Tikhonov *  2. A built-in iterator is provided to iterate over all allocated
2150aadb33SDmitri Tikhonov *     objects (with ability to safely release objects while iterator
2250aadb33SDmitri Tikhonov *     is active).  This may be useful in some circumstances.
2350aadb33SDmitri Tikhonov *
2450aadb33SDmitri Tikhonov * To gain all these advantages, there are trade-offs:
2550aadb33SDmitri Tikhonov *
2650aadb33SDmitri Tikhonov *  1. There are two memory penalties:
2750aadb33SDmitri Tikhonov *      a. Per object overhead.  To avoid division and multiplication,
2850aadb33SDmitri Tikhonov *         the object sizes is rounded up to the nearest power or two,
2950aadb33SDmitri Tikhonov *         starting with 64 bytes (minumum) and up to 2 KB (maximum).
3050aadb33SDmitri Tikhonov *         Thus, a 104-byte object will have a 24-byte overhead; a
3150aadb33SDmitri Tikhonov *         130-byte object will have 126-byte overhead.  This is
3250aadb33SDmitri Tikhonov *         something to keep in mind.
3350aadb33SDmitri Tikhonov *      b. Per page overhead.  Page links occupy some bytes in the
3450aadb33SDmitri Tikhonov *         page.  To keep things fast, at least one slot per page is
3550aadb33SDmitri Tikhonov *         always occupied, independent of object size.  Thus, for a
3650aadb33SDmitri Tikhonov *         1 KB object size, 25% of the page is used for the page
3750aadb33SDmitri Tikhonov *         header.
3850aadb33SDmitri Tikhonov *  2. 4 KB pages are not freed until the malo allocator is destroyed.
3950aadb33SDmitri Tikhonov *     This is something to keep in mind.
4050aadb33SDmitri Tikhonov *
4150aadb33SDmitri Tikhonov * P.S. In Russian, "malo" (��������) means "little" or "few".  Thus, the
4250aadb33SDmitri Tikhonov *      malo allocator aims to perform its job in as few CPU cycles as
4350aadb33SDmitri Tikhonov *      possible.
4450aadb33SDmitri Tikhonov */
4550aadb33SDmitri Tikhonov
4650aadb33SDmitri Tikhonov#include <assert.h>
4750aadb33SDmitri Tikhonov#include <errno.h>
4850aadb33SDmitri Tikhonov#include <stdint.h>
4950aadb33SDmitri Tikhonov#include <stdlib.h>
5050aadb33SDmitri Tikhonov#include <sys/queue.h>
5150aadb33SDmitri Tikhonov
5250aadb33SDmitri Tikhonov#include "fiu-local.h"
5350aadb33SDmitri Tikhonov#include "lsquic_malo.h"
5450aadb33SDmitri Tikhonov
5550aadb33SDmitri Tikhonov/* 64 slots in a 4KB page means that the smallest object is 64 bytes.
5650aadb33SDmitri Tikhonov * The largest object is 2KB.
5750aadb33SDmitri Tikhonov */
5850aadb33SDmitri Tikhonov#define MALO_MIN_NBITS 6
5950aadb33SDmitri Tikhonov#define MALO_MAX_NBITS 11
6050aadb33SDmitri Tikhonov
6150aadb33SDmitri Tikhonov/* A "free page" is a page with free slots available.
6250aadb33SDmitri Tikhonov */
6350aadb33SDmitri Tikhonov
6450aadb33SDmitri Tikhonovstatic unsigned find_free_slot (uint64_t slots);
6550aadb33SDmitri Tikhonovstatic unsigned size_in_bits (size_t sz);
6650aadb33SDmitri Tikhonov
6750aadb33SDmitri Tikhonovstruct malo_page {
6850aadb33SDmitri Tikhonov    SLIST_ENTRY(malo_page)  next_page;
6950aadb33SDmitri Tikhonov    LIST_ENTRY(malo_page)   next_free_page;
7050aadb33SDmitri Tikhonov    struct malo            *malo;
7150aadb33SDmitri Tikhonov    uint64_t                slots,
7250aadb33SDmitri Tikhonov                            full_slot_mask;
7350aadb33SDmitri Tikhonov    unsigned                nbits;
7450aadb33SDmitri Tikhonov    unsigned                initial_slot;
7550aadb33SDmitri Tikhonov};
7650aadb33SDmitri Tikhonov
7750aadb33SDmitri Tikhonovtypedef char malo_header_fits_in_one_slot
7850aadb33SDmitri Tikhonov    [0 - (sizeof(struct malo_page) > (1 << MALO_MAX_NBITS))];
7950aadb33SDmitri Tikhonov
8050aadb33SDmitri Tikhonovstruct malo {
8150aadb33SDmitri Tikhonov    struct malo_page        page_header;
8250aadb33SDmitri Tikhonov    SLIST_HEAD(, malo_page) all_pages;
8350aadb33SDmitri Tikhonov    LIST_HEAD(, malo_page)  free_pages;
8450aadb33SDmitri Tikhonov    struct {
8550aadb33SDmitri Tikhonov        struct malo_page   *cur_page;
8650aadb33SDmitri Tikhonov        unsigned            next_slot;
8750aadb33SDmitri Tikhonov    }                       iter;
8850aadb33SDmitri Tikhonov};
8950aadb33SDmitri Tikhonov
9050aadb33SDmitri Tikhonovstruct malo *
9150aadb33SDmitri Tikhonovlsquic_malo_create (size_t obj_size)
9250aadb33SDmitri Tikhonov{
9350aadb33SDmitri Tikhonov    unsigned nbits = size_in_bits(obj_size);
9450aadb33SDmitri Tikhonov    if (nbits < MALO_MIN_NBITS)
9550aadb33SDmitri Tikhonov        nbits = MALO_MIN_NBITS;
9650aadb33SDmitri Tikhonov    else if (nbits > MALO_MAX_NBITS)
9750aadb33SDmitri Tikhonov    {
9850aadb33SDmitri Tikhonov        errno = EOVERFLOW;
9950aadb33SDmitri Tikhonov        return NULL;
10050aadb33SDmitri Tikhonov    }
10150aadb33SDmitri Tikhonov
10250aadb33SDmitri Tikhonov    struct malo *malo;
10350aadb33SDmitri Tikhonov    if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000))
10450aadb33SDmitri Tikhonov        return NULL;
10550aadb33SDmitri Tikhonov
10650aadb33SDmitri Tikhonov    SLIST_INIT(&malo->all_pages);
10750aadb33SDmitri Tikhonov    LIST_INIT(&malo->free_pages);
10850aadb33SDmitri Tikhonov    malo->iter.cur_page = &malo->page_header;
10950aadb33SDmitri Tikhonov    malo->iter.next_slot = 0;
11050aadb33SDmitri Tikhonov
11150aadb33SDmitri Tikhonov    int n_slots =   sizeof(*malo) / (1 << nbits)
11250aadb33SDmitri Tikhonov                + ((sizeof(*malo) % (1 << nbits)) > 0);
11350aadb33SDmitri Tikhonov
11450aadb33SDmitri Tikhonov    struct malo_page *const page = &malo->page_header;
11550aadb33SDmitri Tikhonov    SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
11650aadb33SDmitri Tikhonov    LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
11750aadb33SDmitri Tikhonov    page->malo = malo;
11850aadb33SDmitri Tikhonov    if (nbits == MALO_MIN_NBITS)
11950aadb33SDmitri Tikhonov        page->full_slot_mask = ~0ULL;
12050aadb33SDmitri Tikhonov    else
12150aadb33SDmitri Tikhonov        page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1;
12250aadb33SDmitri Tikhonov    page->slots = (1ULL << n_slots) - 1;
12350aadb33SDmitri Tikhonov    page->nbits = nbits;
12450aadb33SDmitri Tikhonov    page->initial_slot = n_slots;
12550aadb33SDmitri Tikhonov
12650aadb33SDmitri Tikhonov    return malo;
12750aadb33SDmitri Tikhonov}
12850aadb33SDmitri Tikhonov
12950aadb33SDmitri Tikhonov
13050aadb33SDmitri Tikhonovstatic struct malo_page *
13150aadb33SDmitri Tikhonovallocate_page (struct malo *malo)
13250aadb33SDmitri Tikhonov{
13350aadb33SDmitri Tikhonov    struct malo_page *page;
13450aadb33SDmitri Tikhonov    if (0 != posix_memalign((void **) &page, 0x1000, 0x1000))
13550aadb33SDmitri Tikhonov        return NULL;
13650aadb33SDmitri Tikhonov    SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
13750aadb33SDmitri Tikhonov    LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
13850aadb33SDmitri Tikhonov    page->slots = 1;
13950aadb33SDmitri Tikhonov    page->full_slot_mask = malo->page_header.full_slot_mask;
14050aadb33SDmitri Tikhonov    page->nbits = malo->page_header.nbits;
14150aadb33SDmitri Tikhonov    page->malo = malo;
14250aadb33SDmitri Tikhonov    page->initial_slot = 1;
14350aadb33SDmitri Tikhonov    return page;
14450aadb33SDmitri Tikhonov}
14550aadb33SDmitri Tikhonov
14650aadb33SDmitri Tikhonov
14750aadb33SDmitri Tikhonov#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0)
14850aadb33SDmitri Tikhonov
14950aadb33SDmitri Tikhonov/* Get a new object. */
15050aadb33SDmitri Tikhonovvoid *
15150aadb33SDmitri Tikhonovlsquic_malo_get (struct malo *malo)
15250aadb33SDmitri Tikhonov{
15350aadb33SDmitri Tikhonov    fiu_do_on("malo/get", FAIL_NOMEM);
15450aadb33SDmitri Tikhonov    struct malo_page *page = LIST_FIRST(&malo->free_pages);
15550aadb33SDmitri Tikhonov    if (!page)
15650aadb33SDmitri Tikhonov    {
15750aadb33SDmitri Tikhonov        page = allocate_page(malo);
15850aadb33SDmitri Tikhonov        if (!page)
15950aadb33SDmitri Tikhonov            return NULL;
16050aadb33SDmitri Tikhonov    }
16150aadb33SDmitri Tikhonov    unsigned slot = find_free_slot(page->slots);
16250aadb33SDmitri Tikhonov    page->slots |= (1ULL << slot);
16350aadb33SDmitri Tikhonov    if (page->full_slot_mask == page->slots)
16450aadb33SDmitri Tikhonov        LIST_REMOVE(page, next_free_page);
16550aadb33SDmitri Tikhonov    return (char *) page + (slot << page->nbits);
16650aadb33SDmitri Tikhonov}
16750aadb33SDmitri Tikhonov
16850aadb33SDmitri Tikhonov
16950aadb33SDmitri Tikhonov/* Return obj to the pool */
17050aadb33SDmitri Tikhonovvoid
17150aadb33SDmitri Tikhonovlsquic_malo_put (void *obj)
17250aadb33SDmitri Tikhonov{
17350aadb33SDmitri Tikhonov    uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1);
17450aadb33SDmitri Tikhonov    struct malo_page *page = (void *) page_addr;
17550aadb33SDmitri Tikhonov    unsigned slot = ((uintptr_t) obj - page_addr) >> page->nbits;
17650aadb33SDmitri Tikhonov    if (page->full_slot_mask == page->slots)
17750aadb33SDmitri Tikhonov        LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page);
17850aadb33SDmitri Tikhonov    page->slots &= ~(1ULL << slot);
17950aadb33SDmitri Tikhonov}
18050aadb33SDmitri Tikhonov
18150aadb33SDmitri Tikhonov
18250aadb33SDmitri Tikhonovvoid
18350aadb33SDmitri Tikhonovlsquic_malo_destroy (struct malo *malo)
18450aadb33SDmitri Tikhonov{
18550aadb33SDmitri Tikhonov    struct malo_page *page, *next;
18650aadb33SDmitri Tikhonov    page = SLIST_FIRST(&malo->all_pages);
18750aadb33SDmitri Tikhonov    while (page != &malo->page_header)
18850aadb33SDmitri Tikhonov    {
18950aadb33SDmitri Tikhonov        next = SLIST_NEXT(page, next_page);
19050aadb33SDmitri Tikhonov        free(page);
19150aadb33SDmitri Tikhonov        page = next;
19250aadb33SDmitri Tikhonov    }
19350aadb33SDmitri Tikhonov    free(page);
19450aadb33SDmitri Tikhonov}
19550aadb33SDmitri Tikhonov
19650aadb33SDmitri Tikhonov
19750aadb33SDmitri Tikhonov/* The iterator is built-in.  Usage:
19850aadb33SDmitri Tikhonov * void *obj;
19950aadb33SDmitri Tikhonov * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo))
20050aadb33SDmitri Tikhonov *     do_stuff(obj);
20150aadb33SDmitri Tikhonov */
20250aadb33SDmitri Tikhonovvoid *
20350aadb33SDmitri Tikhonovlsquic_malo_first (struct malo *malo)
20450aadb33SDmitri Tikhonov{
20550aadb33SDmitri Tikhonov    malo->iter.cur_page = SLIST_FIRST(&malo->all_pages);
20650aadb33SDmitri Tikhonov    malo->iter.next_slot = malo->iter.cur_page->initial_slot;
20750aadb33SDmitri Tikhonov    return lsquic_malo_next(malo);
20850aadb33SDmitri Tikhonov}
20950aadb33SDmitri Tikhonov
21050aadb33SDmitri Tikhonov
21150aadb33SDmitri Tikhonovvoid *
21250aadb33SDmitri Tikhonovlsquic_malo_next (struct malo *malo)
21350aadb33SDmitri Tikhonov{
21450aadb33SDmitri Tikhonov    struct malo_page *page;
21550aadb33SDmitri Tikhonov    unsigned max_slot, slot;
21650aadb33SDmitri Tikhonov
21750aadb33SDmitri Tikhonov    page = malo->iter.cur_page;
21850aadb33SDmitri Tikhonov    if (page)
21950aadb33SDmitri Tikhonov    {
22050aadb33SDmitri Tikhonov        max_slot = 1 << (12 - page->nbits);     /* Same for all pages */
22150aadb33SDmitri Tikhonov        slot = malo->iter.next_slot;
22250aadb33SDmitri Tikhonov        while (1)
22350aadb33SDmitri Tikhonov        {
22450aadb33SDmitri Tikhonov            for (; slot < max_slot; ++slot)
22550aadb33SDmitri Tikhonov            {
22650aadb33SDmitri Tikhonov                if (page->slots & (1ULL << slot))
22750aadb33SDmitri Tikhonov                {
22850aadb33SDmitri Tikhonov                    malo->iter.cur_page  = page;
22950aadb33SDmitri Tikhonov                    malo->iter.next_slot = slot + 1;
23050aadb33SDmitri Tikhonov                    return (char *) page + (slot << page->nbits);
23150aadb33SDmitri Tikhonov                }
23250aadb33SDmitri Tikhonov            }
23350aadb33SDmitri Tikhonov            page = SLIST_NEXT(page, next_page);
23450aadb33SDmitri Tikhonov            if (page)
23550aadb33SDmitri Tikhonov                slot = page->initial_slot;
23650aadb33SDmitri Tikhonov            else
23750aadb33SDmitri Tikhonov            {
23850aadb33SDmitri Tikhonov                malo->iter.cur_page = NULL;     /* Stop iterator */
23950aadb33SDmitri Tikhonov                return NULL;
24050aadb33SDmitri Tikhonov            }
24150aadb33SDmitri Tikhonov        }
24250aadb33SDmitri Tikhonov    }
24350aadb33SDmitri Tikhonov
24450aadb33SDmitri Tikhonov    return NULL;
24550aadb33SDmitri Tikhonov}
24650aadb33SDmitri Tikhonov
24750aadb33SDmitri Tikhonov
24850aadb33SDmitri Tikhonovstatic unsigned
24950aadb33SDmitri Tikhonovsize_in_bits (size_t sz)
25050aadb33SDmitri Tikhonov{
25150aadb33SDmitri Tikhonov#if __GNUC__
25250aadb33SDmitri Tikhonov    unsigned clz = __builtin_clz(sz - 1);
25350aadb33SDmitri Tikhonov    return 32 - clz;
25450aadb33SDmitri Tikhonov#else
25550aadb33SDmitri Tikhonov    unsigned clz;
25650aadb33SDmitri Tikhonov    size_t y;
25750aadb33SDmitri Tikhonov
25850aadb33SDmitri Tikhonov    --sz;
25950aadb33SDmitri Tikhonov    clz = 32;
26050aadb33SDmitri Tikhonov    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
26150aadb33SDmitri Tikhonov    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
26250aadb33SDmitri Tikhonov    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
26350aadb33SDmitri Tikhonov    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
26450aadb33SDmitri Tikhonov    y = sz >>  1;   if (y) return 32 - clz + 1;
26550aadb33SDmitri Tikhonov    return 32 - clz + sz;
26650aadb33SDmitri Tikhonov#endif
26750aadb33SDmitri Tikhonov}
26850aadb33SDmitri Tikhonov
26950aadb33SDmitri Tikhonov
27050aadb33SDmitri Tikhonovstatic unsigned
27150aadb33SDmitri Tikhonovfind_free_slot (uint64_t slots)
27250aadb33SDmitri Tikhonov{
27350aadb33SDmitri Tikhonov#if __GNUC__
27450aadb33SDmitri Tikhonov    return __builtin_ffsll(~slots) - 1;
27550aadb33SDmitri Tikhonov#else
27650aadb33SDmitri Tikhonov    unsigned n;
27750aadb33SDmitri Tikhonov
27850aadb33SDmitri Tikhonov    slots =~ slots;
27950aadb33SDmitri Tikhonov    n = 0;
28050aadb33SDmitri Tikhonov
28150aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; }
28250aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; }
28350aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL <<  8) - 1))) { n +=  8; slots >>=  8; }
28450aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL <<  4) - 1))) { n +=  4; slots >>=  4; }
28550aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL <<  2) - 1))) { n +=  2; slots >>=  2; }
28650aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL <<  1) - 1))) { n +=  1; slots >>=  1; }
28750aadb33SDmitri Tikhonov    return n;
28850aadb33SDmitri Tikhonov#endif
28950aadb33SDmitri Tikhonov}
290c51ce338SDmitri Tikhonov
291c51ce338SDmitri Tikhonov
292c51ce338SDmitri Tikhonovsize_t
293c51ce338SDmitri Tikhonovlsquic_malo_mem_used (const struct malo *malo)
294c51ce338SDmitri Tikhonov{
295c51ce338SDmitri Tikhonov    const struct malo_page *page;
296c51ce338SDmitri Tikhonov    size_t size;
297c51ce338SDmitri Tikhonov
298c51ce338SDmitri Tikhonov    size = 0;
299c51ce338SDmitri Tikhonov    SLIST_FOREACH(page, &malo->all_pages, next_page)
300c51ce338SDmitri Tikhonov        size += sizeof(*page);
301c51ce338SDmitri Tikhonov
302c51ce338SDmitri Tikhonov    return size;
303c51ce338SDmitri Tikhonov}
304