lsquic_malo.c revision 2d296031
1229fce07SDmitri Tikhonov/* Copyright (c) 2017 - 2019 LiteSpeed Technologies Inc.  See LICENSE. */
250aadb33SDmitri Tikhonov/*
350aadb33SDmitri Tikhonov * lsquic_malo.c -- malo allocator implementation.
450aadb33SDmitri Tikhonov *
550aadb33SDmitri Tikhonov * The malo allocator is a pool of objects of fixed size.  It tries to
650aadb33SDmitri Tikhonov * allocate and deallocate objects as fast as possible.  To do so, it
750aadb33SDmitri Tikhonov * does the following:
850aadb33SDmitri Tikhonov *
950aadb33SDmitri Tikhonov *  1. Allocations occur 4 KB at a time.
1050aadb33SDmitri Tikhonov *  2. No division or multiplication operations are performed.
1150aadb33SDmitri Tikhonov *
1250aadb33SDmitri Tikhonov * (In recent testing, malo was about 2.7 times faster than malloc for
1350aadb33SDmitri Tikhonov * 64-byte objects.)
1450aadb33SDmitri Tikhonov *
1550aadb33SDmitri Tikhonov * Besides speed, two other important characteristics distinguish it
1650aadb33SDmitri Tikhonov * from other pool allocators:
1750aadb33SDmitri Tikhonov *
1850aadb33SDmitri Tikhonov *  1. To free (put) an object, one does not need a pointer to the malo
1950aadb33SDmitri Tikhonov *     object.  This makes this allocator easy to use.
2050aadb33SDmitri Tikhonov *  2. A built-in iterator is provided to iterate over all allocated
2150aadb33SDmitri Tikhonov *     objects (with ability to safely release objects while iterator
2250aadb33SDmitri Tikhonov *     is active).  This may be useful in some circumstances.
2350aadb33SDmitri Tikhonov *
2450aadb33SDmitri Tikhonov * To gain all these advantages, there are trade-offs:
2550aadb33SDmitri Tikhonov *
2650aadb33SDmitri Tikhonov *  1. There are two memory penalties:
2750aadb33SDmitri Tikhonov *      a. Per object overhead.  To avoid division and multiplication,
2850aadb33SDmitri Tikhonov *         the object sizes is rounded up to the nearest power or two,
2950aadb33SDmitri Tikhonov *         starting with 64 bytes (minumum) and up to 2 KB (maximum).
3050aadb33SDmitri Tikhonov *         Thus, a 104-byte object will have a 24-byte overhead; a
3150aadb33SDmitri Tikhonov *         130-byte object will have 126-byte overhead.  This is
3250aadb33SDmitri Tikhonov *         something to keep in mind.
3350aadb33SDmitri Tikhonov *      b. Per page overhead.  Page links occupy some bytes in the
3450aadb33SDmitri Tikhonov *         page.  To keep things fast, at least one slot per page is
3550aadb33SDmitri Tikhonov *         always occupied, independent of object size.  Thus, for a
3650aadb33SDmitri Tikhonov *         1 KB object size, 25% of the page is used for the page
3750aadb33SDmitri Tikhonov *         header.
3850aadb33SDmitri Tikhonov *  2. 4 KB pages are not freed until the malo allocator is destroyed.
3950aadb33SDmitri Tikhonov *     This is something to keep in mind.
4050aadb33SDmitri Tikhonov *
4150aadb33SDmitri Tikhonov * P.S. In Russian, "malo" (��������) means "little" or "few".  Thus, the
4250aadb33SDmitri Tikhonov *      malo allocator aims to perform its job in as few CPU cycles as
4350aadb33SDmitri Tikhonov *      possible.
4450aadb33SDmitri Tikhonov */
4550aadb33SDmitri Tikhonov
4650aadb33SDmitri Tikhonov#include <assert.h>
4750aadb33SDmitri Tikhonov#include <errno.h>
4850aadb33SDmitri Tikhonov#include <stdint.h>
4950aadb33SDmitri Tikhonov#include <stdlib.h>
5050aadb33SDmitri Tikhonov#include <sys/queue.h>
51461e84d8SAmol Deshpande#ifdef WIN32
52461e84d8SAmol Deshpande#include <vc_compat.h>
532d296031SDmitri Tikhonov#include <intrin.h>
54461e84d8SAmol Deshpande#endif
5550aadb33SDmitri Tikhonov
5650aadb33SDmitri Tikhonov#include "fiu-local.h"
5750aadb33SDmitri Tikhonov#include "lsquic_malo.h"
5850aadb33SDmitri Tikhonov
5950aadb33SDmitri Tikhonov/* 64 slots in a 4KB page means that the smallest object is 64 bytes.
6050aadb33SDmitri Tikhonov * The largest object is 2KB.
6150aadb33SDmitri Tikhonov */
6250aadb33SDmitri Tikhonov#define MALO_MIN_NBITS 6
6350aadb33SDmitri Tikhonov#define MALO_MAX_NBITS 11
6450aadb33SDmitri Tikhonov
6550aadb33SDmitri Tikhonov/* A "free page" is a page with free slots available.
6650aadb33SDmitri Tikhonov */
6750aadb33SDmitri Tikhonov
6850aadb33SDmitri Tikhonovstatic unsigned find_free_slot (uint64_t slots);
6950aadb33SDmitri Tikhonovstatic unsigned size_in_bits (size_t sz);
7050aadb33SDmitri Tikhonov
7150aadb33SDmitri Tikhonovstruct malo_page {
7250aadb33SDmitri Tikhonov    SLIST_ENTRY(malo_page)  next_page;
7350aadb33SDmitri Tikhonov    LIST_ENTRY(malo_page)   next_free_page;
7450aadb33SDmitri Tikhonov    struct malo            *malo;
7550aadb33SDmitri Tikhonov    uint64_t                slots,
7650aadb33SDmitri Tikhonov                            full_slot_mask;
7750aadb33SDmitri Tikhonov    unsigned                nbits;
7850aadb33SDmitri Tikhonov    unsigned                initial_slot;
7950aadb33SDmitri Tikhonov};
8050aadb33SDmitri Tikhonov
8150aadb33SDmitri Tikhonovtypedef char malo_header_fits_in_one_slot
82461e84d8SAmol Deshpande    [(sizeof(struct malo_page) > (1 << MALO_MAX_NBITS)) ? -1 : 1];
8350aadb33SDmitri Tikhonov
8450aadb33SDmitri Tikhonovstruct malo {
8550aadb33SDmitri Tikhonov    struct malo_page        page_header;
8650aadb33SDmitri Tikhonov    SLIST_HEAD(, malo_page) all_pages;
8750aadb33SDmitri Tikhonov    LIST_HEAD(, malo_page)  free_pages;
8850aadb33SDmitri Tikhonov    struct {
8950aadb33SDmitri Tikhonov        struct malo_page   *cur_page;
9050aadb33SDmitri Tikhonov        unsigned            next_slot;
9150aadb33SDmitri Tikhonov    }                       iter;
9250aadb33SDmitri Tikhonov};
9350aadb33SDmitri Tikhonov
9450aadb33SDmitri Tikhonovstruct malo *
9550aadb33SDmitri Tikhonovlsquic_malo_create (size_t obj_size)
9650aadb33SDmitri Tikhonov{
9750aadb33SDmitri Tikhonov    unsigned nbits = size_in_bits(obj_size);
9850aadb33SDmitri Tikhonov    if (nbits < MALO_MIN_NBITS)
9950aadb33SDmitri Tikhonov        nbits = MALO_MIN_NBITS;
10050aadb33SDmitri Tikhonov    else if (nbits > MALO_MAX_NBITS)
10150aadb33SDmitri Tikhonov    {
10250aadb33SDmitri Tikhonov        errno = EOVERFLOW;
10350aadb33SDmitri Tikhonov        return NULL;
10450aadb33SDmitri Tikhonov    }
10550aadb33SDmitri Tikhonov
10650aadb33SDmitri Tikhonov    struct malo *malo;
10750aadb33SDmitri Tikhonov    if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000))
10850aadb33SDmitri Tikhonov        return NULL;
10950aadb33SDmitri Tikhonov
11050aadb33SDmitri Tikhonov    SLIST_INIT(&malo->all_pages);
11150aadb33SDmitri Tikhonov    LIST_INIT(&malo->free_pages);
11250aadb33SDmitri Tikhonov    malo->iter.cur_page = &malo->page_header;
11350aadb33SDmitri Tikhonov    malo->iter.next_slot = 0;
11450aadb33SDmitri Tikhonov
11550aadb33SDmitri Tikhonov    int n_slots =   sizeof(*malo) / (1 << nbits)
11650aadb33SDmitri Tikhonov                + ((sizeof(*malo) % (1 << nbits)) > 0);
11750aadb33SDmitri Tikhonov
11850aadb33SDmitri Tikhonov    struct malo_page *const page = &malo->page_header;
11950aadb33SDmitri Tikhonov    SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
12050aadb33SDmitri Tikhonov    LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
12150aadb33SDmitri Tikhonov    page->malo = malo;
12250aadb33SDmitri Tikhonov    if (nbits == MALO_MIN_NBITS)
12350aadb33SDmitri Tikhonov        page->full_slot_mask = ~0ULL;
12450aadb33SDmitri Tikhonov    else
12550aadb33SDmitri Tikhonov        page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1;
12650aadb33SDmitri Tikhonov    page->slots = (1ULL << n_slots) - 1;
12750aadb33SDmitri Tikhonov    page->nbits = nbits;
12850aadb33SDmitri Tikhonov    page->initial_slot = n_slots;
12950aadb33SDmitri Tikhonov
13050aadb33SDmitri Tikhonov    return malo;
13150aadb33SDmitri Tikhonov}
13250aadb33SDmitri Tikhonov
13350aadb33SDmitri Tikhonov
13450aadb33SDmitri Tikhonovstatic struct malo_page *
13550aadb33SDmitri Tikhonovallocate_page (struct malo *malo)
13650aadb33SDmitri Tikhonov{
13750aadb33SDmitri Tikhonov    struct malo_page *page;
13850aadb33SDmitri Tikhonov    if (0 != posix_memalign((void **) &page, 0x1000, 0x1000))
13950aadb33SDmitri Tikhonov        return NULL;
14050aadb33SDmitri Tikhonov    SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
14150aadb33SDmitri Tikhonov    LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
14250aadb33SDmitri Tikhonov    page->slots = 1;
14350aadb33SDmitri Tikhonov    page->full_slot_mask = malo->page_header.full_slot_mask;
14450aadb33SDmitri Tikhonov    page->nbits = malo->page_header.nbits;
14550aadb33SDmitri Tikhonov    page->malo = malo;
14650aadb33SDmitri Tikhonov    page->initial_slot = 1;
14750aadb33SDmitri Tikhonov    return page;
14850aadb33SDmitri Tikhonov}
14950aadb33SDmitri Tikhonov
15050aadb33SDmitri Tikhonov
15150aadb33SDmitri Tikhonov#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0)
15250aadb33SDmitri Tikhonov
15350aadb33SDmitri Tikhonov/* Get a new object. */
15450aadb33SDmitri Tikhonovvoid *
15550aadb33SDmitri Tikhonovlsquic_malo_get (struct malo *malo)
15650aadb33SDmitri Tikhonov{
15750aadb33SDmitri Tikhonov    fiu_do_on("malo/get", FAIL_NOMEM);
15850aadb33SDmitri Tikhonov    struct malo_page *page = LIST_FIRST(&malo->free_pages);
15950aadb33SDmitri Tikhonov    if (!page)
16050aadb33SDmitri Tikhonov    {
16150aadb33SDmitri Tikhonov        page = allocate_page(malo);
16250aadb33SDmitri Tikhonov        if (!page)
16350aadb33SDmitri Tikhonov            return NULL;
16450aadb33SDmitri Tikhonov    }
16550aadb33SDmitri Tikhonov    unsigned slot = find_free_slot(page->slots);
16650aadb33SDmitri Tikhonov    page->slots |= (1ULL << slot);
16750aadb33SDmitri Tikhonov    if (page->full_slot_mask == page->slots)
16850aadb33SDmitri Tikhonov        LIST_REMOVE(page, next_free_page);
16950aadb33SDmitri Tikhonov    return (char *) page + (slot << page->nbits);
17050aadb33SDmitri Tikhonov}
17150aadb33SDmitri Tikhonov
17250aadb33SDmitri Tikhonov
17350aadb33SDmitri Tikhonov/* Return obj to the pool */
17450aadb33SDmitri Tikhonovvoid
17550aadb33SDmitri Tikhonovlsquic_malo_put (void *obj)
17650aadb33SDmitri Tikhonov{
17750aadb33SDmitri Tikhonov    uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1);
17850aadb33SDmitri Tikhonov    struct malo_page *page = (void *) page_addr;
17950aadb33SDmitri Tikhonov    unsigned slot = ((uintptr_t) obj - page_addr) >> page->nbits;
18050aadb33SDmitri Tikhonov    if (page->full_slot_mask == page->slots)
18150aadb33SDmitri Tikhonov        LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page);
18250aadb33SDmitri Tikhonov    page->slots &= ~(1ULL << slot);
18350aadb33SDmitri Tikhonov}
18450aadb33SDmitri Tikhonov
18550aadb33SDmitri Tikhonov
18650aadb33SDmitri Tikhonovvoid
18750aadb33SDmitri Tikhonovlsquic_malo_destroy (struct malo *malo)
18850aadb33SDmitri Tikhonov{
18950aadb33SDmitri Tikhonov    struct malo_page *page, *next;
19050aadb33SDmitri Tikhonov    page = SLIST_FIRST(&malo->all_pages);
19150aadb33SDmitri Tikhonov    while (page != &malo->page_header)
19250aadb33SDmitri Tikhonov    {
19350aadb33SDmitri Tikhonov        next = SLIST_NEXT(page, next_page);
194461e84d8SAmol Deshpande#ifndef WIN32
19550aadb33SDmitri Tikhonov        free(page);
196461e84d8SAmol Deshpande#else
197461e84d8SAmol Deshpande        _aligned_free(page);
198461e84d8SAmol Deshpande#endif
19950aadb33SDmitri Tikhonov        page = next;
20050aadb33SDmitri Tikhonov    }
201461e84d8SAmol Deshpande#ifndef WIN32
20250aadb33SDmitri Tikhonov    free(page);
203461e84d8SAmol Deshpande#else
204461e84d8SAmol Deshpande        _aligned_free(page);
205461e84d8SAmol Deshpande#endif
20650aadb33SDmitri Tikhonov}
20750aadb33SDmitri Tikhonov
20850aadb33SDmitri Tikhonov
20950aadb33SDmitri Tikhonov/* The iterator is built-in.  Usage:
21050aadb33SDmitri Tikhonov * void *obj;
21150aadb33SDmitri Tikhonov * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo))
21250aadb33SDmitri Tikhonov *     do_stuff(obj);
21350aadb33SDmitri Tikhonov */
21450aadb33SDmitri Tikhonovvoid *
21550aadb33SDmitri Tikhonovlsquic_malo_first (struct malo *malo)
21650aadb33SDmitri Tikhonov{
21750aadb33SDmitri Tikhonov    malo->iter.cur_page = SLIST_FIRST(&malo->all_pages);
21850aadb33SDmitri Tikhonov    malo->iter.next_slot = malo->iter.cur_page->initial_slot;
21950aadb33SDmitri Tikhonov    return lsquic_malo_next(malo);
22050aadb33SDmitri Tikhonov}
22150aadb33SDmitri Tikhonov
22250aadb33SDmitri Tikhonov
22350aadb33SDmitri Tikhonovvoid *
22450aadb33SDmitri Tikhonovlsquic_malo_next (struct malo *malo)
22550aadb33SDmitri Tikhonov{
22650aadb33SDmitri Tikhonov    struct malo_page *page;
22750aadb33SDmitri Tikhonov    unsigned max_slot, slot;
22850aadb33SDmitri Tikhonov
22950aadb33SDmitri Tikhonov    page = malo->iter.cur_page;
23050aadb33SDmitri Tikhonov    if (page)
23150aadb33SDmitri Tikhonov    {
23250aadb33SDmitri Tikhonov        max_slot = 1 << (12 - page->nbits);     /* Same for all pages */
23350aadb33SDmitri Tikhonov        slot = malo->iter.next_slot;
23450aadb33SDmitri Tikhonov        while (1)
23550aadb33SDmitri Tikhonov        {
23650aadb33SDmitri Tikhonov            for (; slot < max_slot; ++slot)
23750aadb33SDmitri Tikhonov            {
23850aadb33SDmitri Tikhonov                if (page->slots & (1ULL << slot))
23950aadb33SDmitri Tikhonov                {
24050aadb33SDmitri Tikhonov                    malo->iter.cur_page  = page;
24150aadb33SDmitri Tikhonov                    malo->iter.next_slot = slot + 1;
24250aadb33SDmitri Tikhonov                    return (char *) page + (slot << page->nbits);
24350aadb33SDmitri Tikhonov                }
24450aadb33SDmitri Tikhonov            }
24550aadb33SDmitri Tikhonov            page = SLIST_NEXT(page, next_page);
24650aadb33SDmitri Tikhonov            if (page)
24750aadb33SDmitri Tikhonov                slot = page->initial_slot;
24850aadb33SDmitri Tikhonov            else
24950aadb33SDmitri Tikhonov            {
25050aadb33SDmitri Tikhonov                malo->iter.cur_page = NULL;     /* Stop iterator */
25150aadb33SDmitri Tikhonov                return NULL;
25250aadb33SDmitri Tikhonov            }
25350aadb33SDmitri Tikhonov        }
25450aadb33SDmitri Tikhonov    }
25550aadb33SDmitri Tikhonov
25650aadb33SDmitri Tikhonov    return NULL;
25750aadb33SDmitri Tikhonov}
25850aadb33SDmitri Tikhonov
25950aadb33SDmitri Tikhonov
26050aadb33SDmitri Tikhonovstatic unsigned
26150aadb33SDmitri Tikhonovsize_in_bits (size_t sz)
26250aadb33SDmitri Tikhonov{
26350aadb33SDmitri Tikhonov#if __GNUC__
26450aadb33SDmitri Tikhonov    unsigned clz = __builtin_clz(sz - 1);
26550aadb33SDmitri Tikhonov    return 32 - clz;
2662d296031SDmitri Tikhonov#elif defined(WIN32)
2672d296031SDmitri Tikhonov    unsigned char s;
2682d296031SDmitri Tikhonov    unsigned long idx;
2692d296031SDmitri Tikhonov    s = _BitScanReverse(&idx, sz);
2702d296031SDmitri Tikhonov    assert(s);
2712d296031SDmitri Tikhonov    return (unsigned) idx + 1;
27250aadb33SDmitri Tikhonov#else
2732d296031SDmitri Tikhonov#error This function contains a bug!
27450aadb33SDmitri Tikhonov    unsigned clz;
27550aadb33SDmitri Tikhonov    size_t y;
27650aadb33SDmitri Tikhonov
27750aadb33SDmitri Tikhonov    --sz;
27850aadb33SDmitri Tikhonov    clz = 32;
27950aadb33SDmitri Tikhonov    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
28050aadb33SDmitri Tikhonov    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
28150aadb33SDmitri Tikhonov    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
28250aadb33SDmitri Tikhonov    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
28350aadb33SDmitri Tikhonov    y = sz >>  1;   if (y) return 32 - clz + 1;
28450aadb33SDmitri Tikhonov    return 32 - clz + sz;
28550aadb33SDmitri Tikhonov#endif
28650aadb33SDmitri Tikhonov}
28750aadb33SDmitri Tikhonov
28850aadb33SDmitri Tikhonov
28950aadb33SDmitri Tikhonovstatic unsigned
29050aadb33SDmitri Tikhonovfind_free_slot (uint64_t slots)
29150aadb33SDmitri Tikhonov{
29250aadb33SDmitri Tikhonov#if __GNUC__
29350aadb33SDmitri Tikhonov    return __builtin_ffsll(~slots) - 1;
29450aadb33SDmitri Tikhonov#else
29550aadb33SDmitri Tikhonov    unsigned n;
29650aadb33SDmitri Tikhonov
29750aadb33SDmitri Tikhonov    slots =~ slots;
29850aadb33SDmitri Tikhonov    n = 0;
29950aadb33SDmitri Tikhonov
30050aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; }
30150aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; }
30250aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL <<  8) - 1))) { n +=  8; slots >>=  8; }
30350aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL <<  4) - 1))) { n +=  4; slots >>=  4; }
30450aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL <<  2) - 1))) { n +=  2; slots >>=  2; }
30550aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL <<  1) - 1))) { n +=  1; slots >>=  1; }
30650aadb33SDmitri Tikhonov    return n;
30750aadb33SDmitri Tikhonov#endif
30850aadb33SDmitri Tikhonov}
309c51ce338SDmitri Tikhonov
310c51ce338SDmitri Tikhonov
311c51ce338SDmitri Tikhonovsize_t
312c51ce338SDmitri Tikhonovlsquic_malo_mem_used (const struct malo *malo)
313c51ce338SDmitri Tikhonov{
314c51ce338SDmitri Tikhonov    const struct malo_page *page;
315c51ce338SDmitri Tikhonov    size_t size;
316c51ce338SDmitri Tikhonov
317c51ce338SDmitri Tikhonov    size = 0;
318c51ce338SDmitri Tikhonov    SLIST_FOREACH(page, &malo->all_pages, next_page)
319c51ce338SDmitri Tikhonov        size += sizeof(*page);
320c51ce338SDmitri Tikhonov
321c51ce338SDmitri Tikhonov    return size;
322c51ce338SDmitri Tikhonov}
323