lsquic_malo.c revision 461e84d8
150aadb33SDmitri Tikhonov/* Copyright (c) 2017 LiteSpeed Technologies Inc.  See LICENSE. */
250aadb33SDmitri Tikhonov/*
350aadb33SDmitri Tikhonov * lsquic_malo.c -- malo allocator implementation.
450aadb33SDmitri Tikhonov *
550aadb33SDmitri Tikhonov * The malo allocator is a pool of objects of fixed size.  It tries to
650aadb33SDmitri Tikhonov * allocate and deallocate objects as fast as possible.  To do so, it
750aadb33SDmitri Tikhonov * does the following:
850aadb33SDmitri Tikhonov *
950aadb33SDmitri Tikhonov *  1. Allocations occur 4 KB at a time.
1050aadb33SDmitri Tikhonov *  2. No division or multiplication operations are performed.
1150aadb33SDmitri Tikhonov *
1250aadb33SDmitri Tikhonov * (In recent testing, malo was about 2.7 times faster than malloc for
1350aadb33SDmitri Tikhonov * 64-byte objects.)
1450aadb33SDmitri Tikhonov *
1550aadb33SDmitri Tikhonov * Besides speed, two other important characteristics distinguish it
1650aadb33SDmitri Tikhonov * from other pool allocators:
1750aadb33SDmitri Tikhonov *
1850aadb33SDmitri Tikhonov *  1. To free (put) an object, one does not need a pointer to the malo
1950aadb33SDmitri Tikhonov *     object.  This makes this allocator easy to use.
2050aadb33SDmitri Tikhonov *  2. A built-in iterator is provided to iterate over all allocated
2150aadb33SDmitri Tikhonov *     objects (with ability to safely release objects while iterator
2250aadb33SDmitri Tikhonov *     is active).  This may be useful in some circumstances.
2350aadb33SDmitri Tikhonov *
2450aadb33SDmitri Tikhonov * To gain all these advantages, there are trade-offs:
2550aadb33SDmitri Tikhonov *
2650aadb33SDmitri Tikhonov *  1. There are two memory penalties:
2750aadb33SDmitri Tikhonov *      a. Per object overhead.  To avoid division and multiplication,
2850aadb33SDmitri Tikhonov *         the object sizes is rounded up to the nearest power or two,
2950aadb33SDmitri Tikhonov *         starting with 64 bytes (minumum) and up to 2 KB (maximum).
3050aadb33SDmitri Tikhonov *         Thus, a 104-byte object will have a 24-byte overhead; a
3150aadb33SDmitri Tikhonov *         130-byte object will have 126-byte overhead.  This is
3250aadb33SDmitri Tikhonov *         something to keep in mind.
3350aadb33SDmitri Tikhonov *      b. Per page overhead.  Page links occupy some bytes in the
3450aadb33SDmitri Tikhonov *         page.  To keep things fast, at least one slot per page is
3550aadb33SDmitri Tikhonov *         always occupied, independent of object size.  Thus, for a
3650aadb33SDmitri Tikhonov *         1 KB object size, 25% of the page is used for the page
3750aadb33SDmitri Tikhonov *         header.
3850aadb33SDmitri Tikhonov *  2. 4 KB pages are not freed until the malo allocator is destroyed.
3950aadb33SDmitri Tikhonov *     This is something to keep in mind.
4050aadb33SDmitri Tikhonov *
4150aadb33SDmitri Tikhonov * P.S. In Russian, "malo" (��������) means "little" or "few".  Thus, the
4250aadb33SDmitri Tikhonov *      malo allocator aims to perform its job in as few CPU cycles as
4350aadb33SDmitri Tikhonov *      possible.
4450aadb33SDmitri Tikhonov */
4550aadb33SDmitri Tikhonov
4650aadb33SDmitri Tikhonov#include <assert.h>
4750aadb33SDmitri Tikhonov#include <errno.h>
4850aadb33SDmitri Tikhonov#include <stdint.h>
4950aadb33SDmitri Tikhonov#include <stdlib.h>
5050aadb33SDmitri Tikhonov#include <sys/queue.h>
51461e84d8SAmol Deshpande#ifdef WIN32
52461e84d8SAmol Deshpande#include <vc_compat.h>
53461e84d8SAmol Deshpande#endif
5450aadb33SDmitri Tikhonov
5550aadb33SDmitri Tikhonov#include "fiu-local.h"
5650aadb33SDmitri Tikhonov#include "lsquic_malo.h"
5750aadb33SDmitri Tikhonov
5850aadb33SDmitri Tikhonov/* 64 slots in a 4KB page means that the smallest object is 64 bytes.
5950aadb33SDmitri Tikhonov * The largest object is 2KB.
6050aadb33SDmitri Tikhonov */
6150aadb33SDmitri Tikhonov#define MALO_MIN_NBITS 6
6250aadb33SDmitri Tikhonov#define MALO_MAX_NBITS 11
6350aadb33SDmitri Tikhonov
6450aadb33SDmitri Tikhonov/* A "free page" is a page with free slots available.
6550aadb33SDmitri Tikhonov */
6650aadb33SDmitri Tikhonov
6750aadb33SDmitri Tikhonovstatic unsigned find_free_slot (uint64_t slots);
6850aadb33SDmitri Tikhonovstatic unsigned size_in_bits (size_t sz);
6950aadb33SDmitri Tikhonov
7050aadb33SDmitri Tikhonovstruct malo_page {
7150aadb33SDmitri Tikhonov    SLIST_ENTRY(malo_page)  next_page;
7250aadb33SDmitri Tikhonov    LIST_ENTRY(malo_page)   next_free_page;
7350aadb33SDmitri Tikhonov    struct malo            *malo;
7450aadb33SDmitri Tikhonov    uint64_t                slots,
7550aadb33SDmitri Tikhonov                            full_slot_mask;
7650aadb33SDmitri Tikhonov    unsigned                nbits;
7750aadb33SDmitri Tikhonov    unsigned                initial_slot;
7850aadb33SDmitri Tikhonov};
7950aadb33SDmitri Tikhonov
8050aadb33SDmitri Tikhonovtypedef char malo_header_fits_in_one_slot
81461e84d8SAmol Deshpande    [(sizeof(struct malo_page) > (1 << MALO_MAX_NBITS)) ? -1 : 1];
8250aadb33SDmitri Tikhonov
8350aadb33SDmitri Tikhonovstruct malo {
8450aadb33SDmitri Tikhonov    struct malo_page        page_header;
8550aadb33SDmitri Tikhonov    SLIST_HEAD(, malo_page) all_pages;
8650aadb33SDmitri Tikhonov    LIST_HEAD(, malo_page)  free_pages;
8750aadb33SDmitri Tikhonov    struct {
8850aadb33SDmitri Tikhonov        struct malo_page   *cur_page;
8950aadb33SDmitri Tikhonov        unsigned            next_slot;
9050aadb33SDmitri Tikhonov    }                       iter;
9150aadb33SDmitri Tikhonov};
9250aadb33SDmitri Tikhonov
9350aadb33SDmitri Tikhonovstruct malo *
9450aadb33SDmitri Tikhonovlsquic_malo_create (size_t obj_size)
9550aadb33SDmitri Tikhonov{
9650aadb33SDmitri Tikhonov    unsigned nbits = size_in_bits(obj_size);
9750aadb33SDmitri Tikhonov    if (nbits < MALO_MIN_NBITS)
9850aadb33SDmitri Tikhonov        nbits = MALO_MIN_NBITS;
9950aadb33SDmitri Tikhonov    else if (nbits > MALO_MAX_NBITS)
10050aadb33SDmitri Tikhonov    {
10150aadb33SDmitri Tikhonov        errno = EOVERFLOW;
10250aadb33SDmitri Tikhonov        return NULL;
10350aadb33SDmitri Tikhonov    }
10450aadb33SDmitri Tikhonov
10550aadb33SDmitri Tikhonov    struct malo *malo;
10650aadb33SDmitri Tikhonov    if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000))
10750aadb33SDmitri Tikhonov        return NULL;
10850aadb33SDmitri Tikhonov
10950aadb33SDmitri Tikhonov    SLIST_INIT(&malo->all_pages);
11050aadb33SDmitri Tikhonov    LIST_INIT(&malo->free_pages);
11150aadb33SDmitri Tikhonov    malo->iter.cur_page = &malo->page_header;
11250aadb33SDmitri Tikhonov    malo->iter.next_slot = 0;
11350aadb33SDmitri Tikhonov
11450aadb33SDmitri Tikhonov    int n_slots =   sizeof(*malo) / (1 << nbits)
11550aadb33SDmitri Tikhonov                + ((sizeof(*malo) % (1 << nbits)) > 0);
11650aadb33SDmitri Tikhonov
11750aadb33SDmitri Tikhonov    struct malo_page *const page = &malo->page_header;
11850aadb33SDmitri Tikhonov    SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
11950aadb33SDmitri Tikhonov    LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
12050aadb33SDmitri Tikhonov    page->malo = malo;
12150aadb33SDmitri Tikhonov    if (nbits == MALO_MIN_NBITS)
12250aadb33SDmitri Tikhonov        page->full_slot_mask = ~0ULL;
12350aadb33SDmitri Tikhonov    else
12450aadb33SDmitri Tikhonov        page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1;
12550aadb33SDmitri Tikhonov    page->slots = (1ULL << n_slots) - 1;
12650aadb33SDmitri Tikhonov    page->nbits = nbits;
12750aadb33SDmitri Tikhonov    page->initial_slot = n_slots;
12850aadb33SDmitri Tikhonov
12950aadb33SDmitri Tikhonov    return malo;
13050aadb33SDmitri Tikhonov}
13150aadb33SDmitri Tikhonov
13250aadb33SDmitri Tikhonov
13350aadb33SDmitri Tikhonovstatic struct malo_page *
13450aadb33SDmitri Tikhonovallocate_page (struct malo *malo)
13550aadb33SDmitri Tikhonov{
13650aadb33SDmitri Tikhonov    struct malo_page *page;
13750aadb33SDmitri Tikhonov    if (0 != posix_memalign((void **) &page, 0x1000, 0x1000))
13850aadb33SDmitri Tikhonov        return NULL;
13950aadb33SDmitri Tikhonov    SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
14050aadb33SDmitri Tikhonov    LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
14150aadb33SDmitri Tikhonov    page->slots = 1;
14250aadb33SDmitri Tikhonov    page->full_slot_mask = malo->page_header.full_slot_mask;
14350aadb33SDmitri Tikhonov    page->nbits = malo->page_header.nbits;
14450aadb33SDmitri Tikhonov    page->malo = malo;
14550aadb33SDmitri Tikhonov    page->initial_slot = 1;
14650aadb33SDmitri Tikhonov    return page;
14750aadb33SDmitri Tikhonov}
14850aadb33SDmitri Tikhonov
14950aadb33SDmitri Tikhonov
15050aadb33SDmitri Tikhonov#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0)
15150aadb33SDmitri Tikhonov
15250aadb33SDmitri Tikhonov/* Get a new object. */
15350aadb33SDmitri Tikhonovvoid *
15450aadb33SDmitri Tikhonovlsquic_malo_get (struct malo *malo)
15550aadb33SDmitri Tikhonov{
15650aadb33SDmitri Tikhonov    fiu_do_on("malo/get", FAIL_NOMEM);
15750aadb33SDmitri Tikhonov    struct malo_page *page = LIST_FIRST(&malo->free_pages);
15850aadb33SDmitri Tikhonov    if (!page)
15950aadb33SDmitri Tikhonov    {
16050aadb33SDmitri Tikhonov        page = allocate_page(malo);
16150aadb33SDmitri Tikhonov        if (!page)
16250aadb33SDmitri Tikhonov            return NULL;
16350aadb33SDmitri Tikhonov    }
16450aadb33SDmitri Tikhonov    unsigned slot = find_free_slot(page->slots);
16550aadb33SDmitri Tikhonov    page->slots |= (1ULL << slot);
16650aadb33SDmitri Tikhonov    if (page->full_slot_mask == page->slots)
16750aadb33SDmitri Tikhonov        LIST_REMOVE(page, next_free_page);
16850aadb33SDmitri Tikhonov    return (char *) page + (slot << page->nbits);
16950aadb33SDmitri Tikhonov}
17050aadb33SDmitri Tikhonov
17150aadb33SDmitri Tikhonov
17250aadb33SDmitri Tikhonov/* Return obj to the pool */
17350aadb33SDmitri Tikhonovvoid
17450aadb33SDmitri Tikhonovlsquic_malo_put (void *obj)
17550aadb33SDmitri Tikhonov{
17650aadb33SDmitri Tikhonov    uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1);
17750aadb33SDmitri Tikhonov    struct malo_page *page = (void *) page_addr;
17850aadb33SDmitri Tikhonov    unsigned slot = ((uintptr_t) obj - page_addr) >> page->nbits;
17950aadb33SDmitri Tikhonov    if (page->full_slot_mask == page->slots)
18050aadb33SDmitri Tikhonov        LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page);
18150aadb33SDmitri Tikhonov    page->slots &= ~(1ULL << slot);
18250aadb33SDmitri Tikhonov}
18350aadb33SDmitri Tikhonov
18450aadb33SDmitri Tikhonov
18550aadb33SDmitri Tikhonovvoid
18650aadb33SDmitri Tikhonovlsquic_malo_destroy (struct malo *malo)
18750aadb33SDmitri Tikhonov{
18850aadb33SDmitri Tikhonov    struct malo_page *page, *next;
18950aadb33SDmitri Tikhonov    page = SLIST_FIRST(&malo->all_pages);
19050aadb33SDmitri Tikhonov    while (page != &malo->page_header)
19150aadb33SDmitri Tikhonov    {
19250aadb33SDmitri Tikhonov        next = SLIST_NEXT(page, next_page);
193461e84d8SAmol Deshpande#ifndef WIN32
19450aadb33SDmitri Tikhonov        free(page);
195461e84d8SAmol Deshpande#else
196461e84d8SAmol Deshpande        _aligned_free(page);
197461e84d8SAmol Deshpande#endif
19850aadb33SDmitri Tikhonov        page = next;
19950aadb33SDmitri Tikhonov    }
200461e84d8SAmol Deshpande#ifndef WIN32
20150aadb33SDmitri Tikhonov    free(page);
202461e84d8SAmol Deshpande#else
203461e84d8SAmol Deshpande        _aligned_free(page);
204461e84d8SAmol Deshpande#endif
20550aadb33SDmitri Tikhonov}
20650aadb33SDmitri Tikhonov
20750aadb33SDmitri Tikhonov
20850aadb33SDmitri Tikhonov/* The iterator is built-in.  Usage:
20950aadb33SDmitri Tikhonov * void *obj;
21050aadb33SDmitri Tikhonov * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo))
21150aadb33SDmitri Tikhonov *     do_stuff(obj);
21250aadb33SDmitri Tikhonov */
21350aadb33SDmitri Tikhonovvoid *
21450aadb33SDmitri Tikhonovlsquic_malo_first (struct malo *malo)
21550aadb33SDmitri Tikhonov{
21650aadb33SDmitri Tikhonov    malo->iter.cur_page = SLIST_FIRST(&malo->all_pages);
21750aadb33SDmitri Tikhonov    malo->iter.next_slot = malo->iter.cur_page->initial_slot;
21850aadb33SDmitri Tikhonov    return lsquic_malo_next(malo);
21950aadb33SDmitri Tikhonov}
22050aadb33SDmitri Tikhonov
22150aadb33SDmitri Tikhonov
22250aadb33SDmitri Tikhonovvoid *
22350aadb33SDmitri Tikhonovlsquic_malo_next (struct malo *malo)
22450aadb33SDmitri Tikhonov{
22550aadb33SDmitri Tikhonov    struct malo_page *page;
22650aadb33SDmitri Tikhonov    unsigned max_slot, slot;
22750aadb33SDmitri Tikhonov
22850aadb33SDmitri Tikhonov    page = malo->iter.cur_page;
22950aadb33SDmitri Tikhonov    if (page)
23050aadb33SDmitri Tikhonov    {
23150aadb33SDmitri Tikhonov        max_slot = 1 << (12 - page->nbits);     /* Same for all pages */
23250aadb33SDmitri Tikhonov        slot = malo->iter.next_slot;
23350aadb33SDmitri Tikhonov        while (1)
23450aadb33SDmitri Tikhonov        {
23550aadb33SDmitri Tikhonov            for (; slot < max_slot; ++slot)
23650aadb33SDmitri Tikhonov            {
23750aadb33SDmitri Tikhonov                if (page->slots & (1ULL << slot))
23850aadb33SDmitri Tikhonov                {
23950aadb33SDmitri Tikhonov                    malo->iter.cur_page  = page;
24050aadb33SDmitri Tikhonov                    malo->iter.next_slot = slot + 1;
24150aadb33SDmitri Tikhonov                    return (char *) page + (slot << page->nbits);
24250aadb33SDmitri Tikhonov                }
24350aadb33SDmitri Tikhonov            }
24450aadb33SDmitri Tikhonov            page = SLIST_NEXT(page, next_page);
24550aadb33SDmitri Tikhonov            if (page)
24650aadb33SDmitri Tikhonov                slot = page->initial_slot;
24750aadb33SDmitri Tikhonov            else
24850aadb33SDmitri Tikhonov            {
24950aadb33SDmitri Tikhonov                malo->iter.cur_page = NULL;     /* Stop iterator */
25050aadb33SDmitri Tikhonov                return NULL;
25150aadb33SDmitri Tikhonov            }
25250aadb33SDmitri Tikhonov        }
25350aadb33SDmitri Tikhonov    }
25450aadb33SDmitri Tikhonov
25550aadb33SDmitri Tikhonov    return NULL;
25650aadb33SDmitri Tikhonov}
25750aadb33SDmitri Tikhonov
25850aadb33SDmitri Tikhonov
25950aadb33SDmitri Tikhonovstatic unsigned
26050aadb33SDmitri Tikhonovsize_in_bits (size_t sz)
26150aadb33SDmitri Tikhonov{
26250aadb33SDmitri Tikhonov#if __GNUC__
26350aadb33SDmitri Tikhonov    unsigned clz = __builtin_clz(sz - 1);
26450aadb33SDmitri Tikhonov    return 32 - clz;
26550aadb33SDmitri Tikhonov#else
26650aadb33SDmitri Tikhonov    unsigned clz;
26750aadb33SDmitri Tikhonov    size_t y;
26850aadb33SDmitri Tikhonov
26950aadb33SDmitri Tikhonov    --sz;
27050aadb33SDmitri Tikhonov    clz = 32;
27150aadb33SDmitri Tikhonov    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
27250aadb33SDmitri Tikhonov    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
27350aadb33SDmitri Tikhonov    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
27450aadb33SDmitri Tikhonov    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
27550aadb33SDmitri Tikhonov    y = sz >>  1;   if (y) return 32 - clz + 1;
27650aadb33SDmitri Tikhonov    return 32 - clz + sz;
27750aadb33SDmitri Tikhonov#endif
27850aadb33SDmitri Tikhonov}
27950aadb33SDmitri Tikhonov
28050aadb33SDmitri Tikhonov
28150aadb33SDmitri Tikhonovstatic unsigned
28250aadb33SDmitri Tikhonovfind_free_slot (uint64_t slots)
28350aadb33SDmitri Tikhonov{
28450aadb33SDmitri Tikhonov#if __GNUC__
28550aadb33SDmitri Tikhonov    return __builtin_ffsll(~slots) - 1;
28650aadb33SDmitri Tikhonov#else
28750aadb33SDmitri Tikhonov    unsigned n;
28850aadb33SDmitri Tikhonov
28950aadb33SDmitri Tikhonov    slots =~ slots;
29050aadb33SDmitri Tikhonov    n = 0;
29150aadb33SDmitri Tikhonov
29250aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; }
29350aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; }
29450aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL <<  8) - 1))) { n +=  8; slots >>=  8; }
29550aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL <<  4) - 1))) { n +=  4; slots >>=  4; }
29650aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL <<  2) - 1))) { n +=  2; slots >>=  2; }
29750aadb33SDmitri Tikhonov    if (0 == (slots & ((1ULL <<  1) - 1))) { n +=  1; slots >>=  1; }
29850aadb33SDmitri Tikhonov    return n;
29950aadb33SDmitri Tikhonov#endif
30050aadb33SDmitri Tikhonov}
301c51ce338SDmitri Tikhonov
302c51ce338SDmitri Tikhonov
303c51ce338SDmitri Tikhonovsize_t
304c51ce338SDmitri Tikhonovlsquic_malo_mem_used (const struct malo *malo)
305c51ce338SDmitri Tikhonov{
306c51ce338SDmitri Tikhonov    const struct malo_page *page;
307c51ce338SDmitri Tikhonov    size_t size;
308c51ce338SDmitri Tikhonov
309c51ce338SDmitri Tikhonov    size = 0;
310c51ce338SDmitri Tikhonov    SLIST_FOREACH(page, &malo->all_pages, next_page)
311c51ce338SDmitri Tikhonov        size += sizeof(*page);
312c51ce338SDmitri Tikhonov
313c51ce338SDmitri Tikhonov    return size;
314c51ce338SDmitri Tikhonov}
315