lsquic_malo.c revision c51ce338
150aadb33SDmitri Tikhonov/* Copyright (c) 2017 LiteSpeed Technologies Inc. See LICENSE. */ 250aadb33SDmitri Tikhonov/* 350aadb33SDmitri Tikhonov * lsquic_malo.c -- malo allocator implementation. 450aadb33SDmitri Tikhonov * 550aadb33SDmitri Tikhonov * The malo allocator is a pool of objects of fixed size. It tries to 650aadb33SDmitri Tikhonov * allocate and deallocate objects as fast as possible. To do so, it 750aadb33SDmitri Tikhonov * does the following: 850aadb33SDmitri Tikhonov * 950aadb33SDmitri Tikhonov * 1. Allocations occur 4 KB at a time. 1050aadb33SDmitri Tikhonov * 2. No division or multiplication operations are performed. 1150aadb33SDmitri Tikhonov * 1250aadb33SDmitri Tikhonov * (In recent testing, malo was about 2.7 times faster than malloc for 1350aadb33SDmitri Tikhonov * 64-byte objects.) 1450aadb33SDmitri Tikhonov * 1550aadb33SDmitri Tikhonov * Besides speed, two other important characteristics distinguish it 1650aadb33SDmitri Tikhonov * from other pool allocators: 1750aadb33SDmitri Tikhonov * 1850aadb33SDmitri Tikhonov * 1. To free (put) an object, one does not need a pointer to the malo 1950aadb33SDmitri Tikhonov * object. This makes this allocator easy to use. 2050aadb33SDmitri Tikhonov * 2. A built-in iterator is provided to iterate over all allocated 2150aadb33SDmitri Tikhonov * objects (with ability to safely release objects while iterator 2250aadb33SDmitri Tikhonov * is active). This may be useful in some circumstances. 2350aadb33SDmitri Tikhonov * 2450aadb33SDmitri Tikhonov * To gain all these advantages, there are trade-offs: 2550aadb33SDmitri Tikhonov * 2650aadb33SDmitri Tikhonov * 1. There are two memory penalties: 2750aadb33SDmitri Tikhonov * a. Per object overhead. To avoid division and multiplication, 2850aadb33SDmitri Tikhonov * the object sizes is rounded up to the nearest power or two, 2950aadb33SDmitri Tikhonov * starting with 64 bytes (minumum) and up to 2 KB (maximum). 3050aadb33SDmitri Tikhonov * Thus, a 104-byte object will have a 24-byte overhead; a 3150aadb33SDmitri Tikhonov * 130-byte object will have 126-byte overhead. This is 3250aadb33SDmitri Tikhonov * something to keep in mind. 3350aadb33SDmitri Tikhonov * b. Per page overhead. Page links occupy some bytes in the 3450aadb33SDmitri Tikhonov * page. To keep things fast, at least one slot per page is 3550aadb33SDmitri Tikhonov * always occupied, independent of object size. Thus, for a 3650aadb33SDmitri Tikhonov * 1 KB object size, 25% of the page is used for the page 3750aadb33SDmitri Tikhonov * header. 3850aadb33SDmitri Tikhonov * 2. 4 KB pages are not freed until the malo allocator is destroyed. 3950aadb33SDmitri Tikhonov * This is something to keep in mind. 4050aadb33SDmitri Tikhonov * 4150aadb33SDmitri Tikhonov * P.S. In Russian, "malo" (��������) means "little" or "few". Thus, the 4250aadb33SDmitri Tikhonov * malo allocator aims to perform its job in as few CPU cycles as 4350aadb33SDmitri Tikhonov * possible. 4450aadb33SDmitri Tikhonov */ 4550aadb33SDmitri Tikhonov 4650aadb33SDmitri Tikhonov#include <assert.h> 4750aadb33SDmitri Tikhonov#include <errno.h> 4850aadb33SDmitri Tikhonov#include <stdint.h> 4950aadb33SDmitri Tikhonov#include <stdlib.h> 5050aadb33SDmitri Tikhonov#include <sys/queue.h> 5150aadb33SDmitri Tikhonov 5250aadb33SDmitri Tikhonov#include "fiu-local.h" 5350aadb33SDmitri Tikhonov#include "lsquic_malo.h" 5450aadb33SDmitri Tikhonov 5550aadb33SDmitri Tikhonov/* 64 slots in a 4KB page means that the smallest object is 64 bytes. 5650aadb33SDmitri Tikhonov * The largest object is 2KB. 5750aadb33SDmitri Tikhonov */ 5850aadb33SDmitri Tikhonov#define MALO_MIN_NBITS 6 5950aadb33SDmitri Tikhonov#define MALO_MAX_NBITS 11 6050aadb33SDmitri Tikhonov 6150aadb33SDmitri Tikhonov/* A "free page" is a page with free slots available. 6250aadb33SDmitri Tikhonov */ 6350aadb33SDmitri Tikhonov 6450aadb33SDmitri Tikhonovstatic unsigned find_free_slot (uint64_t slots); 6550aadb33SDmitri Tikhonovstatic unsigned size_in_bits (size_t sz); 6650aadb33SDmitri Tikhonov 6750aadb33SDmitri Tikhonovstruct malo_page { 6850aadb33SDmitri Tikhonov SLIST_ENTRY(malo_page) next_page; 6950aadb33SDmitri Tikhonov LIST_ENTRY(malo_page) next_free_page; 7050aadb33SDmitri Tikhonov struct malo *malo; 7150aadb33SDmitri Tikhonov uint64_t slots, 7250aadb33SDmitri Tikhonov full_slot_mask; 7350aadb33SDmitri Tikhonov unsigned nbits; 7450aadb33SDmitri Tikhonov unsigned initial_slot; 7550aadb33SDmitri Tikhonov}; 7650aadb33SDmitri Tikhonov 7750aadb33SDmitri Tikhonovtypedef char malo_header_fits_in_one_slot 7850aadb33SDmitri Tikhonov [0 - (sizeof(struct malo_page) > (1 << MALO_MAX_NBITS))]; 7950aadb33SDmitri Tikhonov 8050aadb33SDmitri Tikhonovstruct malo { 8150aadb33SDmitri Tikhonov struct malo_page page_header; 8250aadb33SDmitri Tikhonov SLIST_HEAD(, malo_page) all_pages; 8350aadb33SDmitri Tikhonov LIST_HEAD(, malo_page) free_pages; 8450aadb33SDmitri Tikhonov struct { 8550aadb33SDmitri Tikhonov struct malo_page *cur_page; 8650aadb33SDmitri Tikhonov unsigned next_slot; 8750aadb33SDmitri Tikhonov } iter; 8850aadb33SDmitri Tikhonov}; 8950aadb33SDmitri Tikhonov 9050aadb33SDmitri Tikhonovstruct malo * 9150aadb33SDmitri Tikhonovlsquic_malo_create (size_t obj_size) 9250aadb33SDmitri Tikhonov{ 9350aadb33SDmitri Tikhonov unsigned nbits = size_in_bits(obj_size); 9450aadb33SDmitri Tikhonov if (nbits < MALO_MIN_NBITS) 9550aadb33SDmitri Tikhonov nbits = MALO_MIN_NBITS; 9650aadb33SDmitri Tikhonov else if (nbits > MALO_MAX_NBITS) 9750aadb33SDmitri Tikhonov { 9850aadb33SDmitri Tikhonov errno = EOVERFLOW; 9950aadb33SDmitri Tikhonov return NULL; 10050aadb33SDmitri Tikhonov } 10150aadb33SDmitri Tikhonov 10250aadb33SDmitri Tikhonov struct malo *malo; 10350aadb33SDmitri Tikhonov if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000)) 10450aadb33SDmitri Tikhonov return NULL; 10550aadb33SDmitri Tikhonov 10650aadb33SDmitri Tikhonov SLIST_INIT(&malo->all_pages); 10750aadb33SDmitri Tikhonov LIST_INIT(&malo->free_pages); 10850aadb33SDmitri Tikhonov malo->iter.cur_page = &malo->page_header; 10950aadb33SDmitri Tikhonov malo->iter.next_slot = 0; 11050aadb33SDmitri Tikhonov 11150aadb33SDmitri Tikhonov int n_slots = sizeof(*malo) / (1 << nbits) 11250aadb33SDmitri Tikhonov + ((sizeof(*malo) % (1 << nbits)) > 0); 11350aadb33SDmitri Tikhonov 11450aadb33SDmitri Tikhonov struct malo_page *const page = &malo->page_header; 11550aadb33SDmitri Tikhonov SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 11650aadb33SDmitri Tikhonov LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 11750aadb33SDmitri Tikhonov page->malo = malo; 11850aadb33SDmitri Tikhonov if (nbits == MALO_MIN_NBITS) 11950aadb33SDmitri Tikhonov page->full_slot_mask = ~0ULL; 12050aadb33SDmitri Tikhonov else 12150aadb33SDmitri Tikhonov page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1; 12250aadb33SDmitri Tikhonov page->slots = (1ULL << n_slots) - 1; 12350aadb33SDmitri Tikhonov page->nbits = nbits; 12450aadb33SDmitri Tikhonov page->initial_slot = n_slots; 12550aadb33SDmitri Tikhonov 12650aadb33SDmitri Tikhonov return malo; 12750aadb33SDmitri Tikhonov} 12850aadb33SDmitri Tikhonov 12950aadb33SDmitri Tikhonov 13050aadb33SDmitri Tikhonovstatic struct malo_page * 13150aadb33SDmitri Tikhonovallocate_page (struct malo *malo) 13250aadb33SDmitri Tikhonov{ 13350aadb33SDmitri Tikhonov struct malo_page *page; 13450aadb33SDmitri Tikhonov if (0 != posix_memalign((void **) &page, 0x1000, 0x1000)) 13550aadb33SDmitri Tikhonov return NULL; 13650aadb33SDmitri Tikhonov SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 13750aadb33SDmitri Tikhonov LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 13850aadb33SDmitri Tikhonov page->slots = 1; 13950aadb33SDmitri Tikhonov page->full_slot_mask = malo->page_header.full_slot_mask; 14050aadb33SDmitri Tikhonov page->nbits = malo->page_header.nbits; 14150aadb33SDmitri Tikhonov page->malo = malo; 14250aadb33SDmitri Tikhonov page->initial_slot = 1; 14350aadb33SDmitri Tikhonov return page; 14450aadb33SDmitri Tikhonov} 14550aadb33SDmitri Tikhonov 14650aadb33SDmitri Tikhonov 14750aadb33SDmitri Tikhonov#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0) 14850aadb33SDmitri Tikhonov 14950aadb33SDmitri Tikhonov/* Get a new object. */ 15050aadb33SDmitri Tikhonovvoid * 15150aadb33SDmitri Tikhonovlsquic_malo_get (struct malo *malo) 15250aadb33SDmitri Tikhonov{ 15350aadb33SDmitri Tikhonov fiu_do_on("malo/get", FAIL_NOMEM); 15450aadb33SDmitri Tikhonov struct malo_page *page = LIST_FIRST(&malo->free_pages); 15550aadb33SDmitri Tikhonov if (!page) 15650aadb33SDmitri Tikhonov { 15750aadb33SDmitri Tikhonov page = allocate_page(malo); 15850aadb33SDmitri Tikhonov if (!page) 15950aadb33SDmitri Tikhonov return NULL; 16050aadb33SDmitri Tikhonov } 16150aadb33SDmitri Tikhonov unsigned slot = find_free_slot(page->slots); 16250aadb33SDmitri Tikhonov page->slots |= (1ULL << slot); 16350aadb33SDmitri Tikhonov if (page->full_slot_mask == page->slots) 16450aadb33SDmitri Tikhonov LIST_REMOVE(page, next_free_page); 16550aadb33SDmitri Tikhonov return (char *) page + (slot << page->nbits); 16650aadb33SDmitri Tikhonov} 16750aadb33SDmitri Tikhonov 16850aadb33SDmitri Tikhonov 16950aadb33SDmitri Tikhonov/* Return obj to the pool */ 17050aadb33SDmitri Tikhonovvoid 17150aadb33SDmitri Tikhonovlsquic_malo_put (void *obj) 17250aadb33SDmitri Tikhonov{ 17350aadb33SDmitri Tikhonov uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1); 17450aadb33SDmitri Tikhonov struct malo_page *page = (void *) page_addr; 17550aadb33SDmitri Tikhonov unsigned slot = ((uintptr_t) obj - page_addr) >> page->nbits; 17650aadb33SDmitri Tikhonov if (page->full_slot_mask == page->slots) 17750aadb33SDmitri Tikhonov LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page); 17850aadb33SDmitri Tikhonov page->slots &= ~(1ULL << slot); 17950aadb33SDmitri Tikhonov} 18050aadb33SDmitri Tikhonov 18150aadb33SDmitri Tikhonov 18250aadb33SDmitri Tikhonovvoid 18350aadb33SDmitri Tikhonovlsquic_malo_destroy (struct malo *malo) 18450aadb33SDmitri Tikhonov{ 18550aadb33SDmitri Tikhonov struct malo_page *page, *next; 18650aadb33SDmitri Tikhonov page = SLIST_FIRST(&malo->all_pages); 18750aadb33SDmitri Tikhonov while (page != &malo->page_header) 18850aadb33SDmitri Tikhonov { 18950aadb33SDmitri Tikhonov next = SLIST_NEXT(page, next_page); 19050aadb33SDmitri Tikhonov free(page); 19150aadb33SDmitri Tikhonov page = next; 19250aadb33SDmitri Tikhonov } 19350aadb33SDmitri Tikhonov free(page); 19450aadb33SDmitri Tikhonov} 19550aadb33SDmitri Tikhonov 19650aadb33SDmitri Tikhonov 19750aadb33SDmitri Tikhonov/* The iterator is built-in. Usage: 19850aadb33SDmitri Tikhonov * void *obj; 19950aadb33SDmitri Tikhonov * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo)) 20050aadb33SDmitri Tikhonov * do_stuff(obj); 20150aadb33SDmitri Tikhonov */ 20250aadb33SDmitri Tikhonovvoid * 20350aadb33SDmitri Tikhonovlsquic_malo_first (struct malo *malo) 20450aadb33SDmitri Tikhonov{ 20550aadb33SDmitri Tikhonov malo->iter.cur_page = SLIST_FIRST(&malo->all_pages); 20650aadb33SDmitri Tikhonov malo->iter.next_slot = malo->iter.cur_page->initial_slot; 20750aadb33SDmitri Tikhonov return lsquic_malo_next(malo); 20850aadb33SDmitri Tikhonov} 20950aadb33SDmitri Tikhonov 21050aadb33SDmitri Tikhonov 21150aadb33SDmitri Tikhonovvoid * 21250aadb33SDmitri Tikhonovlsquic_malo_next (struct malo *malo) 21350aadb33SDmitri Tikhonov{ 21450aadb33SDmitri Tikhonov struct malo_page *page; 21550aadb33SDmitri Tikhonov unsigned max_slot, slot; 21650aadb33SDmitri Tikhonov 21750aadb33SDmitri Tikhonov page = malo->iter.cur_page; 21850aadb33SDmitri Tikhonov if (page) 21950aadb33SDmitri Tikhonov { 22050aadb33SDmitri Tikhonov max_slot = 1 << (12 - page->nbits); /* Same for all pages */ 22150aadb33SDmitri Tikhonov slot = malo->iter.next_slot; 22250aadb33SDmitri Tikhonov while (1) 22350aadb33SDmitri Tikhonov { 22450aadb33SDmitri Tikhonov for (; slot < max_slot; ++slot) 22550aadb33SDmitri Tikhonov { 22650aadb33SDmitri Tikhonov if (page->slots & (1ULL << slot)) 22750aadb33SDmitri Tikhonov { 22850aadb33SDmitri Tikhonov malo->iter.cur_page = page; 22950aadb33SDmitri Tikhonov malo->iter.next_slot = slot + 1; 23050aadb33SDmitri Tikhonov return (char *) page + (slot << page->nbits); 23150aadb33SDmitri Tikhonov } 23250aadb33SDmitri Tikhonov } 23350aadb33SDmitri Tikhonov page = SLIST_NEXT(page, next_page); 23450aadb33SDmitri Tikhonov if (page) 23550aadb33SDmitri Tikhonov slot = page->initial_slot; 23650aadb33SDmitri Tikhonov else 23750aadb33SDmitri Tikhonov { 23850aadb33SDmitri Tikhonov malo->iter.cur_page = NULL; /* Stop iterator */ 23950aadb33SDmitri Tikhonov return NULL; 24050aadb33SDmitri Tikhonov } 24150aadb33SDmitri Tikhonov } 24250aadb33SDmitri Tikhonov } 24350aadb33SDmitri Tikhonov 24450aadb33SDmitri Tikhonov return NULL; 24550aadb33SDmitri Tikhonov} 24650aadb33SDmitri Tikhonov 24750aadb33SDmitri Tikhonov 24850aadb33SDmitri Tikhonovstatic unsigned 24950aadb33SDmitri Tikhonovsize_in_bits (size_t sz) 25050aadb33SDmitri Tikhonov{ 25150aadb33SDmitri Tikhonov#if __GNUC__ 25250aadb33SDmitri Tikhonov unsigned clz = __builtin_clz(sz - 1); 25350aadb33SDmitri Tikhonov return 32 - clz; 25450aadb33SDmitri Tikhonov#else 25550aadb33SDmitri Tikhonov unsigned clz; 25650aadb33SDmitri Tikhonov size_t y; 25750aadb33SDmitri Tikhonov 25850aadb33SDmitri Tikhonov --sz; 25950aadb33SDmitri Tikhonov clz = 32; 26050aadb33SDmitri Tikhonov y = sz >> 16; if (y) { clz -= 16; sz = y; } 26150aadb33SDmitri Tikhonov y = sz >> 8; if (y) { clz -= 8; sz = y; } 26250aadb33SDmitri Tikhonov y = sz >> 4; if (y) { clz -= 4; sz = y; } 26350aadb33SDmitri Tikhonov y = sz >> 2; if (y) { clz -= 2; sz = y; } 26450aadb33SDmitri Tikhonov y = sz >> 1; if (y) return 32 - clz + 1; 26550aadb33SDmitri Tikhonov return 32 - clz + sz; 26650aadb33SDmitri Tikhonov#endif 26750aadb33SDmitri Tikhonov} 26850aadb33SDmitri Tikhonov 26950aadb33SDmitri Tikhonov 27050aadb33SDmitri Tikhonovstatic unsigned 27150aadb33SDmitri Tikhonovfind_free_slot (uint64_t slots) 27250aadb33SDmitri Tikhonov{ 27350aadb33SDmitri Tikhonov#if __GNUC__ 27450aadb33SDmitri Tikhonov return __builtin_ffsll(~slots) - 1; 27550aadb33SDmitri Tikhonov#else 27650aadb33SDmitri Tikhonov unsigned n; 27750aadb33SDmitri Tikhonov 27850aadb33SDmitri Tikhonov slots =~ slots; 27950aadb33SDmitri Tikhonov n = 0; 28050aadb33SDmitri Tikhonov 28150aadb33SDmitri Tikhonov if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; } 28250aadb33SDmitri Tikhonov if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; } 28350aadb33SDmitri Tikhonov if (0 == (slots & ((1ULL << 8) - 1))) { n += 8; slots >>= 8; } 28450aadb33SDmitri Tikhonov if (0 == (slots & ((1ULL << 4) - 1))) { n += 4; slots >>= 4; } 28550aadb33SDmitri Tikhonov if (0 == (slots & ((1ULL << 2) - 1))) { n += 2; slots >>= 2; } 28650aadb33SDmitri Tikhonov if (0 == (slots & ((1ULL << 1) - 1))) { n += 1; slots >>= 1; } 28750aadb33SDmitri Tikhonov return n; 28850aadb33SDmitri Tikhonov#endif 28950aadb33SDmitri Tikhonov} 290c51ce338SDmitri Tikhonov 291c51ce338SDmitri Tikhonov 292c51ce338SDmitri Tikhonovsize_t 293c51ce338SDmitri Tikhonovlsquic_malo_mem_used (const struct malo *malo) 294c51ce338SDmitri Tikhonov{ 295c51ce338SDmitri Tikhonov const struct malo_page *page; 296c51ce338SDmitri Tikhonov size_t size; 297c51ce338SDmitri Tikhonov 298c51ce338SDmitri Tikhonov size = 0; 299c51ce338SDmitri Tikhonov SLIST_FOREACH(page, &malo->all_pages, next_page) 300c51ce338SDmitri Tikhonov size += sizeof(*page); 301c51ce338SDmitri Tikhonov 302c51ce338SDmitri Tikhonov return size; 303c51ce338SDmitri Tikhonov} 304