lsquic_malo.c revision 229fce07
1229fce07SDmitri Tikhonov/* Copyright (c) 2017 - 2019 LiteSpeed Technologies Inc. See LICENSE. */ 250aadb33SDmitri Tikhonov/* 350aadb33SDmitri Tikhonov * lsquic_malo.c -- malo allocator implementation. 450aadb33SDmitri Tikhonov * 550aadb33SDmitri Tikhonov * The malo allocator is a pool of objects of fixed size. It tries to 650aadb33SDmitri Tikhonov * allocate and deallocate objects as fast as possible. To do so, it 750aadb33SDmitri Tikhonov * does the following: 850aadb33SDmitri Tikhonov * 950aadb33SDmitri Tikhonov * 1. Allocations occur 4 KB at a time. 1050aadb33SDmitri Tikhonov * 2. No division or multiplication operations are performed. 1150aadb33SDmitri Tikhonov * 1250aadb33SDmitri Tikhonov * (In recent testing, malo was about 2.7 times faster than malloc for 1350aadb33SDmitri Tikhonov * 64-byte objects.) 1450aadb33SDmitri Tikhonov * 1550aadb33SDmitri Tikhonov * Besides speed, two other important characteristics distinguish it 1650aadb33SDmitri Tikhonov * from other pool allocators: 1750aadb33SDmitri Tikhonov * 1850aadb33SDmitri Tikhonov * 1. To free (put) an object, one does not need a pointer to the malo 1950aadb33SDmitri Tikhonov * object. This makes this allocator easy to use. 2050aadb33SDmitri Tikhonov * 2. A built-in iterator is provided to iterate over all allocated 2150aadb33SDmitri Tikhonov * objects (with ability to safely release objects while iterator 2250aadb33SDmitri Tikhonov * is active). This may be useful in some circumstances. 2350aadb33SDmitri Tikhonov * 2450aadb33SDmitri Tikhonov * To gain all these advantages, there are trade-offs: 2550aadb33SDmitri Tikhonov * 2650aadb33SDmitri Tikhonov * 1. There are two memory penalties: 2750aadb33SDmitri Tikhonov * a. Per object overhead. To avoid division and multiplication, 2850aadb33SDmitri Tikhonov * the object sizes is rounded up to the nearest power or two, 2950aadb33SDmitri Tikhonov * starting with 64 bytes (minumum) and up to 2 KB (maximum). 3050aadb33SDmitri Tikhonov * Thus, a 104-byte object will have a 24-byte overhead; a 3150aadb33SDmitri Tikhonov * 130-byte object will have 126-byte overhead. This is 3250aadb33SDmitri Tikhonov * something to keep in mind. 3350aadb33SDmitri Tikhonov * b. Per page overhead. Page links occupy some bytes in the 3450aadb33SDmitri Tikhonov * page. To keep things fast, at least one slot per page is 3550aadb33SDmitri Tikhonov * always occupied, independent of object size. Thus, for a 3650aadb33SDmitri Tikhonov * 1 KB object size, 25% of the page is used for the page 3750aadb33SDmitri Tikhonov * header. 3850aadb33SDmitri Tikhonov * 2. 4 KB pages are not freed until the malo allocator is destroyed. 3950aadb33SDmitri Tikhonov * This is something to keep in mind. 4050aadb33SDmitri Tikhonov * 4150aadb33SDmitri Tikhonov * P.S. In Russian, "malo" (��������) means "little" or "few". Thus, the 4250aadb33SDmitri Tikhonov * malo allocator aims to perform its job in as few CPU cycles as 4350aadb33SDmitri Tikhonov * possible. 4450aadb33SDmitri Tikhonov */ 4550aadb33SDmitri Tikhonov 4650aadb33SDmitri Tikhonov#include <assert.h> 4750aadb33SDmitri Tikhonov#include <errno.h> 4850aadb33SDmitri Tikhonov#include <stdint.h> 4950aadb33SDmitri Tikhonov#include <stdlib.h> 5050aadb33SDmitri Tikhonov#include <sys/queue.h> 51461e84d8SAmol Deshpande#ifdef WIN32 52461e84d8SAmol Deshpande#include <vc_compat.h> 53461e84d8SAmol Deshpande#endif 5450aadb33SDmitri Tikhonov 5550aadb33SDmitri Tikhonov#include "fiu-local.h" 5650aadb33SDmitri Tikhonov#include "lsquic_malo.h" 5750aadb33SDmitri Tikhonov 5850aadb33SDmitri Tikhonov/* 64 slots in a 4KB page means that the smallest object is 64 bytes. 5950aadb33SDmitri Tikhonov * The largest object is 2KB. 6050aadb33SDmitri Tikhonov */ 6150aadb33SDmitri Tikhonov#define MALO_MIN_NBITS 6 6250aadb33SDmitri Tikhonov#define MALO_MAX_NBITS 11 6350aadb33SDmitri Tikhonov 6450aadb33SDmitri Tikhonov/* A "free page" is a page with free slots available. 6550aadb33SDmitri Tikhonov */ 6650aadb33SDmitri Tikhonov 6750aadb33SDmitri Tikhonovstatic unsigned find_free_slot (uint64_t slots); 6850aadb33SDmitri Tikhonovstatic unsigned size_in_bits (size_t sz); 6950aadb33SDmitri Tikhonov 7050aadb33SDmitri Tikhonovstruct malo_page { 7150aadb33SDmitri Tikhonov SLIST_ENTRY(malo_page) next_page; 7250aadb33SDmitri Tikhonov LIST_ENTRY(malo_page) next_free_page; 7350aadb33SDmitri Tikhonov struct malo *malo; 7450aadb33SDmitri Tikhonov uint64_t slots, 7550aadb33SDmitri Tikhonov full_slot_mask; 7650aadb33SDmitri Tikhonov unsigned nbits; 7750aadb33SDmitri Tikhonov unsigned initial_slot; 7850aadb33SDmitri Tikhonov}; 7950aadb33SDmitri Tikhonov 8050aadb33SDmitri Tikhonovtypedef char malo_header_fits_in_one_slot 81461e84d8SAmol Deshpande [(sizeof(struct malo_page) > (1 << MALO_MAX_NBITS)) ? -1 : 1]; 8250aadb33SDmitri Tikhonov 8350aadb33SDmitri Tikhonovstruct malo { 8450aadb33SDmitri Tikhonov struct malo_page page_header; 8550aadb33SDmitri Tikhonov SLIST_HEAD(, malo_page) all_pages; 8650aadb33SDmitri Tikhonov LIST_HEAD(, malo_page) free_pages; 8750aadb33SDmitri Tikhonov struct { 8850aadb33SDmitri Tikhonov struct malo_page *cur_page; 8950aadb33SDmitri Tikhonov unsigned next_slot; 9050aadb33SDmitri Tikhonov } iter; 9150aadb33SDmitri Tikhonov}; 9250aadb33SDmitri Tikhonov 9350aadb33SDmitri Tikhonovstruct malo * 9450aadb33SDmitri Tikhonovlsquic_malo_create (size_t obj_size) 9550aadb33SDmitri Tikhonov{ 9650aadb33SDmitri Tikhonov unsigned nbits = size_in_bits(obj_size); 9750aadb33SDmitri Tikhonov if (nbits < MALO_MIN_NBITS) 9850aadb33SDmitri Tikhonov nbits = MALO_MIN_NBITS; 9950aadb33SDmitri Tikhonov else if (nbits > MALO_MAX_NBITS) 10050aadb33SDmitri Tikhonov { 10150aadb33SDmitri Tikhonov errno = EOVERFLOW; 10250aadb33SDmitri Tikhonov return NULL; 10350aadb33SDmitri Tikhonov } 10450aadb33SDmitri Tikhonov 10550aadb33SDmitri Tikhonov struct malo *malo; 10650aadb33SDmitri Tikhonov if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000)) 10750aadb33SDmitri Tikhonov return NULL; 10850aadb33SDmitri Tikhonov 10950aadb33SDmitri Tikhonov SLIST_INIT(&malo->all_pages); 11050aadb33SDmitri Tikhonov LIST_INIT(&malo->free_pages); 11150aadb33SDmitri Tikhonov malo->iter.cur_page = &malo->page_header; 11250aadb33SDmitri Tikhonov malo->iter.next_slot = 0; 11350aadb33SDmitri Tikhonov 11450aadb33SDmitri Tikhonov int n_slots = sizeof(*malo) / (1 << nbits) 11550aadb33SDmitri Tikhonov + ((sizeof(*malo) % (1 << nbits)) > 0); 11650aadb33SDmitri Tikhonov 11750aadb33SDmitri Tikhonov struct malo_page *const page = &malo->page_header; 11850aadb33SDmitri Tikhonov SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 11950aadb33SDmitri Tikhonov LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 12050aadb33SDmitri Tikhonov page->malo = malo; 12150aadb33SDmitri Tikhonov if (nbits == MALO_MIN_NBITS) 12250aadb33SDmitri Tikhonov page->full_slot_mask = ~0ULL; 12350aadb33SDmitri Tikhonov else 12450aadb33SDmitri Tikhonov page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1; 12550aadb33SDmitri Tikhonov page->slots = (1ULL << n_slots) - 1; 12650aadb33SDmitri Tikhonov page->nbits = nbits; 12750aadb33SDmitri Tikhonov page->initial_slot = n_slots; 12850aadb33SDmitri Tikhonov 12950aadb33SDmitri Tikhonov return malo; 13050aadb33SDmitri Tikhonov} 13150aadb33SDmitri Tikhonov 13250aadb33SDmitri Tikhonov 13350aadb33SDmitri Tikhonovstatic struct malo_page * 13450aadb33SDmitri Tikhonovallocate_page (struct malo *malo) 13550aadb33SDmitri Tikhonov{ 13650aadb33SDmitri Tikhonov struct malo_page *page; 13750aadb33SDmitri Tikhonov if (0 != posix_memalign((void **) &page, 0x1000, 0x1000)) 13850aadb33SDmitri Tikhonov return NULL; 13950aadb33SDmitri Tikhonov SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 14050aadb33SDmitri Tikhonov LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 14150aadb33SDmitri Tikhonov page->slots = 1; 14250aadb33SDmitri Tikhonov page->full_slot_mask = malo->page_header.full_slot_mask; 14350aadb33SDmitri Tikhonov page->nbits = malo->page_header.nbits; 14450aadb33SDmitri Tikhonov page->malo = malo; 14550aadb33SDmitri Tikhonov page->initial_slot = 1; 14650aadb33SDmitri Tikhonov return page; 14750aadb33SDmitri Tikhonov} 14850aadb33SDmitri Tikhonov 14950aadb33SDmitri Tikhonov 15050aadb33SDmitri Tikhonov#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0) 15150aadb33SDmitri Tikhonov 15250aadb33SDmitri Tikhonov/* Get a new object. */ 15350aadb33SDmitri Tikhonovvoid * 15450aadb33SDmitri Tikhonovlsquic_malo_get (struct malo *malo) 15550aadb33SDmitri Tikhonov{ 15650aadb33SDmitri Tikhonov fiu_do_on("malo/get", FAIL_NOMEM); 15750aadb33SDmitri Tikhonov struct malo_page *page = LIST_FIRST(&malo->free_pages); 15850aadb33SDmitri Tikhonov if (!page) 15950aadb33SDmitri Tikhonov { 16050aadb33SDmitri Tikhonov page = allocate_page(malo); 16150aadb33SDmitri Tikhonov if (!page) 16250aadb33SDmitri Tikhonov return NULL; 16350aadb33SDmitri Tikhonov } 16450aadb33SDmitri Tikhonov unsigned slot = find_free_slot(page->slots); 16550aadb33SDmitri Tikhonov page->slots |= (1ULL << slot); 16650aadb33SDmitri Tikhonov if (page->full_slot_mask == page->slots) 16750aadb33SDmitri Tikhonov LIST_REMOVE(page, next_free_page); 16850aadb33SDmitri Tikhonov return (char *) page + (slot << page->nbits); 16950aadb33SDmitri Tikhonov} 17050aadb33SDmitri Tikhonov 17150aadb33SDmitri Tikhonov 17250aadb33SDmitri Tikhonov/* Return obj to the pool */ 17350aadb33SDmitri Tikhonovvoid 17450aadb33SDmitri Tikhonovlsquic_malo_put (void *obj) 17550aadb33SDmitri Tikhonov{ 17650aadb33SDmitri Tikhonov uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1); 17750aadb33SDmitri Tikhonov struct malo_page *page = (void *) page_addr; 17850aadb33SDmitri Tikhonov unsigned slot = ((uintptr_t) obj - page_addr) >> page->nbits; 17950aadb33SDmitri Tikhonov if (page->full_slot_mask == page->slots) 18050aadb33SDmitri Tikhonov LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page); 18150aadb33SDmitri Tikhonov page->slots &= ~(1ULL << slot); 18250aadb33SDmitri Tikhonov} 18350aadb33SDmitri Tikhonov 18450aadb33SDmitri Tikhonov 18550aadb33SDmitri Tikhonovvoid 18650aadb33SDmitri Tikhonovlsquic_malo_destroy (struct malo *malo) 18750aadb33SDmitri Tikhonov{ 18850aadb33SDmitri Tikhonov struct malo_page *page, *next; 18950aadb33SDmitri Tikhonov page = SLIST_FIRST(&malo->all_pages); 19050aadb33SDmitri Tikhonov while (page != &malo->page_header) 19150aadb33SDmitri Tikhonov { 19250aadb33SDmitri Tikhonov next = SLIST_NEXT(page, next_page); 193461e84d8SAmol Deshpande#ifndef WIN32 19450aadb33SDmitri Tikhonov free(page); 195461e84d8SAmol Deshpande#else 196461e84d8SAmol Deshpande _aligned_free(page); 197461e84d8SAmol Deshpande#endif 19850aadb33SDmitri Tikhonov page = next; 19950aadb33SDmitri Tikhonov } 200461e84d8SAmol Deshpande#ifndef WIN32 20150aadb33SDmitri Tikhonov free(page); 202461e84d8SAmol Deshpande#else 203461e84d8SAmol Deshpande _aligned_free(page); 204461e84d8SAmol Deshpande#endif 20550aadb33SDmitri Tikhonov} 20650aadb33SDmitri Tikhonov 20750aadb33SDmitri Tikhonov 20850aadb33SDmitri Tikhonov/* The iterator is built-in. Usage: 20950aadb33SDmitri Tikhonov * void *obj; 21050aadb33SDmitri Tikhonov * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo)) 21150aadb33SDmitri Tikhonov * do_stuff(obj); 21250aadb33SDmitri Tikhonov */ 21350aadb33SDmitri Tikhonovvoid * 21450aadb33SDmitri Tikhonovlsquic_malo_first (struct malo *malo) 21550aadb33SDmitri Tikhonov{ 21650aadb33SDmitri Tikhonov malo->iter.cur_page = SLIST_FIRST(&malo->all_pages); 21750aadb33SDmitri Tikhonov malo->iter.next_slot = malo->iter.cur_page->initial_slot; 21850aadb33SDmitri Tikhonov return lsquic_malo_next(malo); 21950aadb33SDmitri Tikhonov} 22050aadb33SDmitri Tikhonov 22150aadb33SDmitri Tikhonov 22250aadb33SDmitri Tikhonovvoid * 22350aadb33SDmitri Tikhonovlsquic_malo_next (struct malo *malo) 22450aadb33SDmitri Tikhonov{ 22550aadb33SDmitri Tikhonov struct malo_page *page; 22650aadb33SDmitri Tikhonov unsigned max_slot, slot; 22750aadb33SDmitri Tikhonov 22850aadb33SDmitri Tikhonov page = malo->iter.cur_page; 22950aadb33SDmitri Tikhonov if (page) 23050aadb33SDmitri Tikhonov { 23150aadb33SDmitri Tikhonov max_slot = 1 << (12 - page->nbits); /* Same for all pages */ 23250aadb33SDmitri Tikhonov slot = malo->iter.next_slot; 23350aadb33SDmitri Tikhonov while (1) 23450aadb33SDmitri Tikhonov { 23550aadb33SDmitri Tikhonov for (; slot < max_slot; ++slot) 23650aadb33SDmitri Tikhonov { 23750aadb33SDmitri Tikhonov if (page->slots & (1ULL << slot)) 23850aadb33SDmitri Tikhonov { 23950aadb33SDmitri Tikhonov malo->iter.cur_page = page; 24050aadb33SDmitri Tikhonov malo->iter.next_slot = slot + 1; 24150aadb33SDmitri Tikhonov return (char *) page + (slot << page->nbits); 24250aadb33SDmitri Tikhonov } 24350aadb33SDmitri Tikhonov } 24450aadb33SDmitri Tikhonov page = SLIST_NEXT(page, next_page); 24550aadb33SDmitri Tikhonov if (page) 24650aadb33SDmitri Tikhonov slot = page->initial_slot; 24750aadb33SDmitri Tikhonov else 24850aadb33SDmitri Tikhonov { 24950aadb33SDmitri Tikhonov malo->iter.cur_page = NULL; /* Stop iterator */ 25050aadb33SDmitri Tikhonov return NULL; 25150aadb33SDmitri Tikhonov } 25250aadb33SDmitri Tikhonov } 25350aadb33SDmitri Tikhonov } 25450aadb33SDmitri Tikhonov 25550aadb33SDmitri Tikhonov return NULL; 25650aadb33SDmitri Tikhonov} 25750aadb33SDmitri Tikhonov 25850aadb33SDmitri Tikhonov 25950aadb33SDmitri Tikhonovstatic unsigned 26050aadb33SDmitri Tikhonovsize_in_bits (size_t sz) 26150aadb33SDmitri Tikhonov{ 26250aadb33SDmitri Tikhonov#if __GNUC__ 26350aadb33SDmitri Tikhonov unsigned clz = __builtin_clz(sz - 1); 26450aadb33SDmitri Tikhonov return 32 - clz; 26550aadb33SDmitri Tikhonov#else 26650aadb33SDmitri Tikhonov unsigned clz; 26750aadb33SDmitri Tikhonov size_t y; 26850aadb33SDmitri Tikhonov 26950aadb33SDmitri Tikhonov --sz; 27050aadb33SDmitri Tikhonov clz = 32; 27150aadb33SDmitri Tikhonov y = sz >> 16; if (y) { clz -= 16; sz = y; } 27250aadb33SDmitri Tikhonov y = sz >> 8; if (y) { clz -= 8; sz = y; } 27350aadb33SDmitri Tikhonov y = sz >> 4; if (y) { clz -= 4; sz = y; } 27450aadb33SDmitri Tikhonov y = sz >> 2; if (y) { clz -= 2; sz = y; } 27550aadb33SDmitri Tikhonov y = sz >> 1; if (y) return 32 - clz + 1; 27650aadb33SDmitri Tikhonov return 32 - clz + sz; 27750aadb33SDmitri Tikhonov#endif 27850aadb33SDmitri Tikhonov} 27950aadb33SDmitri Tikhonov 28050aadb33SDmitri Tikhonov 28150aadb33SDmitri Tikhonovstatic unsigned 28250aadb33SDmitri Tikhonovfind_free_slot (uint64_t slots) 28350aadb33SDmitri Tikhonov{ 28450aadb33SDmitri Tikhonov#if __GNUC__ 28550aadb33SDmitri Tikhonov return __builtin_ffsll(~slots) - 1; 28650aadb33SDmitri Tikhonov#else 28750aadb33SDmitri Tikhonov unsigned n; 28850aadb33SDmitri Tikhonov 28950aadb33SDmitri Tikhonov slots =~ slots; 29050aadb33SDmitri Tikhonov n = 0; 29150aadb33SDmitri Tikhonov 29250aadb33SDmitri Tikhonov if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; } 29350aadb33SDmitri Tikhonov if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; } 29450aadb33SDmitri Tikhonov if (0 == (slots & ((1ULL << 8) - 1))) { n += 8; slots >>= 8; } 29550aadb33SDmitri Tikhonov if (0 == (slots & ((1ULL << 4) - 1))) { n += 4; slots >>= 4; } 29650aadb33SDmitri Tikhonov if (0 == (slots & ((1ULL << 2) - 1))) { n += 2; slots >>= 2; } 29750aadb33SDmitri Tikhonov if (0 == (slots & ((1ULL << 1) - 1))) { n += 1; slots >>= 1; } 29850aadb33SDmitri Tikhonov return n; 29950aadb33SDmitri Tikhonov#endif 30050aadb33SDmitri Tikhonov} 301c51ce338SDmitri Tikhonov 302c51ce338SDmitri Tikhonov 303c51ce338SDmitri Tikhonovsize_t 304c51ce338SDmitri Tikhonovlsquic_malo_mem_used (const struct malo *malo) 305c51ce338SDmitri Tikhonov{ 306c51ce338SDmitri Tikhonov const struct malo_page *page; 307c51ce338SDmitri Tikhonov size_t size; 308c51ce338SDmitri Tikhonov 309c51ce338SDmitri Tikhonov size = 0; 310c51ce338SDmitri Tikhonov SLIST_FOREACH(page, &malo->all_pages, next_page) 311c51ce338SDmitri Tikhonov size += sizeof(*page); 312c51ce338SDmitri Tikhonov 313c51ce338SDmitri Tikhonov return size; 314c51ce338SDmitri Tikhonov} 315