lsquic_malo.c revision 2d296031
1/* Copyright (c) 2017 - 2019 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_malo.c -- malo allocator implementation. 4 * 5 * The malo allocator is a pool of objects of fixed size. It tries to 6 * allocate and deallocate objects as fast as possible. To do so, it 7 * does the following: 8 * 9 * 1. Allocations occur 4 KB at a time. 10 * 2. No division or multiplication operations are performed. 11 * 12 * (In recent testing, malo was about 2.7 times faster than malloc for 13 * 64-byte objects.) 14 * 15 * Besides speed, two other important characteristics distinguish it 16 * from other pool allocators: 17 * 18 * 1. To free (put) an object, one does not need a pointer to the malo 19 * object. This makes this allocator easy to use. 20 * 2. A built-in iterator is provided to iterate over all allocated 21 * objects (with ability to safely release objects while iterator 22 * is active). This may be useful in some circumstances. 23 * 24 * To gain all these advantages, there are trade-offs: 25 * 26 * 1. There are two memory penalties: 27 * a. Per object overhead. To avoid division and multiplication, 28 * the object sizes is rounded up to the nearest power or two, 29 * starting with 64 bytes (minumum) and up to 2 KB (maximum). 30 * Thus, a 104-byte object will have a 24-byte overhead; a 31 * 130-byte object will have 126-byte overhead. This is 32 * something to keep in mind. 33 * b. Per page overhead. Page links occupy some bytes in the 34 * page. To keep things fast, at least one slot per page is 35 * always occupied, independent of object size. Thus, for a 36 * 1 KB object size, 25% of the page is used for the page 37 * header. 38 * 2. 4 KB pages are not freed until the malo allocator is destroyed. 39 * This is something to keep in mind. 40 * 41 * P.S. In Russian, "malo" (��������) means "little" or "few". Thus, the 42 * malo allocator aims to perform its job in as few CPU cycles as 43 * possible. 44 */ 45 46#include <assert.h> 47#include <errno.h> 48#include <stdint.h> 49#include <stdlib.h> 50#include <sys/queue.h> 51#ifdef WIN32 52#include <vc_compat.h> 53#include <intrin.h> 54#endif 55 56#include "fiu-local.h" 57#include "lsquic_malo.h" 58 59/* 64 slots in a 4KB page means that the smallest object is 64 bytes. 60 * The largest object is 2KB. 61 */ 62#define MALO_MIN_NBITS 6 63#define MALO_MAX_NBITS 11 64 65/* A "free page" is a page with free slots available. 66 */ 67 68static unsigned find_free_slot (uint64_t slots); 69static unsigned size_in_bits (size_t sz); 70 71struct malo_page { 72 SLIST_ENTRY(malo_page) next_page; 73 LIST_ENTRY(malo_page) next_free_page; 74 struct malo *malo; 75 uint64_t slots, 76 full_slot_mask; 77 unsigned nbits; 78 unsigned initial_slot; 79}; 80 81typedef char malo_header_fits_in_one_slot 82 [(sizeof(struct malo_page) > (1 << MALO_MAX_NBITS)) ? -1 : 1]; 83 84struct malo { 85 struct malo_page page_header; 86 SLIST_HEAD(, malo_page) all_pages; 87 LIST_HEAD(, malo_page) free_pages; 88 struct { 89 struct malo_page *cur_page; 90 unsigned next_slot; 91 } iter; 92}; 93 94struct malo * 95lsquic_malo_create (size_t obj_size) 96{ 97 unsigned nbits = size_in_bits(obj_size); 98 if (nbits < MALO_MIN_NBITS) 99 nbits = MALO_MIN_NBITS; 100 else if (nbits > MALO_MAX_NBITS) 101 { 102 errno = EOVERFLOW; 103 return NULL; 104 } 105 106 struct malo *malo; 107 if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000)) 108 return NULL; 109 110 SLIST_INIT(&malo->all_pages); 111 LIST_INIT(&malo->free_pages); 112 malo->iter.cur_page = &malo->page_header; 113 malo->iter.next_slot = 0; 114 115 int n_slots = sizeof(*malo) / (1 << nbits) 116 + ((sizeof(*malo) % (1 << nbits)) > 0); 117 118 struct malo_page *const page = &malo->page_header; 119 SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 120 LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 121 page->malo = malo; 122 if (nbits == MALO_MIN_NBITS) 123 page->full_slot_mask = ~0ULL; 124 else 125 page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1; 126 page->slots = (1ULL << n_slots) - 1; 127 page->nbits = nbits; 128 page->initial_slot = n_slots; 129 130 return malo; 131} 132 133 134static struct malo_page * 135allocate_page (struct malo *malo) 136{ 137 struct malo_page *page; 138 if (0 != posix_memalign((void **) &page, 0x1000, 0x1000)) 139 return NULL; 140 SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 141 LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 142 page->slots = 1; 143 page->full_slot_mask = malo->page_header.full_slot_mask; 144 page->nbits = malo->page_header.nbits; 145 page->malo = malo; 146 page->initial_slot = 1; 147 return page; 148} 149 150 151#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0) 152 153/* Get a new object. */ 154void * 155lsquic_malo_get (struct malo *malo) 156{ 157 fiu_do_on("malo/get", FAIL_NOMEM); 158 struct malo_page *page = LIST_FIRST(&malo->free_pages); 159 if (!page) 160 { 161 page = allocate_page(malo); 162 if (!page) 163 return NULL; 164 } 165 unsigned slot = find_free_slot(page->slots); 166 page->slots |= (1ULL << slot); 167 if (page->full_slot_mask == page->slots) 168 LIST_REMOVE(page, next_free_page); 169 return (char *) page + (slot << page->nbits); 170} 171 172 173/* Return obj to the pool */ 174void 175lsquic_malo_put (void *obj) 176{ 177 uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1); 178 struct malo_page *page = (void *) page_addr; 179 unsigned slot = ((uintptr_t) obj - page_addr) >> page->nbits; 180 if (page->full_slot_mask == page->slots) 181 LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page); 182 page->slots &= ~(1ULL << slot); 183} 184 185 186void 187lsquic_malo_destroy (struct malo *malo) 188{ 189 struct malo_page *page, *next; 190 page = SLIST_FIRST(&malo->all_pages); 191 while (page != &malo->page_header) 192 { 193 next = SLIST_NEXT(page, next_page); 194#ifndef WIN32 195 free(page); 196#else 197 _aligned_free(page); 198#endif 199 page = next; 200 } 201#ifndef WIN32 202 free(page); 203#else 204 _aligned_free(page); 205#endif 206} 207 208 209/* The iterator is built-in. Usage: 210 * void *obj; 211 * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo)) 212 * do_stuff(obj); 213 */ 214void * 215lsquic_malo_first (struct malo *malo) 216{ 217 malo->iter.cur_page = SLIST_FIRST(&malo->all_pages); 218 malo->iter.next_slot = malo->iter.cur_page->initial_slot; 219 return lsquic_malo_next(malo); 220} 221 222 223void * 224lsquic_malo_next (struct malo *malo) 225{ 226 struct malo_page *page; 227 unsigned max_slot, slot; 228 229 page = malo->iter.cur_page; 230 if (page) 231 { 232 max_slot = 1 << (12 - page->nbits); /* Same for all pages */ 233 slot = malo->iter.next_slot; 234 while (1) 235 { 236 for (; slot < max_slot; ++slot) 237 { 238 if (page->slots & (1ULL << slot)) 239 { 240 malo->iter.cur_page = page; 241 malo->iter.next_slot = slot + 1; 242 return (char *) page + (slot << page->nbits); 243 } 244 } 245 page = SLIST_NEXT(page, next_page); 246 if (page) 247 slot = page->initial_slot; 248 else 249 { 250 malo->iter.cur_page = NULL; /* Stop iterator */ 251 return NULL; 252 } 253 } 254 } 255 256 return NULL; 257} 258 259 260static unsigned 261size_in_bits (size_t sz) 262{ 263#if __GNUC__ 264 unsigned clz = __builtin_clz(sz - 1); 265 return 32 - clz; 266#elif defined(WIN32) 267 unsigned char s; 268 unsigned long idx; 269 s = _BitScanReverse(&idx, sz); 270 assert(s); 271 return (unsigned) idx + 1; 272#else 273#error This function contains a bug! 274 unsigned clz; 275 size_t y; 276 277 --sz; 278 clz = 32; 279 y = sz >> 16; if (y) { clz -= 16; sz = y; } 280 y = sz >> 8; if (y) { clz -= 8; sz = y; } 281 y = sz >> 4; if (y) { clz -= 4; sz = y; } 282 y = sz >> 2; if (y) { clz -= 2; sz = y; } 283 y = sz >> 1; if (y) return 32 - clz + 1; 284 return 32 - clz + sz; 285#endif 286} 287 288 289static unsigned 290find_free_slot (uint64_t slots) 291{ 292#if __GNUC__ 293 return __builtin_ffsll(~slots) - 1; 294#else 295 unsigned n; 296 297 slots =~ slots; 298 n = 0; 299 300 if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; } 301 if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; } 302 if (0 == (slots & ((1ULL << 8) - 1))) { n += 8; slots >>= 8; } 303 if (0 == (slots & ((1ULL << 4) - 1))) { n += 4; slots >>= 4; } 304 if (0 == (slots & ((1ULL << 2) - 1))) { n += 2; slots >>= 2; } 305 if (0 == (slots & ((1ULL << 1) - 1))) { n += 1; slots >>= 1; } 306 return n; 307#endif 308} 309 310 311size_t 312lsquic_malo_mem_used (const struct malo *malo) 313{ 314 const struct malo_page *page; 315 size_t size; 316 317 size = 0; 318 SLIST_FOREACH(page, &malo->all_pages, next_page) 319 size += sizeof(*page); 320 321 return size; 322} 323