lsquic_malo.c revision a74702c6
1/* Copyright (c) 2017 - 2022 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_malo.c -- malo allocator implementation. 4 * 5 * The malo allocator is a pool of objects of fixed size. It tries to 6 * allocate and deallocate objects as fast as possible. To do so, it 7 * does the following: 8 * 9 * 1. Allocations occur 4 KB at a time. 10 * 2. No division or multiplication operations are performed for 11 * appropriately sized objects. (More on this below.) 12 * 13 * (In recent testing, malo was about 2.7 times faster than malloc for 14 * 64-byte objects.) 15 * 16 * Besides speed, two other important characteristics distinguish it 17 * from other pool allocators: 18 * 19 * 1. To free (put) an object, one does not need a pointer to the malo 20 * object. This makes this allocator easy to use. 21 * 2. A built-in iterator is provided to iterate over all allocated 22 * objects (with ability safely to release objects while iterator 23 * is active). This may be useful in some circumstances. 24 * 25 * To gain all these advantages, there are trade-offs: 26 * 27 * 1. There are two memory penalties: 28 * a. Per object overhead. If an object is at least ROUNDUP_THRESH in 29 * size as the next power of two, the allocator uses that power of 30 * two value as the object size. This is done to avoid using 31 * division and multiplication. For example, a 104-byte object 32 * will have a 24-byte overhead. 33 * b. Per page overhead. Page links occupy some bytes in the 34 * page. To keep things fast, at least one slot per page is 35 * always occupied, independent of object size. Thus, for a 36 * 1 KB object size, 25% of the page is used for the page 37 * header. 38 * 2. 4 KB pages are not freed until the malo allocator is destroyed. 39 * This is something to keep in mind. 40 * 41 * P.S. In Russian, "malo" (��������) means "little" or "few". Thus, the 42 * malo allocator aims to perform its job in as few CPU cycles as 43 * possible. 44 */ 45 46#include <assert.h> 47#include <errno.h> 48#include <stdint.h> 49#include <stdlib.h> 50#include <sys/queue.h> 51#ifdef WIN32 52#include <vc_compat.h> 53#include <intrin.h> 54#endif 55 56#include "fiu-local.h" 57#include "lsquic_malo.h" 58 59#ifndef LSQUIC_USE_POOLS 60#define LSQUIC_USE_POOLS 1 61#endif 62 63/* 64 slots in a 4KB page means that the smallest object is 64 bytes. 64 * The largest object is 2KB. 65 */ 66#define MALO_MIN_NBITS 6 67#define MALO_MAX_NBITS 11 68 69#define ROUNDUP_THRESH 0.75f 70 71/* A "free page" is a page with free slots available. 72 */ 73 74#if LSQUIC_USE_POOLS 75static unsigned find_free_slot (uint64_t slots); 76static unsigned size_in_bits (size_t sz); 77#endif 78 79struct malo_page { 80 SLIST_ENTRY(malo_page) next_page; 81 LIST_ENTRY(malo_page) next_free_page; 82 struct malo *malo; 83 uint64_t slots, 84 full_slot_mask; 85 unsigned nbits; /* If pow is zero, stores object size */ 86 unsigned initial_slot; 87 int pow; /* True if object is power of 2 */ 88}; 89 90typedef char malo_header_fits_in_one_slot 91 [(sizeof(struct malo_page) > (1 << MALO_MIN_NBITS)) ? -1 : 1]; 92 93#if !LSQUIC_USE_POOLS 94struct nopool_elem 95{ 96 TAILQ_ENTRY(nopool_elem) next; 97 struct malo *malo; 98 unsigned char data[0]; 99}; 100#endif 101 102struct malo { 103#if LSQUIC_USE_POOLS 104 struct malo_page page_header; 105 SLIST_HEAD(, malo_page) all_pages; 106 LIST_HEAD(, malo_page) free_pages; 107 struct { 108 struct malo_page *cur_page; 109 unsigned next_slot; 110 } iter; 111#else 112 /* List of all elements: used by the iterator */ 113 TAILQ_HEAD(, nopool_elem) elems; 114 size_t obj_size; 115 struct nopool_elem *next_iter_elem; 116#endif 117}; 118 119struct malo * 120lsquic_malo_create (size_t obj_size) 121{ 122#if LSQUIC_USE_POOLS 123 int pow, n_slots; 124 unsigned nbits; 125 126 obj_size = (obj_size + 7) & -8; 127 nbits = size_in_bits(obj_size); 128 if (nbits < MALO_MIN_NBITS) 129 nbits = MALO_MIN_NBITS; 130 else if (nbits > MALO_MAX_NBITS) 131 { 132 errno = EOVERFLOW; 133 return NULL; 134 } 135 136 pow = obj_size <= (1 << MALO_MIN_NBITS) 137 || (float) obj_size / (1 << nbits) > ROUNDUP_THRESH; 138 139 struct malo *malo; 140 if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000)) 141 return NULL; 142 143 SLIST_INIT(&malo->all_pages); 144 LIST_INIT(&malo->free_pages); 145 malo->iter.cur_page = &malo->page_header; 146 malo->iter.next_slot = 0; 147 148 if (pow) 149 n_slots = sizeof(*malo) / (1 << nbits) 150 + ((sizeof(*malo) % (1 << nbits)) > 0); 151 else 152 n_slots = sizeof(*malo) / obj_size 153 + ((sizeof(*malo) % obj_size) > 0); 154 155 struct malo_page *const page = &malo->page_header; 156 SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 157 LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 158 page->malo = malo; 159 if (!pow) 160 page->full_slot_mask = (1ULL << (0x1000 / obj_size)) - 1; 161 else if (nbits == MALO_MIN_NBITS) 162 page->full_slot_mask = ~0ULL; 163 else 164 page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1; 165 page->slots = (1ULL << n_slots) - 1; 166 page->pow = pow; 167 page->nbits = pow ? nbits : obj_size; 168 page->initial_slot = n_slots; 169 170 return malo; 171#else 172 struct malo *malo; 173 174 /* Use the same sizing mechanism as in the normal version */ 175 if (obj_size < (1 << MALO_MIN_NBITS)) 176 obj_size = 1 << MALO_MIN_NBITS; 177 else 178 obj_size = (obj_size + 7) & -8; 179 180 malo = malloc(sizeof(*malo)); 181 if (malo) 182 { 183 TAILQ_INIT(&malo->elems); 184 malo->obj_size = obj_size; 185 malo->next_iter_elem = NULL; 186 return malo; 187 } 188 else 189 return NULL; 190#endif 191} 192 193 194#if LSQUIC_USE_POOLS 195static struct malo_page * 196allocate_page (struct malo *malo) 197{ 198 struct malo_page *page; 199 if (0 != posix_memalign((void **) &page, 0x1000, 0x1000)) 200 return NULL; 201 SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 202 LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 203 page->slots = 1; 204 page->full_slot_mask = malo->page_header.full_slot_mask; 205 page->nbits = malo->page_header.nbits; 206 page->pow = malo->page_header.pow; 207 page->malo = malo; 208 page->initial_slot = 1; 209 return page; 210} 211#endif 212 213 214#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0) 215 216/* Get a new object. */ 217void * 218lsquic_malo_get (struct malo *malo) 219{ 220#if LSQUIC_USE_POOLS 221 fiu_do_on("malo/get", FAIL_NOMEM); 222 struct malo_page *page = LIST_FIRST(&malo->free_pages); 223 if (!page) 224 { 225 page = allocate_page(malo); 226 if (!page) 227 return NULL; 228 } 229 unsigned slot = find_free_slot(page->slots); 230 page->slots |= (1ULL << slot); 231 if (page->full_slot_mask == page->slots) 232 LIST_REMOVE(page, next_free_page); 233 if (page->pow) 234 return (char *) page + (slot << page->nbits); 235 else 236 return (char *) page + (slot * page->nbits); 237#else 238 struct nopool_elem *el; 239 el = malloc(sizeof(*el) + malo->obj_size); 240 if (el) 241 { 242 TAILQ_INSERT_HEAD(&malo->elems, el, next); 243 el->malo = malo; 244 return el->data; 245 } 246 else 247 return NULL; 248#endif 249} 250 251 252/* Return obj to the pool */ 253void 254lsquic_malo_put (void *obj) 255{ 256#if LSQUIC_USE_POOLS 257 uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1); 258 struct malo_page *page = (void *) page_addr; 259 unsigned slot; 260 if (page->pow) 261 slot = ((uintptr_t) obj - page_addr) >> page->nbits; 262 else 263 slot = ((uintptr_t) obj - page_addr) / page->nbits; 264 if (page->full_slot_mask == page->slots) 265 LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page); 266 page->slots &= ~(1ULL << slot); 267#else 268 struct nopool_elem *el; 269 el = (struct nopool_elem *) ((char *) obj - sizeof(*el)); 270 if (el == el->malo->next_iter_elem) 271 el->malo->next_iter_elem = TAILQ_NEXT(el->malo->next_iter_elem, next); 272 TAILQ_REMOVE(&el->malo->elems, el, next); 273 free(el); 274#endif 275} 276 277 278void 279lsquic_malo_destroy (struct malo *malo) 280{ 281#if LSQUIC_USE_POOLS 282 struct malo_page *page, *next; 283 page = SLIST_FIRST(&malo->all_pages); 284 while (page != &malo->page_header) 285 { 286 next = SLIST_NEXT(page, next_page); 287#ifndef WIN32 288 free(page); 289#else 290 _aligned_free(page); 291#endif 292 page = next; 293 } 294#ifndef WIN32 295 free(page); 296#else 297 _aligned_free(page); 298#endif 299#else 300 struct nopool_elem *el, *next_el; 301 for (el = TAILQ_FIRST(&malo->elems); el; el = next_el) 302 { 303 next_el = TAILQ_NEXT(el, next); 304 free(el); 305 } 306 free(malo); 307#endif 308} 309 310 311/* The iterator is built-in. Usage: 312 * void *obj; 313 * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo)) 314 * do_stuff(obj); 315 */ 316void * 317lsquic_malo_first (struct malo *malo) 318{ 319#if LSQUIC_USE_POOLS 320 malo->iter.cur_page = SLIST_FIRST(&malo->all_pages); 321 malo->iter.next_slot = malo->iter.cur_page->initial_slot; 322#else 323 malo->next_iter_elem = TAILQ_FIRST(&malo->elems); 324#endif 325 return lsquic_malo_next(malo); 326} 327 328 329void * 330lsquic_malo_next (struct malo *malo) 331{ 332#if LSQUIC_USE_POOLS 333 struct malo_page *page; 334 unsigned max_slot, slot; 335 336 page = malo->iter.cur_page; 337 if (page) 338 { 339 if (page->pow) 340 max_slot = 1 << (12 - page->nbits); /* Same for all pages */ 341 else 342 max_slot = 0x1000 / page->nbits; 343 slot = malo->iter.next_slot; 344 while (1) 345 { 346 for (; slot < max_slot; ++slot) 347 { 348 if (page->slots & (1ULL << slot)) 349 { 350 malo->iter.cur_page = page; 351 malo->iter.next_slot = slot + 1; 352 if (page->pow) 353 return (char *) page + (slot << page->nbits); 354 else 355 { 356 assert(slot * (page->nbits + 1) < 0x1000); 357 return (char *) page + (slot * page->nbits); 358 } 359 } 360 } 361 page = SLIST_NEXT(page, next_page); 362 if (page) 363 slot = page->initial_slot; 364 else 365 { 366 malo->iter.cur_page = NULL; /* Stop iterator */ 367 return NULL; 368 } 369 } 370 } 371 372 return NULL; 373#else 374 struct nopool_elem *retval; 375 376 if (malo->next_iter_elem) 377 { 378 retval = malo->next_iter_elem; 379 malo->next_iter_elem = TAILQ_NEXT(malo->next_iter_elem, next); 380 return retval->data; 381 } 382 else 383 return NULL; 384#endif 385} 386 387 388#if LSQUIC_USE_POOLS 389static unsigned 390size_in_bits (size_t sz) 391{ 392#if __GNUC__ 393 unsigned clz = sz > 1 ? __builtin_clz(sz - 1) : 31; 394 return 32 - clz; 395#elif defined(WIN32) 396 unsigned char s; 397 unsigned long idx; 398 s = _BitScanReverse(&idx, sz); 399 assert(s); 400 return (unsigned) idx + 1; 401#else 402#error This function contains a bug! 403 unsigned clz; 404 size_t y; 405 406 --sz; 407 clz = 32; 408 y = sz >> 16; if (y) { clz -= 16; sz = y; } 409 y = sz >> 8; if (y) { clz -= 8; sz = y; } 410 y = sz >> 4; if (y) { clz -= 4; sz = y; } 411 y = sz >> 2; if (y) { clz -= 2; sz = y; } 412 y = sz >> 1; if (y) return 32 - clz + 1; 413 return 32 - clz + sz; 414#endif 415} 416 417 418static unsigned 419find_free_slot (uint64_t slots) 420{ 421#if __GNUC__ 422 return __builtin_ffsll(~slots) - 1; 423#else 424 unsigned n; 425 426 slots =~ slots; 427 n = 0; 428 429 if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; } 430 if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; } 431 if (0 == (slots & ((1ULL << 8) - 1))) { n += 8; slots >>= 8; } 432 if (0 == (slots & ((1ULL << 4) - 1))) { n += 4; slots >>= 4; } 433 if (0 == (slots & ((1ULL << 2) - 1))) { n += 2; slots >>= 2; } 434 if (0 == (slots & ((1ULL << 1) - 1))) { n += 1; slots >>= 1; } 435 return n; 436#endif 437} 438#endif 439 440 441size_t 442lsquic_malo_mem_used (const struct malo *malo) 443{ 444#if LSQUIC_USE_POOLS 445 const struct malo_page *page; 446 size_t size; 447 448 size = 0; 449 SLIST_FOREACH(page, &malo->all_pages, next_page) 450 size += sizeof(*page); 451 452 return size; 453#else 454 return 0; 455#endif 456} 457