lsquic_malo.c revision 5392f7a3
1/* Copyright (c) 2017 - 2019 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_malo.c -- malo allocator implementation. 4 * 5 * The malo allocator is a pool of objects of fixed size. It tries to 6 * allocate and deallocate objects as fast as possible. To do so, it 7 * does the following: 8 * 9 * 1. Allocations occur 4 KB at a time. 10 * 2. No division or multiplication operations are performed for 11 * appropriately sized objects. (More on this below.) 12 * 13 * (In recent testing, malo was about 2.7 times faster than malloc for 14 * 64-byte objects.) 15 * 16 * Besides speed, two other important characteristics distinguish it 17 * from other pool allocators: 18 * 19 * 1. To free (put) an object, one does not need a pointer to the malo 20 * object. This makes this allocator easy to use. 21 * 2. A built-in iterator is provided to iterate over all allocated 22 * objects (with ability safely to release objects while iterator 23 * is active). This may be useful in some circumstances. 24 * 25 * To gain all these advantages, there are trade-offs: 26 * 27 * 1. There are two memory penalties: 28 * a. Per object overhead. If an object is at least ROUNDUP_THRESH in 29 * size as the next power of two, the allocator uses that power of 30 * two value as the object size. This is done to avoid using 31 * division and multiplication. For example, a 104-byte object 32 * will have a 24-byte overhead. 33 * b. Per page overhead. Page links occupy some bytes in the 34 * page. To keep things fast, at least one slot per page is 35 * always occupied, independent of object size. Thus, for a 36 * 1 KB object size, 25% of the page is used for the page 37 * header. 38 * 2. 4 KB pages are not freed until the malo allocator is destroyed. 39 * This is something to keep in mind. 40 * 41 * P.S. In Russian, "malo" (��������) means "little" or "few". Thus, the 42 * malo allocator aims to perform its job in as few CPU cycles as 43 * possible. 44 */ 45 46#include <assert.h> 47#include <errno.h> 48#include <stdint.h> 49#include <stdlib.h> 50#include <sys/queue.h> 51#ifdef WIN32 52#include <vc_compat.h> 53#include <intrin.h> 54#endif 55 56#include "fiu-local.h" 57#include "lsquic_malo.h" 58 59#ifndef LSQUIC_USE_POOLS 60#define LSQUIC_USE_POOLS 1 61#endif 62 63/* 64 slots in a 4KB page means that the smallest object is 64 bytes. 64 * The largest object is 2KB. 65 */ 66#define MALO_MIN_NBITS 6 67#define MALO_MAX_NBITS 11 68 69#define ROUNDUP_THRESH 0.75f 70 71/* A "free page" is a page with free slots available. 72 */ 73 74#if LSQUIC_USE_POOLS 75static unsigned find_free_slot (uint64_t slots); 76static unsigned size_in_bits (size_t sz); 77#endif 78 79struct malo_page { 80 SLIST_ENTRY(malo_page) next_page; 81 LIST_ENTRY(malo_page) next_free_page; 82 struct malo *malo; 83 uint64_t slots, 84 full_slot_mask; 85 unsigned nbits; /* If pow is zero, stores object size */ 86 unsigned initial_slot; 87 int pow; /* True if object is power of 2 */ 88}; 89 90typedef char malo_header_fits_in_one_slot 91 [(sizeof(struct malo_page) > (1 << MALO_MIN_NBITS)) ? -1 : 1]; 92 93#if !LSQUIC_USE_POOLS 94struct nopool_elem 95{ 96 TAILQ_ENTRY(nopool_elem) next; 97 struct malo *malo; 98 unsigned char data[0]; 99}; 100#endif 101 102struct malo { 103#if LSQUIC_USE_POOLS 104 struct malo_page page_header; 105 SLIST_HEAD(, malo_page) all_pages; 106 LIST_HEAD(, malo_page) free_pages; 107 struct { 108 struct malo_page *cur_page; 109 unsigned next_slot; 110 } iter; 111#else 112 /* List of all elements: used by the iterator */ 113 TAILQ_HEAD(, nopool_elem) elems; 114 size_t obj_size; 115 struct nopool_elem *next_iter_elem; 116#endif 117}; 118 119struct malo * 120lsquic_malo_create (size_t obj_size) 121{ 122#if LSQUIC_USE_POOLS 123 int pow, n_slots; 124 unsigned nbits; 125 126 obj_size = (obj_size + 7) & -8; 127 nbits = size_in_bits(obj_size); 128 if (nbits < MALO_MIN_NBITS) 129 nbits = MALO_MIN_NBITS; 130 else if (nbits > MALO_MAX_NBITS) 131 { 132 errno = EOVERFLOW; 133 return NULL; 134 } 135 136 pow = obj_size <= (1 << MALO_MIN_NBITS) 137 || (float) obj_size / (1 << nbits) > ROUNDUP_THRESH; 138 139 struct malo *malo; 140 if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000)) 141 return NULL; 142 143 SLIST_INIT(&malo->all_pages); 144 LIST_INIT(&malo->free_pages); 145 malo->iter.cur_page = &malo->page_header; 146 malo->iter.next_slot = 0; 147 148 if (pow) 149 n_slots = sizeof(*malo) / (1 << nbits) 150 + ((sizeof(*malo) % (1 << nbits)) > 0); 151 else 152 n_slots = sizeof(*malo) / obj_size 153 + ((sizeof(*malo) % obj_size) > 0); 154 155 struct malo_page *const page = &malo->page_header; 156 SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 157 LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 158 page->malo = malo; 159 if (!pow) 160 page->full_slot_mask = (1ULL << (0x1000 / obj_size)) - 1; 161 else if (nbits == MALO_MIN_NBITS) 162 page->full_slot_mask = ~0ULL; 163 else 164 page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1; 165 page->slots = (1ULL << n_slots) - 1; 166 page->pow = pow; 167 page->nbits = pow ? nbits : obj_size; 168 page->initial_slot = n_slots; 169 170 return malo; 171#else 172 struct malo *malo; 173 174 /* Use the same sizing mechanism as in the normal version */ 175 if (obj_size < (1 << MALO_MIN_NBITS)) 176 obj_size = 1 << MALO_MIN_NBITS; 177 else 178 obj_size = (obj_size + 7) & -8; 179 180 malo = malloc(sizeof(*malo)); 181 if (malo) 182 { 183 TAILQ_INIT(&malo->elems); 184 malo->obj_size = obj_size; 185 return malo; 186 } 187 else 188 return NULL; 189#endif 190} 191 192 193#if LSQUIC_USE_POOLS 194static struct malo_page * 195allocate_page (struct malo *malo) 196{ 197 struct malo_page *page; 198 if (0 != posix_memalign((void **) &page, 0x1000, 0x1000)) 199 return NULL; 200 SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 201 LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 202 page->slots = 1; 203 page->full_slot_mask = malo->page_header.full_slot_mask; 204 page->nbits = malo->page_header.nbits; 205 page->pow = malo->page_header.pow; 206 page->malo = malo; 207 page->initial_slot = 1; 208 return page; 209} 210#endif 211 212 213#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0) 214 215/* Get a new object. */ 216void * 217lsquic_malo_get (struct malo *malo) 218{ 219#if LSQUIC_USE_POOLS 220 fiu_do_on("malo/get", FAIL_NOMEM); 221 struct malo_page *page = LIST_FIRST(&malo->free_pages); 222 if (!page) 223 { 224 page = allocate_page(malo); 225 if (!page) 226 return NULL; 227 } 228 unsigned slot = find_free_slot(page->slots); 229 page->slots |= (1ULL << slot); 230 if (page->full_slot_mask == page->slots) 231 LIST_REMOVE(page, next_free_page); 232 if (page->pow) 233 return (char *) page + (slot << page->nbits); 234 else 235 return (char *) page + (slot * page->nbits); 236#else 237 struct nopool_elem *el; 238 el = malloc(sizeof(*el) + malo->obj_size); 239 if (el) 240 { 241 TAILQ_INSERT_HEAD(&malo->elems, el, next); 242 el->malo = malo; 243 return el->data; 244 } 245 else 246 return NULL; 247#endif 248} 249 250 251/* Return obj to the pool */ 252void 253lsquic_malo_put (void *obj) 254{ 255#if LSQUIC_USE_POOLS 256 uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1); 257 struct malo_page *page = (void *) page_addr; 258 unsigned slot; 259 if (page->pow) 260 slot = ((uintptr_t) obj - page_addr) >> page->nbits; 261 else 262 slot = ((uintptr_t) obj - page_addr) / page->nbits; 263 if (page->full_slot_mask == page->slots) 264 LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page); 265 page->slots &= ~(1ULL << slot); 266#else 267 struct nopool_elem *el; 268 el = (struct nopool_elem *) ((char *) obj - sizeof(*el)); 269 if (el == el->malo->next_iter_elem) 270 el->malo->next_iter_elem = TAILQ_NEXT(el->malo->next_iter_elem, next); 271 TAILQ_REMOVE(&el->malo->elems, el, next); 272 free(el); 273#endif 274} 275 276 277void 278lsquic_malo_destroy (struct malo *malo) 279{ 280#if LSQUIC_USE_POOLS 281 struct malo_page *page, *next; 282 page = SLIST_FIRST(&malo->all_pages); 283 while (page != &malo->page_header) 284 { 285 next = SLIST_NEXT(page, next_page); 286#ifndef WIN32 287 free(page); 288#else 289 _aligned_free(page); 290#endif 291 page = next; 292 } 293#ifndef WIN32 294 free(page); 295#else 296 _aligned_free(page); 297#endif 298#else 299 struct nopool_elem *el, *next_el; 300 for (el = TAILQ_FIRST(&malo->elems); el; el = next_el) 301 { 302 next_el = TAILQ_NEXT(el, next); 303 free(el); 304 } 305 free(malo); 306#endif 307} 308 309 310/* The iterator is built-in. Usage: 311 * void *obj; 312 * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo)) 313 * do_stuff(obj); 314 */ 315void * 316lsquic_malo_first (struct malo *malo) 317{ 318#if LSQUIC_USE_POOLS 319 malo->iter.cur_page = SLIST_FIRST(&malo->all_pages); 320 malo->iter.next_slot = malo->iter.cur_page->initial_slot; 321#else 322 malo->next_iter_elem = TAILQ_FIRST(&malo->elems); 323#endif 324 return lsquic_malo_next(malo); 325} 326 327 328void * 329lsquic_malo_next (struct malo *malo) 330{ 331#if LSQUIC_USE_POOLS 332 struct malo_page *page; 333 unsigned max_slot, slot; 334 335 page = malo->iter.cur_page; 336 if (page) 337 { 338 if (page->pow) 339 max_slot = 1 << (12 - page->nbits); /* Same for all pages */ 340 else 341 max_slot = 0x1000 / page->nbits; 342 slot = malo->iter.next_slot; 343 while (1) 344 { 345 for (; slot < max_slot; ++slot) 346 { 347 if (page->slots & (1ULL << slot)) 348 { 349 malo->iter.cur_page = page; 350 malo->iter.next_slot = slot + 1; 351 if (page->pow) 352 return (char *) page + (slot << page->nbits); 353 else 354 { 355 assert(slot * (page->nbits + 1) < 0x1000); 356 return (char *) page + (slot * page->nbits); 357 } 358 } 359 } 360 page = SLIST_NEXT(page, next_page); 361 if (page) 362 slot = page->initial_slot; 363 else 364 { 365 malo->iter.cur_page = NULL; /* Stop iterator */ 366 return NULL; 367 } 368 } 369 } 370 371 return NULL; 372#else 373 struct nopool_elem *retval; 374 375 if (malo->next_iter_elem) 376 { 377 retval = malo->next_iter_elem; 378 malo->next_iter_elem = TAILQ_NEXT(malo->next_iter_elem, next); 379 return retval->data; 380 } 381 else 382 return NULL; 383#endif 384} 385 386 387#if LSQUIC_USE_POOLS 388static unsigned 389size_in_bits (size_t sz) 390{ 391#if __GNUC__ 392 unsigned clz = sz > 1 ? __builtin_clz(sz - 1) : 31; 393 return 32 - clz; 394#elif defined(WIN32) 395 unsigned char s; 396 unsigned long idx; 397 s = _BitScanReverse(&idx, sz); 398 assert(s); 399 return (unsigned) idx + 1; 400#else 401#error This function contains a bug! 402 unsigned clz; 403 size_t y; 404 405 --sz; 406 clz = 32; 407 y = sz >> 16; if (y) { clz -= 16; sz = y; } 408 y = sz >> 8; if (y) { clz -= 8; sz = y; } 409 y = sz >> 4; if (y) { clz -= 4; sz = y; } 410 y = sz >> 2; if (y) { clz -= 2; sz = y; } 411 y = sz >> 1; if (y) return 32 - clz + 1; 412 return 32 - clz + sz; 413#endif 414} 415 416 417static unsigned 418find_free_slot (uint64_t slots) 419{ 420#if __GNUC__ 421 return __builtin_ffsll(~slots) - 1; 422#else 423 unsigned n; 424 425 slots =~ slots; 426 n = 0; 427 428 if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; } 429 if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; } 430 if (0 == (slots & ((1ULL << 8) - 1))) { n += 8; slots >>= 8; } 431 if (0 == (slots & ((1ULL << 4) - 1))) { n += 4; slots >>= 4; } 432 if (0 == (slots & ((1ULL << 2) - 1))) { n += 2; slots >>= 2; } 433 if (0 == (slots & ((1ULL << 1) - 1))) { n += 1; slots >>= 1; } 434 return n; 435#endif 436} 437#endif 438 439 440size_t 441lsquic_malo_mem_used (const struct malo *malo) 442{ 443#if LSQUIC_USE_POOLS 444 const struct malo_page *page; 445 size_t size; 446 447 size = 0; 448 SLIST_FOREACH(page, &malo->all_pages, next_page) 449 size += sizeof(*page); 450 451 return size; 452#else 453 return 0; 454#endif 455} 456