lsquic_malo.c revision 229fce07
1/* Copyright (c) 2017 - 2019 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_malo.c -- malo allocator implementation. 4 * 5 * The malo allocator is a pool of objects of fixed size. It tries to 6 * allocate and deallocate objects as fast as possible. To do so, it 7 * does the following: 8 * 9 * 1. Allocations occur 4 KB at a time. 10 * 2. No division or multiplication operations are performed. 11 * 12 * (In recent testing, malo was about 2.7 times faster than malloc for 13 * 64-byte objects.) 14 * 15 * Besides speed, two other important characteristics distinguish it 16 * from other pool allocators: 17 * 18 * 1. To free (put) an object, one does not need a pointer to the malo 19 * object. This makes this allocator easy to use. 20 * 2. A built-in iterator is provided to iterate over all allocated 21 * objects (with ability to safely release objects while iterator 22 * is active). This may be useful in some circumstances. 23 * 24 * To gain all these advantages, there are trade-offs: 25 * 26 * 1. There are two memory penalties: 27 * a. Per object overhead. To avoid division and multiplication, 28 * the object sizes is rounded up to the nearest power or two, 29 * starting with 64 bytes (minumum) and up to 2 KB (maximum). 30 * Thus, a 104-byte object will have a 24-byte overhead; a 31 * 130-byte object will have 126-byte overhead. This is 32 * something to keep in mind. 33 * b. Per page overhead. Page links occupy some bytes in the 34 * page. To keep things fast, at least one slot per page is 35 * always occupied, independent of object size. Thus, for a 36 * 1 KB object size, 25% of the page is used for the page 37 * header. 38 * 2. 4 KB pages are not freed until the malo allocator is destroyed. 39 * This is something to keep in mind. 40 * 41 * P.S. In Russian, "malo" (��������) means "little" or "few". Thus, the 42 * malo allocator aims to perform its job in as few CPU cycles as 43 * possible. 44 */ 45 46#include <assert.h> 47#include <errno.h> 48#include <stdint.h> 49#include <stdlib.h> 50#include <sys/queue.h> 51#ifdef WIN32 52#include <vc_compat.h> 53#endif 54 55#include "fiu-local.h" 56#include "lsquic_malo.h" 57 58/* 64 slots in a 4KB page means that the smallest object is 64 bytes. 59 * The largest object is 2KB. 60 */ 61#define MALO_MIN_NBITS 6 62#define MALO_MAX_NBITS 11 63 64/* A "free page" is a page with free slots available. 65 */ 66 67static unsigned find_free_slot (uint64_t slots); 68static unsigned size_in_bits (size_t sz); 69 70struct malo_page { 71 SLIST_ENTRY(malo_page) next_page; 72 LIST_ENTRY(malo_page) next_free_page; 73 struct malo *malo; 74 uint64_t slots, 75 full_slot_mask; 76 unsigned nbits; 77 unsigned initial_slot; 78}; 79 80typedef char malo_header_fits_in_one_slot 81 [(sizeof(struct malo_page) > (1 << MALO_MAX_NBITS)) ? -1 : 1]; 82 83struct malo { 84 struct malo_page page_header; 85 SLIST_HEAD(, malo_page) all_pages; 86 LIST_HEAD(, malo_page) free_pages; 87 struct { 88 struct malo_page *cur_page; 89 unsigned next_slot; 90 } iter; 91}; 92 93struct malo * 94lsquic_malo_create (size_t obj_size) 95{ 96 unsigned nbits = size_in_bits(obj_size); 97 if (nbits < MALO_MIN_NBITS) 98 nbits = MALO_MIN_NBITS; 99 else if (nbits > MALO_MAX_NBITS) 100 { 101 errno = EOVERFLOW; 102 return NULL; 103 } 104 105 struct malo *malo; 106 if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000)) 107 return NULL; 108 109 SLIST_INIT(&malo->all_pages); 110 LIST_INIT(&malo->free_pages); 111 malo->iter.cur_page = &malo->page_header; 112 malo->iter.next_slot = 0; 113 114 int n_slots = sizeof(*malo) / (1 << nbits) 115 + ((sizeof(*malo) % (1 << nbits)) > 0); 116 117 struct malo_page *const page = &malo->page_header; 118 SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 119 LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 120 page->malo = malo; 121 if (nbits == MALO_MIN_NBITS) 122 page->full_slot_mask = ~0ULL; 123 else 124 page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1; 125 page->slots = (1ULL << n_slots) - 1; 126 page->nbits = nbits; 127 page->initial_slot = n_slots; 128 129 return malo; 130} 131 132 133static struct malo_page * 134allocate_page (struct malo *malo) 135{ 136 struct malo_page *page; 137 if (0 != posix_memalign((void **) &page, 0x1000, 0x1000)) 138 return NULL; 139 SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 140 LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 141 page->slots = 1; 142 page->full_slot_mask = malo->page_header.full_slot_mask; 143 page->nbits = malo->page_header.nbits; 144 page->malo = malo; 145 page->initial_slot = 1; 146 return page; 147} 148 149 150#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0) 151 152/* Get a new object. */ 153void * 154lsquic_malo_get (struct malo *malo) 155{ 156 fiu_do_on("malo/get", FAIL_NOMEM); 157 struct malo_page *page = LIST_FIRST(&malo->free_pages); 158 if (!page) 159 { 160 page = allocate_page(malo); 161 if (!page) 162 return NULL; 163 } 164 unsigned slot = find_free_slot(page->slots); 165 page->slots |= (1ULL << slot); 166 if (page->full_slot_mask == page->slots) 167 LIST_REMOVE(page, next_free_page); 168 return (char *) page + (slot << page->nbits); 169} 170 171 172/* Return obj to the pool */ 173void 174lsquic_malo_put (void *obj) 175{ 176 uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1); 177 struct malo_page *page = (void *) page_addr; 178 unsigned slot = ((uintptr_t) obj - page_addr) >> page->nbits; 179 if (page->full_slot_mask == page->slots) 180 LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page); 181 page->slots &= ~(1ULL << slot); 182} 183 184 185void 186lsquic_malo_destroy (struct malo *malo) 187{ 188 struct malo_page *page, *next; 189 page = SLIST_FIRST(&malo->all_pages); 190 while (page != &malo->page_header) 191 { 192 next = SLIST_NEXT(page, next_page); 193#ifndef WIN32 194 free(page); 195#else 196 _aligned_free(page); 197#endif 198 page = next; 199 } 200#ifndef WIN32 201 free(page); 202#else 203 _aligned_free(page); 204#endif 205} 206 207 208/* The iterator is built-in. Usage: 209 * void *obj; 210 * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo)) 211 * do_stuff(obj); 212 */ 213void * 214lsquic_malo_first (struct malo *malo) 215{ 216 malo->iter.cur_page = SLIST_FIRST(&malo->all_pages); 217 malo->iter.next_slot = malo->iter.cur_page->initial_slot; 218 return lsquic_malo_next(malo); 219} 220 221 222void * 223lsquic_malo_next (struct malo *malo) 224{ 225 struct malo_page *page; 226 unsigned max_slot, slot; 227 228 page = malo->iter.cur_page; 229 if (page) 230 { 231 max_slot = 1 << (12 - page->nbits); /* Same for all pages */ 232 slot = malo->iter.next_slot; 233 while (1) 234 { 235 for (; slot < max_slot; ++slot) 236 { 237 if (page->slots & (1ULL << slot)) 238 { 239 malo->iter.cur_page = page; 240 malo->iter.next_slot = slot + 1; 241 return (char *) page + (slot << page->nbits); 242 } 243 } 244 page = SLIST_NEXT(page, next_page); 245 if (page) 246 slot = page->initial_slot; 247 else 248 { 249 malo->iter.cur_page = NULL; /* Stop iterator */ 250 return NULL; 251 } 252 } 253 } 254 255 return NULL; 256} 257 258 259static unsigned 260size_in_bits (size_t sz) 261{ 262#if __GNUC__ 263 unsigned clz = __builtin_clz(sz - 1); 264 return 32 - clz; 265#else 266 unsigned clz; 267 size_t y; 268 269 --sz; 270 clz = 32; 271 y = sz >> 16; if (y) { clz -= 16; sz = y; } 272 y = sz >> 8; if (y) { clz -= 8; sz = y; } 273 y = sz >> 4; if (y) { clz -= 4; sz = y; } 274 y = sz >> 2; if (y) { clz -= 2; sz = y; } 275 y = sz >> 1; if (y) return 32 - clz + 1; 276 return 32 - clz + sz; 277#endif 278} 279 280 281static unsigned 282find_free_slot (uint64_t slots) 283{ 284#if __GNUC__ 285 return __builtin_ffsll(~slots) - 1; 286#else 287 unsigned n; 288 289 slots =~ slots; 290 n = 0; 291 292 if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; } 293 if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; } 294 if (0 == (slots & ((1ULL << 8) - 1))) { n += 8; slots >>= 8; } 295 if (0 == (slots & ((1ULL << 4) - 1))) { n += 4; slots >>= 4; } 296 if (0 == (slots & ((1ULL << 2) - 1))) { n += 2; slots >>= 2; } 297 if (0 == (slots & ((1ULL << 1) - 1))) { n += 1; slots >>= 1; } 298 return n; 299#endif 300} 301 302 303size_t 304lsquic_malo_mem_used (const struct malo *malo) 305{ 306 const struct malo_page *page; 307 size_t size; 308 309 size = 0; 310 SLIST_FOREACH(page, &malo->all_pages, next_page) 311 size += sizeof(*page); 312 313 return size; 314} 315