lsquic_malo.c revision 50aadb33
1/* Copyright (c) 2017 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_malo.c -- malo allocator implementation. 4 * 5 * The malo allocator is a pool of objects of fixed size. It tries to 6 * allocate and deallocate objects as fast as possible. To do so, it 7 * does the following: 8 * 9 * 1. Allocations occur 4 KB at a time. 10 * 2. No division or multiplication operations are performed. 11 * 12 * (In recent testing, malo was about 2.7 times faster than malloc for 13 * 64-byte objects.) 14 * 15 * Besides speed, two other important characteristics distinguish it 16 * from other pool allocators: 17 * 18 * 1. To free (put) an object, one does not need a pointer to the malo 19 * object. This makes this allocator easy to use. 20 * 2. A built-in iterator is provided to iterate over all allocated 21 * objects (with ability to safely release objects while iterator 22 * is active). This may be useful in some circumstances. 23 * 24 * To gain all these advantages, there are trade-offs: 25 * 26 * 1. There are two memory penalties: 27 * a. Per object overhead. To avoid division and multiplication, 28 * the object sizes is rounded up to the nearest power or two, 29 * starting with 64 bytes (minumum) and up to 2 KB (maximum). 30 * Thus, a 104-byte object will have a 24-byte overhead; a 31 * 130-byte object will have 126-byte overhead. This is 32 * something to keep in mind. 33 * b. Per page overhead. Page links occupy some bytes in the 34 * page. To keep things fast, at least one slot per page is 35 * always occupied, independent of object size. Thus, for a 36 * 1 KB object size, 25% of the page is used for the page 37 * header. 38 * 2. 4 KB pages are not freed until the malo allocator is destroyed. 39 * This is something to keep in mind. 40 * 41 * P.S. In Russian, "malo" (��������) means "little" or "few". Thus, the 42 * malo allocator aims to perform its job in as few CPU cycles as 43 * possible. 44 */ 45 46#include <assert.h> 47#include <errno.h> 48#include <stdint.h> 49#include <stdlib.h> 50#include <sys/queue.h> 51 52#include "fiu-local.h" 53#include "lsquic_malo.h" 54 55/* 64 slots in a 4KB page means that the smallest object is 64 bytes. 56 * The largest object is 2KB. 57 */ 58#define MALO_MIN_NBITS 6 59#define MALO_MAX_NBITS 11 60 61/* A "free page" is a page with free slots available. 62 */ 63 64static unsigned find_free_slot (uint64_t slots); 65static unsigned size_in_bits (size_t sz); 66 67struct malo_page { 68 SLIST_ENTRY(malo_page) next_page; 69 LIST_ENTRY(malo_page) next_free_page; 70 struct malo *malo; 71 uint64_t slots, 72 full_slot_mask; 73 unsigned nbits; 74 unsigned initial_slot; 75}; 76 77typedef char malo_header_fits_in_one_slot 78 [0 - (sizeof(struct malo_page) > (1 << MALO_MAX_NBITS))]; 79 80struct malo { 81 struct malo_page page_header; 82 SLIST_HEAD(, malo_page) all_pages; 83 LIST_HEAD(, malo_page) free_pages; 84 struct { 85 struct malo_page *cur_page; 86 unsigned next_slot; 87 } iter; 88}; 89 90struct malo * 91lsquic_malo_create (size_t obj_size) 92{ 93 unsigned nbits = size_in_bits(obj_size); 94 if (nbits < MALO_MIN_NBITS) 95 nbits = MALO_MIN_NBITS; 96 else if (nbits > MALO_MAX_NBITS) 97 { 98 errno = EOVERFLOW; 99 return NULL; 100 } 101 102 struct malo *malo; 103 if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000)) 104 return NULL; 105 106 SLIST_INIT(&malo->all_pages); 107 LIST_INIT(&malo->free_pages); 108 malo->iter.cur_page = &malo->page_header; 109 malo->iter.next_slot = 0; 110 111 int n_slots = sizeof(*malo) / (1 << nbits) 112 + ((sizeof(*malo) % (1 << nbits)) > 0); 113 114 struct malo_page *const page = &malo->page_header; 115 SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 116 LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 117 page->malo = malo; 118 if (nbits == MALO_MIN_NBITS) 119 page->full_slot_mask = ~0ULL; 120 else 121 page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1; 122 page->slots = (1ULL << n_slots) - 1; 123 page->nbits = nbits; 124 page->initial_slot = n_slots; 125 126 return malo; 127} 128 129 130static struct malo_page * 131allocate_page (struct malo *malo) 132{ 133 struct malo_page *page; 134 if (0 != posix_memalign((void **) &page, 0x1000, 0x1000)) 135 return NULL; 136 SLIST_INSERT_HEAD(&malo->all_pages, page, next_page); 137 LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page); 138 page->slots = 1; 139 page->full_slot_mask = malo->page_header.full_slot_mask; 140 page->nbits = malo->page_header.nbits; 141 page->malo = malo; 142 page->initial_slot = 1; 143 return page; 144} 145 146 147#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0) 148 149/* Get a new object. */ 150void * 151lsquic_malo_get (struct malo *malo) 152{ 153 fiu_do_on("malo/get", FAIL_NOMEM); 154 struct malo_page *page = LIST_FIRST(&malo->free_pages); 155 if (!page) 156 { 157 page = allocate_page(malo); 158 if (!page) 159 return NULL; 160 } 161 unsigned slot = find_free_slot(page->slots); 162 page->slots |= (1ULL << slot); 163 if (page->full_slot_mask == page->slots) 164 LIST_REMOVE(page, next_free_page); 165 return (char *) page + (slot << page->nbits); 166} 167 168 169/* Return obj to the pool */ 170void 171lsquic_malo_put (void *obj) 172{ 173 uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1); 174 struct malo_page *page = (void *) page_addr; 175 unsigned slot = ((uintptr_t) obj - page_addr) >> page->nbits; 176 if (page->full_slot_mask == page->slots) 177 LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page); 178 page->slots &= ~(1ULL << slot); 179} 180 181 182void 183lsquic_malo_destroy (struct malo *malo) 184{ 185 struct malo_page *page, *next; 186 page = SLIST_FIRST(&malo->all_pages); 187 while (page != &malo->page_header) 188 { 189 next = SLIST_NEXT(page, next_page); 190 free(page); 191 page = next; 192 } 193 free(page); 194} 195 196 197/* The iterator is built-in. Usage: 198 * void *obj; 199 * for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo)) 200 * do_stuff(obj); 201 */ 202void * 203lsquic_malo_first (struct malo *malo) 204{ 205 malo->iter.cur_page = SLIST_FIRST(&malo->all_pages); 206 malo->iter.next_slot = malo->iter.cur_page->initial_slot; 207 return lsquic_malo_next(malo); 208} 209 210 211void * 212lsquic_malo_next (struct malo *malo) 213{ 214 struct malo_page *page; 215 unsigned max_slot, slot; 216 217 page = malo->iter.cur_page; 218 if (page) 219 { 220 max_slot = 1 << (12 - page->nbits); /* Same for all pages */ 221 slot = malo->iter.next_slot; 222 while (1) 223 { 224 for (; slot < max_slot; ++slot) 225 { 226 if (page->slots & (1ULL << slot)) 227 { 228 malo->iter.cur_page = page; 229 malo->iter.next_slot = slot + 1; 230 return (char *) page + (slot << page->nbits); 231 } 232 } 233 page = SLIST_NEXT(page, next_page); 234 if (page) 235 slot = page->initial_slot; 236 else 237 { 238 malo->iter.cur_page = NULL; /* Stop iterator */ 239 return NULL; 240 } 241 } 242 } 243 244 return NULL; 245} 246 247 248static unsigned 249size_in_bits (size_t sz) 250{ 251#if __GNUC__ 252 unsigned clz = __builtin_clz(sz - 1); 253 return 32 - clz; 254#else 255 unsigned clz; 256 size_t y; 257 258 --sz; 259 clz = 32; 260 y = sz >> 16; if (y) { clz -= 16; sz = y; } 261 y = sz >> 8; if (y) { clz -= 8; sz = y; } 262 y = sz >> 4; if (y) { clz -= 4; sz = y; } 263 y = sz >> 2; if (y) { clz -= 2; sz = y; } 264 y = sz >> 1; if (y) return 32 - clz + 1; 265 return 32 - clz + sz; 266#endif 267} 268 269 270static unsigned 271find_free_slot (uint64_t slots) 272{ 273#if __GNUC__ 274 return __builtin_ffsll(~slots) - 1; 275#else 276 unsigned n; 277 278 slots =~ slots; 279 n = 0; 280 281 if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; } 282 if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; } 283 if (0 == (slots & ((1ULL << 8) - 1))) { n += 8; slots >>= 8; } 284 if (0 == (slots & ((1ULL << 4) - 1))) { n += 4; slots >>= 4; } 285 if (0 == (slots & ((1ULL << 2) - 1))) { n += 2; slots >>= 2; } 286 if (0 == (slots & ((1ULL << 1) - 1))) { n += 1; slots >>= 1; } 287 return n; 288#endif 289} 290