lsquic_di_hash.c revision 461e84d8
1/* Copyright (c) 2017 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_di_hash.c -- Copy incoming data into a hash 4 * 5 * While this implementation copies the data, its memory use is limited, 6 * which makes it a good choice when we have a lot of stream frames 7 * coming in. 8 * 9 * Another difference is that it does not check for frame overlap, which 10 * is something that is present in Chrome, but it is not required by QUIC. 11 */ 12 13 14#include <assert.h> 15#include <stddef.h> 16#include <stdint.h> 17#include <stdlib.h> 18#include <string.h> 19#include <sys/queue.h> 20 21#include "lsquic.h" 22#include "lsquic_int_types.h" 23#include "lsquic_types.h" 24#include "lsquic_conn_flow.h" 25#include "lsquic_packet_common.h" 26#include "lsquic_packet_in.h" 27#include "lsquic_rtt.h" 28#include "lsquic_sfcw.h" 29#include "lsquic_stream.h" 30#include "lsquic_mm.h" 31#include "lsquic_malo.h" 32#include "lsquic_conn.h" 33#include "lsquic_conn_public.h" 34#include "lsquic_data_in_if.h" 35 36 37#define LSQUIC_LOGGER_MODULE LSQLM_DI 38#define LSQUIC_LOG_CONN_ID hdi->hdi_conn_pub->lconn->cn_cid 39#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id 40#include "lsquic_logger.h" 41 42 43#define N_DB_SETS 57 44 45#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \ 46 sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t)) 47 48struct data_block 49{ 50 TAILQ_ENTRY(data_block) db_next; 51 uint64_t db_off; 52 uint64_t db_set[N_DB_SETS]; /* bit for each valid byte */ 53 unsigned char db_data[DB_DATA_SIZE]; 54}; 55 56typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) ?1: - 1]; 57typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE)?1: - 1]; 58typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) ?1:- 1]; 59 60 61TAILQ_HEAD(dblock_head, data_block); 62 63 64 65 66struct hash_data_in 67{ 68 struct data_in hdi_data_in; 69 struct lsquic_conn_public *hdi_conn_pub; 70 uint64_t hdi_fin_off; 71 struct dblock_head *hdi_buckets; 72 struct data_block *hdi_last_block; 73 struct data_frame hdi_data_frame; 74 uint32_t hdi_stream_id; 75 unsigned hdi_count; 76 unsigned hdi_nbits; 77 enum { 78 HDI_FIN = (1 << 0), 79 } hdi_flags; 80}; 81 82 83#define HDI_PTR(data_in) (struct hash_data_in *) \ 84 ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in)) 85 86 87#define N_BUCKETS(n_bits) (1U << (n_bits)) 88#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1)) 89 90 91static unsigned 92my_log2 /* silly name to suppress compiler warning */ (unsigned sz) 93{ 94#if __GNUC__ 95 unsigned clz = __builtin_clz(sz); 96 return 32 - clz; 97#else 98 unsigned clz; 99 size_t y; 100 clz = 32; 101 y = sz >> 16; if (y) { clz -= 16; sz = y; } 102 y = sz >> 8; if (y) { clz -= 8; sz = y; } 103 y = sz >> 4; if (y) { clz -= 4; sz = y; } 104 y = sz >> 2; if (y) { clz -= 2; sz = y; } 105 y = sz >> 1; if (y) return 32 - clz + 1; 106 return 32 - clz + sz; 107#endif 108} 109 110 111 112 113static void 114hash_di_destroy (struct data_in *data_in) 115{ 116 struct hash_data_in *const hdi = HDI_PTR(data_in); 117 struct data_block *block; 118 unsigned n; 119 120 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 121 { 122 while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n]))) 123 { 124 TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next); 125 free(block); 126 } 127 } 128 free(hdi->hdi_buckets); 129 free(hdi); 130} 131 132 133static int 134hash_grow (struct hash_data_in *hdi) 135{ 136 struct dblock_head *new_buckets, *new[2]; 137 struct data_block *block; 138 unsigned n, old_nbits; 139 int idx; 140 141 old_nbits = hdi->hdi_nbits; 142 LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1)); 143 new_buckets = malloc(sizeof(hdi->hdi_buckets[0]) 144 * N_BUCKETS(old_nbits + 1)); 145 if (!new_buckets) 146 { 147 LSQ_WARN("malloc failed: potential trouble ahead"); 148 return -1; 149 } 150 151 for (n = 0; n < N_BUCKETS(old_nbits); ++n) 152 { 153 new[0] = &new_buckets[n]; 154 new[1] = &new_buckets[n + N_BUCKETS(old_nbits)]; 155 TAILQ_INIT(new[0]); 156 TAILQ_INIT(new[1]); 157 while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n]))) 158 { 159 TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next); 160 idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1; 161 TAILQ_INSERT_TAIL(new[idx], block, db_next); 162 } 163 } 164 free(hdi->hdi_buckets); 165 hdi->hdi_nbits = old_nbits + 1; 166 hdi->hdi_buckets = new_buckets; 167 return 0; 168} 169 170 171static int 172hash_insert (struct hash_data_in *hdi, struct data_block *block) 173{ 174 unsigned buckno; 175 176 if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi)) 177 return -1; 178 179 buckno = BUCKNO(hdi->hdi_nbits, block->db_off); 180 TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next); 181 ++hdi->hdi_count; 182 return 0; 183} 184 185 186static struct data_block * 187hash_find (const struct hash_data_in *hdi, uint64_t off) 188{ 189 struct data_block *block; 190 unsigned buckno; 191 192 buckno = BUCKNO(hdi->hdi_nbits, off); 193 TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next) 194 if (off == block->db_off) 195 return block; 196 return NULL; 197} 198 199 200static void 201hash_remove (struct hash_data_in *hdi, struct data_block *block) 202{ 203 unsigned buckno; 204 205 buckno = BUCKNO(hdi->hdi_nbits, block->db_off); 206 TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next); 207 --hdi->hdi_count; 208} 209 210 211static struct data_block * 212new_block (struct hash_data_in *hdi, uint64_t off) 213{ 214 struct data_block *block; 215 216 assert(0 == off % DB_DATA_SIZE); 217 218 block = malloc(sizeof(*block)); 219 if (!block) 220 return NULL; 221 222 block->db_off = off; 223 if (0 != hash_insert(hdi, block)) 224 { 225 free(block); 226 return NULL; 227 } 228 229 memset(block->db_set, 0, sizeof(block->db_set)); 230 return block; 231} 232 233 234static unsigned 235block_write (struct data_block *block, unsigned block_off, 236 const unsigned char *data, unsigned data_sz) 237{ 238 const unsigned char *begin, *end; 239 unsigned set, bit, n_full_sets, n; 240 uint64_t mask; 241 242 if (data_sz > DB_DATA_SIZE - block_off) 243 data_sz = DB_DATA_SIZE - block_off; 244 245 begin = data; 246 end = begin + data_sz; 247 set = block_off >> 6; 248 bit = block_off & 0x3F; 249 250 assert(set <= N_DB_SETS); 251 252 if (bit) 253 { 254 n = 64 - bit; 255 if (n > data_sz) 256 n = data_sz; 257 mask = ~((1ULL << bit ) - 1) 258 & ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1)); 259 block->db_set[ set ] |= mask; 260 memcpy(block->db_data + block_off, data, n); 261 data += n; 262 block_off += n; 263 ++set; 264 } 265 266 n_full_sets = (end - data) >> 6; 267 if (n_full_sets) 268 { 269 memcpy(block->db_data + block_off, data, n_full_sets * 64); 270 data += n_full_sets * 64; 271 block_off += n_full_sets * 64; 272 memset(&block->db_set[ set ], 0xFF, n_full_sets * 8); 273 set += n_full_sets; 274 } 275 276 if (data < end) 277 { 278 assert(end - data < 64); 279 block->db_set[ set ] |= ((1ULL << (end - data)) - 1); 280 memcpy(block->db_data + block_off, data, end - data); 281 data = end; 282 } 283 284 assert(set <= N_DB_SETS); 285 286 return data - begin; 287} 288 289 290static int 291has_bytes_after (const struct data_block *block, unsigned off) 292{ 293 unsigned bit, set; 294 int has; 295 296 set = off >> 6; 297 bit = off & 0x3F; 298 299 has = 0 != (block->db_set[ set ] >> bit); 300 ++set; 301 302 for ( ; set < N_DB_SETS; ++set) 303 has += 0 != block->db_set[ set ]; 304 305 return has > 0; 306} 307 308 309enum ins_frame 310data_in_hash_insert_data_frame (struct data_in *data_in, 311 const struct data_frame *data_frame, uint64_t read_offset) 312{ 313 struct hash_data_in *const hdi = HDI_PTR(data_in); 314 struct data_block *block; 315 uint64_t key, off, diff, fin_off; 316 const unsigned char *data; 317 unsigned size, nw; 318 319 if (data_frame->df_offset + data_frame->df_size < read_offset) 320 return INS_FRAME_DUP; 321 322 if ((hdi->hdi_flags & HDI_FIN) && 323 ( 324 (data_frame->df_fin && 325 data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off) 326 || 327 data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off 328 ) 329 ) 330 { 331 return INS_FRAME_ERR; 332 } 333 334 if (data_frame->df_offset < read_offset) 335 { 336 diff = read_offset - data_frame->df_offset; 337 assert(diff <= data_frame->df_size); 338 size = data_frame->df_size - diff; 339 off = data_frame->df_offset + diff; 340 data = data_frame->df_data + diff; 341 } 342 else 343 { 344 size = data_frame->df_size; 345 off = data_frame->df_offset; 346 data = data_frame->df_data; 347 } 348 349 key = off - (off % DB_DATA_SIZE); 350 do 351 { 352 block = hash_find(hdi, key); 353 if (!block) 354 { 355 block = new_block(hdi, key); 356 if (!block) 357 return INS_FRAME_ERR; 358 } 359 nw = block_write(block, off % DB_DATA_SIZE, data, size); 360 size -= nw; 361 off += nw; 362 data += nw; 363 key += DB_DATA_SIZE; 364 } 365 while (size > 0); 366 367 if (data_frame->df_fin) 368 { 369 fin_off = data_frame->df_offset + data_frame->df_size; 370 if (has_bytes_after(block, fin_off - block->db_off) || 371 hash_find(hdi, key)) 372 { 373 return INS_FRAME_ERR; 374 } 375 hdi->hdi_flags |= HDI_FIN; 376 hdi->hdi_fin_off = fin_off; 377 } 378 379 return INS_FRAME_OK; 380} 381 382 383static enum ins_frame 384hash_di_insert_frame (struct data_in *data_in, 385 struct stream_frame *new_frame, uint64_t read_offset) 386{ 387 struct hash_data_in *const hdi = HDI_PTR(data_in); 388 const struct data_frame *const data_frame = &new_frame->data_frame; 389 enum ins_frame ins; 390 391 ins = data_in_hash_insert_data_frame(data_in, data_frame, read_offset); 392 lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in); 393 lsquic_malo_put(new_frame); 394 return ins; 395} 396 397 398#if __GNUC__ 399# define ctz __builtin_ctzll 400#else 401static unsigned 402ctz (unsigned long long x) 403{ 404 unsigned n = 0; 405 if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; } 406 if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; } 407 if (0 == (x & ((1ULL << 8) - 1))) { n += 8; x >>= 8; } 408 if (0 == (x & ((1ULL << 4) - 1))) { n += 4; x >>= 4; } 409 if (0 == (x & ((1ULL << 2) - 1))) { n += 2; x >>= 2; } 410 if (0 == (x & ((1ULL << 1) - 1))) { n += 1; x >>= 1; } 411 return n; 412} 413#endif 414 415 416static unsigned 417n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit) 418{ 419 unsigned count; 420 uint64_t part; 421 422 part = ~(block->db_set[ set ] >> bit); 423 if (part) 424 { 425 count = ctz(part); 426 if (count < 64 - bit) 427 return count; 428 } 429 else 430 count = 64; 431 ++set; 432 433 for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set) 434 count += 64; 435 436 if (set < N_DB_SETS) 437 { 438 part = ~block->db_set[ set ]; 439 if (part) 440 count += ctz(part); 441 else 442 count += 64; 443 } 444 445 return count; 446} 447 448 449/* Data block is readable if there is at least one readable byte at 450 * `read_offset' or there is FIN at that offset. 451 */ 452static int 453setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset, 454 struct data_block *block) 455{ 456 unsigned set, bit; 457 uint64_t offset; 458 459 offset = read_offset % DB_DATA_SIZE; 460 set = offset >> 6; 461 bit = offset & 0x3F; 462 463 if (block->db_set[ set ] & (1ULL << bit)) 464 { 465 hdi->hdi_last_block = block; 466 hdi->hdi_data_frame.df_data = block->db_data; 467 hdi->hdi_data_frame.df_offset = block->db_off; 468 hdi->hdi_data_frame.df_read_off = offset; 469 hdi->hdi_data_frame.df_size = offset + 470 n_avail_bytes(block, set, bit); 471 hdi->hdi_data_frame.df_fin = 472 (hdi->hdi_flags & HDI_FIN) && 473 hdi->hdi_data_frame.df_read_off + 474 hdi->hdi_data_frame.df_size == hdi->hdi_fin_off; 475 return 1; 476 } 477 else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off) 478 { 479 hdi->hdi_last_block = block; 480 hdi->hdi_data_frame.df_data = NULL; 481 hdi->hdi_data_frame.df_offset = block->db_off; 482 hdi->hdi_data_frame.df_read_off = offset; 483 hdi->hdi_data_frame.df_size = offset; 484 hdi->hdi_data_frame.df_fin = 1; 485 return 1; 486 } 487 else 488 return 0; 489} 490 491 492static struct data_frame * 493hash_di_get_frame (struct data_in *data_in, uint64_t read_offset) 494{ 495 struct hash_data_in *const hdi = HDI_PTR(data_in); 496 struct data_block *block; 497 uint64_t key; 498 499 key = read_offset - (read_offset % DB_DATA_SIZE); 500 block = hash_find(hdi, key); 501 if (!block) 502 { 503 if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off) 504 { 505 hdi->hdi_last_block = NULL; 506 hdi->hdi_data_frame.df_data = NULL; 507 hdi->hdi_data_frame.df_offset = read_offset - 508 read_offset % DB_DATA_SIZE; 509 hdi->hdi_data_frame.df_read_off = 0; 510 hdi->hdi_data_frame.df_size = 0; 511 hdi->hdi_data_frame.df_fin = 1; 512 return &hdi->hdi_data_frame; 513 } 514 else 515 return NULL; 516 } 517 518 if (setup_data_frame(hdi, read_offset, block)) 519 return &hdi->hdi_data_frame; 520 else 521 return NULL; 522} 523 524 525static void 526hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame) 527{ 528 struct hash_data_in *const hdi = HDI_PTR(data_in); 529 struct data_block *const block = hdi->hdi_last_block; 530 531 if (block) 532 { 533 if (data_frame->df_read_off == DB_DATA_SIZE || 534 !has_bytes_after(block, data_frame->df_read_off)) 535 { 536 hash_remove(hdi, block); 537 free(block); 538 if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN)) 539 { 540 LSQ_DEBUG("hash empty, want to switch"); 541 hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL; 542 } 543 } 544 } 545 else 546 assert(data_frame->df_fin && data_frame->df_size == 0); 547} 548 549 550static int 551hash_di_empty (struct data_in *data_in) 552{ 553 struct hash_data_in *const hdi = HDI_PTR(data_in); 554 return hdi->hdi_count == 0; 555} 556 557 558struct data_in * 559hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset) 560{ 561 struct hash_data_in *const hdi = HDI_PTR(data_in); 562 struct data_in *new_data_in; 563 564 assert(hdi->hdi_count == 0); 565 566 new_data_in = data_in_nocopy_new(hdi->hdi_conn_pub, hdi->hdi_stream_id); 567 data_in->di_if->di_destroy(data_in); 568 569 return new_data_in; 570} 571 572 573static size_t 574hash_di_mem_used (struct data_in *data_in) 575{ 576 struct hash_data_in *const hdi = HDI_PTR(data_in); 577 const struct data_block *block; 578 size_t size; 579 unsigned n; 580 581 size = sizeof(*data_in); 582 583 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 584 TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next) 585 size += sizeof(*block); 586 587 size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]); 588 589 return size; 590} 591 592 593static const struct data_in_iface di_if_hash = { 594 .di_destroy = hash_di_destroy, 595 .di_empty = hash_di_empty, 596 .di_frame_done = hash_di_frame_done, 597 .di_get_frame = hash_di_get_frame, 598 .di_insert_frame = hash_di_insert_frame, 599 .di_mem_used = hash_di_mem_used, 600 .di_switch_impl = hash_di_switch_impl, 601}; 602struct data_in * 603data_in_hash_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id, 604 uint64_t byteage) 605{ 606 struct hash_data_in *hdi; 607 unsigned n; 608 609 hdi = malloc(sizeof(*hdi)); 610 if (!hdi) 611 return NULL; 612 613 hdi->hdi_data_in.di_if = &di_if_hash; 614 hdi->hdi_data_in.di_flags = 0; 615 hdi->hdi_conn_pub = conn_pub; 616 hdi->hdi_stream_id = stream_id; 617 hdi->hdi_fin_off = 0; 618 hdi->hdi_flags = 0; 619 hdi->hdi_last_block = NULL; 620 if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if 621 argument is 0 */) 622 hdi->hdi_nbits = my_log2(byteage / DB_DATA_SIZE) + 2; 623 else 624 hdi->hdi_nbits = 3; 625 hdi->hdi_count = 0; 626 hdi->hdi_buckets = malloc(sizeof(hdi->hdi_buckets[0]) * 627 N_BUCKETS(hdi->hdi_nbits)); 628 if (!hdi->hdi_buckets) 629 { 630 free(hdi); 631 return NULL; 632 } 633 634 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 635 TAILQ_INIT(&hdi->hdi_buckets[n]); 636 637 return &hdi->hdi_data_in; 638} 639