lsquic_di_hash.c revision a5fa05f9
1/* Copyright (c) 2017 - 2020 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_di_hash.c -- Copy incoming data into a hash 4 * 5 * While this implementation copies the data, its memory use is limited, 6 * which makes it a good choice when we have a lot of stream frames 7 * coming in. 8 * 9 * Another difference is that it does not check for frame overlap, which 10 * is something that is present in Chrome, but it is not required by QUIC. 11 */ 12 13 14#include <assert.h> 15#include <inttypes.h> 16#include <stddef.h> 17#include <stdint.h> 18#include <stdlib.h> 19#include <string.h> 20#include <sys/queue.h> 21 22#include "lsquic.h" 23#include "lsquic_int_types.h" 24#include "lsquic_types.h" 25#include "lsquic_conn_flow.h" 26#include "lsquic_packet_common.h" 27#include "lsquic_packet_in.h" 28#include "lsquic_rtt.h" 29#include "lsquic_sfcw.h" 30#include "lsquic_varint.h" 31#include "lsquic_hq.h" 32#include "lsquic_hash.h" 33#include "lsquic_stream.h" 34#include "lsquic_mm.h" 35#include "lsquic_malo.h" 36#include "lsquic_conn.h" 37#include "lsquic_conn_public.h" 38#include "lsquic_data_in_if.h" 39 40 41#define LSQUIC_LOGGER_MODULE LSQLM_DI 42#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(hdi->hdi_conn_pub->lconn) 43#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id 44#include "lsquic_logger.h" 45 46 47#define N_DB_SETS 57 48 49#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \ 50 sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t)) 51 52struct data_block 53{ 54 TAILQ_ENTRY(data_block) db_next; 55 uint64_t db_off; 56 uint64_t db_set[N_DB_SETS]; /* bit for each valid byte */ 57 unsigned char db_data[DB_DATA_SIZE]; 58}; 59 60typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) ?1: - 1]; 61typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE)?1: - 1]; 62typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) ?1:- 1]; 63 64 65TAILQ_HEAD(dblock_head, data_block); 66 67 68static const struct data_in_iface *di_if_hash_ptr; 69 70 71struct hash_data_in 72{ 73 struct data_in hdi_data_in; 74 struct lsquic_conn_public *hdi_conn_pub; 75 uint64_t hdi_fin_off; 76 struct dblock_head *hdi_buckets; 77 struct data_block *hdi_last_block; 78 struct data_frame hdi_data_frame; 79 lsquic_stream_id_t hdi_stream_id; 80 unsigned hdi_count; 81 unsigned hdi_nbits; 82 enum { 83 HDI_FIN = (1 << 0), 84 } hdi_flags; 85}; 86 87 88#define HDI_PTR(data_in) (struct hash_data_in *) \ 89 ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in)) 90 91 92#define N_BUCKETS(n_bits) (1U << (n_bits)) 93#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1)) 94 95 96static unsigned 97my_log2 /* silly name to suppress compiler warning */ (unsigned sz) 98{ 99#if __GNUC__ 100 unsigned clz = __builtin_clz(sz); 101 return 32 - clz; 102#else 103 unsigned clz; 104 size_t y; 105 clz = 32; 106 y = sz >> 16; if (y) { clz -= 16; sz = y; } 107 y = sz >> 8; if (y) { clz -= 8; sz = y; } 108 y = sz >> 4; if (y) { clz -= 4; sz = y; } 109 y = sz >> 2; if (y) { clz -= 2; sz = y; } 110 y = sz >> 1; if (y) return 32 - clz + 1; 111 return 32 - clz + sz; 112#endif 113} 114 115 116struct data_in * 117lsquic_data_in_hash_new (struct lsquic_conn_public *conn_pub, 118 lsquic_stream_id_t stream_id, uint64_t byteage) 119{ 120 struct hash_data_in *hdi; 121 unsigned n; 122 123 hdi = malloc(sizeof(*hdi)); 124 if (!hdi) 125 return NULL; 126 127 hdi->hdi_data_in.di_if = di_if_hash_ptr; 128 hdi->hdi_data_in.di_flags = 0; 129 hdi->hdi_conn_pub = conn_pub; 130 hdi->hdi_stream_id = stream_id; 131 hdi->hdi_fin_off = 0; 132 hdi->hdi_flags = 0; 133 hdi->hdi_last_block = NULL; 134 if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if 135 argument is 0 */) 136 hdi->hdi_nbits = my_log2(byteage / DB_DATA_SIZE) + 2; 137 else 138 hdi->hdi_nbits = 3; 139 hdi->hdi_count = 0; 140 hdi->hdi_buckets = malloc(sizeof(hdi->hdi_buckets[0]) * 141 N_BUCKETS(hdi->hdi_nbits)); 142 if (!hdi->hdi_buckets) 143 { 144 free(hdi); 145 return NULL; 146 } 147 148 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 149 TAILQ_INIT(&hdi->hdi_buckets[n]); 150 151 return &hdi->hdi_data_in; 152} 153 154 155static void 156hash_di_destroy (struct data_in *data_in) 157{ 158 struct hash_data_in *const hdi = HDI_PTR(data_in); 159 struct data_block *block; 160 unsigned n; 161 162 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 163 { 164 while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n]))) 165 { 166 TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next); 167 free(block); 168 } 169 } 170 free(hdi->hdi_buckets); 171 free(hdi); 172} 173 174 175static int 176hash_grow (struct hash_data_in *hdi) 177{ 178 struct dblock_head *new_buckets, *new[2]; 179 struct data_block *block; 180 unsigned n, old_nbits; 181 int idx; 182 183 old_nbits = hdi->hdi_nbits; 184 LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1)); 185 new_buckets = malloc(sizeof(hdi->hdi_buckets[0]) 186 * N_BUCKETS(old_nbits + 1)); 187 if (!new_buckets) 188 { 189 LSQ_WARN("malloc failed: potential trouble ahead"); 190 return -1; 191 } 192 193 for (n = 0; n < N_BUCKETS(old_nbits); ++n) 194 { 195 new[0] = &new_buckets[n]; 196 new[1] = &new_buckets[n + N_BUCKETS(old_nbits)]; 197 TAILQ_INIT(new[0]); 198 TAILQ_INIT(new[1]); 199 while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n]))) 200 { 201 TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next); 202 idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1; 203 TAILQ_INSERT_TAIL(new[idx], block, db_next); 204 } 205 } 206 free(hdi->hdi_buckets); 207 hdi->hdi_nbits = old_nbits + 1; 208 hdi->hdi_buckets = new_buckets; 209 return 0; 210} 211 212 213static int 214hash_insert (struct hash_data_in *hdi, struct data_block *block) 215{ 216 unsigned buckno; 217 218 if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi)) 219 return -1; 220 221 buckno = BUCKNO(hdi->hdi_nbits, block->db_off); 222 TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next); 223 ++hdi->hdi_count; 224 return 0; 225} 226 227 228static struct data_block * 229hash_find (const struct hash_data_in *hdi, uint64_t off) 230{ 231 struct data_block *block; 232 unsigned buckno; 233 234 buckno = BUCKNO(hdi->hdi_nbits, off); 235 TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next) 236 if (off == block->db_off) 237 return block; 238 return NULL; 239} 240 241 242static void 243hash_remove (struct hash_data_in *hdi, struct data_block *block) 244{ 245 unsigned buckno; 246 247 buckno = BUCKNO(hdi->hdi_nbits, block->db_off); 248 TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next); 249 --hdi->hdi_count; 250} 251 252 253static struct data_block * 254new_block (struct hash_data_in *hdi, uint64_t off) 255{ 256 struct data_block *block; 257 258 assert(0 == off % DB_DATA_SIZE); 259 260 block = malloc(sizeof(*block)); 261 if (!block) 262 return NULL; 263 264 block->db_off = off; 265 if (0 != hash_insert(hdi, block)) 266 { 267 free(block); 268 return NULL; 269 } 270 271 memset(block->db_set, 0, sizeof(block->db_set)); 272 return block; 273} 274 275 276static unsigned 277block_write (struct data_block *block, unsigned block_off, 278 const unsigned char *data, unsigned data_sz) 279{ 280 const unsigned char *begin, *end; 281 unsigned set, bit, n_full_sets, n; 282 uint64_t mask; 283 284 assert(block_off < DB_DATA_SIZE); 285 if (data_sz > DB_DATA_SIZE - block_off) 286 data_sz = DB_DATA_SIZE - block_off; 287 288 begin = data; 289 end = begin + data_sz; 290 set = block_off >> 6; 291 bit = block_off & 0x3F; 292 293 assert(set < N_DB_SETS); 294 295 if (bit) 296 { 297 n = 64 - bit; 298 if (n > data_sz) 299 n = data_sz; 300 mask = ~((1ULL << bit ) - 1) 301 & ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1)); 302 block->db_set[ set ] |= mask; 303 memcpy(block->db_data + block_off, data, n); 304 data += n; 305 block_off += n; 306 ++set; 307 } 308 309 n_full_sets = (end - data) >> 6; 310 if (n_full_sets) 311 { 312 memcpy(block->db_data + block_off, data, n_full_sets * 64); 313 data += n_full_sets * 64; 314 block_off += n_full_sets * 64; 315 memset(&block->db_set[ set ], 0xFF, n_full_sets * 8); 316 set += n_full_sets; 317 } 318 319 if (data < end) 320 { 321 assert(end - data < 64); 322 block->db_set[ set ] |= ((1ULL << (end - data)) - 1); 323 memcpy(block->db_data + block_off, data, end - data); 324 data = end; 325 } 326 327 assert(set <= N_DB_SETS); 328 329 return data - begin; 330} 331 332 333static int 334has_bytes_after (const struct data_block *block, unsigned off) 335{ 336 unsigned bit, set; 337 int has; 338 339 set = off >> 6; 340 bit = off & 0x3F; 341 342 has = 0 != (block->db_set[ set ] >> bit); 343 ++set; 344 345 for ( ; set < N_DB_SETS; ++set) 346 has += 0 != block->db_set[ set ]; 347 348 return has > 0; 349} 350 351 352enum ins_frame 353lsquic_data_in_hash_insert_data_frame (struct data_in *data_in, 354 const struct data_frame *data_frame, uint64_t read_offset) 355{ 356 struct hash_data_in *const hdi = HDI_PTR(data_in); 357 struct data_block *block; 358 uint64_t key, off, diff, fin_off; 359 const unsigned char *data; 360 unsigned size, nw; 361 362 if (data_frame->df_offset + data_frame->df_size < read_offset) 363 { 364 if (data_frame->df_fin) 365 return INS_FRAME_ERR; 366 else 367 return INS_FRAME_DUP; 368 } 369 370 if ((hdi->hdi_flags & HDI_FIN) && 371 ( 372 (data_frame->df_fin && 373 data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off) 374 || 375 data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off 376 ) 377 ) 378 { 379 return INS_FRAME_ERR; 380 } 381 382 if (data_frame->df_offset < read_offset) 383 { 384 diff = read_offset - data_frame->df_offset; 385 assert(diff <= data_frame->df_size); 386 size = data_frame->df_size - diff; 387 off = data_frame->df_offset + diff; 388 data = data_frame->df_data + diff; 389 } 390 else 391 { 392 size = data_frame->df_size; 393 off = data_frame->df_offset; 394 data = data_frame->df_data; 395 } 396 397 key = off - (off % DB_DATA_SIZE); 398 do 399 { 400 block = hash_find(hdi, key); 401 if (!block) 402 { 403 block = new_block(hdi, key); 404 if (!block) 405 return INS_FRAME_ERR; 406 } 407 nw = block_write(block, off % DB_DATA_SIZE, data, size); 408 size -= nw; 409 off += nw; 410 data += nw; 411 key += DB_DATA_SIZE; 412 } 413 while (size > 0); 414 415 if (data_frame->df_fin) 416 { 417 fin_off = data_frame->df_offset + data_frame->df_size; 418 if (has_bytes_after(block, fin_off - block->db_off) || 419 hash_find(hdi, key)) 420 { 421 return INS_FRAME_ERR; 422 } 423 hdi->hdi_flags |= HDI_FIN; 424 hdi->hdi_fin_off = fin_off; 425 } 426 427 return INS_FRAME_OK; 428} 429 430 431static enum ins_frame 432hash_di_insert_frame (struct data_in *data_in, 433 struct stream_frame *new_frame, uint64_t read_offset) 434{ 435 struct hash_data_in *const hdi = HDI_PTR(data_in); 436 const struct data_frame *const data_frame = &new_frame->data_frame; 437 enum ins_frame ins; 438 439 ins = lsquic_data_in_hash_insert_data_frame(data_in, data_frame, 440 read_offset); 441 assert(ins != INS_FRAME_OVERLAP); 442 lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in); 443 if (ins != INS_FRAME_OK) 444 lsquic_malo_put(new_frame); 445 return ins; 446} 447 448 449#if __GNUC__ 450# define ctz __builtin_ctzll 451#else 452static unsigned 453ctz (unsigned long long x) 454{ 455 unsigned n = 0; 456 if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; } 457 if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; } 458 if (0 == (x & ((1ULL << 8) - 1))) { n += 8; x >>= 8; } 459 if (0 == (x & ((1ULL << 4) - 1))) { n += 4; x >>= 4; } 460 if (0 == (x & ((1ULL << 2) - 1))) { n += 2; x >>= 2; } 461 if (0 == (x & ((1ULL << 1) - 1))) { n += 1; x >>= 1; } 462 return n; 463} 464#endif 465 466 467static unsigned 468n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit) 469{ 470 unsigned count; 471 uint64_t part; 472 473 part = ~(block->db_set[ set ] >> bit); 474 if (part) 475 { 476 count = ctz(part); 477 if (count < 64 - bit) 478 return count; 479 } 480 else 481 count = 64; 482 ++set; 483 484 for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set) 485 count += 64; 486 487 if (set < N_DB_SETS) 488 { 489 part = ~block->db_set[ set ]; 490 if (part) 491 count += ctz(part); 492 else 493 count += 64; 494 } 495 496 return count; 497} 498 499 500/* Data block is readable if there is at least one readable byte at 501 * `read_offset' or there is FIN at that offset. 502 */ 503static int 504setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset, 505 struct data_block *block) 506{ 507 unsigned set, bit; 508 uint64_t offset; 509 510 offset = read_offset % DB_DATA_SIZE; 511 set = offset >> 6; 512 bit = offset & 0x3F; 513 514 if (block->db_set[ set ] & (1ULL << bit)) 515 { 516 hdi->hdi_last_block = block; 517 hdi->hdi_data_frame.df_data = block->db_data; 518 hdi->hdi_data_frame.df_offset = block->db_off; 519 hdi->hdi_data_frame.df_read_off = offset; 520 hdi->hdi_data_frame.df_size = offset + 521 n_avail_bytes(block, set, bit); 522 hdi->hdi_data_frame.df_fin = 523 (hdi->hdi_flags & HDI_FIN) && 524 hdi->hdi_data_frame.df_read_off + 525 hdi->hdi_data_frame.df_size == hdi->hdi_fin_off; 526 return 1; 527 } 528 else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off) 529 { 530 hdi->hdi_last_block = block; 531 hdi->hdi_data_frame.df_data = NULL; 532 hdi->hdi_data_frame.df_offset = block->db_off; 533 hdi->hdi_data_frame.df_read_off = offset; 534 hdi->hdi_data_frame.df_size = offset; 535 hdi->hdi_data_frame.df_fin = 1; 536 return 1; 537 } 538 else 539 return 0; 540} 541 542 543static struct data_frame * 544hash_di_get_frame (struct data_in *data_in, uint64_t read_offset) 545{ 546 struct hash_data_in *const hdi = HDI_PTR(data_in); 547 struct data_block *block; 548 uint64_t key; 549 550 key = read_offset - (read_offset % DB_DATA_SIZE); 551 block = hash_find(hdi, key); 552 if (!block) 553 { 554 if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off) 555 { 556 hdi->hdi_last_block = NULL; 557 hdi->hdi_data_frame.df_data = NULL; 558 hdi->hdi_data_frame.df_offset = read_offset - 559 read_offset % DB_DATA_SIZE; 560 hdi->hdi_data_frame.df_read_off = 0; 561 hdi->hdi_data_frame.df_size = 0; 562 hdi->hdi_data_frame.df_fin = 1; 563 return &hdi->hdi_data_frame; 564 } 565 else 566 return NULL; 567 } 568 569 if (setup_data_frame(hdi, read_offset, block)) 570 return &hdi->hdi_data_frame; 571 else 572 return NULL; 573} 574 575 576static void 577hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame) 578{ 579 struct hash_data_in *const hdi = HDI_PTR(data_in); 580 struct data_block *const block = hdi->hdi_last_block; 581 582 if (block) 583 { 584 if (data_frame->df_read_off == DB_DATA_SIZE || 585 !has_bytes_after(block, data_frame->df_read_off)) 586 { 587 hash_remove(hdi, block); 588 free(block); 589 if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN)) 590 { 591 LSQ_DEBUG("hash empty, want to switch"); 592 hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL; 593 } 594 } 595 } 596 else 597 assert(data_frame->df_fin && data_frame->df_size == 0); 598} 599 600 601static int 602hash_di_empty (struct data_in *data_in) 603{ 604 struct hash_data_in *const hdi = HDI_PTR(data_in); 605 return hdi->hdi_count == 0; 606} 607 608 609static struct data_in * 610hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset) 611{ 612 struct hash_data_in *const hdi = HDI_PTR(data_in); 613 struct data_in *new_data_in; 614 615 assert(hdi->hdi_count == 0); 616 617 new_data_in = lsquic_data_in_nocopy_new(hdi->hdi_conn_pub, 618 hdi->hdi_stream_id); 619 data_in->di_if->di_destroy(data_in); 620 621 return new_data_in; 622} 623 624 625static size_t 626hash_di_mem_used (struct data_in *data_in) 627{ 628 struct hash_data_in *const hdi = HDI_PTR(data_in); 629 const struct data_block *block; 630 size_t size; 631 unsigned n; 632 633 size = sizeof(*data_in); 634 635 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 636 TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next) 637 size += sizeof(*block); 638 639 size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]); 640 641 return size; 642} 643 644 645static void 646hash_di_dump_state (struct data_in *data_in) 647{ 648 const struct hash_data_in *const hdi = HDI_PTR(data_in); 649 const struct data_block *block; 650 unsigned n; 651 652 LSQ_DEBUG("hash state: flags: %X; fin off: %"PRIu64"; count: %u", 653 hdi->hdi_flags, hdi->hdi_fin_off, hdi->hdi_count); 654 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 655 TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next) 656 LSQ_DEBUG("block: off: %"PRIu64, block->db_off); 657} 658 659 660static uint64_t 661hash_di_readable_bytes (struct data_in *data_in, uint64_t read_offset) 662{ 663 const struct data_frame *data_frame; 664 uint64_t starting_offset; 665 666 starting_offset = read_offset; 667 while (data_frame = hash_di_get_frame(data_in, read_offset), 668 data_frame && data_frame->df_size - data_frame->df_read_off) 669 read_offset += data_frame->df_size - data_frame->df_read_off; 670 671 return read_offset - starting_offset; 672} 673 674 675static const struct data_in_iface di_if_hash = { 676 .di_destroy = hash_di_destroy, 677 .di_dump_state = hash_di_dump_state, 678 .di_empty = hash_di_empty, 679 .di_frame_done = hash_di_frame_done, 680 .di_get_frame = hash_di_get_frame, 681 .di_insert_frame = hash_di_insert_frame, 682 .di_mem_used = hash_di_mem_used, 683 .di_own_on_ok = 0, 684 .di_readable_bytes 685 = hash_di_readable_bytes, 686 .di_switch_impl = hash_di_switch_impl, 687}; 688 689static const struct data_in_iface *di_if_hash_ptr = &di_if_hash; 690