lsquic_di_hash.c revision 5392f7a3
1/* Copyright (c) 2017 - 2019 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_di_hash.c -- Copy incoming data into a hash 4 * 5 * While this implementation copies the data, its memory use is limited, 6 * which makes it a good choice when we have a lot of stream frames 7 * coming in. 8 * 9 * Another difference is that it does not check for frame overlap, which 10 * is something that is present in Chrome, but it is not required by QUIC. 11 */ 12 13 14#include <assert.h> 15#include <inttypes.h> 16#include <stddef.h> 17#include <stdint.h> 18#include <stdlib.h> 19#include <string.h> 20#include <sys/queue.h> 21 22#include "lsquic.h" 23#include "lsquic_int_types.h" 24#include "lsquic_types.h" 25#include "lsquic_conn_flow.h" 26#include "lsquic_packet_common.h" 27#include "lsquic_packet_in.h" 28#include "lsquic_rtt.h" 29#include "lsquic_sfcw.h" 30#include "lsquic_varint.h" 31#include "lsquic_hq.h" 32#include "lsquic_varint.h" 33#include "lsquic_hash.h" 34#include "lsquic_stream.h" 35#include "lsquic_mm.h" 36#include "lsquic_malo.h" 37#include "lsquic_conn.h" 38#include "lsquic_conn_public.h" 39#include "lsquic_data_in_if.h" 40 41 42#define LSQUIC_LOGGER_MODULE LSQLM_DI 43#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(hdi->hdi_conn_pub->lconn) 44#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id 45#include "lsquic_logger.h" 46 47 48#define N_DB_SETS 57 49 50#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \ 51 sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t)) 52 53struct data_block 54{ 55 TAILQ_ENTRY(data_block) db_next; 56 uint64_t db_off; 57 uint64_t db_set[N_DB_SETS]; /* bit for each valid byte */ 58 unsigned char db_data[DB_DATA_SIZE]; 59}; 60 61typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) ?1: - 1]; 62typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE)?1: - 1]; 63typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) ?1:- 1]; 64 65 66TAILQ_HEAD(dblock_head, data_block); 67 68 69static const struct data_in_iface *di_if_hash_ptr; 70 71 72struct hash_data_in 73{ 74 struct data_in hdi_data_in; 75 struct lsquic_conn_public *hdi_conn_pub; 76 uint64_t hdi_fin_off; 77 struct dblock_head *hdi_buckets; 78 struct data_block *hdi_last_block; 79 struct data_frame hdi_data_frame; 80 lsquic_stream_id_t hdi_stream_id; 81 unsigned hdi_count; 82 unsigned hdi_nbits; 83 enum { 84 HDI_FIN = (1 << 0), 85 } hdi_flags; 86}; 87 88 89#define HDI_PTR(data_in) (struct hash_data_in *) \ 90 ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in)) 91 92 93#define N_BUCKETS(n_bits) (1U << (n_bits)) 94#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1)) 95 96 97static unsigned 98my_log2 /* silly name to suppress compiler warning */ (unsigned sz) 99{ 100#if __GNUC__ 101 unsigned clz = __builtin_clz(sz); 102 return 32 - clz; 103#else 104 unsigned clz; 105 size_t y; 106 clz = 32; 107 y = sz >> 16; if (y) { clz -= 16; sz = y; } 108 y = sz >> 8; if (y) { clz -= 8; sz = y; } 109 y = sz >> 4; if (y) { clz -= 4; sz = y; } 110 y = sz >> 2; if (y) { clz -= 2; sz = y; } 111 y = sz >> 1; if (y) return 32 - clz + 1; 112 return 32 - clz + sz; 113#endif 114} 115 116 117struct data_in * 118data_in_hash_new (struct lsquic_conn_public *conn_pub, 119 lsquic_stream_id_t stream_id, uint64_t byteage) 120{ 121 struct hash_data_in *hdi; 122 unsigned n; 123 124 hdi = malloc(sizeof(*hdi)); 125 if (!hdi) 126 return NULL; 127 128 hdi->hdi_data_in.di_if = di_if_hash_ptr; 129 hdi->hdi_data_in.di_flags = 0; 130 hdi->hdi_conn_pub = conn_pub; 131 hdi->hdi_stream_id = stream_id; 132 hdi->hdi_fin_off = 0; 133 hdi->hdi_flags = 0; 134 hdi->hdi_last_block = NULL; 135 if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if 136 argument is 0 */) 137 hdi->hdi_nbits = my_log2(byteage / DB_DATA_SIZE) + 2; 138 else 139 hdi->hdi_nbits = 3; 140 hdi->hdi_count = 0; 141 hdi->hdi_buckets = malloc(sizeof(hdi->hdi_buckets[0]) * 142 N_BUCKETS(hdi->hdi_nbits)); 143 if (!hdi->hdi_buckets) 144 { 145 free(hdi); 146 return NULL; 147 } 148 149 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 150 TAILQ_INIT(&hdi->hdi_buckets[n]); 151 152 return &hdi->hdi_data_in; 153} 154 155 156static void 157hash_di_destroy (struct data_in *data_in) 158{ 159 struct hash_data_in *const hdi = HDI_PTR(data_in); 160 struct data_block *block; 161 unsigned n; 162 163 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 164 { 165 while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n]))) 166 { 167 TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next); 168 free(block); 169 } 170 } 171 free(hdi->hdi_buckets); 172 free(hdi); 173} 174 175 176static int 177hash_grow (struct hash_data_in *hdi) 178{ 179 struct dblock_head *new_buckets, *new[2]; 180 struct data_block *block; 181 unsigned n, old_nbits; 182 int idx; 183 184 old_nbits = hdi->hdi_nbits; 185 LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1)); 186 new_buckets = malloc(sizeof(hdi->hdi_buckets[0]) 187 * N_BUCKETS(old_nbits + 1)); 188 if (!new_buckets) 189 { 190 LSQ_WARN("malloc failed: potential trouble ahead"); 191 return -1; 192 } 193 194 for (n = 0; n < N_BUCKETS(old_nbits); ++n) 195 { 196 new[0] = &new_buckets[n]; 197 new[1] = &new_buckets[n + N_BUCKETS(old_nbits)]; 198 TAILQ_INIT(new[0]); 199 TAILQ_INIT(new[1]); 200 while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n]))) 201 { 202 TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next); 203 idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1; 204 TAILQ_INSERT_TAIL(new[idx], block, db_next); 205 } 206 } 207 free(hdi->hdi_buckets); 208 hdi->hdi_nbits = old_nbits + 1; 209 hdi->hdi_buckets = new_buckets; 210 return 0; 211} 212 213 214static int 215hash_insert (struct hash_data_in *hdi, struct data_block *block) 216{ 217 unsigned buckno; 218 219 if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi)) 220 return -1; 221 222 buckno = BUCKNO(hdi->hdi_nbits, block->db_off); 223 TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next); 224 ++hdi->hdi_count; 225 return 0; 226} 227 228 229static struct data_block * 230hash_find (const struct hash_data_in *hdi, uint64_t off) 231{ 232 struct data_block *block; 233 unsigned buckno; 234 235 buckno = BUCKNO(hdi->hdi_nbits, off); 236 TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next) 237 if (off == block->db_off) 238 return block; 239 return NULL; 240} 241 242 243static void 244hash_remove (struct hash_data_in *hdi, struct data_block *block) 245{ 246 unsigned buckno; 247 248 buckno = BUCKNO(hdi->hdi_nbits, block->db_off); 249 TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next); 250 --hdi->hdi_count; 251} 252 253 254static struct data_block * 255new_block (struct hash_data_in *hdi, uint64_t off) 256{ 257 struct data_block *block; 258 259 assert(0 == off % DB_DATA_SIZE); 260 261 block = malloc(sizeof(*block)); 262 if (!block) 263 return NULL; 264 265 block->db_off = off; 266 if (0 != hash_insert(hdi, block)) 267 { 268 free(block); 269 return NULL; 270 } 271 272 memset(block->db_set, 0, sizeof(block->db_set)); 273 return block; 274} 275 276 277static unsigned 278block_write (struct data_block *block, unsigned block_off, 279 const unsigned char *data, unsigned data_sz) 280{ 281 const unsigned char *begin, *end; 282 unsigned set, bit, n_full_sets, n; 283 uint64_t mask; 284 285 assert(block_off < DB_DATA_SIZE); 286 if (data_sz > DB_DATA_SIZE - block_off) 287 data_sz = DB_DATA_SIZE - block_off; 288 289 begin = data; 290 end = begin + data_sz; 291 set = block_off >> 6; 292 bit = block_off & 0x3F; 293 294 assert(set < N_DB_SETS); 295 296 if (bit) 297 { 298 n = 64 - bit; 299 if (n > data_sz) 300 n = data_sz; 301 mask = ~((1ULL << bit ) - 1) 302 & ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1)); 303 block->db_set[ set ] |= mask; 304 memcpy(block->db_data + block_off, data, n); 305 data += n; 306 block_off += n; 307 ++set; 308 } 309 310 n_full_sets = (end - data) >> 6; 311 if (n_full_sets) 312 { 313 memcpy(block->db_data + block_off, data, n_full_sets * 64); 314 data += n_full_sets * 64; 315 block_off += n_full_sets * 64; 316 memset(&block->db_set[ set ], 0xFF, n_full_sets * 8); 317 set += n_full_sets; 318 } 319 320 if (data < end) 321 { 322 assert(end - data < 64); 323 block->db_set[ set ] |= ((1ULL << (end - data)) - 1); 324 memcpy(block->db_data + block_off, data, end - data); 325 data = end; 326 } 327 328 assert(set <= N_DB_SETS); 329 330 return data - begin; 331} 332 333 334static int 335has_bytes_after (const struct data_block *block, unsigned off) 336{ 337 unsigned bit, set; 338 int has; 339 340 set = off >> 6; 341 bit = off & 0x3F; 342 343 has = 0 != (block->db_set[ set ] >> bit); 344 ++set; 345 346 for ( ; set < N_DB_SETS; ++set) 347 has += 0 != block->db_set[ set ]; 348 349 return has > 0; 350} 351 352 353enum ins_frame 354data_in_hash_insert_data_frame (struct data_in *data_in, 355 const struct data_frame *data_frame, uint64_t read_offset) 356{ 357 struct hash_data_in *const hdi = HDI_PTR(data_in); 358 struct data_block *block; 359 uint64_t key, off, diff, fin_off; 360 const unsigned char *data; 361 unsigned size, nw; 362 363 if (data_frame->df_offset + data_frame->df_size < read_offset) 364 { 365 if (data_frame->df_fin) 366 return INS_FRAME_ERR; 367 else 368 return INS_FRAME_DUP; 369 } 370 371 if ((hdi->hdi_flags & HDI_FIN) && 372 ( 373 (data_frame->df_fin && 374 data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off) 375 || 376 data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off 377 ) 378 ) 379 { 380 return INS_FRAME_ERR; 381 } 382 383 if (data_frame->df_offset < read_offset) 384 { 385 diff = read_offset - data_frame->df_offset; 386 assert(diff <= data_frame->df_size); 387 size = data_frame->df_size - diff; 388 off = data_frame->df_offset + diff; 389 data = data_frame->df_data + diff; 390 } 391 else 392 { 393 size = data_frame->df_size; 394 off = data_frame->df_offset; 395 data = data_frame->df_data; 396 } 397 398 key = off - (off % DB_DATA_SIZE); 399 do 400 { 401 block = hash_find(hdi, key); 402 if (!block) 403 { 404 block = new_block(hdi, key); 405 if (!block) 406 return INS_FRAME_ERR; 407 } 408 nw = block_write(block, off % DB_DATA_SIZE, data, size); 409 size -= nw; 410 off += nw; 411 data += nw; 412 key += DB_DATA_SIZE; 413 } 414 while (size > 0); 415 416 if (data_frame->df_fin) 417 { 418 fin_off = data_frame->df_offset + data_frame->df_size; 419 if (has_bytes_after(block, fin_off - block->db_off) || 420 hash_find(hdi, key)) 421 { 422 return INS_FRAME_ERR; 423 } 424 hdi->hdi_flags |= HDI_FIN; 425 hdi->hdi_fin_off = fin_off; 426 } 427 428 return INS_FRAME_OK; 429} 430 431 432static enum ins_frame 433hash_di_insert_frame (struct data_in *data_in, 434 struct stream_frame *new_frame, uint64_t read_offset) 435{ 436 struct hash_data_in *const hdi = HDI_PTR(data_in); 437 const struct data_frame *const data_frame = &new_frame->data_frame; 438 enum ins_frame ins; 439 440 ins = data_in_hash_insert_data_frame(data_in, data_frame, read_offset); 441 assert(ins != INS_FRAME_OVERLAP); 442 lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in); 443 lsquic_malo_put(new_frame); 444 return ins; 445} 446 447 448#if __GNUC__ 449# define ctz __builtin_ctzll 450#else 451static unsigned 452ctz (unsigned long long x) 453{ 454 unsigned n = 0; 455 if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; } 456 if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; } 457 if (0 == (x & ((1ULL << 8) - 1))) { n += 8; x >>= 8; } 458 if (0 == (x & ((1ULL << 4) - 1))) { n += 4; x >>= 4; } 459 if (0 == (x & ((1ULL << 2) - 1))) { n += 2; x >>= 2; } 460 if (0 == (x & ((1ULL << 1) - 1))) { n += 1; x >>= 1; } 461 return n; 462} 463#endif 464 465 466static unsigned 467n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit) 468{ 469 unsigned count; 470 uint64_t part; 471 472 part = ~(block->db_set[ set ] >> bit); 473 if (part) 474 { 475 count = ctz(part); 476 if (count < 64 - bit) 477 return count; 478 } 479 else 480 count = 64; 481 ++set; 482 483 for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set) 484 count += 64; 485 486 if (set < N_DB_SETS) 487 { 488 part = ~block->db_set[ set ]; 489 if (part) 490 count += ctz(part); 491 else 492 count += 64; 493 } 494 495 return count; 496} 497 498 499/* Data block is readable if there is at least one readable byte at 500 * `read_offset' or there is FIN at that offset. 501 */ 502static int 503setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset, 504 struct data_block *block) 505{ 506 unsigned set, bit; 507 uint64_t offset; 508 509 offset = read_offset % DB_DATA_SIZE; 510 set = offset >> 6; 511 bit = offset & 0x3F; 512 513 if (block->db_set[ set ] & (1ULL << bit)) 514 { 515 hdi->hdi_last_block = block; 516 hdi->hdi_data_frame.df_data = block->db_data; 517 hdi->hdi_data_frame.df_offset = block->db_off; 518 hdi->hdi_data_frame.df_read_off = offset; 519 hdi->hdi_data_frame.df_size = offset + 520 n_avail_bytes(block, set, bit); 521 hdi->hdi_data_frame.df_fin = 522 (hdi->hdi_flags & HDI_FIN) && 523 hdi->hdi_data_frame.df_read_off + 524 hdi->hdi_data_frame.df_size == hdi->hdi_fin_off; 525 return 1; 526 } 527 else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off) 528 { 529 hdi->hdi_last_block = block; 530 hdi->hdi_data_frame.df_data = NULL; 531 hdi->hdi_data_frame.df_offset = block->db_off; 532 hdi->hdi_data_frame.df_read_off = offset; 533 hdi->hdi_data_frame.df_size = offset; 534 hdi->hdi_data_frame.df_fin = 1; 535 return 1; 536 } 537 else 538 return 0; 539} 540 541 542static struct data_frame * 543hash_di_get_frame (struct data_in *data_in, uint64_t read_offset) 544{ 545 struct hash_data_in *const hdi = HDI_PTR(data_in); 546 struct data_block *block; 547 uint64_t key; 548 549 key = read_offset - (read_offset % DB_DATA_SIZE); 550 block = hash_find(hdi, key); 551 if (!block) 552 { 553 if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off) 554 { 555 hdi->hdi_last_block = NULL; 556 hdi->hdi_data_frame.df_data = NULL; 557 hdi->hdi_data_frame.df_offset = read_offset - 558 read_offset % DB_DATA_SIZE; 559 hdi->hdi_data_frame.df_read_off = 0; 560 hdi->hdi_data_frame.df_size = 0; 561 hdi->hdi_data_frame.df_fin = 1; 562 return &hdi->hdi_data_frame; 563 } 564 else 565 return NULL; 566 } 567 568 if (setup_data_frame(hdi, read_offset, block)) 569 return &hdi->hdi_data_frame; 570 else 571 return NULL; 572} 573 574 575static void 576hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame) 577{ 578 struct hash_data_in *const hdi = HDI_PTR(data_in); 579 struct data_block *const block = hdi->hdi_last_block; 580 581 if (block) 582 { 583 if (data_frame->df_read_off == DB_DATA_SIZE || 584 !has_bytes_after(block, data_frame->df_read_off)) 585 { 586 hash_remove(hdi, block); 587 free(block); 588 if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN)) 589 { 590 LSQ_DEBUG("hash empty, want to switch"); 591 hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL; 592 } 593 } 594 } 595 else 596 assert(data_frame->df_fin && data_frame->df_size == 0); 597} 598 599 600static int 601hash_di_empty (struct data_in *data_in) 602{ 603 struct hash_data_in *const hdi = HDI_PTR(data_in); 604 return hdi->hdi_count == 0; 605} 606 607 608struct data_in * 609hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset) 610{ 611 struct hash_data_in *const hdi = HDI_PTR(data_in); 612 struct data_in *new_data_in; 613 614 assert(hdi->hdi_count == 0); 615 616 new_data_in = data_in_nocopy_new(hdi->hdi_conn_pub, hdi->hdi_stream_id); 617 data_in->di_if->di_destroy(data_in); 618 619 return new_data_in; 620} 621 622 623static size_t 624hash_di_mem_used (struct data_in *data_in) 625{ 626 struct hash_data_in *const hdi = HDI_PTR(data_in); 627 const struct data_block *block; 628 size_t size; 629 unsigned n; 630 631 size = sizeof(*data_in); 632 633 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 634 TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next) 635 size += sizeof(*block); 636 637 size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]); 638 639 return size; 640} 641 642 643static void 644hash_di_dump_state (struct data_in *data_in) 645{ 646 const struct hash_data_in *const hdi = HDI_PTR(data_in); 647 const struct data_block *block; 648 unsigned n; 649 650 LSQ_DEBUG("hash state: flags: %X; fin off: %"PRIu64"; count: %u", 651 hdi->hdi_flags, hdi->hdi_fin_off, hdi->hdi_count); 652 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 653 TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next) 654 LSQ_DEBUG("block: off: %"PRIu64, block->db_off); 655} 656 657 658static uint64_t 659hash_di_readable_bytes (struct data_in *data_in, uint64_t read_offset) 660{ 661 const struct data_frame *data_frame; 662 uint64_t starting_offset; 663 664 starting_offset = read_offset; 665 while (data_frame = hash_di_get_frame(data_in, read_offset), 666 data_frame && data_frame->df_size - data_frame->df_read_off) 667 read_offset += data_frame->df_size - data_frame->df_read_off; 668 669 return read_offset - starting_offset; 670} 671 672 673static const struct data_in_iface di_if_hash = { 674 .di_destroy = hash_di_destroy, 675 .di_dump_state = hash_di_dump_state, 676 .di_empty = hash_di_empty, 677 .di_frame_done = hash_di_frame_done, 678 .di_get_frame = hash_di_get_frame, 679 .di_insert_frame = hash_di_insert_frame, 680 .di_mem_used = hash_di_mem_used, 681 .di_readable_bytes 682 = hash_di_readable_bytes, 683 .di_switch_impl = hash_di_switch_impl, 684}; 685 686static const struct data_in_iface *di_if_hash_ptr = &di_if_hash; 687