lsquic_di_hash.c revision 03fb9352
1/* Copyright (c) 2017 - 2019 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_di_hash.c -- Copy incoming data into a hash 4 * 5 * While this implementation copies the data, its memory use is limited, 6 * which makes it a good choice when we have a lot of stream frames 7 * coming in. 8 * 9 * Another difference is that it does not check for frame overlap, which 10 * is something that is present in Chrome, but it is not required by QUIC. 11 */ 12 13 14#include <assert.h> 15#include <inttypes.h> 16#include <stddef.h> 17#include <stdint.h> 18#include <stdlib.h> 19#include <string.h> 20#include <sys/queue.h> 21 22#include "lsquic.h" 23#include "lsquic_int_types.h" 24#include "lsquic_types.h" 25#include "lsquic_conn_flow.h" 26#include "lsquic_packet_common.h" 27#include "lsquic_packet_in.h" 28#include "lsquic_rtt.h" 29#include "lsquic_sfcw.h" 30#include "lsquic_stream.h" 31#include "lsquic_mm.h" 32#include "lsquic_malo.h" 33#include "lsquic_conn.h" 34#include "lsquic_conn_public.h" 35#include "lsquic_data_in_if.h" 36 37 38#define LSQUIC_LOGGER_MODULE LSQLM_DI 39#define LSQUIC_LOG_CONN_ID hdi->hdi_conn_pub->lconn->cn_cid 40#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id 41#include "lsquic_logger.h" 42 43 44#define N_DB_SETS 57 45 46#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \ 47 sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t)) 48 49struct data_block 50{ 51 TAILQ_ENTRY(data_block) db_next; 52 uint64_t db_off; 53 uint64_t db_set[N_DB_SETS]; /* bit for each valid byte */ 54 unsigned char db_data[DB_DATA_SIZE]; 55}; 56 57typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) ?1: - 1]; 58typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE)?1: - 1]; 59typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) ?1:- 1]; 60 61 62TAILQ_HEAD(dblock_head, data_block); 63 64 65static const struct data_in_iface *di_if_hash_ptr; 66 67 68struct hash_data_in 69{ 70 struct data_in hdi_data_in; 71 struct lsquic_conn_public *hdi_conn_pub; 72 uint64_t hdi_fin_off; 73 struct dblock_head *hdi_buckets; 74 struct data_block *hdi_last_block; 75 struct data_frame hdi_data_frame; 76 uint32_t hdi_stream_id; 77 unsigned hdi_count; 78 unsigned hdi_nbits; 79 enum { 80 HDI_FIN = (1 << 0), 81 } hdi_flags; 82}; 83 84 85#define HDI_PTR(data_in) (struct hash_data_in *) \ 86 ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in)) 87 88 89#define N_BUCKETS(n_bits) (1U << (n_bits)) 90#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1)) 91 92 93static unsigned 94my_log2 /* silly name to suppress compiler warning */ (unsigned sz) 95{ 96#if __GNUC__ 97 unsigned clz = __builtin_clz(sz); 98 return 32 - clz; 99#else 100 unsigned clz; 101 size_t y; 102 clz = 32; 103 y = sz >> 16; if (y) { clz -= 16; sz = y; } 104 y = sz >> 8; if (y) { clz -= 8; sz = y; } 105 y = sz >> 4; if (y) { clz -= 4; sz = y; } 106 y = sz >> 2; if (y) { clz -= 2; sz = y; } 107 y = sz >> 1; if (y) return 32 - clz + 1; 108 return 32 - clz + sz; 109#endif 110} 111 112 113struct data_in * 114data_in_hash_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id, 115 uint64_t byteage) 116{ 117 struct hash_data_in *hdi; 118 unsigned n; 119 120 hdi = malloc(sizeof(*hdi)); 121 if (!hdi) 122 return NULL; 123 124 hdi->hdi_data_in.di_if = di_if_hash_ptr; 125 hdi->hdi_data_in.di_flags = 0; 126 hdi->hdi_conn_pub = conn_pub; 127 hdi->hdi_stream_id = stream_id; 128 hdi->hdi_fin_off = 0; 129 hdi->hdi_flags = 0; 130 hdi->hdi_last_block = NULL; 131 if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if 132 argument is 0 */) 133 hdi->hdi_nbits = my_log2(byteage / DB_DATA_SIZE) + 2; 134 else 135 hdi->hdi_nbits = 3; 136 hdi->hdi_count = 0; 137 hdi->hdi_buckets = malloc(sizeof(hdi->hdi_buckets[0]) * 138 N_BUCKETS(hdi->hdi_nbits)); 139 if (!hdi->hdi_buckets) 140 { 141 free(hdi); 142 return NULL; 143 } 144 145 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 146 TAILQ_INIT(&hdi->hdi_buckets[n]); 147 148 return &hdi->hdi_data_in; 149} 150 151 152static void 153hash_di_destroy (struct data_in *data_in) 154{ 155 struct hash_data_in *const hdi = HDI_PTR(data_in); 156 struct data_block *block; 157 unsigned n; 158 159 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 160 { 161 while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n]))) 162 { 163 TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next); 164 free(block); 165 } 166 } 167 free(hdi->hdi_buckets); 168 free(hdi); 169} 170 171 172static int 173hash_grow (struct hash_data_in *hdi) 174{ 175 struct dblock_head *new_buckets, *new[2]; 176 struct data_block *block; 177 unsigned n, old_nbits; 178 int idx; 179 180 old_nbits = hdi->hdi_nbits; 181 LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1)); 182 new_buckets = malloc(sizeof(hdi->hdi_buckets[0]) 183 * N_BUCKETS(old_nbits + 1)); 184 if (!new_buckets) 185 { 186 LSQ_WARN("malloc failed: potential trouble ahead"); 187 return -1; 188 } 189 190 for (n = 0; n < N_BUCKETS(old_nbits); ++n) 191 { 192 new[0] = &new_buckets[n]; 193 new[1] = &new_buckets[n + N_BUCKETS(old_nbits)]; 194 TAILQ_INIT(new[0]); 195 TAILQ_INIT(new[1]); 196 while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n]))) 197 { 198 TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next); 199 idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1; 200 TAILQ_INSERT_TAIL(new[idx], block, db_next); 201 } 202 } 203 free(hdi->hdi_buckets); 204 hdi->hdi_nbits = old_nbits + 1; 205 hdi->hdi_buckets = new_buckets; 206 return 0; 207} 208 209 210static int 211hash_insert (struct hash_data_in *hdi, struct data_block *block) 212{ 213 unsigned buckno; 214 215 if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi)) 216 return -1; 217 218 buckno = BUCKNO(hdi->hdi_nbits, block->db_off); 219 TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next); 220 ++hdi->hdi_count; 221 return 0; 222} 223 224 225static struct data_block * 226hash_find (const struct hash_data_in *hdi, uint64_t off) 227{ 228 struct data_block *block; 229 unsigned buckno; 230 231 buckno = BUCKNO(hdi->hdi_nbits, off); 232 TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next) 233 if (off == block->db_off) 234 return block; 235 return NULL; 236} 237 238 239static void 240hash_remove (struct hash_data_in *hdi, struct data_block *block) 241{ 242 unsigned buckno; 243 244 buckno = BUCKNO(hdi->hdi_nbits, block->db_off); 245 TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next); 246 --hdi->hdi_count; 247} 248 249 250static struct data_block * 251new_block (struct hash_data_in *hdi, uint64_t off) 252{ 253 struct data_block *block; 254 255 assert(0 == off % DB_DATA_SIZE); 256 257 block = malloc(sizeof(*block)); 258 if (!block) 259 return NULL; 260 261 block->db_off = off; 262 if (0 != hash_insert(hdi, block)) 263 { 264 free(block); 265 return NULL; 266 } 267 268 memset(block->db_set, 0, sizeof(block->db_set)); 269 return block; 270} 271 272 273static unsigned 274block_write (struct data_block *block, unsigned block_off, 275 const unsigned char *data, unsigned data_sz) 276{ 277 const unsigned char *begin, *end; 278 unsigned set, bit, n_full_sets, n; 279 uint64_t mask; 280 281 assert(block_off < DB_DATA_SIZE); 282 if (data_sz > DB_DATA_SIZE - block_off) 283 data_sz = DB_DATA_SIZE - block_off; 284 285 begin = data; 286 end = begin + data_sz; 287 set = block_off >> 6; 288 bit = block_off & 0x3F; 289 290 assert(set < N_DB_SETS); 291 292 if (bit) 293 { 294 n = 64 - bit; 295 if (n > data_sz) 296 n = data_sz; 297 mask = ~((1ULL << bit ) - 1) 298 & ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1)); 299 block->db_set[ set ] |= mask; 300 memcpy(block->db_data + block_off, data, n); 301 data += n; 302 block_off += n; 303 ++set; 304 } 305 306 n_full_sets = (end - data) >> 6; 307 if (n_full_sets) 308 { 309 memcpy(block->db_data + block_off, data, n_full_sets * 64); 310 data += n_full_sets * 64; 311 block_off += n_full_sets * 64; 312 memset(&block->db_set[ set ], 0xFF, n_full_sets * 8); 313 set += n_full_sets; 314 } 315 316 if (data < end) 317 { 318 assert(end - data < 64); 319 block->db_set[ set ] |= ((1ULL << (end - data)) - 1); 320 memcpy(block->db_data + block_off, data, end - data); 321 data = end; 322 } 323 324 assert(set <= N_DB_SETS); 325 326 return data - begin; 327} 328 329 330static int 331has_bytes_after (const struct data_block *block, unsigned off) 332{ 333 unsigned bit, set; 334 int has; 335 336 set = off >> 6; 337 bit = off & 0x3F; 338 339 has = 0 != (block->db_set[ set ] >> bit); 340 ++set; 341 342 for ( ; set < N_DB_SETS; ++set) 343 has += 0 != block->db_set[ set ]; 344 345 return has > 0; 346} 347 348 349enum ins_frame 350data_in_hash_insert_data_frame (struct data_in *data_in, 351 const struct data_frame *data_frame, uint64_t read_offset) 352{ 353 struct hash_data_in *const hdi = HDI_PTR(data_in); 354 struct data_block *block; 355 uint64_t key, off, diff, fin_off; 356 const unsigned char *data; 357 unsigned size, nw; 358 359 if (data_frame->df_offset + data_frame->df_size < read_offset) 360 { 361 if (data_frame->df_fin) 362 return INS_FRAME_ERR; 363 else 364 return INS_FRAME_DUP; 365 } 366 367 if ((hdi->hdi_flags & HDI_FIN) && 368 ( 369 (data_frame->df_fin && 370 data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off) 371 || 372 data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off 373 ) 374 ) 375 { 376 return INS_FRAME_ERR; 377 } 378 379 if (data_frame->df_offset < read_offset) 380 { 381 diff = read_offset - data_frame->df_offset; 382 assert(diff <= data_frame->df_size); 383 size = data_frame->df_size - diff; 384 off = data_frame->df_offset + diff; 385 data = data_frame->df_data + diff; 386 } 387 else 388 { 389 size = data_frame->df_size; 390 off = data_frame->df_offset; 391 data = data_frame->df_data; 392 } 393 394 key = off - (off % DB_DATA_SIZE); 395 do 396 { 397 block = hash_find(hdi, key); 398 if (!block) 399 { 400 block = new_block(hdi, key); 401 if (!block) 402 return INS_FRAME_ERR; 403 } 404 nw = block_write(block, off % DB_DATA_SIZE, data, size); 405 size -= nw; 406 off += nw; 407 data += nw; 408 key += DB_DATA_SIZE; 409 } 410 while (size > 0); 411 412 if (data_frame->df_fin) 413 { 414 fin_off = data_frame->df_offset + data_frame->df_size; 415 if (has_bytes_after(block, fin_off - block->db_off) || 416 hash_find(hdi, key)) 417 { 418 return INS_FRAME_ERR; 419 } 420 hdi->hdi_flags |= HDI_FIN; 421 hdi->hdi_fin_off = fin_off; 422 } 423 424 return INS_FRAME_OK; 425} 426 427 428static enum ins_frame 429hash_di_insert_frame (struct data_in *data_in, 430 struct stream_frame *new_frame, uint64_t read_offset) 431{ 432 struct hash_data_in *const hdi = HDI_PTR(data_in); 433 const struct data_frame *const data_frame = &new_frame->data_frame; 434 enum ins_frame ins; 435 436 ins = data_in_hash_insert_data_frame(data_in, data_frame, read_offset); 437 assert(ins != INS_FRAME_OVERLAP); 438 lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in); 439 lsquic_malo_put(new_frame); 440 return ins; 441} 442 443 444#if __GNUC__ 445# define ctz __builtin_ctzll 446#else 447static unsigned 448ctz (unsigned long long x) 449{ 450 unsigned n = 0; 451 if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; } 452 if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; } 453 if (0 == (x & ((1ULL << 8) - 1))) { n += 8; x >>= 8; } 454 if (0 == (x & ((1ULL << 4) - 1))) { n += 4; x >>= 4; } 455 if (0 == (x & ((1ULL << 2) - 1))) { n += 2; x >>= 2; } 456 if (0 == (x & ((1ULL << 1) - 1))) { n += 1; x >>= 1; } 457 return n; 458} 459 460 461#endif 462 463 464static unsigned 465n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit) 466{ 467 unsigned count; 468 uint64_t part; 469 470 part = ~(block->db_set[ set ] >> bit); 471 if (part) 472 { 473 count = ctz(part); 474 if (count < 64 - bit) 475 return count; 476 } 477 else 478 count = 64; 479 ++set; 480 481 for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set) 482 count += 64; 483 484 if (set < N_DB_SETS) 485 { 486 part = ~block->db_set[ set ]; 487 if (part) 488 count += ctz(part); 489 else 490 count += 64; 491 } 492 493 return count; 494} 495 496 497/* Data block is readable if there is at least one readable byte at 498 * `read_offset' or there is FIN at that offset. 499 */ 500static int 501setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset, 502 struct data_block *block) 503{ 504 unsigned set, bit; 505 uint64_t offset; 506 507 offset = read_offset % DB_DATA_SIZE; 508 set = offset >> 6; 509 bit = offset & 0x3F; 510 511 if (block->db_set[ set ] & (1ULL << bit)) 512 { 513 hdi->hdi_last_block = block; 514 hdi->hdi_data_frame.df_data = block->db_data; 515 hdi->hdi_data_frame.df_offset = block->db_off; 516 hdi->hdi_data_frame.df_read_off = offset; 517 hdi->hdi_data_frame.df_size = offset + 518 n_avail_bytes(block, set, bit); 519 hdi->hdi_data_frame.df_fin = 520 (hdi->hdi_flags & HDI_FIN) && 521 hdi->hdi_data_frame.df_read_off + 522 hdi->hdi_data_frame.df_size == hdi->hdi_fin_off; 523 return 1; 524 } 525 else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off) 526 { 527 hdi->hdi_last_block = block; 528 hdi->hdi_data_frame.df_data = NULL; 529 hdi->hdi_data_frame.df_offset = block->db_off; 530 hdi->hdi_data_frame.df_read_off = offset; 531 hdi->hdi_data_frame.df_size = offset; 532 hdi->hdi_data_frame.df_fin = 1; 533 return 1; 534 } 535 else 536 return 0; 537} 538 539 540static struct data_frame * 541hash_di_get_frame (struct data_in *data_in, uint64_t read_offset) 542{ 543 struct hash_data_in *const hdi = HDI_PTR(data_in); 544 struct data_block *block; 545 uint64_t key; 546 547 key = read_offset - (read_offset % DB_DATA_SIZE); 548 block = hash_find(hdi, key); 549 if (!block) 550 { 551 if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off) 552 { 553 hdi->hdi_last_block = NULL; 554 hdi->hdi_data_frame.df_data = NULL; 555 hdi->hdi_data_frame.df_offset = read_offset - 556 read_offset % DB_DATA_SIZE; 557 hdi->hdi_data_frame.df_read_off = 0; 558 hdi->hdi_data_frame.df_size = 0; 559 hdi->hdi_data_frame.df_fin = 1; 560 return &hdi->hdi_data_frame; 561 } 562 else 563 return NULL; 564 } 565 566 if (setup_data_frame(hdi, read_offset, block)) 567 return &hdi->hdi_data_frame; 568 else 569 return NULL; 570} 571 572 573static void 574hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame) 575{ 576 struct hash_data_in *const hdi = HDI_PTR(data_in); 577 struct data_block *const block = hdi->hdi_last_block; 578 579 if (block) 580 { 581 if (data_frame->df_read_off == DB_DATA_SIZE || 582 !has_bytes_after(block, data_frame->df_read_off)) 583 { 584 hash_remove(hdi, block); 585 free(block); 586 if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN)) 587 { 588 LSQ_DEBUG("hash empty, want to switch"); 589 hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL; 590 } 591 } 592 } 593 else 594 assert(data_frame->df_fin && data_frame->df_size == 0); 595} 596 597 598static int 599hash_di_empty (struct data_in *data_in) 600{ 601 struct hash_data_in *const hdi = HDI_PTR(data_in); 602 return hdi->hdi_count == 0; 603} 604 605 606struct data_in * 607hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset) 608{ 609 struct hash_data_in *const hdi = HDI_PTR(data_in); 610 struct data_in *new_data_in; 611 612 assert(hdi->hdi_count == 0); 613 614 new_data_in = data_in_nocopy_new(hdi->hdi_conn_pub, hdi->hdi_stream_id); 615 data_in->di_if->di_destroy(data_in); 616 617 return new_data_in; 618} 619 620 621static size_t 622hash_di_mem_used (struct data_in *data_in) 623{ 624 struct hash_data_in *const hdi = HDI_PTR(data_in); 625 const struct data_block *block; 626 size_t size; 627 unsigned n; 628 629 size = sizeof(*data_in); 630 631 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 632 TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next) 633 size += sizeof(*block); 634 635 size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]); 636 637 return size; 638} 639 640 641static const struct data_in_iface di_if_hash = { 642 .di_destroy = hash_di_destroy, 643 .di_empty = hash_di_empty, 644 .di_frame_done = hash_di_frame_done, 645 .di_get_frame = hash_di_get_frame, 646 .di_insert_frame = hash_di_insert_frame, 647 .di_mem_used = hash_di_mem_used, 648 .di_switch_impl = hash_di_switch_impl, 649}; 650 651static const struct data_in_iface *di_if_hash_ptr = &di_if_hash; 652