lsquic_di_hash.c revision be4cfad0
1/* Copyright (c) 2017 - 2018 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_di_hash.c -- Copy incoming data into a hash 4 * 5 * While this implementation copies the data, its memory use is limited, 6 * which makes it a good choice when we have a lot of stream frames 7 * coming in. 8 * 9 * Another difference is that it does not check for frame overlap, which 10 * is something that is present in Chrome, but it is not required by QUIC. 11 */ 12 13 14#include <assert.h> 15#include <stddef.h> 16#include <stdint.h> 17#include <stdlib.h> 18#include <string.h> 19#include <sys/queue.h> 20 21#include "lsquic.h" 22#include "lsquic_int_types.h" 23#include "lsquic_types.h" 24#include "lsquic_conn_flow.h" 25#include "lsquic_packet_common.h" 26#include "lsquic_packet_in.h" 27#include "lsquic_rtt.h" 28#include "lsquic_sfcw.h" 29#include "lsquic_stream.h" 30#include "lsquic_mm.h" 31#include "lsquic_malo.h" 32#include "lsquic_conn.h" 33#include "lsquic_conn_public.h" 34#include "lsquic_data_in_if.h" 35 36 37#define LSQUIC_LOGGER_MODULE LSQLM_DI 38#define LSQUIC_LOG_CONN_ID hdi->hdi_conn_pub->lconn->cn_cid 39#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id 40#include "lsquic_logger.h" 41 42 43#define N_DB_SETS 57 44 45#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \ 46 sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t)) 47 48struct data_block 49{ 50 TAILQ_ENTRY(data_block) db_next; 51 uint64_t db_off; 52 uint64_t db_set[N_DB_SETS]; /* bit for each valid byte */ 53 unsigned char db_data[DB_DATA_SIZE]; 54}; 55 56typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) ?1: - 1]; 57typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE)?1: - 1]; 58typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) ?1:- 1]; 59 60 61TAILQ_HEAD(dblock_head, data_block); 62 63 64static const struct data_in_iface *di_if_hash_ptr; 65 66 67struct hash_data_in 68{ 69 struct data_in hdi_data_in; 70 struct lsquic_conn_public *hdi_conn_pub; 71 uint64_t hdi_fin_off; 72 struct dblock_head *hdi_buckets; 73 struct data_block *hdi_last_block; 74 struct data_frame hdi_data_frame; 75 uint32_t hdi_stream_id; 76 unsigned hdi_count; 77 unsigned hdi_nbits; 78 enum { 79 HDI_FIN = (1 << 0), 80 } hdi_flags; 81}; 82 83 84#define HDI_PTR(data_in) (struct hash_data_in *) \ 85 ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in)) 86 87 88#define N_BUCKETS(n_bits) (1U << (n_bits)) 89#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1)) 90 91 92static unsigned 93my_log2 /* silly name to suppress compiler warning */ (unsigned sz) 94{ 95#if __GNUC__ 96 unsigned clz = __builtin_clz(sz); 97 return 32 - clz; 98#else 99 unsigned clz; 100 size_t y; 101 clz = 32; 102 y = sz >> 16; if (y) { clz -= 16; sz = y; } 103 y = sz >> 8; if (y) { clz -= 8; sz = y; } 104 y = sz >> 4; if (y) { clz -= 4; sz = y; } 105 y = sz >> 2; if (y) { clz -= 2; sz = y; } 106 y = sz >> 1; if (y) return 32 - clz + 1; 107 return 32 - clz + sz; 108#endif 109} 110 111 112struct data_in * 113data_in_hash_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id, 114 uint64_t byteage) 115{ 116 struct hash_data_in *hdi; 117 unsigned n; 118 119 hdi = malloc(sizeof(*hdi)); 120 if (!hdi) 121 return NULL; 122 123 hdi->hdi_data_in.di_if = di_if_hash_ptr; 124 hdi->hdi_data_in.di_flags = 0; 125 hdi->hdi_conn_pub = conn_pub; 126 hdi->hdi_stream_id = stream_id; 127 hdi->hdi_fin_off = 0; 128 hdi->hdi_flags = 0; 129 hdi->hdi_last_block = NULL; 130 if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if 131 argument is 0 */) 132 hdi->hdi_nbits = my_log2(byteage / DB_DATA_SIZE) + 2; 133 else 134 hdi->hdi_nbits = 3; 135 hdi->hdi_count = 0; 136 hdi->hdi_buckets = malloc(sizeof(hdi->hdi_buckets[0]) * 137 N_BUCKETS(hdi->hdi_nbits)); 138 if (!hdi->hdi_buckets) 139 { 140 free(hdi); 141 return NULL; 142 } 143 144 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 145 TAILQ_INIT(&hdi->hdi_buckets[n]); 146 147 return &hdi->hdi_data_in; 148} 149 150 151static void 152hash_di_destroy (struct data_in *data_in) 153{ 154 struct hash_data_in *const hdi = HDI_PTR(data_in); 155 struct data_block *block; 156 unsigned n; 157 158 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 159 { 160 while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n]))) 161 { 162 TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next); 163 free(block); 164 } 165 } 166 free(hdi->hdi_buckets); 167 free(hdi); 168} 169 170 171static int 172hash_grow (struct hash_data_in *hdi) 173{ 174 struct dblock_head *new_buckets, *new[2]; 175 struct data_block *block; 176 unsigned n, old_nbits; 177 int idx; 178 179 old_nbits = hdi->hdi_nbits; 180 LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1)); 181 new_buckets = malloc(sizeof(hdi->hdi_buckets[0]) 182 * N_BUCKETS(old_nbits + 1)); 183 if (!new_buckets) 184 { 185 LSQ_WARN("malloc failed: potential trouble ahead"); 186 return -1; 187 } 188 189 for (n = 0; n < N_BUCKETS(old_nbits); ++n) 190 { 191 new[0] = &new_buckets[n]; 192 new[1] = &new_buckets[n + N_BUCKETS(old_nbits)]; 193 TAILQ_INIT(new[0]); 194 TAILQ_INIT(new[1]); 195 while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n]))) 196 { 197 TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next); 198 idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1; 199 TAILQ_INSERT_TAIL(new[idx], block, db_next); 200 } 201 } 202 free(hdi->hdi_buckets); 203 hdi->hdi_nbits = old_nbits + 1; 204 hdi->hdi_buckets = new_buckets; 205 return 0; 206} 207 208 209static int 210hash_insert (struct hash_data_in *hdi, struct data_block *block) 211{ 212 unsigned buckno; 213 214 if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi)) 215 return -1; 216 217 buckno = BUCKNO(hdi->hdi_nbits, block->db_off); 218 TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next); 219 ++hdi->hdi_count; 220 return 0; 221} 222 223 224static struct data_block * 225hash_find (const struct hash_data_in *hdi, uint64_t off) 226{ 227 struct data_block *block; 228 unsigned buckno; 229 230 buckno = BUCKNO(hdi->hdi_nbits, off); 231 TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next) 232 if (off == block->db_off) 233 return block; 234 return NULL; 235} 236 237 238static void 239hash_remove (struct hash_data_in *hdi, struct data_block *block) 240{ 241 unsigned buckno; 242 243 buckno = BUCKNO(hdi->hdi_nbits, block->db_off); 244 TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next); 245 --hdi->hdi_count; 246} 247 248 249static struct data_block * 250new_block (struct hash_data_in *hdi, uint64_t off) 251{ 252 struct data_block *block; 253 254 assert(0 == off % DB_DATA_SIZE); 255 256 block = malloc(sizeof(*block)); 257 if (!block) 258 return NULL; 259 260 block->db_off = off; 261 if (0 != hash_insert(hdi, block)) 262 { 263 free(block); 264 return NULL; 265 } 266 267 memset(block->db_set, 0, sizeof(block->db_set)); 268 return block; 269} 270 271 272static unsigned 273block_write (struct data_block *block, unsigned block_off, 274 const unsigned char *data, unsigned data_sz) 275{ 276 const unsigned char *begin, *end; 277 unsigned set, bit, n_full_sets, n; 278 uint64_t mask; 279 280 if (data_sz > DB_DATA_SIZE - block_off) 281 data_sz = DB_DATA_SIZE - block_off; 282 283 begin = data; 284 end = begin + data_sz; 285 set = block_off >> 6; 286 bit = block_off & 0x3F; 287 288 assert(set <= N_DB_SETS); 289 290 if (bit) 291 { 292 n = 64 - bit; 293 if (n > data_sz) 294 n = data_sz; 295 mask = ~((1ULL << bit ) - 1) 296 & ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1)); 297 block->db_set[ set ] |= mask; 298 memcpy(block->db_data + block_off, data, n); 299 data += n; 300 block_off += n; 301 ++set; 302 } 303 304 n_full_sets = (end - data) >> 6; 305 if (n_full_sets) 306 { 307 memcpy(block->db_data + block_off, data, n_full_sets * 64); 308 data += n_full_sets * 64; 309 block_off += n_full_sets * 64; 310 memset(&block->db_set[ set ], 0xFF, n_full_sets * 8); 311 set += n_full_sets; 312 } 313 314 if (data < end) 315 { 316 assert(end - data < 64); 317 block->db_set[ set ] |= ((1ULL << (end - data)) - 1); 318 memcpy(block->db_data + block_off, data, end - data); 319 data = end; 320 } 321 322 assert(set <= N_DB_SETS); 323 324 return data - begin; 325} 326 327 328static int 329has_bytes_after (const struct data_block *block, unsigned off) 330{ 331 unsigned bit, set; 332 int has; 333 334 set = off >> 6; 335 bit = off & 0x3F; 336 337 has = 0 != (block->db_set[ set ] >> bit); 338 ++set; 339 340 for ( ; set < N_DB_SETS; ++set) 341 has += 0 != block->db_set[ set ]; 342 343 return has > 0; 344} 345 346 347enum ins_frame 348data_in_hash_insert_data_frame (struct data_in *data_in, 349 const struct data_frame *data_frame, uint64_t read_offset) 350{ 351 struct hash_data_in *const hdi = HDI_PTR(data_in); 352 struct data_block *block; 353 uint64_t key, off, diff, fin_off; 354 const unsigned char *data; 355 unsigned size, nw; 356 357 if (data_frame->df_offset + data_frame->df_size < read_offset) 358 { 359 if (data_frame->df_fin) 360 return INS_FRAME_ERR; 361 else 362 return INS_FRAME_DUP; 363 } 364 365 if ((hdi->hdi_flags & HDI_FIN) && 366 ( 367 (data_frame->df_fin && 368 data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off) 369 || 370 data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off 371 ) 372 ) 373 { 374 return INS_FRAME_ERR; 375 } 376 377 if (data_frame->df_offset < read_offset) 378 { 379 diff = read_offset - data_frame->df_offset; 380 assert(diff <= data_frame->df_size); 381 size = data_frame->df_size - diff; 382 off = data_frame->df_offset + diff; 383 data = data_frame->df_data + diff; 384 } 385 else 386 { 387 size = data_frame->df_size; 388 off = data_frame->df_offset; 389 data = data_frame->df_data; 390 } 391 392 key = off - (off % DB_DATA_SIZE); 393 do 394 { 395 block = hash_find(hdi, key); 396 if (!block) 397 { 398 block = new_block(hdi, key); 399 if (!block) 400 return INS_FRAME_ERR; 401 } 402 nw = block_write(block, off % DB_DATA_SIZE, data, size); 403 size -= nw; 404 off += nw; 405 data += nw; 406 key += DB_DATA_SIZE; 407 } 408 while (size > 0); 409 410 if (data_frame->df_fin) 411 { 412 fin_off = data_frame->df_offset + data_frame->df_size; 413 if (has_bytes_after(block, fin_off - block->db_off) || 414 hash_find(hdi, key)) 415 { 416 return INS_FRAME_ERR; 417 } 418 hdi->hdi_flags |= HDI_FIN; 419 hdi->hdi_fin_off = fin_off; 420 } 421 422 return INS_FRAME_OK; 423} 424 425 426static enum ins_frame 427hash_di_insert_frame (struct data_in *data_in, 428 struct stream_frame *new_frame, uint64_t read_offset) 429{ 430 struct hash_data_in *const hdi = HDI_PTR(data_in); 431 const struct data_frame *const data_frame = &new_frame->data_frame; 432 enum ins_frame ins; 433 434 ins = data_in_hash_insert_data_frame(data_in, data_frame, read_offset); 435 assert(ins != INS_FRAME_OVERLAP); 436 lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in); 437 lsquic_malo_put(new_frame); 438 return ins; 439} 440 441 442#if __GNUC__ 443# define ctz __builtin_ctzll 444#else 445static unsigned 446ctz (unsigned long long x) 447{ 448 unsigned n = 0; 449 if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; } 450 if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; } 451 if (0 == (x & ((1ULL << 8) - 1))) { n += 8; x >>= 8; } 452 if (0 == (x & ((1ULL << 4) - 1))) { n += 4; x >>= 4; } 453 if (0 == (x & ((1ULL << 2) - 1))) { n += 2; x >>= 2; } 454 if (0 == (x & ((1ULL << 1) - 1))) { n += 1; x >>= 1; } 455 return n; 456} 457#endif 458 459 460static unsigned 461n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit) 462{ 463 unsigned count; 464 uint64_t part; 465 466 part = ~(block->db_set[ set ] >> bit); 467 if (part) 468 { 469 count = ctz(part); 470 if (count < 64 - bit) 471 return count; 472 } 473 else 474 count = 64; 475 ++set; 476 477 for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set) 478 count += 64; 479 480 if (set < N_DB_SETS) 481 { 482 part = ~block->db_set[ set ]; 483 if (part) 484 count += ctz(part); 485 else 486 count += 64; 487 } 488 489 return count; 490} 491 492 493/* Data block is readable if there is at least one readable byte at 494 * `read_offset' or there is FIN at that offset. 495 */ 496static int 497setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset, 498 struct data_block *block) 499{ 500 unsigned set, bit; 501 uint64_t offset; 502 503 offset = read_offset % DB_DATA_SIZE; 504 set = offset >> 6; 505 bit = offset & 0x3F; 506 507 if (block->db_set[ set ] & (1ULL << bit)) 508 { 509 hdi->hdi_last_block = block; 510 hdi->hdi_data_frame.df_data = block->db_data; 511 hdi->hdi_data_frame.df_offset = block->db_off; 512 hdi->hdi_data_frame.df_read_off = offset; 513 hdi->hdi_data_frame.df_size = offset + 514 n_avail_bytes(block, set, bit); 515 hdi->hdi_data_frame.df_fin = 516 (hdi->hdi_flags & HDI_FIN) && 517 hdi->hdi_data_frame.df_read_off + 518 hdi->hdi_data_frame.df_size == hdi->hdi_fin_off; 519 return 1; 520 } 521 else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off) 522 { 523 hdi->hdi_last_block = block; 524 hdi->hdi_data_frame.df_data = NULL; 525 hdi->hdi_data_frame.df_offset = block->db_off; 526 hdi->hdi_data_frame.df_read_off = offset; 527 hdi->hdi_data_frame.df_size = offset; 528 hdi->hdi_data_frame.df_fin = 1; 529 return 1; 530 } 531 else 532 return 0; 533} 534 535 536static struct data_frame * 537hash_di_get_frame (struct data_in *data_in, uint64_t read_offset) 538{ 539 struct hash_data_in *const hdi = HDI_PTR(data_in); 540 struct data_block *block; 541 uint64_t key; 542 543 key = read_offset - (read_offset % DB_DATA_SIZE); 544 block = hash_find(hdi, key); 545 if (!block) 546 { 547 if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off) 548 { 549 hdi->hdi_last_block = NULL; 550 hdi->hdi_data_frame.df_data = NULL; 551 hdi->hdi_data_frame.df_offset = read_offset - 552 read_offset % DB_DATA_SIZE; 553 hdi->hdi_data_frame.df_read_off = 0; 554 hdi->hdi_data_frame.df_size = 0; 555 hdi->hdi_data_frame.df_fin = 1; 556 return &hdi->hdi_data_frame; 557 } 558 else 559 return NULL; 560 } 561 562 if (setup_data_frame(hdi, read_offset, block)) 563 return &hdi->hdi_data_frame; 564 else 565 return NULL; 566} 567 568 569static void 570hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame) 571{ 572 struct hash_data_in *const hdi = HDI_PTR(data_in); 573 struct data_block *const block = hdi->hdi_last_block; 574 575 if (block) 576 { 577 if (data_frame->df_read_off == DB_DATA_SIZE || 578 !has_bytes_after(block, data_frame->df_read_off)) 579 { 580 hash_remove(hdi, block); 581 free(block); 582 if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN)) 583 { 584 LSQ_DEBUG("hash empty, want to switch"); 585 hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL; 586 } 587 } 588 } 589 else 590 assert(data_frame->df_fin && data_frame->df_size == 0); 591} 592 593 594static int 595hash_di_empty (struct data_in *data_in) 596{ 597 struct hash_data_in *const hdi = HDI_PTR(data_in); 598 return hdi->hdi_count == 0; 599} 600 601 602struct data_in * 603hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset) 604{ 605 struct hash_data_in *const hdi = HDI_PTR(data_in); 606 struct data_in *new_data_in; 607 608 assert(hdi->hdi_count == 0); 609 610 new_data_in = data_in_nocopy_new(hdi->hdi_conn_pub, hdi->hdi_stream_id); 611 data_in->di_if->di_destroy(data_in); 612 613 return new_data_in; 614} 615 616 617static size_t 618hash_di_mem_used (struct data_in *data_in) 619{ 620 struct hash_data_in *const hdi = HDI_PTR(data_in); 621 const struct data_block *block; 622 size_t size; 623 unsigned n; 624 625 size = sizeof(*data_in); 626 627 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 628 TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next) 629 size += sizeof(*block); 630 631 size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]); 632 633 return size; 634} 635 636 637static const struct data_in_iface di_if_hash = { 638 .di_destroy = hash_di_destroy, 639 .di_empty = hash_di_empty, 640 .di_frame_done = hash_di_frame_done, 641 .di_get_frame = hash_di_get_frame, 642 .di_insert_frame = hash_di_insert_frame, 643 .di_mem_used = hash_di_mem_used, 644 .di_switch_impl = hash_di_switch_impl, 645}; 646 647static const struct data_in_iface *di_if_hash_ptr = &di_if_hash; 648