lsquic_di_hash.c revision c51ce338
1/* Copyright (c) 2017 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_di_hash.c -- Copy incoming data into a hash 4 * 5 * While this implementation copies the data, its memory use is limited, 6 * which makes it a good choice when we have a lot of stream frames 7 * coming in. 8 * 9 * Another difference is that it does not check for frame overlap, which 10 * is something that is present in Chrome, but it is not required by QUIC. 11 */ 12 13 14#include <assert.h> 15#include <stddef.h> 16#include <stdint.h> 17#include <stdlib.h> 18#include <string.h> 19#include <sys/queue.h> 20 21#include "lsquic.h" 22#include "lsquic_int_types.h" 23#include "lsquic_types.h" 24#include "lsquic_conn_flow.h" 25#include "lsquic_packet_in.h" 26#include "lsquic_rtt.h" 27#include "lsquic_sfcw.h" 28#include "lsquic_stream.h" 29#include "lsquic_mm.h" 30#include "lsquic_malo.h" 31#include "lsquic_conn.h" 32#include "lsquic_conn_public.h" 33#include "lsquic_data_in_if.h" 34 35 36#define LSQUIC_LOGGER_MODULE LSQLM_DI 37#define LSQUIC_LOG_CONN_ID hdi->hdi_conn_pub->lconn->cn_cid 38#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id 39#include "lsquic_logger.h" 40 41 42#define N_DB_SETS 57 43 44#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \ 45 sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t)) 46 47struct data_block 48{ 49 TAILQ_ENTRY(data_block) db_next; 50 uint64_t db_off; 51 uint64_t db_set[N_DB_SETS]; /* bit for each valid byte */ 52 unsigned char db_data[DB_DATA_SIZE]; 53}; 54 55typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) - 1]; 56typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE) - 1]; 57typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) - 1]; 58 59 60TAILQ_HEAD(dblock_head, data_block); 61 62 63static const struct data_in_iface di_if_hash; 64 65 66struct hash_data_in 67{ 68 struct data_in hdi_data_in; 69 struct lsquic_conn_public *hdi_conn_pub; 70 uint64_t hdi_fin_off; 71 struct dblock_head *hdi_buckets; 72 struct data_block *hdi_last_block; 73 struct data_frame hdi_data_frame; 74 uint32_t hdi_stream_id; 75 unsigned hdi_count; 76 unsigned hdi_nbits; 77 enum { 78 HDI_FIN = (1 << 0), 79 } hdi_flags; 80}; 81 82 83#define HDI_PTR(data_in) (struct hash_data_in *) \ 84 ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in)) 85 86 87#define N_BUCKETS(n_bits) (1U << (n_bits)) 88#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1)) 89 90 91static unsigned 92my_log2 /* silly name to suppress compiler warning */ (unsigned sz) 93{ 94#if __GNUC__ 95 unsigned clz = __builtin_clz(sz); 96 return 32 - clz; 97#else 98 unsigned clz; 99 size_t y; 100 clz = 32; 101 y = sz >> 16; if (y) { clz -= 16; sz = y; } 102 y = sz >> 8; if (y) { clz -= 8; sz = y; } 103 y = sz >> 4; if (y) { clz -= 4; sz = y; } 104 y = sz >> 2; if (y) { clz -= 2; sz = y; } 105 y = sz >> 1; if (y) return 32 - clz + 1; 106 return 32 - clz + sz; 107#endif 108} 109 110 111struct data_in * 112data_in_hash_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id, 113 uint64_t byteage) 114{ 115 struct hash_data_in *hdi; 116 unsigned n; 117 118 hdi = malloc(sizeof(*hdi)); 119 if (!hdi) 120 return NULL; 121 122 hdi->hdi_data_in.di_if = &di_if_hash; 123 hdi->hdi_data_in.di_flags = 0; 124 hdi->hdi_conn_pub = conn_pub; 125 hdi->hdi_stream_id = stream_id; 126 hdi->hdi_fin_off = 0; 127 hdi->hdi_flags = 0; 128 hdi->hdi_last_block = NULL; 129 if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if 130 argument is 0 */) 131 hdi->hdi_nbits = my_log2(byteage / DB_DATA_SIZE) + 2; 132 else 133 hdi->hdi_nbits = 3; 134 hdi->hdi_count = 0; 135 hdi->hdi_buckets = malloc(sizeof(hdi->hdi_buckets[0]) * 136 N_BUCKETS(hdi->hdi_nbits)); 137 if (!hdi->hdi_buckets) 138 { 139 free(hdi); 140 return NULL; 141 } 142 143 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 144 TAILQ_INIT(&hdi->hdi_buckets[n]); 145 146 return &hdi->hdi_data_in; 147} 148 149 150static void 151hash_di_destroy (struct data_in *data_in) 152{ 153 struct hash_data_in *const hdi = HDI_PTR(data_in); 154 struct data_block *block; 155 unsigned n; 156 157 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 158 { 159 while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n]))) 160 { 161 TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next); 162 free(block); 163 } 164 } 165 free(hdi->hdi_buckets); 166 free(hdi); 167} 168 169 170static int 171hash_grow (struct hash_data_in *hdi) 172{ 173 struct dblock_head *new_buckets, *new[2]; 174 struct data_block *block; 175 unsigned n, old_nbits; 176 int idx; 177 178 old_nbits = hdi->hdi_nbits; 179 LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1)); 180 new_buckets = malloc(sizeof(hdi->hdi_buckets[0]) 181 * N_BUCKETS(old_nbits + 1)); 182 if (!new_buckets) 183 { 184 LSQ_WARN("malloc failed: potential trouble ahead"); 185 return -1; 186 } 187 188 for (n = 0; n < N_BUCKETS(old_nbits); ++n) 189 { 190 new[0] = &new_buckets[n]; 191 new[1] = &new_buckets[n + N_BUCKETS(old_nbits)]; 192 TAILQ_INIT(new[0]); 193 TAILQ_INIT(new[1]); 194 while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n]))) 195 { 196 TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next); 197 idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1; 198 TAILQ_INSERT_TAIL(new[idx], block, db_next); 199 } 200 } 201 free(hdi->hdi_buckets); 202 hdi->hdi_nbits = old_nbits + 1; 203 hdi->hdi_buckets = new_buckets; 204 return 0; 205} 206 207 208static int 209hash_insert (struct hash_data_in *hdi, struct data_block *block) 210{ 211 unsigned buckno; 212 213 if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi)) 214 return -1; 215 216 buckno = BUCKNO(hdi->hdi_nbits, block->db_off); 217 TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next); 218 ++hdi->hdi_count; 219 return 0; 220} 221 222 223static struct data_block * 224hash_find (const struct hash_data_in *hdi, uint64_t off) 225{ 226 struct data_block *block; 227 unsigned buckno; 228 229 buckno = BUCKNO(hdi->hdi_nbits, off); 230 TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next) 231 if (off == block->db_off) 232 return block; 233 return NULL; 234} 235 236 237static void 238hash_remove (struct hash_data_in *hdi, struct data_block *block) 239{ 240 unsigned buckno; 241 242 buckno = BUCKNO(hdi->hdi_nbits, block->db_off); 243 TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next); 244 --hdi->hdi_count; 245} 246 247 248static struct data_block * 249new_block (struct hash_data_in *hdi, uint64_t off) 250{ 251 struct data_block *block; 252 253 assert(0 == off % DB_DATA_SIZE); 254 255 block = malloc(sizeof(*block)); 256 if (!block) 257 return NULL; 258 259 block->db_off = off; 260 if (0 != hash_insert(hdi, block)) 261 { 262 free(block); 263 return NULL; 264 } 265 266 memset(block->db_set, 0, sizeof(block->db_set)); 267 return block; 268} 269 270 271static unsigned 272block_write (struct data_block *block, unsigned block_off, 273 const unsigned char *data, unsigned data_sz) 274{ 275 const unsigned char *begin, *end; 276 unsigned set, bit, n_full_sets, n; 277 uint64_t mask; 278 279 if (data_sz > DB_DATA_SIZE - block_off) 280 data_sz = DB_DATA_SIZE - block_off; 281 282 begin = data; 283 end = begin + data_sz; 284 set = block_off >> 6; 285 bit = block_off & 0x3F; 286 287 assert(set <= N_DB_SETS); 288 289 if (bit) 290 { 291 n = 64 - bit; 292 if (n > data_sz) 293 n = data_sz; 294 mask = ~((1ULL << bit ) - 1) 295 & ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1)); 296 block->db_set[ set ] |= mask; 297 memcpy(block->db_data + block_off, data, n); 298 data += n; 299 block_off += n; 300 ++set; 301 } 302 303 n_full_sets = (end - data) >> 6; 304 if (n_full_sets) 305 { 306 memcpy(block->db_data + block_off, data, n_full_sets * 64); 307 data += n_full_sets * 64; 308 block_off += n_full_sets * 64; 309 memset(&block->db_set[ set ], 0xFF, n_full_sets * 8); 310 set += n_full_sets; 311 } 312 313 if (data < end) 314 { 315 assert(end - data < 64); 316 block->db_set[ set ] |= ((1ULL << (end - data)) - 1); 317 memcpy(block->db_data + block_off, data, end - data); 318 data = end; 319 } 320 321 assert(set <= N_DB_SETS); 322 323 return data - begin; 324} 325 326 327static int 328has_bytes_after (const struct data_block *block, unsigned off) 329{ 330 unsigned bit, set; 331 int has; 332 333 set = off >> 6; 334 bit = off & 0x3F; 335 336 has = 0 != (block->db_set[ set ] >> bit); 337 ++set; 338 339 for ( ; set < N_DB_SETS; ++set) 340 has += 0 != block->db_set[ set ]; 341 342 return has > 0; 343} 344 345 346enum ins_frame 347data_in_hash_insert_data_frame (struct data_in *data_in, 348 const struct data_frame *data_frame, uint64_t read_offset) 349{ 350 struct hash_data_in *const hdi = HDI_PTR(data_in); 351 struct data_block *block; 352 uint64_t key, off, diff, fin_off; 353 const unsigned char *data; 354 unsigned size, nw; 355 356 if (data_frame->df_offset + data_frame->df_size < read_offset) 357 return INS_FRAME_DUP; 358 359 if ((hdi->hdi_flags & HDI_FIN) && 360 ( 361 (data_frame->df_fin && 362 data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off) 363 || 364 data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off 365 ) 366 ) 367 { 368 return INS_FRAME_ERR; 369 } 370 371 if (data_frame->df_offset < read_offset) 372 { 373 diff = read_offset - data_frame->df_offset; 374 assert(diff <= data_frame->df_size); 375 size = data_frame->df_size - diff; 376 off = data_frame->df_offset + diff; 377 data = data_frame->df_data + diff; 378 } 379 else 380 { 381 size = data_frame->df_size; 382 off = data_frame->df_offset; 383 data = data_frame->df_data; 384 } 385 386 key = off - (off % DB_DATA_SIZE); 387 do 388 { 389 block = hash_find(hdi, key); 390 if (!block) 391 { 392 block = new_block(hdi, key); 393 if (!block) 394 return INS_FRAME_ERR; 395 } 396 nw = block_write(block, off % DB_DATA_SIZE, data, size); 397 size -= nw; 398 off += nw; 399 data += nw; 400 key += DB_DATA_SIZE; 401 } 402 while (size > 0); 403 404 if (data_frame->df_fin) 405 { 406 fin_off = data_frame->df_offset + data_frame->df_size; 407 if (has_bytes_after(block, fin_off - block->db_off) || 408 hash_find(hdi, key)) 409 { 410 return INS_FRAME_ERR; 411 } 412 hdi->hdi_flags |= HDI_FIN; 413 hdi->hdi_fin_off = fin_off; 414 } 415 416 return INS_FRAME_OK; 417} 418 419 420static enum ins_frame 421hash_di_insert_frame (struct data_in *data_in, 422 struct stream_frame *new_frame, uint64_t read_offset) 423{ 424 struct hash_data_in *const hdi = HDI_PTR(data_in); 425 const struct data_frame *const data_frame = &new_frame->data_frame; 426 enum ins_frame ins; 427 428 ins = data_in_hash_insert_data_frame(data_in, data_frame, read_offset); 429 lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in); 430 lsquic_malo_put(new_frame); 431 return ins; 432} 433 434 435#if __GNUC__ 436# define ctz __builtin_ctzll 437#else 438static unsigned 439ctz (unsigned long long x) 440{ 441 unsigned n = 0; 442 if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; } 443 if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; } 444 if (0 == (x & ((1ULL << 8) - 1))) { n += 8; x >>= 8; } 445 if (0 == (x & ((1ULL << 4) - 1))) { n += 4; x >>= 4; } 446 if (0 == (x & ((1ULL << 2) - 1))) { n += 2; x >>= 2; } 447 if (0 == (x & ((1ULL << 1) - 1))) { n += 1; x >>= 1; } 448 return n; 449} 450#endif 451 452 453static unsigned 454n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit) 455{ 456 unsigned count; 457 uint64_t part; 458 459 part = ~(block->db_set[ set ] >> bit); 460 if (part) 461 { 462 count = ctz(part); 463 if (count < 64 - bit) 464 return count; 465 } 466 else 467 count = 64; 468 ++set; 469 470 for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set) 471 count += 64; 472 473 if (set < N_DB_SETS) 474 { 475 part = ~block->db_set[ set ]; 476 if (part) 477 count += ctz(part); 478 else 479 count += 64; 480 } 481 482 return count; 483} 484 485 486/* Data block is readable if there is at least one readable byte at 487 * `read_offset' or there is FIN at that offset. 488 */ 489static int 490setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset, 491 struct data_block *block) 492{ 493 unsigned set, bit; 494 uint64_t offset; 495 496 offset = read_offset % DB_DATA_SIZE; 497 set = offset >> 6; 498 bit = offset & 0x3F; 499 500 if (block->db_set[ set ] & (1ULL << bit)) 501 { 502 hdi->hdi_last_block = block; 503 hdi->hdi_data_frame.df_data = block->db_data; 504 hdi->hdi_data_frame.df_offset = block->db_off; 505 hdi->hdi_data_frame.df_read_off = offset; 506 hdi->hdi_data_frame.df_size = offset + 507 n_avail_bytes(block, set, bit); 508 hdi->hdi_data_frame.df_fin = 509 (hdi->hdi_flags & HDI_FIN) && 510 hdi->hdi_data_frame.df_read_off + 511 hdi->hdi_data_frame.df_size == hdi->hdi_fin_off; 512 return 1; 513 } 514 else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off) 515 { 516 hdi->hdi_last_block = block; 517 hdi->hdi_data_frame.df_data = NULL; 518 hdi->hdi_data_frame.df_offset = block->db_off; 519 hdi->hdi_data_frame.df_read_off = offset; 520 hdi->hdi_data_frame.df_size = offset; 521 hdi->hdi_data_frame.df_fin = 1; 522 return 1; 523 } 524 else 525 return 0; 526} 527 528 529static struct data_frame * 530hash_di_get_frame (struct data_in *data_in, uint64_t read_offset) 531{ 532 struct hash_data_in *const hdi = HDI_PTR(data_in); 533 struct data_block *block; 534 uint64_t key; 535 536 key = read_offset - (read_offset % DB_DATA_SIZE); 537 block = hash_find(hdi, key); 538 if (!block) 539 { 540 if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off) 541 { 542 hdi->hdi_last_block = NULL; 543 hdi->hdi_data_frame.df_data = NULL; 544 hdi->hdi_data_frame.df_offset = read_offset - 545 read_offset % DB_DATA_SIZE; 546 hdi->hdi_data_frame.df_read_off = 0; 547 hdi->hdi_data_frame.df_size = 0; 548 hdi->hdi_data_frame.df_fin = 1; 549 return &hdi->hdi_data_frame; 550 } 551 else 552 return NULL; 553 } 554 555 if (setup_data_frame(hdi, read_offset, block)) 556 return &hdi->hdi_data_frame; 557 else 558 return NULL; 559} 560 561 562static void 563hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame) 564{ 565 struct hash_data_in *const hdi = HDI_PTR(data_in); 566 struct data_block *const block = hdi->hdi_last_block; 567 568 if (block) 569 { 570 if (data_frame->df_read_off == DB_DATA_SIZE || 571 !has_bytes_after(block, data_frame->df_read_off)) 572 { 573 hash_remove(hdi, block); 574 free(block); 575 if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN)) 576 { 577 LSQ_DEBUG("hash empty, want to switch"); 578 hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL; 579 } 580 } 581 } 582 else 583 assert(data_frame->df_fin && data_frame->df_size == 0); 584} 585 586 587static int 588hash_di_empty (struct data_in *data_in) 589{ 590 struct hash_data_in *const hdi = HDI_PTR(data_in); 591 return hdi->hdi_count == 0; 592} 593 594 595struct data_in * 596hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset) 597{ 598 struct hash_data_in *const hdi = HDI_PTR(data_in); 599 struct data_in *new_data_in; 600 601 assert(hdi->hdi_count == 0); 602 603 new_data_in = data_in_nocopy_new(hdi->hdi_conn_pub, hdi->hdi_stream_id); 604 data_in->di_if->di_destroy(data_in); 605 606 return new_data_in; 607} 608 609 610static size_t 611hash_di_mem_used (struct data_in *data_in) 612{ 613 struct hash_data_in *const hdi = HDI_PTR(data_in); 614 const struct data_block *block; 615 size_t size; 616 unsigned n; 617 618 size = sizeof(*data_in); 619 620 for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n) 621 TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next) 622 size += sizeof(*block); 623 624 size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]); 625 626 return size; 627} 628 629 630static const struct data_in_iface di_if_hash = { 631 .di_destroy = hash_di_destroy, 632 .di_empty = hash_di_empty, 633 .di_frame_done = hash_di_frame_done, 634 .di_get_frame = hash_di_get_frame, 635 .di_insert_frame = hash_di_insert_frame, 636 .di_mem_used = hash_di_mem_used, 637 .di_switch_impl = hash_di_switch_impl, 638}; 639