lsquic_di_hash.c revision f4841319
1/* Copyright (c) 2017 - 2020 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_di_hash.c -- Copy incoming data into a hash
4 *
5 * While this implementation copies the data, its memory use is limited,
6 * which makes it a good choice when we have a lot of stream frames
7 * coming in.
8 *
9 * Another difference is that it does not check for frame overlap, which
10 * is something that is present in Chrome, but it is not required by QUIC.
11 */
12
13
14#include <assert.h>
15#include <inttypes.h>
16#include <stddef.h>
17#include <stdint.h>
18#include <stdlib.h>
19#include <string.h>
20#include <sys/queue.h>
21
22#include "lsquic.h"
23#include "lsquic_int_types.h"
24#include "lsquic_types.h"
25#include "lsquic_conn_flow.h"
26#include "lsquic_packet_common.h"
27#include "lsquic_packet_in.h"
28#include "lsquic_rtt.h"
29#include "lsquic_sfcw.h"
30#include "lsquic_varint.h"
31#include "lsquic_hq.h"
32#include "lsquic_hash.h"
33#include "lsquic_stream.h"
34#include "lsquic_mm.h"
35#include "lsquic_malo.h"
36#include "lsquic_conn.h"
37#include "lsquic_conn_public.h"
38#include "lsquic_data_in_if.h"
39
40
41#define LSQUIC_LOGGER_MODULE LSQLM_DI
42#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(hdi->hdi_conn_pub->lconn)
43#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id
44#include "lsquic_logger.h"
45
46
47#define N_DB_SETS 57
48
49#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \
50                                sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t))
51
52struct data_block
53{
54    TAILQ_ENTRY(data_block) db_next;
55    uint64_t                db_off;
56    uint64_t                db_set[N_DB_SETS];  /* bit for each valid byte */
57    unsigned char           db_data[DB_DATA_SIZE];
58};
59
60typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) ?1: - 1];
61typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE)?1: - 1];
62typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) ?1:- 1];
63
64
65TAILQ_HEAD(dblock_head, data_block);
66
67
68static const struct data_in_iface *di_if_hash_ptr;
69
70
71struct hash_data_in
72{
73    struct data_in              hdi_data_in;
74    struct lsquic_conn_public  *hdi_conn_pub;
75    uint64_t                    hdi_fin_off;
76    struct dblock_head         *hdi_buckets;
77    struct data_block          *hdi_last_block;
78    struct data_frame           hdi_data_frame;
79    lsquic_stream_id_t          hdi_stream_id;
80    unsigned                    hdi_count;
81    unsigned                    hdi_nbits;
82    enum {
83            HDI_FIN = (1 << 0),
84    }                           hdi_flags;
85};
86
87
88#define HDI_PTR(data_in) (struct hash_data_in *) \
89    ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in))
90
91
92#define N_BUCKETS(n_bits) (1U << (n_bits))
93#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1))
94
95
96static unsigned
97my_log2 /* silly name to suppress compiler warning */ (unsigned sz)
98{
99#if __GNUC__
100    unsigned clz = __builtin_clz(sz);
101    return 32 - clz;
102#else
103    unsigned clz;
104    size_t y;
105    clz = 32;
106    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
107    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
108    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
109    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
110    y = sz >>  1;   if (y) return 32 - clz + 1;
111    return 32 - clz + sz;
112#endif
113}
114
115
116struct data_in *
117data_in_hash_new (struct lsquic_conn_public *conn_pub,
118                        lsquic_stream_id_t stream_id, uint64_t byteage)
119{
120    struct hash_data_in *hdi;
121    unsigned n;
122
123    hdi = malloc(sizeof(*hdi));
124    if (!hdi)
125        return NULL;
126
127    hdi->hdi_data_in.di_if    = di_if_hash_ptr;
128    hdi->hdi_data_in.di_flags = 0;
129    hdi->hdi_conn_pub         = conn_pub;
130    hdi->hdi_stream_id        = stream_id;
131    hdi->hdi_fin_off          = 0;
132    hdi->hdi_flags            = 0;
133    hdi->hdi_last_block       = NULL;
134    if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if
135                                   argument is 0 */)
136        hdi->hdi_nbits        = my_log2(byteage / DB_DATA_SIZE) + 2;
137    else
138        hdi->hdi_nbits        = 3;
139    hdi->hdi_count            = 0;
140    hdi->hdi_buckets          = malloc(sizeof(hdi->hdi_buckets[0]) *
141                                                    N_BUCKETS(hdi->hdi_nbits));
142    if (!hdi->hdi_buckets)
143    {
144        free(hdi);
145        return NULL;
146    }
147
148    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
149        TAILQ_INIT(&hdi->hdi_buckets[n]);
150
151    return &hdi->hdi_data_in;
152}
153
154
155static void
156hash_di_destroy (struct data_in *data_in)
157{
158    struct hash_data_in *const hdi = HDI_PTR(data_in);
159    struct data_block *block;
160    unsigned n;
161
162    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
163    {
164        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
165        {
166            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
167            free(block);
168        }
169    }
170    free(hdi->hdi_buckets);
171    free(hdi);
172}
173
174
175static int
176hash_grow (struct hash_data_in *hdi)
177{
178    struct dblock_head *new_buckets, *new[2];
179    struct data_block *block;
180    unsigned n, old_nbits;
181    int idx;
182
183    old_nbits = hdi->hdi_nbits;
184    LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1));
185    new_buckets = malloc(sizeof(hdi->hdi_buckets[0])
186                                                * N_BUCKETS(old_nbits + 1));
187    if (!new_buckets)
188    {
189        LSQ_WARN("malloc failed: potential trouble ahead");
190        return -1;
191    }
192
193    for (n = 0; n < N_BUCKETS(old_nbits); ++n)
194    {
195        new[0] = &new_buckets[n];
196        new[1] = &new_buckets[n + N_BUCKETS(old_nbits)];
197        TAILQ_INIT(new[0]);
198        TAILQ_INIT(new[1]);
199        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
200        {
201            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
202            idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1;
203            TAILQ_INSERT_TAIL(new[idx], block, db_next);
204        }
205    }
206    free(hdi->hdi_buckets);
207    hdi->hdi_nbits   = old_nbits + 1;
208    hdi->hdi_buckets = new_buckets;
209    return 0;
210}
211
212
213static int
214hash_insert (struct hash_data_in *hdi, struct data_block *block)
215{
216    unsigned buckno;
217
218    if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi))
219        return -1;
220
221    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
222    TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next);
223    ++hdi->hdi_count;
224    return 0;
225}
226
227
228static struct data_block *
229hash_find (const struct hash_data_in *hdi, uint64_t off)
230{
231    struct data_block *block;
232    unsigned buckno;
233
234    buckno = BUCKNO(hdi->hdi_nbits, off);
235    TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next)
236        if (off == block->db_off)
237            return block;
238    return NULL;
239}
240
241
242static void
243hash_remove (struct hash_data_in *hdi, struct data_block *block)
244{
245    unsigned buckno;
246
247    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
248    TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next);
249    --hdi->hdi_count;
250}
251
252
253static struct data_block *
254new_block (struct hash_data_in *hdi, uint64_t off)
255{
256    struct data_block *block;
257
258    assert(0 == off % DB_DATA_SIZE);
259
260    block = malloc(sizeof(*block));
261    if (!block)
262        return NULL;
263
264    block->db_off = off;
265    if (0 != hash_insert(hdi, block))
266    {
267        free(block);
268        return NULL;
269    }
270
271    memset(block->db_set, 0, sizeof(block->db_set));
272    return block;
273}
274
275
276static unsigned
277block_write (struct data_block *block, unsigned block_off,
278                             const unsigned char *data, unsigned data_sz)
279{
280    const unsigned char *begin, *end;
281    unsigned set, bit, n_full_sets, n;
282    uint64_t mask;
283
284    assert(block_off < DB_DATA_SIZE);
285    if (data_sz > DB_DATA_SIZE - block_off)
286        data_sz = DB_DATA_SIZE - block_off;
287
288    begin = data;
289    end = begin + data_sz;
290    set = block_off >> 6;
291    bit = block_off & 0x3F;
292
293    assert(set < N_DB_SETS);
294
295    if (bit)
296    {
297        n = 64 - bit;
298        if (n > data_sz)
299            n = data_sz;
300        mask = ~((1ULL <<  bit     ) - 1)
301             &  ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1));
302        block->db_set[ set ] |= mask;
303        memcpy(block->db_data + block_off, data, n);
304        data      += n;
305        block_off += n;
306        ++set;
307    }
308
309    n_full_sets = (end - data) >> 6;
310    if (n_full_sets)
311    {
312        memcpy(block->db_data + block_off, data, n_full_sets * 64);
313        data      += n_full_sets * 64;
314        block_off += n_full_sets * 64;
315        memset(&block->db_set[ set ], 0xFF, n_full_sets * 8);
316        set += n_full_sets;
317    }
318
319    if (data < end)
320    {
321        assert(end - data < 64);
322        block->db_set[ set ] |= ((1ULL << (end - data)) - 1);
323        memcpy(block->db_data + block_off, data, end - data);
324        data = end;
325    }
326
327    assert(set <= N_DB_SETS);
328
329    return data - begin;
330}
331
332
333static int
334has_bytes_after (const struct data_block *block, unsigned off)
335{
336    unsigned bit, set;
337    int has;
338
339    set = off >> 6;
340    bit = off & 0x3F;
341
342    has = 0 != (block->db_set[ set ] >> bit);
343    ++set;
344
345    for ( ; set < N_DB_SETS; ++set)
346        has += 0 != block->db_set[ set ];
347
348    return has > 0;
349}
350
351
352enum ins_frame
353data_in_hash_insert_data_frame (struct data_in *data_in,
354                const struct data_frame *data_frame, uint64_t read_offset)
355{
356    struct hash_data_in *const hdi = HDI_PTR(data_in);
357    struct data_block *block;
358    uint64_t key, off, diff, fin_off;
359    const unsigned char *data;
360    unsigned size, nw;
361
362    if (data_frame->df_offset + data_frame->df_size < read_offset)
363    {
364        if (data_frame->df_fin)
365            return INS_FRAME_ERR;
366        else
367            return INS_FRAME_DUP;
368    }
369
370    if ((hdi->hdi_flags & HDI_FIN) &&
371         (
372          (data_frame->df_fin &&
373             data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off)
374          ||
375          data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off
376         )
377       )
378    {
379        return INS_FRAME_ERR;
380    }
381
382    if (data_frame->df_offset < read_offset)
383    {
384        diff = read_offset - data_frame->df_offset;
385        assert(diff <= data_frame->df_size);
386        size = data_frame->df_size   - diff;
387        off  = data_frame->df_offset + diff;
388        data = data_frame->df_data   + diff;
389    }
390    else
391    {
392        size = data_frame->df_size;
393        off  = data_frame->df_offset;
394        data = data_frame->df_data;
395    }
396
397    key = off - (off % DB_DATA_SIZE);
398    do
399    {
400        block = hash_find(hdi, key);
401        if (!block)
402        {
403            block = new_block(hdi, key);
404            if (!block)
405                return INS_FRAME_ERR;
406        }
407        nw = block_write(block, off % DB_DATA_SIZE, data, size);
408        size -= nw;
409        off  += nw;
410        data += nw;
411        key  += DB_DATA_SIZE;
412    }
413    while (size > 0);
414
415    if (data_frame->df_fin)
416    {
417        fin_off = data_frame->df_offset + data_frame->df_size;
418        if (has_bytes_after(block, fin_off - block->db_off) ||
419                                                        hash_find(hdi, key))
420        {
421            return INS_FRAME_ERR;
422        }
423        hdi->hdi_flags  |= HDI_FIN;
424        hdi->hdi_fin_off = fin_off;
425    }
426
427    return INS_FRAME_OK;
428}
429
430
431static enum ins_frame
432hash_di_insert_frame (struct data_in *data_in,
433                        struct stream_frame *new_frame, uint64_t read_offset)
434{
435    struct hash_data_in *const hdi = HDI_PTR(data_in);
436    const struct data_frame *const data_frame = &new_frame->data_frame;
437    enum ins_frame ins;
438
439    ins = data_in_hash_insert_data_frame(data_in, data_frame, read_offset);
440    assert(ins != INS_FRAME_OVERLAP);
441    lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in);
442    if (ins != INS_FRAME_OK)
443        lsquic_malo_put(new_frame);
444    return ins;
445}
446
447
448#if __GNUC__
449#   define ctz __builtin_ctzll
450#else
451static unsigned
452ctz (unsigned long long x)
453{
454    unsigned n = 0;
455    if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; }
456    if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; }
457    if (0 == (x & ((1ULL <<  8) - 1))) { n +=  8; x >>=  8; }
458    if (0 == (x & ((1ULL <<  4) - 1))) { n +=  4; x >>=  4; }
459    if (0 == (x & ((1ULL <<  2) - 1))) { n +=  2; x >>=  2; }
460    if (0 == (x & ((1ULL <<  1) - 1))) { n +=  1; x >>=  1; }
461    return n;
462}
463#endif
464
465
466static unsigned
467n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit)
468{
469    unsigned count;
470    uint64_t part;
471
472    part = ~(block->db_set[ set ] >> bit);
473    if (part)
474    {
475        count = ctz(part);
476        if (count < 64 - bit)
477            return count;
478    }
479    else
480        count = 64;
481    ++set;
482
483    for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set)
484        count += 64;
485
486    if (set < N_DB_SETS)
487    {
488        part = ~block->db_set[ set ];
489        if (part)
490            count += ctz(part);
491        else
492            count += 64;
493    }
494
495    return count;
496}
497
498
499/* Data block is readable if there is at least one readable byte at
500 * `read_offset' or there is FIN at that offset.
501 */
502static int
503setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset,
504                                                    struct data_block *block)
505{
506    unsigned set, bit;
507    uint64_t offset;
508
509    offset = read_offset % DB_DATA_SIZE;
510    set = offset >> 6;
511    bit = offset & 0x3F;
512
513    if (block->db_set[ set ] & (1ULL << bit))
514    {
515        hdi->hdi_last_block             = block;
516        hdi->hdi_data_frame.df_data     = block->db_data;
517        hdi->hdi_data_frame.df_offset   = block->db_off;
518        hdi->hdi_data_frame.df_read_off = offset;
519        hdi->hdi_data_frame.df_size     = offset +
520                                                n_avail_bytes(block, set, bit);
521        hdi->hdi_data_frame.df_fin      =
522            (hdi->hdi_flags & HDI_FIN) &&
523                hdi->hdi_data_frame.df_read_off +
524                    hdi->hdi_data_frame.df_size == hdi->hdi_fin_off;
525        return 1;
526    }
527    else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
528    {
529        hdi->hdi_last_block             = block;
530        hdi->hdi_data_frame.df_data     = NULL;
531        hdi->hdi_data_frame.df_offset   = block->db_off;
532        hdi->hdi_data_frame.df_read_off = offset;
533        hdi->hdi_data_frame.df_size     = offset;
534        hdi->hdi_data_frame.df_fin      = 1;
535        return 1;
536    }
537    else
538        return 0;
539}
540
541
542static struct data_frame *
543hash_di_get_frame (struct data_in *data_in, uint64_t read_offset)
544{
545    struct hash_data_in *const hdi = HDI_PTR(data_in);
546    struct data_block *block;
547    uint64_t key;
548
549    key = read_offset - (read_offset % DB_DATA_SIZE);
550    block = hash_find(hdi, key);
551    if (!block)
552    {
553        if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
554        {
555            hdi->hdi_last_block             = NULL;
556            hdi->hdi_data_frame.df_data     = NULL;
557            hdi->hdi_data_frame.df_offset   = read_offset -
558                                                    read_offset % DB_DATA_SIZE;
559            hdi->hdi_data_frame.df_read_off = 0;
560            hdi->hdi_data_frame.df_size     = 0;
561            hdi->hdi_data_frame.df_fin      = 1;
562            return &hdi->hdi_data_frame;
563        }
564        else
565            return NULL;
566    }
567
568    if (setup_data_frame(hdi, read_offset, block))
569        return &hdi->hdi_data_frame;
570    else
571        return NULL;
572}
573
574
575static void
576hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame)
577{
578    struct hash_data_in *const hdi = HDI_PTR(data_in);
579    struct data_block *const block = hdi->hdi_last_block;
580
581    if (block)
582    {
583        if (data_frame->df_read_off == DB_DATA_SIZE ||
584                            !has_bytes_after(block, data_frame->df_read_off))
585        {
586            hash_remove(hdi, block);
587            free(block);
588            if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN))
589            {
590                LSQ_DEBUG("hash empty, want to switch");
591                hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL;
592            }
593        }
594    }
595    else
596        assert(data_frame->df_fin && data_frame->df_size == 0);
597}
598
599
600static int
601hash_di_empty (struct data_in *data_in)
602{
603    struct hash_data_in *const hdi = HDI_PTR(data_in);
604    return hdi->hdi_count == 0;
605}
606
607
608struct data_in *
609hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset)
610{
611    struct hash_data_in *const hdi = HDI_PTR(data_in);
612    struct data_in *new_data_in;
613
614    assert(hdi->hdi_count == 0);
615
616    new_data_in = data_in_nocopy_new(hdi->hdi_conn_pub, hdi->hdi_stream_id);
617    data_in->di_if->di_destroy(data_in);
618
619    return new_data_in;
620}
621
622
623static size_t
624hash_di_mem_used (struct data_in *data_in)
625{
626    struct hash_data_in *const hdi = HDI_PTR(data_in);
627    const struct data_block *block;
628    size_t size;
629    unsigned n;
630
631    size = sizeof(*data_in);
632
633    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
634        TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next)
635            size += sizeof(*block);
636
637    size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]);
638
639    return size;
640}
641
642
643static void
644hash_di_dump_state (struct data_in *data_in)
645{
646    const struct hash_data_in *const hdi = HDI_PTR(data_in);
647    const struct data_block *block;
648    unsigned n;
649
650    LSQ_DEBUG("hash state: flags: %X; fin off: %"PRIu64"; count: %u",
651        hdi->hdi_flags, hdi->hdi_fin_off, hdi->hdi_count);
652    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
653        TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next)
654            LSQ_DEBUG("block: off: %"PRIu64, block->db_off);
655}
656
657
658static uint64_t
659hash_di_readable_bytes (struct data_in *data_in, uint64_t read_offset)
660{
661    const struct data_frame *data_frame;
662    uint64_t starting_offset;
663
664    starting_offset = read_offset;
665    while (data_frame = hash_di_get_frame(data_in, read_offset),
666                data_frame && data_frame->df_size - data_frame->df_read_off)
667        read_offset += data_frame->df_size - data_frame->df_read_off;
668
669    return read_offset - starting_offset;
670}
671
672
673static const struct data_in_iface di_if_hash = {
674    .di_destroy      = hash_di_destroy,
675    .di_dump_state   = hash_di_dump_state,
676    .di_empty        = hash_di_empty,
677    .di_frame_done   = hash_di_frame_done,
678    .di_get_frame    = hash_di_get_frame,
679    .di_insert_frame = hash_di_insert_frame,
680    .di_mem_used     = hash_di_mem_used,
681    .di_own_on_ok    = 0,
682    .di_readable_bytes
683                     = hash_di_readable_bytes,
684    .di_switch_impl  = hash_di_switch_impl,
685};
686
687static const struct data_in_iface *di_if_hash_ptr = &di_if_hash;
688