lsquic_di_hash.c revision 7d09751d
1/* Copyright (c) 2017 - 2020 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_di_hash.c -- Copy incoming data into a hash
4 *
5 * While this implementation copies the data, its memory use is limited,
6 * which makes it a good choice when we have a lot of stream frames
7 * coming in.
8 *
9 * Another difference is that it does not check for frame overlap, which
10 * is something that is present in Chrome, but it is not required by QUIC.
11 */
12
13
14#include <assert.h>
15#include <inttypes.h>
16#include <stddef.h>
17#include <stdint.h>
18#include <stdlib.h>
19#include <string.h>
20#include <sys/queue.h>
21
22#include "lsquic.h"
23#include "lsquic_int_types.h"
24#include "lsquic_types.h"
25#include "lsquic_conn_flow.h"
26#include "lsquic_packet_common.h"
27#include "lsquic_packet_in.h"
28#include "lsquic_rtt.h"
29#include "lsquic_sfcw.h"
30#include "lsquic_varint.h"
31#include "lsquic_hq.h"
32#include "lsquic_varint.h"
33#include "lsquic_hash.h"
34#include "lsquic_stream.h"
35#include "lsquic_mm.h"
36#include "lsquic_malo.h"
37#include "lsquic_conn.h"
38#include "lsquic_conn_public.h"
39#include "lsquic_data_in_if.h"
40
41
42#define LSQUIC_LOGGER_MODULE LSQLM_DI
43#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(hdi->hdi_conn_pub->lconn)
44#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id
45#include "lsquic_logger.h"
46
47
48#define N_DB_SETS 57
49
50#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \
51                                sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t))
52
53struct data_block
54{
55    TAILQ_ENTRY(data_block) db_next;
56    uint64_t                db_off;
57    uint64_t                db_set[N_DB_SETS];  /* bit for each valid byte */
58    unsigned char           db_data[DB_DATA_SIZE];
59};
60
61typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) ?1: - 1];
62typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE)?1: - 1];
63typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) ?1:- 1];
64
65
66TAILQ_HEAD(dblock_head, data_block);
67
68
69static const struct data_in_iface *di_if_hash_ptr;
70
71
72struct hash_data_in
73{
74    struct data_in              hdi_data_in;
75    struct lsquic_conn_public  *hdi_conn_pub;
76    uint64_t                    hdi_fin_off;
77    struct dblock_head         *hdi_buckets;
78    struct data_block          *hdi_last_block;
79    struct data_frame           hdi_data_frame;
80    lsquic_stream_id_t          hdi_stream_id;
81    unsigned                    hdi_count;
82    unsigned                    hdi_nbits;
83    enum {
84            HDI_FIN = (1 << 0),
85    }                           hdi_flags;
86};
87
88
89#define HDI_PTR(data_in) (struct hash_data_in *) \
90    ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in))
91
92
93#define N_BUCKETS(n_bits) (1U << (n_bits))
94#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1))
95
96
97static unsigned
98my_log2 /* silly name to suppress compiler warning */ (unsigned sz)
99{
100#if __GNUC__
101    unsigned clz = __builtin_clz(sz);
102    return 32 - clz;
103#else
104    unsigned clz;
105    size_t y;
106    clz = 32;
107    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
108    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
109    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
110    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
111    y = sz >>  1;   if (y) return 32 - clz + 1;
112    return 32 - clz + sz;
113#endif
114}
115
116
117struct data_in *
118data_in_hash_new (struct lsquic_conn_public *conn_pub,
119                        lsquic_stream_id_t stream_id, uint64_t byteage)
120{
121    struct hash_data_in *hdi;
122    unsigned n;
123
124    hdi = malloc(sizeof(*hdi));
125    if (!hdi)
126        return NULL;
127
128    hdi->hdi_data_in.di_if    = di_if_hash_ptr;
129    hdi->hdi_data_in.di_flags = 0;
130    hdi->hdi_conn_pub         = conn_pub;
131    hdi->hdi_stream_id        = stream_id;
132    hdi->hdi_fin_off          = 0;
133    hdi->hdi_flags            = 0;
134    hdi->hdi_last_block       = NULL;
135    if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if
136                                   argument is 0 */)
137        hdi->hdi_nbits        = my_log2(byteage / DB_DATA_SIZE) + 2;
138    else
139        hdi->hdi_nbits        = 3;
140    hdi->hdi_count            = 0;
141    hdi->hdi_buckets          = malloc(sizeof(hdi->hdi_buckets[0]) *
142                                                    N_BUCKETS(hdi->hdi_nbits));
143    if (!hdi->hdi_buckets)
144    {
145        free(hdi);
146        return NULL;
147    }
148
149    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
150        TAILQ_INIT(&hdi->hdi_buckets[n]);
151
152    return &hdi->hdi_data_in;
153}
154
155
156static void
157hash_di_destroy (struct data_in *data_in)
158{
159    struct hash_data_in *const hdi = HDI_PTR(data_in);
160    struct data_block *block;
161    unsigned n;
162
163    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
164    {
165        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
166        {
167            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
168            free(block);
169        }
170    }
171    free(hdi->hdi_buckets);
172    free(hdi);
173}
174
175
176static int
177hash_grow (struct hash_data_in *hdi)
178{
179    struct dblock_head *new_buckets, *new[2];
180    struct data_block *block;
181    unsigned n, old_nbits;
182    int idx;
183
184    old_nbits = hdi->hdi_nbits;
185    LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1));
186    new_buckets = malloc(sizeof(hdi->hdi_buckets[0])
187                                                * N_BUCKETS(old_nbits + 1));
188    if (!new_buckets)
189    {
190        LSQ_WARN("malloc failed: potential trouble ahead");
191        return -1;
192    }
193
194    for (n = 0; n < N_BUCKETS(old_nbits); ++n)
195    {
196        new[0] = &new_buckets[n];
197        new[1] = &new_buckets[n + N_BUCKETS(old_nbits)];
198        TAILQ_INIT(new[0]);
199        TAILQ_INIT(new[1]);
200        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
201        {
202            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
203            idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1;
204            TAILQ_INSERT_TAIL(new[idx], block, db_next);
205        }
206    }
207    free(hdi->hdi_buckets);
208    hdi->hdi_nbits   = old_nbits + 1;
209    hdi->hdi_buckets = new_buckets;
210    return 0;
211}
212
213
214static int
215hash_insert (struct hash_data_in *hdi, struct data_block *block)
216{
217    unsigned buckno;
218
219    if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi))
220        return -1;
221
222    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
223    TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next);
224    ++hdi->hdi_count;
225    return 0;
226}
227
228
229static struct data_block *
230hash_find (const struct hash_data_in *hdi, uint64_t off)
231{
232    struct data_block *block;
233    unsigned buckno;
234
235    buckno = BUCKNO(hdi->hdi_nbits, off);
236    TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next)
237        if (off == block->db_off)
238            return block;
239    return NULL;
240}
241
242
243static void
244hash_remove (struct hash_data_in *hdi, struct data_block *block)
245{
246    unsigned buckno;
247
248    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
249    TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next);
250    --hdi->hdi_count;
251}
252
253
254static struct data_block *
255new_block (struct hash_data_in *hdi, uint64_t off)
256{
257    struct data_block *block;
258
259    assert(0 == off % DB_DATA_SIZE);
260
261    block = malloc(sizeof(*block));
262    if (!block)
263        return NULL;
264
265    block->db_off = off;
266    if (0 != hash_insert(hdi, block))
267    {
268        free(block);
269        return NULL;
270    }
271
272    memset(block->db_set, 0, sizeof(block->db_set));
273    return block;
274}
275
276
277static unsigned
278block_write (struct data_block *block, unsigned block_off,
279                             const unsigned char *data, unsigned data_sz)
280{
281    const unsigned char *begin, *end;
282    unsigned set, bit, n_full_sets, n;
283    uint64_t mask;
284
285    assert(block_off < DB_DATA_SIZE);
286    if (data_sz > DB_DATA_SIZE - block_off)
287        data_sz = DB_DATA_SIZE - block_off;
288
289    begin = data;
290    end = begin + data_sz;
291    set = block_off >> 6;
292    bit = block_off & 0x3F;
293
294    assert(set < N_DB_SETS);
295
296    if (bit)
297    {
298        n = 64 - bit;
299        if (n > data_sz)
300            n = data_sz;
301        mask = ~((1ULL <<  bit     ) - 1)
302             &  ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1));
303        block->db_set[ set ] |= mask;
304        memcpy(block->db_data + block_off, data, n);
305        data      += n;
306        block_off += n;
307        ++set;
308    }
309
310    n_full_sets = (end - data) >> 6;
311    if (n_full_sets)
312    {
313        memcpy(block->db_data + block_off, data, n_full_sets * 64);
314        data      += n_full_sets * 64;
315        block_off += n_full_sets * 64;
316        memset(&block->db_set[ set ], 0xFF, n_full_sets * 8);
317        set += n_full_sets;
318    }
319
320    if (data < end)
321    {
322        assert(end - data < 64);
323        block->db_set[ set ] |= ((1ULL << (end - data)) - 1);
324        memcpy(block->db_data + block_off, data, end - data);
325        data = end;
326    }
327
328    assert(set <= N_DB_SETS);
329
330    return data - begin;
331}
332
333
334static int
335has_bytes_after (const struct data_block *block, unsigned off)
336{
337    unsigned bit, set;
338    int has;
339
340    set = off >> 6;
341    bit = off & 0x3F;
342
343    has = 0 != (block->db_set[ set ] >> bit);
344    ++set;
345
346    for ( ; set < N_DB_SETS; ++set)
347        has += 0 != block->db_set[ set ];
348
349    return has > 0;
350}
351
352
353enum ins_frame
354data_in_hash_insert_data_frame (struct data_in *data_in,
355                const struct data_frame *data_frame, uint64_t read_offset)
356{
357    struct hash_data_in *const hdi = HDI_PTR(data_in);
358    struct data_block *block;
359    uint64_t key, off, diff, fin_off;
360    const unsigned char *data;
361    unsigned size, nw;
362
363    if (data_frame->df_offset + data_frame->df_size < read_offset)
364    {
365        if (data_frame->df_fin)
366            return INS_FRAME_ERR;
367        else
368            return INS_FRAME_DUP;
369    }
370
371    if ((hdi->hdi_flags & HDI_FIN) &&
372         (
373          (data_frame->df_fin &&
374             data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off)
375          ||
376          data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off
377         )
378       )
379    {
380        return INS_FRAME_ERR;
381    }
382
383    if (data_frame->df_offset < read_offset)
384    {
385        diff = read_offset - data_frame->df_offset;
386        assert(diff <= data_frame->df_size);
387        size = data_frame->df_size   - diff;
388        off  = data_frame->df_offset + diff;
389        data = data_frame->df_data   + diff;
390    }
391    else
392    {
393        size = data_frame->df_size;
394        off  = data_frame->df_offset;
395        data = data_frame->df_data;
396    }
397
398    key = off - (off % DB_DATA_SIZE);
399    do
400    {
401        block = hash_find(hdi, key);
402        if (!block)
403        {
404            block = new_block(hdi, key);
405            if (!block)
406                return INS_FRAME_ERR;
407        }
408        nw = block_write(block, off % DB_DATA_SIZE, data, size);
409        size -= nw;
410        off  += nw;
411        data += nw;
412        key  += DB_DATA_SIZE;
413    }
414    while (size > 0);
415
416    if (data_frame->df_fin)
417    {
418        fin_off = data_frame->df_offset + data_frame->df_size;
419        if (has_bytes_after(block, fin_off - block->db_off) ||
420                                                        hash_find(hdi, key))
421        {
422            return INS_FRAME_ERR;
423        }
424        hdi->hdi_flags  |= HDI_FIN;
425        hdi->hdi_fin_off = fin_off;
426    }
427
428    return INS_FRAME_OK;
429}
430
431
432static enum ins_frame
433hash_di_insert_frame (struct data_in *data_in,
434                        struct stream_frame *new_frame, uint64_t read_offset)
435{
436    struct hash_data_in *const hdi = HDI_PTR(data_in);
437    const struct data_frame *const data_frame = &new_frame->data_frame;
438    enum ins_frame ins;
439
440    ins = data_in_hash_insert_data_frame(data_in, data_frame, read_offset);
441    assert(ins != INS_FRAME_OVERLAP);
442    lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in);
443    if (ins != INS_FRAME_OK)
444        lsquic_malo_put(new_frame);
445    return ins;
446}
447
448
449#if __GNUC__
450#   define ctz __builtin_ctzll
451#else
452static unsigned
453ctz (unsigned long long x)
454{
455    unsigned n = 0;
456    if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; }
457    if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; }
458    if (0 == (x & ((1ULL <<  8) - 1))) { n +=  8; x >>=  8; }
459    if (0 == (x & ((1ULL <<  4) - 1))) { n +=  4; x >>=  4; }
460    if (0 == (x & ((1ULL <<  2) - 1))) { n +=  2; x >>=  2; }
461    if (0 == (x & ((1ULL <<  1) - 1))) { n +=  1; x >>=  1; }
462    return n;
463}
464#endif
465
466
467static unsigned
468n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit)
469{
470    unsigned count;
471    uint64_t part;
472
473    part = ~(block->db_set[ set ] >> bit);
474    if (part)
475    {
476        count = ctz(part);
477        if (count < 64 - bit)
478            return count;
479    }
480    else
481        count = 64;
482    ++set;
483
484    for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set)
485        count += 64;
486
487    if (set < N_DB_SETS)
488    {
489        part = ~block->db_set[ set ];
490        if (part)
491            count += ctz(part);
492        else
493            count += 64;
494    }
495
496    return count;
497}
498
499
500/* Data block is readable if there is at least one readable byte at
501 * `read_offset' or there is FIN at that offset.
502 */
503static int
504setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset,
505                                                    struct data_block *block)
506{
507    unsigned set, bit;
508    uint64_t offset;
509
510    offset = read_offset % DB_DATA_SIZE;
511    set = offset >> 6;
512    bit = offset & 0x3F;
513
514    if (block->db_set[ set ] & (1ULL << bit))
515    {
516        hdi->hdi_last_block             = block;
517        hdi->hdi_data_frame.df_data     = block->db_data;
518        hdi->hdi_data_frame.df_offset   = block->db_off;
519        hdi->hdi_data_frame.df_read_off = offset;
520        hdi->hdi_data_frame.df_size     = offset +
521                                                n_avail_bytes(block, set, bit);
522        hdi->hdi_data_frame.df_fin      =
523            (hdi->hdi_flags & HDI_FIN) &&
524                hdi->hdi_data_frame.df_read_off +
525                    hdi->hdi_data_frame.df_size == hdi->hdi_fin_off;
526        return 1;
527    }
528    else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
529    {
530        hdi->hdi_last_block             = block;
531        hdi->hdi_data_frame.df_data     = NULL;
532        hdi->hdi_data_frame.df_offset   = block->db_off;
533        hdi->hdi_data_frame.df_read_off = offset;
534        hdi->hdi_data_frame.df_size     = offset;
535        hdi->hdi_data_frame.df_fin      = 1;
536        return 1;
537    }
538    else
539        return 0;
540}
541
542
543static struct data_frame *
544hash_di_get_frame (struct data_in *data_in, uint64_t read_offset)
545{
546    struct hash_data_in *const hdi = HDI_PTR(data_in);
547    struct data_block *block;
548    uint64_t key;
549
550    key = read_offset - (read_offset % DB_DATA_SIZE);
551    block = hash_find(hdi, key);
552    if (!block)
553    {
554        if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
555        {
556            hdi->hdi_last_block             = NULL;
557            hdi->hdi_data_frame.df_data     = NULL;
558            hdi->hdi_data_frame.df_offset   = read_offset -
559                                                    read_offset % DB_DATA_SIZE;
560            hdi->hdi_data_frame.df_read_off = 0;
561            hdi->hdi_data_frame.df_size     = 0;
562            hdi->hdi_data_frame.df_fin      = 1;
563            return &hdi->hdi_data_frame;
564        }
565        else
566            return NULL;
567    }
568
569    if (setup_data_frame(hdi, read_offset, block))
570        return &hdi->hdi_data_frame;
571    else
572        return NULL;
573}
574
575
576static void
577hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame)
578{
579    struct hash_data_in *const hdi = HDI_PTR(data_in);
580    struct data_block *const block = hdi->hdi_last_block;
581
582    if (block)
583    {
584        if (data_frame->df_read_off == DB_DATA_SIZE ||
585                            !has_bytes_after(block, data_frame->df_read_off))
586        {
587            hash_remove(hdi, block);
588            free(block);
589            if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN))
590            {
591                LSQ_DEBUG("hash empty, want to switch");
592                hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL;
593            }
594        }
595    }
596    else
597        assert(data_frame->df_fin && data_frame->df_size == 0);
598}
599
600
601static int
602hash_di_empty (struct data_in *data_in)
603{
604    struct hash_data_in *const hdi = HDI_PTR(data_in);
605    return hdi->hdi_count == 0;
606}
607
608
609struct data_in *
610hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset)
611{
612    struct hash_data_in *const hdi = HDI_PTR(data_in);
613    struct data_in *new_data_in;
614
615    assert(hdi->hdi_count == 0);
616
617    new_data_in = data_in_nocopy_new(hdi->hdi_conn_pub, hdi->hdi_stream_id);
618    data_in->di_if->di_destroy(data_in);
619
620    return new_data_in;
621}
622
623
624static size_t
625hash_di_mem_used (struct data_in *data_in)
626{
627    struct hash_data_in *const hdi = HDI_PTR(data_in);
628    const struct data_block *block;
629    size_t size;
630    unsigned n;
631
632    size = sizeof(*data_in);
633
634    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
635        TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next)
636            size += sizeof(*block);
637
638    size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]);
639
640    return size;
641}
642
643
644static void
645hash_di_dump_state (struct data_in *data_in)
646{
647    const struct hash_data_in *const hdi = HDI_PTR(data_in);
648    const struct data_block *block;
649    unsigned n;
650
651    LSQ_DEBUG("hash state: flags: %X; fin off: %"PRIu64"; count: %u",
652        hdi->hdi_flags, hdi->hdi_fin_off, hdi->hdi_count);
653    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
654        TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next)
655            LSQ_DEBUG("block: off: %"PRIu64, block->db_off);
656}
657
658
659static uint64_t
660hash_di_readable_bytes (struct data_in *data_in, uint64_t read_offset)
661{
662    const struct data_frame *data_frame;
663    uint64_t starting_offset;
664
665    starting_offset = read_offset;
666    while (data_frame = hash_di_get_frame(data_in, read_offset),
667                data_frame && data_frame->df_size - data_frame->df_read_off)
668        read_offset += data_frame->df_size - data_frame->df_read_off;
669
670    return read_offset - starting_offset;
671}
672
673
674static const struct data_in_iface di_if_hash = {
675    .di_destroy      = hash_di_destroy,
676    .di_dump_state   = hash_di_dump_state,
677    .di_empty        = hash_di_empty,
678    .di_frame_done   = hash_di_frame_done,
679    .di_get_frame    = hash_di_get_frame,
680    .di_insert_frame = hash_di_insert_frame,
681    .di_mem_used     = hash_di_mem_used,
682    .di_own_on_ok    = 0,
683    .di_readable_bytes
684                     = hash_di_readable_bytes,
685    .di_switch_impl  = hash_di_switch_impl,
686};
687
688static const struct data_in_iface *di_if_hash_ptr = &di_if_hash;
689