lsquic_di_hash.c revision 03fb9352
1/* Copyright (c) 2017 - 2019 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_di_hash.c -- Copy incoming data into a hash
4 *
5 * While this implementation copies the data, its memory use is limited,
6 * which makes it a good choice when we have a lot of stream frames
7 * coming in.
8 *
9 * Another difference is that it does not check for frame overlap, which
10 * is something that is present in Chrome, but it is not required by QUIC.
11 */
12
13
14#include <assert.h>
15#include <inttypes.h>
16#include <stddef.h>
17#include <stdint.h>
18#include <stdlib.h>
19#include <string.h>
20#include <sys/queue.h>
21
22#include "lsquic.h"
23#include "lsquic_int_types.h"
24#include "lsquic_types.h"
25#include "lsquic_conn_flow.h"
26#include "lsquic_packet_common.h"
27#include "lsquic_packet_in.h"
28#include "lsquic_rtt.h"
29#include "lsquic_sfcw.h"
30#include "lsquic_stream.h"
31#include "lsquic_mm.h"
32#include "lsquic_malo.h"
33#include "lsquic_conn.h"
34#include "lsquic_conn_public.h"
35#include "lsquic_data_in_if.h"
36
37
38#define LSQUIC_LOGGER_MODULE LSQLM_DI
39#define LSQUIC_LOG_CONN_ID hdi->hdi_conn_pub->lconn->cn_cid
40#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id
41#include "lsquic_logger.h"
42
43
44#define N_DB_SETS 57
45
46#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \
47                                sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t))
48
49struct data_block
50{
51    TAILQ_ENTRY(data_block) db_next;
52    uint64_t                db_off;
53    uint64_t                db_set[N_DB_SETS];  /* bit for each valid byte */
54    unsigned char           db_data[DB_DATA_SIZE];
55};
56
57typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) ?1: - 1];
58typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE)?1: - 1];
59typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) ?1:- 1];
60
61
62TAILQ_HEAD(dblock_head, data_block);
63
64
65static const struct data_in_iface *di_if_hash_ptr;
66
67
68struct hash_data_in
69{
70    struct data_in              hdi_data_in;
71    struct lsquic_conn_public  *hdi_conn_pub;
72    uint64_t                    hdi_fin_off;
73    struct dblock_head         *hdi_buckets;
74    struct data_block          *hdi_last_block;
75    struct data_frame           hdi_data_frame;
76    uint32_t                    hdi_stream_id;
77    unsigned                    hdi_count;
78    unsigned                    hdi_nbits;
79    enum {
80            HDI_FIN = (1 << 0),
81    }                           hdi_flags;
82};
83
84
85#define HDI_PTR(data_in) (struct hash_data_in *) \
86    ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in))
87
88
89#define N_BUCKETS(n_bits) (1U << (n_bits))
90#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1))
91
92
93static unsigned
94my_log2 /* silly name to suppress compiler warning */ (unsigned sz)
95{
96#if __GNUC__
97    unsigned clz = __builtin_clz(sz);
98    return 32 - clz;
99#else
100    unsigned clz;
101    size_t y;
102    clz = 32;
103    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
104    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
105    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
106    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
107    y = sz >>  1;   if (y) return 32 - clz + 1;
108    return 32 - clz + sz;
109#endif
110}
111
112
113struct data_in *
114data_in_hash_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id,
115                  uint64_t byteage)
116{
117    struct hash_data_in *hdi;
118    unsigned n;
119
120    hdi = malloc(sizeof(*hdi));
121    if (!hdi)
122        return NULL;
123
124    hdi->hdi_data_in.di_if    = di_if_hash_ptr;
125    hdi->hdi_data_in.di_flags = 0;
126    hdi->hdi_conn_pub         = conn_pub;
127    hdi->hdi_stream_id        = stream_id;
128    hdi->hdi_fin_off          = 0;
129    hdi->hdi_flags            = 0;
130    hdi->hdi_last_block       = NULL;
131    if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if
132                                   argument is 0 */)
133        hdi->hdi_nbits        = my_log2(byteage / DB_DATA_SIZE) + 2;
134    else
135        hdi->hdi_nbits        = 3;
136    hdi->hdi_count            = 0;
137    hdi->hdi_buckets          = malloc(sizeof(hdi->hdi_buckets[0]) *
138                                                    N_BUCKETS(hdi->hdi_nbits));
139    if (!hdi->hdi_buckets)
140    {
141        free(hdi);
142        return NULL;
143    }
144
145    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
146        TAILQ_INIT(&hdi->hdi_buckets[n]);
147
148    return &hdi->hdi_data_in;
149}
150
151
152static void
153hash_di_destroy (struct data_in *data_in)
154{
155    struct hash_data_in *const hdi = HDI_PTR(data_in);
156    struct data_block *block;
157    unsigned n;
158
159    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
160    {
161        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
162        {
163            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
164            free(block);
165        }
166    }
167    free(hdi->hdi_buckets);
168    free(hdi);
169}
170
171
172static int
173hash_grow (struct hash_data_in *hdi)
174{
175    struct dblock_head *new_buckets, *new[2];
176    struct data_block *block;
177    unsigned n, old_nbits;
178    int idx;
179
180    old_nbits = hdi->hdi_nbits;
181    LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1));
182    new_buckets = malloc(sizeof(hdi->hdi_buckets[0])
183                                                * N_BUCKETS(old_nbits + 1));
184    if (!new_buckets)
185    {
186        LSQ_WARN("malloc failed: potential trouble ahead");
187        return -1;
188    }
189
190    for (n = 0; n < N_BUCKETS(old_nbits); ++n)
191    {
192        new[0] = &new_buckets[n];
193        new[1] = &new_buckets[n + N_BUCKETS(old_nbits)];
194        TAILQ_INIT(new[0]);
195        TAILQ_INIT(new[1]);
196        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
197        {
198            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
199            idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1;
200            TAILQ_INSERT_TAIL(new[idx], block, db_next);
201        }
202    }
203    free(hdi->hdi_buckets);
204    hdi->hdi_nbits   = old_nbits + 1;
205    hdi->hdi_buckets = new_buckets;
206    return 0;
207}
208
209
210static int
211hash_insert (struct hash_data_in *hdi, struct data_block *block)
212{
213    unsigned buckno;
214
215    if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi))
216        return -1;
217
218    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
219    TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next);
220    ++hdi->hdi_count;
221    return 0;
222}
223
224
225static struct data_block *
226hash_find (const struct hash_data_in *hdi, uint64_t off)
227{
228    struct data_block *block;
229    unsigned buckno;
230
231    buckno = BUCKNO(hdi->hdi_nbits, off);
232    TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next)
233        if (off == block->db_off)
234            return block;
235    return NULL;
236}
237
238
239static void
240hash_remove (struct hash_data_in *hdi, struct data_block *block)
241{
242    unsigned buckno;
243
244    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
245    TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next);
246    --hdi->hdi_count;
247}
248
249
250static struct data_block *
251new_block (struct hash_data_in *hdi, uint64_t off)
252{
253    struct data_block *block;
254
255    assert(0 == off % DB_DATA_SIZE);
256
257    block = malloc(sizeof(*block));
258    if (!block)
259        return NULL;
260
261    block->db_off = off;
262    if (0 != hash_insert(hdi, block))
263    {
264        free(block);
265        return NULL;
266    }
267
268    memset(block->db_set, 0, sizeof(block->db_set));
269    return block;
270}
271
272
273static unsigned
274block_write (struct data_block *block, unsigned block_off,
275                             const unsigned char *data, unsigned data_sz)
276{
277    const unsigned char *begin, *end;
278    unsigned set, bit, n_full_sets, n;
279    uint64_t mask;
280
281    assert(block_off < DB_DATA_SIZE);
282    if (data_sz > DB_DATA_SIZE - block_off)
283        data_sz = DB_DATA_SIZE - block_off;
284
285    begin = data;
286    end = begin + data_sz;
287    set = block_off >> 6;
288    bit = block_off & 0x3F;
289
290    assert(set < N_DB_SETS);
291
292    if (bit)
293    {
294        n = 64 - bit;
295        if (n > data_sz)
296            n = data_sz;
297        mask = ~((1ULL <<  bit     ) - 1)
298             &  ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1));
299        block->db_set[ set ] |= mask;
300        memcpy(block->db_data + block_off, data, n);
301        data      += n;
302        block_off += n;
303        ++set;
304    }
305
306    n_full_sets = (end - data) >> 6;
307    if (n_full_sets)
308    {
309        memcpy(block->db_data + block_off, data, n_full_sets * 64);
310        data      += n_full_sets * 64;
311        block_off += n_full_sets * 64;
312        memset(&block->db_set[ set ], 0xFF, n_full_sets * 8);
313        set += n_full_sets;
314    }
315
316    if (data < end)
317    {
318        assert(end - data < 64);
319        block->db_set[ set ] |= ((1ULL << (end - data)) - 1);
320        memcpy(block->db_data + block_off, data, end - data);
321        data = end;
322    }
323
324    assert(set <= N_DB_SETS);
325
326    return data - begin;
327}
328
329
330static int
331has_bytes_after (const struct data_block *block, unsigned off)
332{
333    unsigned bit, set;
334    int has;
335
336    set = off >> 6;
337    bit = off & 0x3F;
338
339    has = 0 != (block->db_set[ set ] >> bit);
340    ++set;
341
342    for ( ; set < N_DB_SETS; ++set)
343        has += 0 != block->db_set[ set ];
344
345    return has > 0;
346}
347
348
349enum ins_frame
350data_in_hash_insert_data_frame (struct data_in *data_in,
351                const struct data_frame *data_frame, uint64_t read_offset)
352{
353    struct hash_data_in *const hdi = HDI_PTR(data_in);
354    struct data_block *block;
355    uint64_t key, off, diff, fin_off;
356    const unsigned char *data;
357    unsigned size, nw;
358
359    if (data_frame->df_offset + data_frame->df_size < read_offset)
360    {
361        if (data_frame->df_fin)
362            return INS_FRAME_ERR;
363        else
364            return INS_FRAME_DUP;
365    }
366
367    if ((hdi->hdi_flags & HDI_FIN) &&
368         (
369          (data_frame->df_fin &&
370             data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off)
371          ||
372          data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off
373         )
374       )
375    {
376        return INS_FRAME_ERR;
377    }
378
379    if (data_frame->df_offset < read_offset)
380    {
381        diff = read_offset - data_frame->df_offset;
382        assert(diff <= data_frame->df_size);
383        size = data_frame->df_size   - diff;
384        off  = data_frame->df_offset + diff;
385        data = data_frame->df_data   + diff;
386    }
387    else
388    {
389        size = data_frame->df_size;
390        off  = data_frame->df_offset;
391        data = data_frame->df_data;
392    }
393
394    key = off - (off % DB_DATA_SIZE);
395    do
396    {
397        block = hash_find(hdi, key);
398        if (!block)
399        {
400            block = new_block(hdi, key);
401            if (!block)
402                return INS_FRAME_ERR;
403        }
404        nw = block_write(block, off % DB_DATA_SIZE, data, size);
405        size -= nw;
406        off  += nw;
407        data += nw;
408        key  += DB_DATA_SIZE;
409    }
410    while (size > 0);
411
412    if (data_frame->df_fin)
413    {
414        fin_off = data_frame->df_offset + data_frame->df_size;
415        if (has_bytes_after(block, fin_off - block->db_off) ||
416                                                        hash_find(hdi, key))
417        {
418            return INS_FRAME_ERR;
419        }
420        hdi->hdi_flags  |= HDI_FIN;
421        hdi->hdi_fin_off = fin_off;
422    }
423
424    return INS_FRAME_OK;
425}
426
427
428static enum ins_frame
429hash_di_insert_frame (struct data_in *data_in,
430                        struct stream_frame *new_frame, uint64_t read_offset)
431{
432    struct hash_data_in *const hdi = HDI_PTR(data_in);
433    const struct data_frame *const data_frame = &new_frame->data_frame;
434    enum ins_frame ins;
435
436    ins = data_in_hash_insert_data_frame(data_in, data_frame, read_offset);
437    assert(ins != INS_FRAME_OVERLAP);
438    lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in);
439    lsquic_malo_put(new_frame);
440    return ins;
441}
442
443
444#if __GNUC__
445#   define ctz __builtin_ctzll
446#else
447static unsigned
448ctz (unsigned long long x)
449{
450    unsigned n = 0;
451    if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; }
452    if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; }
453    if (0 == (x & ((1ULL <<  8) - 1))) { n +=  8; x >>=  8; }
454    if (0 == (x & ((1ULL <<  4) - 1))) { n +=  4; x >>=  4; }
455    if (0 == (x & ((1ULL <<  2) - 1))) { n +=  2; x >>=  2; }
456    if (0 == (x & ((1ULL <<  1) - 1))) { n +=  1; x >>=  1; }
457    return n;
458}
459
460
461#endif
462
463
464static unsigned
465n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit)
466{
467    unsigned count;
468    uint64_t part;
469
470    part = ~(block->db_set[ set ] >> bit);
471    if (part)
472    {
473        count = ctz(part);
474        if (count < 64 - bit)
475            return count;
476    }
477    else
478        count = 64;
479    ++set;
480
481    for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set)
482        count += 64;
483
484    if (set < N_DB_SETS)
485    {
486        part = ~block->db_set[ set ];
487        if (part)
488            count += ctz(part);
489        else
490            count += 64;
491    }
492
493    return count;
494}
495
496
497/* Data block is readable if there is at least one readable byte at
498 * `read_offset' or there is FIN at that offset.
499 */
500static int
501setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset,
502                                                    struct data_block *block)
503{
504    unsigned set, bit;
505    uint64_t offset;
506
507    offset = read_offset % DB_DATA_SIZE;
508    set = offset >> 6;
509    bit = offset & 0x3F;
510
511    if (block->db_set[ set ] & (1ULL << bit))
512    {
513        hdi->hdi_last_block             = block;
514        hdi->hdi_data_frame.df_data     = block->db_data;
515        hdi->hdi_data_frame.df_offset   = block->db_off;
516        hdi->hdi_data_frame.df_read_off = offset;
517        hdi->hdi_data_frame.df_size     = offset +
518                                                n_avail_bytes(block, set, bit);
519        hdi->hdi_data_frame.df_fin      =
520            (hdi->hdi_flags & HDI_FIN) &&
521                hdi->hdi_data_frame.df_read_off +
522                    hdi->hdi_data_frame.df_size == hdi->hdi_fin_off;
523        return 1;
524    }
525    else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
526    {
527        hdi->hdi_last_block             = block;
528        hdi->hdi_data_frame.df_data     = NULL;
529        hdi->hdi_data_frame.df_offset   = block->db_off;
530        hdi->hdi_data_frame.df_read_off = offset;
531        hdi->hdi_data_frame.df_size     = offset;
532        hdi->hdi_data_frame.df_fin      = 1;
533        return 1;
534    }
535    else
536        return 0;
537}
538
539
540static struct data_frame *
541hash_di_get_frame (struct data_in *data_in, uint64_t read_offset)
542{
543    struct hash_data_in *const hdi = HDI_PTR(data_in);
544    struct data_block *block;
545    uint64_t key;
546
547    key = read_offset - (read_offset % DB_DATA_SIZE);
548    block = hash_find(hdi, key);
549    if (!block)
550    {
551        if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
552        {
553            hdi->hdi_last_block             = NULL;
554            hdi->hdi_data_frame.df_data     = NULL;
555            hdi->hdi_data_frame.df_offset   = read_offset -
556                                                    read_offset % DB_DATA_SIZE;
557            hdi->hdi_data_frame.df_read_off = 0;
558            hdi->hdi_data_frame.df_size     = 0;
559            hdi->hdi_data_frame.df_fin      = 1;
560            return &hdi->hdi_data_frame;
561        }
562        else
563            return NULL;
564    }
565
566    if (setup_data_frame(hdi, read_offset, block))
567        return &hdi->hdi_data_frame;
568    else
569        return NULL;
570}
571
572
573static void
574hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame)
575{
576    struct hash_data_in *const hdi = HDI_PTR(data_in);
577    struct data_block *const block = hdi->hdi_last_block;
578
579    if (block)
580    {
581        if (data_frame->df_read_off == DB_DATA_SIZE ||
582                            !has_bytes_after(block, data_frame->df_read_off))
583        {
584            hash_remove(hdi, block);
585            free(block);
586            if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN))
587            {
588                LSQ_DEBUG("hash empty, want to switch");
589                hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL;
590            }
591        }
592    }
593    else
594        assert(data_frame->df_fin && data_frame->df_size == 0);
595}
596
597
598static int
599hash_di_empty (struct data_in *data_in)
600{
601    struct hash_data_in *const hdi = HDI_PTR(data_in);
602    return hdi->hdi_count == 0;
603}
604
605
606struct data_in *
607hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset)
608{
609    struct hash_data_in *const hdi = HDI_PTR(data_in);
610    struct data_in *new_data_in;
611
612    assert(hdi->hdi_count == 0);
613
614    new_data_in = data_in_nocopy_new(hdi->hdi_conn_pub, hdi->hdi_stream_id);
615    data_in->di_if->di_destroy(data_in);
616
617    return new_data_in;
618}
619
620
621static size_t
622hash_di_mem_used (struct data_in *data_in)
623{
624    struct hash_data_in *const hdi = HDI_PTR(data_in);
625    const struct data_block *block;
626    size_t size;
627    unsigned n;
628
629    size = sizeof(*data_in);
630
631    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
632        TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next)
633            size += sizeof(*block);
634
635    size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]);
636
637    return size;
638}
639
640
641static const struct data_in_iface di_if_hash = {
642    .di_destroy      = hash_di_destroy,
643    .di_empty        = hash_di_empty,
644    .di_frame_done   = hash_di_frame_done,
645    .di_get_frame    = hash_di_get_frame,
646    .di_insert_frame = hash_di_insert_frame,
647    .di_mem_used     = hash_di_mem_used,
648    .di_switch_impl  = hash_di_switch_impl,
649};
650
651static const struct data_in_iface *di_if_hash_ptr = &di_if_hash;
652