lsquic_di_hash.c revision be4cfad0
1/* Copyright (c) 2017 - 2018 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_di_hash.c -- Copy incoming data into a hash
4 *
5 * While this implementation copies the data, its memory use is limited,
6 * which makes it a good choice when we have a lot of stream frames
7 * coming in.
8 *
9 * Another difference is that it does not check for frame overlap, which
10 * is something that is present in Chrome, but it is not required by QUIC.
11 */
12
13
14#include <assert.h>
15#include <stddef.h>
16#include <stdint.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/queue.h>
20
21#include "lsquic.h"
22#include "lsquic_int_types.h"
23#include "lsquic_types.h"
24#include "lsquic_conn_flow.h"
25#include "lsquic_packet_common.h"
26#include "lsquic_packet_in.h"
27#include "lsquic_rtt.h"
28#include "lsquic_sfcw.h"
29#include "lsquic_stream.h"
30#include "lsquic_mm.h"
31#include "lsquic_malo.h"
32#include "lsquic_conn.h"
33#include "lsquic_conn_public.h"
34#include "lsquic_data_in_if.h"
35
36
37#define LSQUIC_LOGGER_MODULE LSQLM_DI
38#define LSQUIC_LOG_CONN_ID hdi->hdi_conn_pub->lconn->cn_cid
39#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id
40#include "lsquic_logger.h"
41
42
43#define N_DB_SETS 57
44
45#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \
46                                sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t))
47
48struct data_block
49{
50    TAILQ_ENTRY(data_block) db_next;
51    uint64_t                db_off;
52    uint64_t                db_set[N_DB_SETS];  /* bit for each valid byte */
53    unsigned char           db_data[DB_DATA_SIZE];
54};
55
56typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) ?1: - 1];
57typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE)?1: - 1];
58typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) ?1:- 1];
59
60
61TAILQ_HEAD(dblock_head, data_block);
62
63
64static const struct data_in_iface *di_if_hash_ptr;
65
66
67struct hash_data_in
68{
69    struct data_in              hdi_data_in;
70    struct lsquic_conn_public  *hdi_conn_pub;
71    uint64_t                    hdi_fin_off;
72    struct dblock_head         *hdi_buckets;
73    struct data_block          *hdi_last_block;
74    struct data_frame           hdi_data_frame;
75    uint32_t                    hdi_stream_id;
76    unsigned                    hdi_count;
77    unsigned                    hdi_nbits;
78    enum {
79            HDI_FIN = (1 << 0),
80    }                           hdi_flags;
81};
82
83
84#define HDI_PTR(data_in) (struct hash_data_in *) \
85    ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in))
86
87
88#define N_BUCKETS(n_bits) (1U << (n_bits))
89#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1))
90
91
92static unsigned
93my_log2 /* silly name to suppress compiler warning */ (unsigned sz)
94{
95#if __GNUC__
96    unsigned clz = __builtin_clz(sz);
97    return 32 - clz;
98#else
99    unsigned clz;
100    size_t y;
101    clz = 32;
102    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
103    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
104    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
105    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
106    y = sz >>  1;   if (y) return 32 - clz + 1;
107    return 32 - clz + sz;
108#endif
109}
110
111
112struct data_in *
113data_in_hash_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id,
114                  uint64_t byteage)
115{
116    struct hash_data_in *hdi;
117    unsigned n;
118
119    hdi = malloc(sizeof(*hdi));
120    if (!hdi)
121        return NULL;
122
123    hdi->hdi_data_in.di_if    = di_if_hash_ptr;
124    hdi->hdi_data_in.di_flags = 0;
125    hdi->hdi_conn_pub         = conn_pub;
126    hdi->hdi_stream_id        = stream_id;
127    hdi->hdi_fin_off          = 0;
128    hdi->hdi_flags            = 0;
129    hdi->hdi_last_block       = NULL;
130    if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if
131                                   argument is 0 */)
132        hdi->hdi_nbits        = my_log2(byteage / DB_DATA_SIZE) + 2;
133    else
134        hdi->hdi_nbits        = 3;
135    hdi->hdi_count            = 0;
136    hdi->hdi_buckets          = malloc(sizeof(hdi->hdi_buckets[0]) *
137                                                    N_BUCKETS(hdi->hdi_nbits));
138    if (!hdi->hdi_buckets)
139    {
140        free(hdi);
141        return NULL;
142    }
143
144    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
145        TAILQ_INIT(&hdi->hdi_buckets[n]);
146
147    return &hdi->hdi_data_in;
148}
149
150
151static void
152hash_di_destroy (struct data_in *data_in)
153{
154    struct hash_data_in *const hdi = HDI_PTR(data_in);
155    struct data_block *block;
156    unsigned n;
157
158    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
159    {
160        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
161        {
162            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
163            free(block);
164        }
165    }
166    free(hdi->hdi_buckets);
167    free(hdi);
168}
169
170
171static int
172hash_grow (struct hash_data_in *hdi)
173{
174    struct dblock_head *new_buckets, *new[2];
175    struct data_block *block;
176    unsigned n, old_nbits;
177    int idx;
178
179    old_nbits = hdi->hdi_nbits;
180    LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1));
181    new_buckets = malloc(sizeof(hdi->hdi_buckets[0])
182                                                * N_BUCKETS(old_nbits + 1));
183    if (!new_buckets)
184    {
185        LSQ_WARN("malloc failed: potential trouble ahead");
186        return -1;
187    }
188
189    for (n = 0; n < N_BUCKETS(old_nbits); ++n)
190    {
191        new[0] = &new_buckets[n];
192        new[1] = &new_buckets[n + N_BUCKETS(old_nbits)];
193        TAILQ_INIT(new[0]);
194        TAILQ_INIT(new[1]);
195        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
196        {
197            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
198            idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1;
199            TAILQ_INSERT_TAIL(new[idx], block, db_next);
200        }
201    }
202    free(hdi->hdi_buckets);
203    hdi->hdi_nbits   = old_nbits + 1;
204    hdi->hdi_buckets = new_buckets;
205    return 0;
206}
207
208
209static int
210hash_insert (struct hash_data_in *hdi, struct data_block *block)
211{
212    unsigned buckno;
213
214    if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi))
215        return -1;
216
217    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
218    TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next);
219    ++hdi->hdi_count;
220    return 0;
221}
222
223
224static struct data_block *
225hash_find (const struct hash_data_in *hdi, uint64_t off)
226{
227    struct data_block *block;
228    unsigned buckno;
229
230    buckno = BUCKNO(hdi->hdi_nbits, off);
231    TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next)
232        if (off == block->db_off)
233            return block;
234    return NULL;
235}
236
237
238static void
239hash_remove (struct hash_data_in *hdi, struct data_block *block)
240{
241    unsigned buckno;
242
243    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
244    TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next);
245    --hdi->hdi_count;
246}
247
248
249static struct data_block *
250new_block (struct hash_data_in *hdi, uint64_t off)
251{
252    struct data_block *block;
253
254    assert(0 == off % DB_DATA_SIZE);
255
256    block = malloc(sizeof(*block));
257    if (!block)
258        return NULL;
259
260    block->db_off = off;
261    if (0 != hash_insert(hdi, block))
262    {
263        free(block);
264        return NULL;
265    }
266
267    memset(block->db_set, 0, sizeof(block->db_set));
268    return block;
269}
270
271
272static unsigned
273block_write (struct data_block *block, unsigned block_off,
274                             const unsigned char *data, unsigned data_sz)
275{
276    const unsigned char *begin, *end;
277    unsigned set, bit, n_full_sets, n;
278    uint64_t mask;
279
280    if (data_sz > DB_DATA_SIZE - block_off)
281        data_sz = DB_DATA_SIZE - block_off;
282
283    begin = data;
284    end = begin + data_sz;
285    set = block_off >> 6;
286    bit = block_off & 0x3F;
287
288    assert(set <= N_DB_SETS);
289
290    if (bit)
291    {
292        n = 64 - bit;
293        if (n > data_sz)
294            n = data_sz;
295        mask = ~((1ULL <<  bit     ) - 1)
296             &  ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1));
297        block->db_set[ set ] |= mask;
298        memcpy(block->db_data + block_off, data, n);
299        data      += n;
300        block_off += n;
301        ++set;
302    }
303
304    n_full_sets = (end - data) >> 6;
305    if (n_full_sets)
306    {
307        memcpy(block->db_data + block_off, data, n_full_sets * 64);
308        data      += n_full_sets * 64;
309        block_off += n_full_sets * 64;
310        memset(&block->db_set[ set ], 0xFF, n_full_sets * 8);
311        set += n_full_sets;
312    }
313
314    if (data < end)
315    {
316        assert(end - data < 64);
317        block->db_set[ set ] |= ((1ULL << (end - data)) - 1);
318        memcpy(block->db_data + block_off, data, end - data);
319        data = end;
320    }
321
322    assert(set <= N_DB_SETS);
323
324    return data - begin;
325}
326
327
328static int
329has_bytes_after (const struct data_block *block, unsigned off)
330{
331    unsigned bit, set;
332    int has;
333
334    set = off >> 6;
335    bit = off & 0x3F;
336
337    has = 0 != (block->db_set[ set ] >> bit);
338    ++set;
339
340    for ( ; set < N_DB_SETS; ++set)
341        has += 0 != block->db_set[ set ];
342
343    return has > 0;
344}
345
346
347enum ins_frame
348data_in_hash_insert_data_frame (struct data_in *data_in,
349                const struct data_frame *data_frame, uint64_t read_offset)
350{
351    struct hash_data_in *const hdi = HDI_PTR(data_in);
352    struct data_block *block;
353    uint64_t key, off, diff, fin_off;
354    const unsigned char *data;
355    unsigned size, nw;
356
357    if (data_frame->df_offset + data_frame->df_size < read_offset)
358    {
359        if (data_frame->df_fin)
360            return INS_FRAME_ERR;
361        else
362            return INS_FRAME_DUP;
363    }
364
365    if ((hdi->hdi_flags & HDI_FIN) &&
366         (
367          (data_frame->df_fin &&
368             data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off)
369          ||
370          data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off
371         )
372       )
373    {
374        return INS_FRAME_ERR;
375    }
376
377    if (data_frame->df_offset < read_offset)
378    {
379        diff = read_offset - data_frame->df_offset;
380        assert(diff <= data_frame->df_size);
381        size = data_frame->df_size   - diff;
382        off  = data_frame->df_offset + diff;
383        data = data_frame->df_data   + diff;
384    }
385    else
386    {
387        size = data_frame->df_size;
388        off  = data_frame->df_offset;
389        data = data_frame->df_data;
390    }
391
392    key = off - (off % DB_DATA_SIZE);
393    do
394    {
395        block = hash_find(hdi, key);
396        if (!block)
397        {
398            block = new_block(hdi, key);
399            if (!block)
400                return INS_FRAME_ERR;
401        }
402        nw = block_write(block, off % DB_DATA_SIZE, data, size);
403        size -= nw;
404        off  += nw;
405        data += nw;
406        key  += DB_DATA_SIZE;
407    }
408    while (size > 0);
409
410    if (data_frame->df_fin)
411    {
412        fin_off = data_frame->df_offset + data_frame->df_size;
413        if (has_bytes_after(block, fin_off - block->db_off) ||
414                                                        hash_find(hdi, key))
415        {
416            return INS_FRAME_ERR;
417        }
418        hdi->hdi_flags  |= HDI_FIN;
419        hdi->hdi_fin_off = fin_off;
420    }
421
422    return INS_FRAME_OK;
423}
424
425
426static enum ins_frame
427hash_di_insert_frame (struct data_in *data_in,
428                        struct stream_frame *new_frame, uint64_t read_offset)
429{
430    struct hash_data_in *const hdi = HDI_PTR(data_in);
431    const struct data_frame *const data_frame = &new_frame->data_frame;
432    enum ins_frame ins;
433
434    ins = data_in_hash_insert_data_frame(data_in, data_frame, read_offset);
435    assert(ins != INS_FRAME_OVERLAP);
436    lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in);
437    lsquic_malo_put(new_frame);
438    return ins;
439}
440
441
442#if __GNUC__
443#   define ctz __builtin_ctzll
444#else
445static unsigned
446ctz (unsigned long long x)
447{
448    unsigned n = 0;
449    if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; }
450    if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; }
451    if (0 == (x & ((1ULL <<  8) - 1))) { n +=  8; x >>=  8; }
452    if (0 == (x & ((1ULL <<  4) - 1))) { n +=  4; x >>=  4; }
453    if (0 == (x & ((1ULL <<  2) - 1))) { n +=  2; x >>=  2; }
454    if (0 == (x & ((1ULL <<  1) - 1))) { n +=  1; x >>=  1; }
455    return n;
456}
457#endif
458
459
460static unsigned
461n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit)
462{
463    unsigned count;
464    uint64_t part;
465
466    part = ~(block->db_set[ set ] >> bit);
467    if (part)
468    {
469        count = ctz(part);
470        if (count < 64 - bit)
471            return count;
472    }
473    else
474        count = 64;
475    ++set;
476
477    for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set)
478        count += 64;
479
480    if (set < N_DB_SETS)
481    {
482        part = ~block->db_set[ set ];
483        if (part)
484            count += ctz(part);
485        else
486            count += 64;
487    }
488
489    return count;
490}
491
492
493/* Data block is readable if there is at least one readable byte at
494 * `read_offset' or there is FIN at that offset.
495 */
496static int
497setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset,
498                                                    struct data_block *block)
499{
500    unsigned set, bit;
501    uint64_t offset;
502
503    offset = read_offset % DB_DATA_SIZE;
504    set = offset >> 6;
505    bit = offset & 0x3F;
506
507    if (block->db_set[ set ] & (1ULL << bit))
508    {
509        hdi->hdi_last_block             = block;
510        hdi->hdi_data_frame.df_data     = block->db_data;
511        hdi->hdi_data_frame.df_offset   = block->db_off;
512        hdi->hdi_data_frame.df_read_off = offset;
513        hdi->hdi_data_frame.df_size     = offset +
514                                                n_avail_bytes(block, set, bit);
515        hdi->hdi_data_frame.df_fin      =
516            (hdi->hdi_flags & HDI_FIN) &&
517                hdi->hdi_data_frame.df_read_off +
518                    hdi->hdi_data_frame.df_size == hdi->hdi_fin_off;
519        return 1;
520    }
521    else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
522    {
523        hdi->hdi_last_block             = block;
524        hdi->hdi_data_frame.df_data     = NULL;
525        hdi->hdi_data_frame.df_offset   = block->db_off;
526        hdi->hdi_data_frame.df_read_off = offset;
527        hdi->hdi_data_frame.df_size     = offset;
528        hdi->hdi_data_frame.df_fin      = 1;
529        return 1;
530    }
531    else
532        return 0;
533}
534
535
536static struct data_frame *
537hash_di_get_frame (struct data_in *data_in, uint64_t read_offset)
538{
539    struct hash_data_in *const hdi = HDI_PTR(data_in);
540    struct data_block *block;
541    uint64_t key;
542
543    key = read_offset - (read_offset % DB_DATA_SIZE);
544    block = hash_find(hdi, key);
545    if (!block)
546    {
547        if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
548        {
549            hdi->hdi_last_block             = NULL;
550            hdi->hdi_data_frame.df_data     = NULL;
551            hdi->hdi_data_frame.df_offset   = read_offset -
552                                                    read_offset % DB_DATA_SIZE;
553            hdi->hdi_data_frame.df_read_off = 0;
554            hdi->hdi_data_frame.df_size     = 0;
555            hdi->hdi_data_frame.df_fin      = 1;
556            return &hdi->hdi_data_frame;
557        }
558        else
559            return NULL;
560    }
561
562    if (setup_data_frame(hdi, read_offset, block))
563        return &hdi->hdi_data_frame;
564    else
565        return NULL;
566}
567
568
569static void
570hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame)
571{
572    struct hash_data_in *const hdi = HDI_PTR(data_in);
573    struct data_block *const block = hdi->hdi_last_block;
574
575    if (block)
576    {
577        if (data_frame->df_read_off == DB_DATA_SIZE ||
578                            !has_bytes_after(block, data_frame->df_read_off))
579        {
580            hash_remove(hdi, block);
581            free(block);
582            if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN))
583            {
584                LSQ_DEBUG("hash empty, want to switch");
585                hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL;
586            }
587        }
588    }
589    else
590        assert(data_frame->df_fin && data_frame->df_size == 0);
591}
592
593
594static int
595hash_di_empty (struct data_in *data_in)
596{
597    struct hash_data_in *const hdi = HDI_PTR(data_in);
598    return hdi->hdi_count == 0;
599}
600
601
602struct data_in *
603hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset)
604{
605    struct hash_data_in *const hdi = HDI_PTR(data_in);
606    struct data_in *new_data_in;
607
608    assert(hdi->hdi_count == 0);
609
610    new_data_in = data_in_nocopy_new(hdi->hdi_conn_pub, hdi->hdi_stream_id);
611    data_in->di_if->di_destroy(data_in);
612
613    return new_data_in;
614}
615
616
617static size_t
618hash_di_mem_used (struct data_in *data_in)
619{
620    struct hash_data_in *const hdi = HDI_PTR(data_in);
621    const struct data_block *block;
622    size_t size;
623    unsigned n;
624
625    size = sizeof(*data_in);
626
627    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
628        TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next)
629            size += sizeof(*block);
630
631    size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]);
632
633    return size;
634}
635
636
637static const struct data_in_iface di_if_hash = {
638    .di_destroy      = hash_di_destroy,
639    .di_empty        = hash_di_empty,
640    .di_frame_done   = hash_di_frame_done,
641    .di_get_frame    = hash_di_get_frame,
642    .di_insert_frame = hash_di_insert_frame,
643    .di_mem_used     = hash_di_mem_used,
644    .di_switch_impl  = hash_di_switch_impl,
645};
646
647static const struct data_in_iface *di_if_hash_ptr = &di_if_hash;
648