lsquic_di_hash.c revision c51ce338
1/* Copyright (c) 2017 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_di_hash.c -- Copy incoming data into a hash
4 *
5 * While this implementation copies the data, its memory use is limited,
6 * which makes it a good choice when we have a lot of stream frames
7 * coming in.
8 *
9 * Another difference is that it does not check for frame overlap, which
10 * is something that is present in Chrome, but it is not required by QUIC.
11 */
12
13
14#include <assert.h>
15#include <stddef.h>
16#include <stdint.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/queue.h>
20
21#include "lsquic.h"
22#include "lsquic_int_types.h"
23#include "lsquic_types.h"
24#include "lsquic_conn_flow.h"
25#include "lsquic_packet_in.h"
26#include "lsquic_rtt.h"
27#include "lsquic_sfcw.h"
28#include "lsquic_stream.h"
29#include "lsquic_mm.h"
30#include "lsquic_malo.h"
31#include "lsquic_conn.h"
32#include "lsquic_conn_public.h"
33#include "lsquic_data_in_if.h"
34
35
36#define LSQUIC_LOGGER_MODULE LSQLM_DI
37#define LSQUIC_LOG_CONN_ID hdi->hdi_conn_pub->lconn->cn_cid
38#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id
39#include "lsquic_logger.h"
40
41
42#define N_DB_SETS 57
43
44#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \
45                                sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t))
46
47struct data_block
48{
49    TAILQ_ENTRY(data_block) db_next;
50    uint64_t                db_off;
51    uint64_t                db_set[N_DB_SETS];  /* bit for each valid byte */
52    unsigned char           db_data[DB_DATA_SIZE];
53};
54
55typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) - 1];
56typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE) - 1];
57typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) - 1];
58
59
60TAILQ_HEAD(dblock_head, data_block);
61
62
63static const struct data_in_iface di_if_hash;
64
65
66struct hash_data_in
67{
68    struct data_in              hdi_data_in;
69    struct lsquic_conn_public  *hdi_conn_pub;
70    uint64_t                    hdi_fin_off;
71    struct dblock_head         *hdi_buckets;
72    struct data_block          *hdi_last_block;
73    struct data_frame           hdi_data_frame;
74    uint32_t                    hdi_stream_id;
75    unsigned                    hdi_count;
76    unsigned                    hdi_nbits;
77    enum {
78            HDI_FIN = (1 << 0),
79    }                           hdi_flags;
80};
81
82
83#define HDI_PTR(data_in) (struct hash_data_in *) \
84    ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in))
85
86
87#define N_BUCKETS(n_bits) (1U << (n_bits))
88#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1))
89
90
91static unsigned
92my_log2 /* silly name to suppress compiler warning */ (unsigned sz)
93{
94#if __GNUC__
95    unsigned clz = __builtin_clz(sz);
96    return 32 - clz;
97#else
98    unsigned clz;
99    size_t y;
100    clz = 32;
101    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
102    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
103    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
104    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
105    y = sz >>  1;   if (y) return 32 - clz + 1;
106    return 32 - clz + sz;
107#endif
108}
109
110
111struct data_in *
112data_in_hash_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id,
113                  uint64_t byteage)
114{
115    struct hash_data_in *hdi;
116    unsigned n;
117
118    hdi = malloc(sizeof(*hdi));
119    if (!hdi)
120        return NULL;
121
122    hdi->hdi_data_in.di_if    = &di_if_hash;
123    hdi->hdi_data_in.di_flags = 0;
124    hdi->hdi_conn_pub         = conn_pub;
125    hdi->hdi_stream_id        = stream_id;
126    hdi->hdi_fin_off          = 0;
127    hdi->hdi_flags            = 0;
128    hdi->hdi_last_block       = NULL;
129    if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if
130                                   argument is 0 */)
131        hdi->hdi_nbits        = my_log2(byteage / DB_DATA_SIZE) + 2;
132    else
133        hdi->hdi_nbits        = 3;
134    hdi->hdi_count            = 0;
135    hdi->hdi_buckets          = malloc(sizeof(hdi->hdi_buckets[0]) *
136                                                    N_BUCKETS(hdi->hdi_nbits));
137    if (!hdi->hdi_buckets)
138    {
139        free(hdi);
140        return NULL;
141    }
142
143    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
144        TAILQ_INIT(&hdi->hdi_buckets[n]);
145
146    return &hdi->hdi_data_in;
147}
148
149
150static void
151hash_di_destroy (struct data_in *data_in)
152{
153    struct hash_data_in *const hdi = HDI_PTR(data_in);
154    struct data_block *block;
155    unsigned n;
156
157    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
158    {
159        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
160        {
161            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
162            free(block);
163        }
164    }
165    free(hdi->hdi_buckets);
166    free(hdi);
167}
168
169
170static int
171hash_grow (struct hash_data_in *hdi)
172{
173    struct dblock_head *new_buckets, *new[2];
174    struct data_block *block;
175    unsigned n, old_nbits;
176    int idx;
177
178    old_nbits = hdi->hdi_nbits;
179    LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1));
180    new_buckets = malloc(sizeof(hdi->hdi_buckets[0])
181                                                * N_BUCKETS(old_nbits + 1));
182    if (!new_buckets)
183    {
184        LSQ_WARN("malloc failed: potential trouble ahead");
185        return -1;
186    }
187
188    for (n = 0; n < N_BUCKETS(old_nbits); ++n)
189    {
190        new[0] = &new_buckets[n];
191        new[1] = &new_buckets[n + N_BUCKETS(old_nbits)];
192        TAILQ_INIT(new[0]);
193        TAILQ_INIT(new[1]);
194        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
195        {
196            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
197            idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1;
198            TAILQ_INSERT_TAIL(new[idx], block, db_next);
199        }
200    }
201    free(hdi->hdi_buckets);
202    hdi->hdi_nbits   = old_nbits + 1;
203    hdi->hdi_buckets = new_buckets;
204    return 0;
205}
206
207
208static int
209hash_insert (struct hash_data_in *hdi, struct data_block *block)
210{
211    unsigned buckno;
212
213    if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi))
214        return -1;
215
216    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
217    TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next);
218    ++hdi->hdi_count;
219    return 0;
220}
221
222
223static struct data_block *
224hash_find (const struct hash_data_in *hdi, uint64_t off)
225{
226    struct data_block *block;
227    unsigned buckno;
228
229    buckno = BUCKNO(hdi->hdi_nbits, off);
230    TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next)
231        if (off == block->db_off)
232            return block;
233    return NULL;
234}
235
236
237static void
238hash_remove (struct hash_data_in *hdi, struct data_block *block)
239{
240    unsigned buckno;
241
242    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
243    TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next);
244    --hdi->hdi_count;
245}
246
247
248static struct data_block *
249new_block (struct hash_data_in *hdi, uint64_t off)
250{
251    struct data_block *block;
252
253    assert(0 == off % DB_DATA_SIZE);
254
255    block = malloc(sizeof(*block));
256    if (!block)
257        return NULL;
258
259    block->db_off = off;
260    if (0 != hash_insert(hdi, block))
261    {
262        free(block);
263        return NULL;
264    }
265
266    memset(block->db_set, 0, sizeof(block->db_set));
267    return block;
268}
269
270
271static unsigned
272block_write (struct data_block *block, unsigned block_off,
273                             const unsigned char *data, unsigned data_sz)
274{
275    const unsigned char *begin, *end;
276    unsigned set, bit, n_full_sets, n;
277    uint64_t mask;
278
279    if (data_sz > DB_DATA_SIZE - block_off)
280        data_sz = DB_DATA_SIZE - block_off;
281
282    begin = data;
283    end = begin + data_sz;
284    set = block_off >> 6;
285    bit = block_off & 0x3F;
286
287    assert(set <= N_DB_SETS);
288
289    if (bit)
290    {
291        n = 64 - bit;
292        if (n > data_sz)
293            n = data_sz;
294        mask = ~((1ULL <<  bit     ) - 1)
295             &  ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1));
296        block->db_set[ set ] |= mask;
297        memcpy(block->db_data + block_off, data, n);
298        data      += n;
299        block_off += n;
300        ++set;
301    }
302
303    n_full_sets = (end - data) >> 6;
304    if (n_full_sets)
305    {
306        memcpy(block->db_data + block_off, data, n_full_sets * 64);
307        data      += n_full_sets * 64;
308        block_off += n_full_sets * 64;
309        memset(&block->db_set[ set ], 0xFF, n_full_sets * 8);
310        set += n_full_sets;
311    }
312
313    if (data < end)
314    {
315        assert(end - data < 64);
316        block->db_set[ set ] |= ((1ULL << (end - data)) - 1);
317        memcpy(block->db_data + block_off, data, end - data);
318        data = end;
319    }
320
321    assert(set <= N_DB_SETS);
322
323    return data - begin;
324}
325
326
327static int
328has_bytes_after (const struct data_block *block, unsigned off)
329{
330    unsigned bit, set;
331    int has;
332
333    set = off >> 6;
334    bit = off & 0x3F;
335
336    has = 0 != (block->db_set[ set ] >> bit);
337    ++set;
338
339    for ( ; set < N_DB_SETS; ++set)
340        has += 0 != block->db_set[ set ];
341
342    return has > 0;
343}
344
345
346enum ins_frame
347data_in_hash_insert_data_frame (struct data_in *data_in,
348                const struct data_frame *data_frame, uint64_t read_offset)
349{
350    struct hash_data_in *const hdi = HDI_PTR(data_in);
351    struct data_block *block;
352    uint64_t key, off, diff, fin_off;
353    const unsigned char *data;
354    unsigned size, nw;
355
356    if (data_frame->df_offset + data_frame->df_size < read_offset)
357        return INS_FRAME_DUP;
358
359    if ((hdi->hdi_flags & HDI_FIN) &&
360         (
361          (data_frame->df_fin &&
362             data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off)
363          ||
364          data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off
365         )
366       )
367    {
368        return INS_FRAME_ERR;
369    }
370
371    if (data_frame->df_offset < read_offset)
372    {
373        diff = read_offset - data_frame->df_offset;
374        assert(diff <= data_frame->df_size);
375        size = data_frame->df_size   - diff;
376        off  = data_frame->df_offset + diff;
377        data = data_frame->df_data   + diff;
378    }
379    else
380    {
381        size = data_frame->df_size;
382        off  = data_frame->df_offset;
383        data = data_frame->df_data;
384    }
385
386    key = off - (off % DB_DATA_SIZE);
387    do
388    {
389        block = hash_find(hdi, key);
390        if (!block)
391        {
392            block = new_block(hdi, key);
393            if (!block)
394                return INS_FRAME_ERR;
395        }
396        nw = block_write(block, off % DB_DATA_SIZE, data, size);
397        size -= nw;
398        off  += nw;
399        data += nw;
400        key  += DB_DATA_SIZE;
401    }
402    while (size > 0);
403
404    if (data_frame->df_fin)
405    {
406        fin_off = data_frame->df_offset + data_frame->df_size;
407        if (has_bytes_after(block, fin_off - block->db_off) ||
408                                                        hash_find(hdi, key))
409        {
410            return INS_FRAME_ERR;
411        }
412        hdi->hdi_flags  |= HDI_FIN;
413        hdi->hdi_fin_off = fin_off;
414    }
415
416    return INS_FRAME_OK;
417}
418
419
420static enum ins_frame
421hash_di_insert_frame (struct data_in *data_in,
422                        struct stream_frame *new_frame, uint64_t read_offset)
423{
424    struct hash_data_in *const hdi = HDI_PTR(data_in);
425    const struct data_frame *const data_frame = &new_frame->data_frame;
426    enum ins_frame ins;
427
428    ins = data_in_hash_insert_data_frame(data_in, data_frame, read_offset);
429    lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in);
430    lsquic_malo_put(new_frame);
431    return ins;
432}
433
434
435#if __GNUC__
436#   define ctz __builtin_ctzll
437#else
438static unsigned
439ctz (unsigned long long x)
440{
441    unsigned n = 0;
442    if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; }
443    if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; }
444    if (0 == (x & ((1ULL <<  8) - 1))) { n +=  8; x >>=  8; }
445    if (0 == (x & ((1ULL <<  4) - 1))) { n +=  4; x >>=  4; }
446    if (0 == (x & ((1ULL <<  2) - 1))) { n +=  2; x >>=  2; }
447    if (0 == (x & ((1ULL <<  1) - 1))) { n +=  1; x >>=  1; }
448    return n;
449}
450#endif
451
452
453static unsigned
454n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit)
455{
456    unsigned count;
457    uint64_t part;
458
459    part = ~(block->db_set[ set ] >> bit);
460    if (part)
461    {
462        count = ctz(part);
463        if (count < 64 - bit)
464            return count;
465    }
466    else
467        count = 64;
468    ++set;
469
470    for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set)
471        count += 64;
472
473    if (set < N_DB_SETS)
474    {
475        part = ~block->db_set[ set ];
476        if (part)
477            count += ctz(part);
478        else
479            count += 64;
480    }
481
482    return count;
483}
484
485
486/* Data block is readable if there is at least one readable byte at
487 * `read_offset' or there is FIN at that offset.
488 */
489static int
490setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset,
491                                                    struct data_block *block)
492{
493    unsigned set, bit;
494    uint64_t offset;
495
496    offset = read_offset % DB_DATA_SIZE;
497    set = offset >> 6;
498    bit = offset & 0x3F;
499
500    if (block->db_set[ set ] & (1ULL << bit))
501    {
502        hdi->hdi_last_block             = block;
503        hdi->hdi_data_frame.df_data     = block->db_data;
504        hdi->hdi_data_frame.df_offset   = block->db_off;
505        hdi->hdi_data_frame.df_read_off = offset;
506        hdi->hdi_data_frame.df_size     = offset +
507                                                n_avail_bytes(block, set, bit);
508        hdi->hdi_data_frame.df_fin      =
509            (hdi->hdi_flags & HDI_FIN) &&
510                hdi->hdi_data_frame.df_read_off +
511                    hdi->hdi_data_frame.df_size == hdi->hdi_fin_off;
512        return 1;
513    }
514    else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
515    {
516        hdi->hdi_last_block             = block;
517        hdi->hdi_data_frame.df_data     = NULL;
518        hdi->hdi_data_frame.df_offset   = block->db_off;
519        hdi->hdi_data_frame.df_read_off = offset;
520        hdi->hdi_data_frame.df_size     = offset;
521        hdi->hdi_data_frame.df_fin      = 1;
522        return 1;
523    }
524    else
525        return 0;
526}
527
528
529static struct data_frame *
530hash_di_get_frame (struct data_in *data_in, uint64_t read_offset)
531{
532    struct hash_data_in *const hdi = HDI_PTR(data_in);
533    struct data_block *block;
534    uint64_t key;
535
536    key = read_offset - (read_offset % DB_DATA_SIZE);
537    block = hash_find(hdi, key);
538    if (!block)
539    {
540        if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
541        {
542            hdi->hdi_last_block             = NULL;
543            hdi->hdi_data_frame.df_data     = NULL;
544            hdi->hdi_data_frame.df_offset   = read_offset -
545                                                    read_offset % DB_DATA_SIZE;
546            hdi->hdi_data_frame.df_read_off = 0;
547            hdi->hdi_data_frame.df_size     = 0;
548            hdi->hdi_data_frame.df_fin      = 1;
549            return &hdi->hdi_data_frame;
550        }
551        else
552            return NULL;
553    }
554
555    if (setup_data_frame(hdi, read_offset, block))
556        return &hdi->hdi_data_frame;
557    else
558        return NULL;
559}
560
561
562static void
563hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame)
564{
565    struct hash_data_in *const hdi = HDI_PTR(data_in);
566    struct data_block *const block = hdi->hdi_last_block;
567
568    if (block)
569    {
570        if (data_frame->df_read_off == DB_DATA_SIZE ||
571                            !has_bytes_after(block, data_frame->df_read_off))
572        {
573            hash_remove(hdi, block);
574            free(block);
575            if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN))
576            {
577                LSQ_DEBUG("hash empty, want to switch");
578                hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL;
579            }
580        }
581    }
582    else
583        assert(data_frame->df_fin && data_frame->df_size == 0);
584}
585
586
587static int
588hash_di_empty (struct data_in *data_in)
589{
590    struct hash_data_in *const hdi = HDI_PTR(data_in);
591    return hdi->hdi_count == 0;
592}
593
594
595struct data_in *
596hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset)
597{
598    struct hash_data_in *const hdi = HDI_PTR(data_in);
599    struct data_in *new_data_in;
600
601    assert(hdi->hdi_count == 0);
602
603    new_data_in = data_in_nocopy_new(hdi->hdi_conn_pub, hdi->hdi_stream_id);
604    data_in->di_if->di_destroy(data_in);
605
606    return new_data_in;
607}
608
609
610static size_t
611hash_di_mem_used (struct data_in *data_in)
612{
613    struct hash_data_in *const hdi = HDI_PTR(data_in);
614    const struct data_block *block;
615    size_t size;
616    unsigned n;
617
618    size = sizeof(*data_in);
619
620    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
621        TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next)
622            size += sizeof(*block);
623
624    size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]);
625
626    return size;
627}
628
629
630static const struct data_in_iface di_if_hash = {
631    .di_destroy      = hash_di_destroy,
632    .di_empty        = hash_di_empty,
633    .di_frame_done   = hash_di_frame_done,
634    .di_get_frame    = hash_di_get_frame,
635    .di_insert_frame = hash_di_insert_frame,
636    .di_mem_used     = hash_di_mem_used,
637    .di_switch_impl  = hash_di_switch_impl,
638};
639