lsquic_di_hash.c revision 461e84d8
1/* Copyright (c) 2017 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_di_hash.c -- Copy incoming data into a hash
4 *
5 * While this implementation copies the data, its memory use is limited,
6 * which makes it a good choice when we have a lot of stream frames
7 * coming in.
8 *
9 * Another difference is that it does not check for frame overlap, which
10 * is something that is present in Chrome, but it is not required by QUIC.
11 */
12
13
14#include <assert.h>
15#include <stddef.h>
16#include <stdint.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/queue.h>
20
21#include "lsquic.h"
22#include "lsquic_int_types.h"
23#include "lsquic_types.h"
24#include "lsquic_conn_flow.h"
25#include "lsquic_packet_common.h"
26#include "lsquic_packet_in.h"
27#include "lsquic_rtt.h"
28#include "lsquic_sfcw.h"
29#include "lsquic_stream.h"
30#include "lsquic_mm.h"
31#include "lsquic_malo.h"
32#include "lsquic_conn.h"
33#include "lsquic_conn_public.h"
34#include "lsquic_data_in_if.h"
35
36
37#define LSQUIC_LOGGER_MODULE LSQLM_DI
38#define LSQUIC_LOG_CONN_ID hdi->hdi_conn_pub->lconn->cn_cid
39#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id
40#include "lsquic_logger.h"
41
42
43#define N_DB_SETS 57
44
45#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \
46                                sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t))
47
48struct data_block
49{
50    TAILQ_ENTRY(data_block) db_next;
51    uint64_t                db_off;
52    uint64_t                db_set[N_DB_SETS];  /* bit for each valid byte */
53    unsigned char           db_data[DB_DATA_SIZE];
54};
55
56typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) ?1: - 1];
57typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE)?1: - 1];
58typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) ?1:- 1];
59
60
61TAILQ_HEAD(dblock_head, data_block);
62
63
64
65
66struct hash_data_in
67{
68    struct data_in              hdi_data_in;
69    struct lsquic_conn_public  *hdi_conn_pub;
70    uint64_t                    hdi_fin_off;
71    struct dblock_head         *hdi_buckets;
72    struct data_block          *hdi_last_block;
73    struct data_frame           hdi_data_frame;
74    uint32_t                    hdi_stream_id;
75    unsigned                    hdi_count;
76    unsigned                    hdi_nbits;
77    enum {
78            HDI_FIN = (1 << 0),
79    }                           hdi_flags;
80};
81
82
83#define HDI_PTR(data_in) (struct hash_data_in *) \
84    ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in))
85
86
87#define N_BUCKETS(n_bits) (1U << (n_bits))
88#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1))
89
90
91static unsigned
92my_log2 /* silly name to suppress compiler warning */ (unsigned sz)
93{
94#if __GNUC__
95    unsigned clz = __builtin_clz(sz);
96    return 32 - clz;
97#else
98    unsigned clz;
99    size_t y;
100    clz = 32;
101    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
102    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
103    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
104    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
105    y = sz >>  1;   if (y) return 32 - clz + 1;
106    return 32 - clz + sz;
107#endif
108}
109
110
111
112
113static void
114hash_di_destroy (struct data_in *data_in)
115{
116    struct hash_data_in *const hdi = HDI_PTR(data_in);
117    struct data_block *block;
118    unsigned n;
119
120    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
121    {
122        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
123        {
124            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
125            free(block);
126        }
127    }
128    free(hdi->hdi_buckets);
129    free(hdi);
130}
131
132
133static int
134hash_grow (struct hash_data_in *hdi)
135{
136    struct dblock_head *new_buckets, *new[2];
137    struct data_block *block;
138    unsigned n, old_nbits;
139    int idx;
140
141    old_nbits = hdi->hdi_nbits;
142    LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1));
143    new_buckets = malloc(sizeof(hdi->hdi_buckets[0])
144                                                * N_BUCKETS(old_nbits + 1));
145    if (!new_buckets)
146    {
147        LSQ_WARN("malloc failed: potential trouble ahead");
148        return -1;
149    }
150
151    for (n = 0; n < N_BUCKETS(old_nbits); ++n)
152    {
153        new[0] = &new_buckets[n];
154        new[1] = &new_buckets[n + N_BUCKETS(old_nbits)];
155        TAILQ_INIT(new[0]);
156        TAILQ_INIT(new[1]);
157        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
158        {
159            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
160            idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1;
161            TAILQ_INSERT_TAIL(new[idx], block, db_next);
162        }
163    }
164    free(hdi->hdi_buckets);
165    hdi->hdi_nbits   = old_nbits + 1;
166    hdi->hdi_buckets = new_buckets;
167    return 0;
168}
169
170
171static int
172hash_insert (struct hash_data_in *hdi, struct data_block *block)
173{
174    unsigned buckno;
175
176    if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi))
177        return -1;
178
179    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
180    TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next);
181    ++hdi->hdi_count;
182    return 0;
183}
184
185
186static struct data_block *
187hash_find (const struct hash_data_in *hdi, uint64_t off)
188{
189    struct data_block *block;
190    unsigned buckno;
191
192    buckno = BUCKNO(hdi->hdi_nbits, off);
193    TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next)
194        if (off == block->db_off)
195            return block;
196    return NULL;
197}
198
199
200static void
201hash_remove (struct hash_data_in *hdi, struct data_block *block)
202{
203    unsigned buckno;
204
205    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
206    TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next);
207    --hdi->hdi_count;
208}
209
210
211static struct data_block *
212new_block (struct hash_data_in *hdi, uint64_t off)
213{
214    struct data_block *block;
215
216    assert(0 == off % DB_DATA_SIZE);
217
218    block = malloc(sizeof(*block));
219    if (!block)
220        return NULL;
221
222    block->db_off = off;
223    if (0 != hash_insert(hdi, block))
224    {
225        free(block);
226        return NULL;
227    }
228
229    memset(block->db_set, 0, sizeof(block->db_set));
230    return block;
231}
232
233
234static unsigned
235block_write (struct data_block *block, unsigned block_off,
236                             const unsigned char *data, unsigned data_sz)
237{
238    const unsigned char *begin, *end;
239    unsigned set, bit, n_full_sets, n;
240    uint64_t mask;
241
242    if (data_sz > DB_DATA_SIZE - block_off)
243        data_sz = DB_DATA_SIZE - block_off;
244
245    begin = data;
246    end = begin + data_sz;
247    set = block_off >> 6;
248    bit = block_off & 0x3F;
249
250    assert(set <= N_DB_SETS);
251
252    if (bit)
253    {
254        n = 64 - bit;
255        if (n > data_sz)
256            n = data_sz;
257        mask = ~((1ULL <<  bit     ) - 1)
258             &  ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1));
259        block->db_set[ set ] |= mask;
260        memcpy(block->db_data + block_off, data, n);
261        data      += n;
262        block_off += n;
263        ++set;
264    }
265
266    n_full_sets = (end - data) >> 6;
267    if (n_full_sets)
268    {
269        memcpy(block->db_data + block_off, data, n_full_sets * 64);
270        data      += n_full_sets * 64;
271        block_off += n_full_sets * 64;
272        memset(&block->db_set[ set ], 0xFF, n_full_sets * 8);
273        set += n_full_sets;
274    }
275
276    if (data < end)
277    {
278        assert(end - data < 64);
279        block->db_set[ set ] |= ((1ULL << (end - data)) - 1);
280        memcpy(block->db_data + block_off, data, end - data);
281        data = end;
282    }
283
284    assert(set <= N_DB_SETS);
285
286    return data - begin;
287}
288
289
290static int
291has_bytes_after (const struct data_block *block, unsigned off)
292{
293    unsigned bit, set;
294    int has;
295
296    set = off >> 6;
297    bit = off & 0x3F;
298
299    has = 0 != (block->db_set[ set ] >> bit);
300    ++set;
301
302    for ( ; set < N_DB_SETS; ++set)
303        has += 0 != block->db_set[ set ];
304
305    return has > 0;
306}
307
308
309enum ins_frame
310data_in_hash_insert_data_frame (struct data_in *data_in,
311                const struct data_frame *data_frame, uint64_t read_offset)
312{
313    struct hash_data_in *const hdi = HDI_PTR(data_in);
314    struct data_block *block;
315    uint64_t key, off, diff, fin_off;
316    const unsigned char *data;
317    unsigned size, nw;
318
319    if (data_frame->df_offset + data_frame->df_size < read_offset)
320        return INS_FRAME_DUP;
321
322    if ((hdi->hdi_flags & HDI_FIN) &&
323         (
324          (data_frame->df_fin &&
325             data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off)
326          ||
327          data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off
328         )
329       )
330    {
331        return INS_FRAME_ERR;
332    }
333
334    if (data_frame->df_offset < read_offset)
335    {
336        diff = read_offset - data_frame->df_offset;
337        assert(diff <= data_frame->df_size);
338        size = data_frame->df_size   - diff;
339        off  = data_frame->df_offset + diff;
340        data = data_frame->df_data   + diff;
341    }
342    else
343    {
344        size = data_frame->df_size;
345        off  = data_frame->df_offset;
346        data = data_frame->df_data;
347    }
348
349    key = off - (off % DB_DATA_SIZE);
350    do
351    {
352        block = hash_find(hdi, key);
353        if (!block)
354        {
355            block = new_block(hdi, key);
356            if (!block)
357                return INS_FRAME_ERR;
358        }
359        nw = block_write(block, off % DB_DATA_SIZE, data, size);
360        size -= nw;
361        off  += nw;
362        data += nw;
363        key  += DB_DATA_SIZE;
364    }
365    while (size > 0);
366
367    if (data_frame->df_fin)
368    {
369        fin_off = data_frame->df_offset + data_frame->df_size;
370        if (has_bytes_after(block, fin_off - block->db_off) ||
371                                                        hash_find(hdi, key))
372        {
373            return INS_FRAME_ERR;
374        }
375        hdi->hdi_flags  |= HDI_FIN;
376        hdi->hdi_fin_off = fin_off;
377    }
378
379    return INS_FRAME_OK;
380}
381
382
383static enum ins_frame
384hash_di_insert_frame (struct data_in *data_in,
385                        struct stream_frame *new_frame, uint64_t read_offset)
386{
387    struct hash_data_in *const hdi = HDI_PTR(data_in);
388    const struct data_frame *const data_frame = &new_frame->data_frame;
389    enum ins_frame ins;
390
391    ins = data_in_hash_insert_data_frame(data_in, data_frame, read_offset);
392    lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in);
393    lsquic_malo_put(new_frame);
394    return ins;
395}
396
397
398#if __GNUC__
399#   define ctz __builtin_ctzll
400#else
401static unsigned
402ctz (unsigned long long x)
403{
404    unsigned n = 0;
405    if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; }
406    if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; }
407    if (0 == (x & ((1ULL <<  8) - 1))) { n +=  8; x >>=  8; }
408    if (0 == (x & ((1ULL <<  4) - 1))) { n +=  4; x >>=  4; }
409    if (0 == (x & ((1ULL <<  2) - 1))) { n +=  2; x >>=  2; }
410    if (0 == (x & ((1ULL <<  1) - 1))) { n +=  1; x >>=  1; }
411    return n;
412}
413#endif
414
415
416static unsigned
417n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit)
418{
419    unsigned count;
420    uint64_t part;
421
422    part = ~(block->db_set[ set ] >> bit);
423    if (part)
424    {
425        count = ctz(part);
426        if (count < 64 - bit)
427            return count;
428    }
429    else
430        count = 64;
431    ++set;
432
433    for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set)
434        count += 64;
435
436    if (set < N_DB_SETS)
437    {
438        part = ~block->db_set[ set ];
439        if (part)
440            count += ctz(part);
441        else
442            count += 64;
443    }
444
445    return count;
446}
447
448
449/* Data block is readable if there is at least one readable byte at
450 * `read_offset' or there is FIN at that offset.
451 */
452static int
453setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset,
454                                                    struct data_block *block)
455{
456    unsigned set, bit;
457    uint64_t offset;
458
459    offset = read_offset % DB_DATA_SIZE;
460    set = offset >> 6;
461    bit = offset & 0x3F;
462
463    if (block->db_set[ set ] & (1ULL << bit))
464    {
465        hdi->hdi_last_block             = block;
466        hdi->hdi_data_frame.df_data     = block->db_data;
467        hdi->hdi_data_frame.df_offset   = block->db_off;
468        hdi->hdi_data_frame.df_read_off = offset;
469        hdi->hdi_data_frame.df_size     = offset +
470                                                n_avail_bytes(block, set, bit);
471        hdi->hdi_data_frame.df_fin      =
472            (hdi->hdi_flags & HDI_FIN) &&
473                hdi->hdi_data_frame.df_read_off +
474                    hdi->hdi_data_frame.df_size == hdi->hdi_fin_off;
475        return 1;
476    }
477    else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
478    {
479        hdi->hdi_last_block             = block;
480        hdi->hdi_data_frame.df_data     = NULL;
481        hdi->hdi_data_frame.df_offset   = block->db_off;
482        hdi->hdi_data_frame.df_read_off = offset;
483        hdi->hdi_data_frame.df_size     = offset;
484        hdi->hdi_data_frame.df_fin      = 1;
485        return 1;
486    }
487    else
488        return 0;
489}
490
491
492static struct data_frame *
493hash_di_get_frame (struct data_in *data_in, uint64_t read_offset)
494{
495    struct hash_data_in *const hdi = HDI_PTR(data_in);
496    struct data_block *block;
497    uint64_t key;
498
499    key = read_offset - (read_offset % DB_DATA_SIZE);
500    block = hash_find(hdi, key);
501    if (!block)
502    {
503        if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
504        {
505            hdi->hdi_last_block             = NULL;
506            hdi->hdi_data_frame.df_data     = NULL;
507            hdi->hdi_data_frame.df_offset   = read_offset -
508                                                    read_offset % DB_DATA_SIZE;
509            hdi->hdi_data_frame.df_read_off = 0;
510            hdi->hdi_data_frame.df_size     = 0;
511            hdi->hdi_data_frame.df_fin      = 1;
512            return &hdi->hdi_data_frame;
513        }
514        else
515            return NULL;
516    }
517
518    if (setup_data_frame(hdi, read_offset, block))
519        return &hdi->hdi_data_frame;
520    else
521        return NULL;
522}
523
524
525static void
526hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame)
527{
528    struct hash_data_in *const hdi = HDI_PTR(data_in);
529    struct data_block *const block = hdi->hdi_last_block;
530
531    if (block)
532    {
533        if (data_frame->df_read_off == DB_DATA_SIZE ||
534                            !has_bytes_after(block, data_frame->df_read_off))
535        {
536            hash_remove(hdi, block);
537            free(block);
538            if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN))
539            {
540                LSQ_DEBUG("hash empty, want to switch");
541                hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL;
542            }
543        }
544    }
545    else
546        assert(data_frame->df_fin && data_frame->df_size == 0);
547}
548
549
550static int
551hash_di_empty (struct data_in *data_in)
552{
553    struct hash_data_in *const hdi = HDI_PTR(data_in);
554    return hdi->hdi_count == 0;
555}
556
557
558struct data_in *
559hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset)
560{
561    struct hash_data_in *const hdi = HDI_PTR(data_in);
562    struct data_in *new_data_in;
563
564    assert(hdi->hdi_count == 0);
565
566    new_data_in = data_in_nocopy_new(hdi->hdi_conn_pub, hdi->hdi_stream_id);
567    data_in->di_if->di_destroy(data_in);
568
569    return new_data_in;
570}
571
572
573static size_t
574hash_di_mem_used (struct data_in *data_in)
575{
576    struct hash_data_in *const hdi = HDI_PTR(data_in);
577    const struct data_block *block;
578    size_t size;
579    unsigned n;
580
581    size = sizeof(*data_in);
582
583    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
584        TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next)
585            size += sizeof(*block);
586
587    size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]);
588
589    return size;
590}
591
592
593static const struct data_in_iface di_if_hash = {
594    .di_destroy      = hash_di_destroy,
595    .di_empty        = hash_di_empty,
596    .di_frame_done   = hash_di_frame_done,
597    .di_get_frame    = hash_di_get_frame,
598    .di_insert_frame = hash_di_insert_frame,
599    .di_mem_used     = hash_di_mem_used,
600    .di_switch_impl  = hash_di_switch_impl,
601};
602struct data_in *
603data_in_hash_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id,
604                  uint64_t byteage)
605{
606    struct hash_data_in *hdi;
607    unsigned n;
608
609    hdi = malloc(sizeof(*hdi));
610    if (!hdi)
611        return NULL;
612
613    hdi->hdi_data_in.di_if    = &di_if_hash;
614    hdi->hdi_data_in.di_flags = 0;
615    hdi->hdi_conn_pub         = conn_pub;
616    hdi->hdi_stream_id        = stream_id;
617    hdi->hdi_fin_off          = 0;
618    hdi->hdi_flags            = 0;
619    hdi->hdi_last_block       = NULL;
620    if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if
621                                   argument is 0 */)
622        hdi->hdi_nbits        = my_log2(byteage / DB_DATA_SIZE) + 2;
623    else
624        hdi->hdi_nbits        = 3;
625    hdi->hdi_count            = 0;
626    hdi->hdi_buckets          = malloc(sizeof(hdi->hdi_buckets[0]) *
627                                                    N_BUCKETS(hdi->hdi_nbits));
628    if (!hdi->hdi_buckets)
629    {
630        free(hdi);
631        return NULL;
632    }
633
634    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
635        TAILQ_INIT(&hdi->hdi_buckets[n]);
636
637    return &hdi->hdi_data_in;
638}
639