lsquic_di_nocopy.c revision 461e84d8
1/* Copyright (c) 2017 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_di_nocopy.c -- The "no-copy" data in stream.
4 *
5 * Data from packets is not copied: the packets are referenced by stream
6 * frames.  When all data from stream frame is read, the frame is released
7 * and packet reference count is decremented, which possibly results in
8 * packet being released as well.
9 *
10 * This approach works well in regular circumstances; there are two scenarios
11 * when it does not:
12 *
13 *  A.  If we have many out-of-order frames, insertion into the list becomes
14 *      expensive.  In the degenerate case, we'd have to traverse the whole
15 *      list to find appropriate position.
16 *
17 *  B.  Having many frames ties up resources, as each frame keeps a reference
18 *      to the packet that contains it.  This is a possible attack vector:
19 *      send many one-byte packets; a single hole at the beginning will stop
20 *      the server from being able to read the stream, thus tying up resources.
21 *
22 * If we detect that either (A) or (B) is true, we request that the stream
23 * switch to a more robust incoming stream frame handler by setting
24 * DI_SWITCH_IMPL flag.
25 *
26 * For a small number of elements, (A) and (B) do not matter and the checks
27 * are not performed.  This number is defined by EFF_CHECK_THRESH_LOW.  On
28 * the other side of the spectrum, if the number of frames grows very high,
29 * we want to switch to a more memory-efficient implementation even if (A)
30 * and (B) are not true.  EFF_CHECK_THRESH_HIGH defines this threshold.
31 *
32 * Between the low and high thresholds, we detect efficiency problems as
33 * follows.
34 *
35 * To detect (A), we count how many elements we have to traverse during
36 * insertion.  If we have to traverse at least half the list
37 * EFF_FAR_TRAVERSE_COUNT in a row, DI_SWITCH_IMPL is issued.
38 *
39 * If average stream frame size is smaller than EFF_TINY_FRAME_SZ bytes,
40 * (B) condition is true.  In addition, if there are more than EFF_MAX_HOLES
41 * in the stream, this is also indicative of (B).
42 */
43
44
45#include <assert.h>
46#include <inttypes.h>
47#include <stddef.h>
48#include <stdint.h>
49#include <stdlib.h>
50#include <sys/queue.h>
51
52#include "lsquic.h"
53#include "lsquic_types.h"
54#include "lsquic_int_types.h"
55#include "lsquic_conn_flow.h"
56#include "lsquic_packet_common.h"
57#include "lsquic_packet_in.h"
58#include "lsquic_rtt.h"
59#include "lsquic_sfcw.h"
60#include "lsquic_stream.h"
61#include "lsquic_mm.h"
62#include "lsquic_malo.h"
63#include "lsquic_conn.h"
64#include "lsquic_conn_public.h"
65#include "lsquic_data_in_if.h"
66
67
68#define LSQUIC_LOGGER_MODULE LSQLM_DI
69#define LSQUIC_LOG_CONN_ID ncdi->ncdi_conn_pub->lconn->cn_cid
70#define LSQUIC_LOG_STREAM_ID ncdi->ncdi_stream_id
71#include "lsquic_logger.h"
72
73
74/* If number of elements is at or below this number, we do not bother to check
75 * efficiency conditions.
76 */
77#define EFF_CHECK_THRESH_LOW    10
78
79/* If number of elements is higher than this number, efficiency alert
80 * is issued unconditionally.
81 */
82#define EFF_CHECK_THRESH_HIGH   1000
83
84/* Maximum number of consecutive far traversals */
85#define EFF_FAR_TRAVERSE_COUNT  4
86
87/* Maximum number of holes that is not deemed suspicious */
88#define EFF_MAX_HOLES           5
89
90/* What is deemed a tiny frame, in bytes.  If it is a power of two, calculation
91 * is cheaper.
92 */
93#define EFF_TINY_FRAME_SZ       64
94
95
96TAILQ_HEAD(stream_frames_tailq, stream_frame);
97
98
99struct nocopy_data_in
100{
101    struct stream_frames_tailq  ncdi_frames_in;
102    struct data_in              ncdi_data_in;
103    struct lsquic_conn_public  *ncdi_conn_pub;
104    uint64_t                    ncdi_byteage;
105    uint32_t                    ncdi_stream_id;
106    unsigned                    ncdi_n_frames;
107    unsigned                    ncdi_n_holes;
108    unsigned                    ncdi_cons_far;
109};
110
111
112#define NCDI_PTR(data_in) (struct nocopy_data_in *) \
113    ((unsigned char *) (data_in) - offsetof(struct nocopy_data_in, ncdi_data_in))
114
115#define STREAM_FRAME_PTR(data_frame) (struct stream_frame *) \
116    ((unsigned char *) (data_frame) - offsetof(struct stream_frame, data_frame))
117
118
119
120
121
122static void
123nocopy_di_destroy (struct data_in *data_in)
124{
125    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
126    stream_frame_t *frame;
127    while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in)))
128    {
129        TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
130        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
131        lsquic_malo_put(frame);
132    }
133    free(ncdi);
134}
135
136
137#if 1
138#define CHECK_ORDER(ncdi)
139#else
140static int
141ordered (const struct nocopy_data_in *ncdi)
142{
143    const stream_frame_t *frame;
144    uint64_t off = 0;
145    int ordered = 1;
146    TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame)
147    {
148        ordered &= off <= frame->data_frame.df_offset;
149        off = frame->data_frame.df_offset;
150    }
151    return ordered;
152}
153#define CHECK_ORDER(ncdi) assert(ordered(ncdi))
154#endif
155
156
157/* To reduce the number of conditionals, logical operators have been replaced
158 * with arithmetic operators.  Return value is an integer in range [0, 3].
159 * Bit 0 is set due to FIN in previous frame.  If bit 1 is set, it means that
160 * it's a dup.
161 */
162static int
163insert_frame (struct nocopy_data_in *ncdi, struct stream_frame *new_frame,
164                                    uint64_t read_offset, unsigned *p_n_frames)
165{
166    int ins;
167    unsigned count;
168    stream_frame_t *prev_frame, *next_frame;
169
170    /* Find position in the list, going backwards.  We go backwards because
171     * that is the most likely scenario.
172     */
173    next_frame = TAILQ_LAST(&ncdi->ncdi_frames_in, stream_frames_tailq);
174    if (next_frame && new_frame->data_frame.df_offset < next_frame->data_frame.df_offset)
175    {
176        count = 1;
177        prev_frame = TAILQ_PREV(next_frame, stream_frames_tailq, next_frame);
178        for ( ; prev_frame &&
179                    new_frame->data_frame.df_offset < next_frame->data_frame.df_offset;
180                next_frame = prev_frame,
181                    prev_frame = TAILQ_PREV(prev_frame, stream_frames_tailq, next_frame))
182        {
183            if (new_frame->data_frame.df_offset >= prev_frame->data_frame.df_offset)
184                break;
185            ++count;
186        }
187    }
188    else
189    {
190        count = 0;
191        prev_frame = NULL;
192    }
193
194    if (!prev_frame && next_frame && new_frame->data_frame.df_offset >=
195                                            next_frame->data_frame.df_offset)
196    {
197        prev_frame = next_frame;
198        next_frame = TAILQ_NEXT(next_frame, next_frame);
199    }
200
201    /* Perform checks */
202    if (prev_frame)
203        ins =
204          (((prev_frame->data_frame.df_offset == new_frame->data_frame.df_offset) &
205            (prev_frame->data_frame.df_size   == new_frame->data_frame.df_size)   &
206            (prev_frame->data_frame.df_fin    == new_frame->data_frame.df_fin)) << 1)   /* Duplicate */
207          | prev_frame->data_frame.df_fin                                               /* FIN in the middle or dup */
208          | (prev_frame->data_frame.df_offset + prev_frame->data_frame.df_size
209                                            > new_frame->data_frame.df_offset)          /* Overlap */
210        ;
211    else
212        ins = 0;
213
214    if (next_frame)
215        ins |=
216          (((next_frame->data_frame.df_offset == new_frame->data_frame.df_offset) &
217            (next_frame->data_frame.df_size   == new_frame->data_frame.df_size)   &
218            (next_frame->data_frame.df_fin    == new_frame->data_frame.df_fin)) << 1)   /* Duplicate */
219          | (new_frame->data_frame.df_offset < read_offset) << 1                        /* Duplicate */
220          | new_frame->data_frame.df_fin                                                /* FIN in the middle or dup */
221          | (new_frame->data_frame.df_offset + new_frame->data_frame.df_size
222                                            > next_frame->data_frame.df_offset)         /* Overlap */
223        ;
224    else
225        ins |=
226            (new_frame->data_frame.df_offset < read_offset) << 1                        /* Duplicate */
227        ;
228
229    if (ins)
230        return ins;
231
232    if (prev_frame)
233    {
234        TAILQ_INSERT_AFTER(&ncdi->ncdi_frames_in, prev_frame, new_frame, next_frame);
235        ncdi->ncdi_n_holes += prev_frame->data_frame.df_offset +
236                    prev_frame->data_frame.df_size != new_frame->data_frame.df_offset;
237        if (next_frame)
238        {
239            ncdi->ncdi_n_holes += new_frame->data_frame.df_offset +
240                    new_frame->data_frame.df_size != next_frame->data_frame.df_offset;
241            --ncdi->ncdi_n_holes;
242        }
243    }
244    else
245    {
246        ncdi->ncdi_n_holes += next_frame && new_frame->data_frame.df_offset +
247                    new_frame->data_frame.df_size != next_frame->data_frame.df_offset;
248        TAILQ_INSERT_HEAD(&ncdi->ncdi_frames_in, new_frame, next_frame);
249    }
250    CHECK_ORDER(ncdi);
251
252    ++ncdi->ncdi_n_frames;
253    ncdi->ncdi_byteage += new_frame->data_frame.df_size;
254    *p_n_frames = count;
255
256    return 0;
257}
258
259
260static int
261check_efficiency (struct nocopy_data_in *ncdi, unsigned count)
262{
263    if (ncdi->ncdi_n_frames <= EFF_CHECK_THRESH_LOW)
264    {
265        ncdi->ncdi_cons_far = 0;
266        return 0;
267    }
268    if (ncdi->ncdi_n_frames > EFF_CHECK_THRESH_HIGH)
269        return 1;
270    if (count >= ncdi->ncdi_n_frames / 2)
271    {
272        ++ncdi->ncdi_cons_far;
273        if (ncdi->ncdi_cons_far > EFF_FAR_TRAVERSE_COUNT)
274            return 1;
275    }
276    else
277        ncdi->ncdi_cons_far = 0;
278    if (ncdi->ncdi_n_holes > EFF_MAX_HOLES)
279        return 1;
280    if (ncdi->ncdi_byteage / EFF_TINY_FRAME_SZ < ncdi->ncdi_n_frames)
281        return 1;
282    return 0;
283}
284
285
286static void
287set_eff_alert (struct nocopy_data_in *ncdi)
288{
289    LSQ_DEBUG("low efficiency: n_frames: %u; n_holes: %u; cons_far: %u; "
290              "byteage: %"PRIu64, ncdi->ncdi_n_frames, ncdi->ncdi_n_holes,
291              ncdi->ncdi_cons_far, ncdi->ncdi_byteage);
292    ncdi->ncdi_data_in.di_flags |= DI_SWITCH_IMPL;
293}
294
295
296static enum ins_frame
297nocopy_di_insert_frame (struct data_in *data_in,
298                        struct stream_frame *new_frame, uint64_t read_offset)
299{
300    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
301    unsigned count;
302    int ins;
303
304    assert(0 == (new_frame->data_frame.df_fin & ~1));
305    ins = insert_frame(ncdi, new_frame, read_offset, &count);
306    switch (ins)
307    {
308    case 0:
309        if (check_efficiency(ncdi, count))
310            set_eff_alert(ncdi);
311        return INS_FRAME_OK;
312    case 2:
313    case 3:
314        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in);
315        lsquic_malo_put(new_frame);
316        return INS_FRAME_DUP;
317    default:
318        assert(1 == ins);
319        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in);
320        lsquic_malo_put(new_frame);
321        return INS_FRAME_ERR;
322    }
323}
324
325
326static struct data_frame *
327nocopy_di_get_frame (struct data_in *data_in, uint64_t read_offset)
328{
329    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
330    struct stream_frame *frame = TAILQ_FIRST(&ncdi->ncdi_frames_in);
331    if (frame && frame->data_frame.df_offset +
332                                frame->data_frame.df_read_off == read_offset)
333        return &frame->data_frame;
334    else
335        return NULL;
336}
337
338
339static void
340nocopy_di_frame_done (struct data_in *data_in, struct data_frame *data_frame)
341{
342    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
343    struct stream_frame *const frame = STREAM_FRAME_PTR(data_frame), *first;
344    assert(data_frame->df_read_off == data_frame->df_size);
345    TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
346    first = TAILQ_FIRST(&ncdi->ncdi_frames_in);
347    ncdi->ncdi_n_holes -= first && frame->data_frame.df_offset +
348                    frame->data_frame.df_size != first->data_frame.df_offset;
349    --ncdi->ncdi_n_frames;
350    ncdi->ncdi_byteage -= frame->data_frame.df_size;
351    lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
352    lsquic_malo_put(frame);
353}
354
355
356static int
357nocopy_di_empty (struct data_in *data_in)
358{
359    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
360    return TAILQ_EMPTY(&ncdi->ncdi_frames_in);
361}
362
363
364struct data_in *
365nocopy_di_switch_impl (struct data_in *data_in, uint64_t read_offset)
366{
367    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
368    struct data_in *new_data_in;
369    stream_frame_t *frame;
370    enum ins_frame ins;
371
372    new_data_in = data_in_hash_new(ncdi->ncdi_conn_pub, ncdi->ncdi_stream_id,
373                                                        ncdi->ncdi_byteage);
374    if (!new_data_in)
375        goto end;
376
377    while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in)))
378    {
379        TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
380        ins = data_in_hash_insert_data_frame(new_data_in, &frame->data_frame,
381                                                                  read_offset);
382        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
383        lsquic_malo_put(frame);
384        if (INS_FRAME_ERR == ins)
385        {
386            new_data_in->di_if->di_destroy(new_data_in);
387            new_data_in = NULL;
388            goto end;
389        }
390    }
391
392  end:
393    data_in->di_if->di_destroy(data_in);
394    return new_data_in;
395}
396
397
398/* This function overestimates amount of memory because some packets are
399 * referenced by more than one stream.  In the usual case, however, I
400 * expect the error not to be large.
401 */
402static size_t
403nocopy_di_mem_used (struct data_in *data_in)
404{
405    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
406    const stream_frame_t *frame;
407    size_t size;
408
409    size = sizeof(*data_in);
410    TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame)
411        size += lsquic_packet_in_mem_used(frame->packet_in);
412
413    return size;
414}
415
416static const struct data_in_iface di_if_nocopy = {
417    .di_destroy      = nocopy_di_destroy,
418    .di_empty        = nocopy_di_empty,
419    .di_frame_done   = nocopy_di_frame_done,
420    .di_get_frame    = nocopy_di_get_frame,
421    .di_insert_frame = nocopy_di_insert_frame,
422    .di_mem_used     = nocopy_di_mem_used,
423    .di_switch_impl  = nocopy_di_switch_impl,
424};
425struct data_in *
426data_in_nocopy_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id)
427{
428    struct nocopy_data_in *ncdi;
429
430    ncdi = malloc(sizeof(*ncdi));
431    if (!ncdi)
432        return NULL;
433
434    TAILQ_INIT(&ncdi->ncdi_frames_in);
435    ncdi->ncdi_data_in.di_if    = &di_if_nocopy;
436    ncdi->ncdi_data_in.di_flags = 0;
437    ncdi->ncdi_conn_pub         = conn_pub;
438    ncdi->ncdi_stream_id        = stream_id;
439    ncdi->ncdi_byteage          = 0;
440    ncdi->ncdi_n_frames         = 0;
441    ncdi->ncdi_n_holes          = 0;
442    ncdi->ncdi_cons_far         = 0;
443    return &ncdi->ncdi_data_in;
444}
445
446