lsquic_di_nocopy.c revision c51ce338
1/* Copyright (c) 2017 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_di_nocopy.c -- The "no-copy" data in stream.
4 *
5 * Data from packets is not copied: the packets are referenced by stream
6 * frames.  When all data from stream frame is read, the frame is released
7 * and packet reference count is decremented, which possibly results in
8 * packet being released as well.
9 *
10 * This approach works well in regular circumstances; there are two scenarios
11 * when it does not:
12 *
13 *  A.  If we have many out-of-order frames, insertion into the list becomes
14 *      expensive.  In the degenerate case, we'd have to traverse the whole
15 *      list to find appropriate position.
16 *
17 *  B.  Having many frames ties up resources, as each frame keeps a reference
18 *      to the packet that contains it.  This is a possible attack vector:
19 *      send many one-byte packets; a single hole at the beginning will stop
20 *      the server from being able to read the stream, thus tying up resources.
21 *
22 * If we detect that either (A) or (B) is true, we request that the stream
23 * switch to a more robust incoming stream frame handler by setting
24 * DI_SWITCH_IMPL flag.
25 *
26 * For a small number of elements, (A) and (B) do not matter and the checks
27 * are not performed.  This number is defined by EFF_CHECK_THRESH_LOW.  On
28 * the other side of the spectrum, if the number of frames grows very high,
29 * we want to switch to a more memory-efficient implementation even if (A)
30 * and (B) are not true.  EFF_CHECK_THRESH_HIGH defines this threshold.
31 *
32 * Between the low and high thresholds, we detect efficiency problems as
33 * follows.
34 *
35 * To detect (A), we count how many elements we have to traverse during
36 * insertion.  If we have to traverse at least half the list
37 * EFF_FAR_TRAVERSE_COUNT in a row, DI_SWITCH_IMPL is issued.
38 *
39 * If average stream frame size is smaller than EFF_TINY_FRAME_SZ bytes,
40 * (B) condition is true.  In addition, if there are more than EFF_MAX_HOLES
41 * in the stream, this is also indicative of (B).
42 */
43
44
45#include <assert.h>
46#include <inttypes.h>
47#include <stddef.h>
48#include <stdint.h>
49#include <stdlib.h>
50#include <sys/queue.h>
51
52#include "lsquic.h"
53#include "lsquic_types.h"
54#include "lsquic_int_types.h"
55#include "lsquic_conn_flow.h"
56#include "lsquic_packet_in.h"
57#include "lsquic_rtt.h"
58#include "lsquic_sfcw.h"
59#include "lsquic_stream.h"
60#include "lsquic_mm.h"
61#include "lsquic_malo.h"
62#include "lsquic_conn.h"
63#include "lsquic_conn_public.h"
64#include "lsquic_data_in_if.h"
65
66
67#define LSQUIC_LOGGER_MODULE LSQLM_DI
68#define LSQUIC_LOG_CONN_ID ncdi->ncdi_conn_pub->lconn->cn_cid
69#define LSQUIC_LOG_STREAM_ID ncdi->ncdi_stream_id
70#include "lsquic_logger.h"
71
72
73/* If number of elements is at or below this number, we do not bother to check
74 * efficiency conditions.
75 */
76#define EFF_CHECK_THRESH_LOW    10
77
78/* If number of elements is higher than this number, efficiency alert
79 * is issued unconditionally.
80 */
81#define EFF_CHECK_THRESH_HIGH   1000
82
83/* Maximum number of consecutive far traversals */
84#define EFF_FAR_TRAVERSE_COUNT  4
85
86/* Maximum number of holes that is not deemed suspicious */
87#define EFF_MAX_HOLES           5
88
89/* What is deemed a tiny frame, in bytes.  If it is a power of two, calculation
90 * is cheaper.
91 */
92#define EFF_TINY_FRAME_SZ       64
93
94
95TAILQ_HEAD(stream_frames_tailq, stream_frame);
96
97
98struct nocopy_data_in
99{
100    struct stream_frames_tailq  ncdi_frames_in;
101    struct data_in              ncdi_data_in;
102    struct lsquic_conn_public  *ncdi_conn_pub;
103    uint64_t                    ncdi_byteage;
104    uint32_t                    ncdi_stream_id;
105    unsigned                    ncdi_n_frames;
106    unsigned                    ncdi_n_holes;
107    unsigned                    ncdi_cons_far;
108};
109
110
111#define NCDI_PTR(data_in) (struct nocopy_data_in *) \
112    ((unsigned char *) (data_in) - offsetof(struct nocopy_data_in, ncdi_data_in))
113
114#define STREAM_FRAME_PTR(data_frame) (struct stream_frame *) \
115    ((unsigned char *) (data_frame) - offsetof(struct stream_frame, data_frame))
116
117
118static const struct data_in_iface di_if_nocopy;
119
120
121struct data_in *
122data_in_nocopy_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id)
123{
124    struct nocopy_data_in *ncdi;
125
126    ncdi = malloc(sizeof(*ncdi));
127    if (!ncdi)
128        return NULL;
129
130    TAILQ_INIT(&ncdi->ncdi_frames_in);
131    ncdi->ncdi_data_in.di_if    = &di_if_nocopy;
132    ncdi->ncdi_data_in.di_flags = 0;
133    ncdi->ncdi_conn_pub         = conn_pub;
134    ncdi->ncdi_stream_id        = stream_id;
135    ncdi->ncdi_byteage          = 0;
136    ncdi->ncdi_n_frames         = 0;
137    ncdi->ncdi_n_holes          = 0;
138    ncdi->ncdi_cons_far         = 0;
139    return &ncdi->ncdi_data_in;
140}
141
142
143static void
144nocopy_di_destroy (struct data_in *data_in)
145{
146    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
147    stream_frame_t *frame;
148    while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in)))
149    {
150        TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
151        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
152        lsquic_malo_put(frame);
153    }
154    free(ncdi);
155}
156
157
158#if 1
159#define CHECK_ORDER(ncdi)
160#else
161static int
162ordered (const struct nocopy_data_in *ncdi)
163{
164    const stream_frame_t *frame;
165    uint64_t off = 0;
166    int ordered = 1;
167    TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame)
168    {
169        ordered &= off <= frame->data_frame.df_offset;
170        off = frame->data_frame.df_offset;
171    }
172    return ordered;
173}
174#define CHECK_ORDER(ncdi) assert(ordered(ncdi))
175#endif
176
177
178/* To reduce the number of conditionals, logical operators have been replaced
179 * with arithmetic operators.  Return value is an integer in range [0, 3].
180 * Bit 0 is set due to FIN in previous frame.  If bit 1 is set, it means that
181 * it's a dup.
182 */
183static int
184insert_frame (struct nocopy_data_in *ncdi, struct stream_frame *new_frame,
185                                    uint64_t read_offset, unsigned *p_n_frames)
186{
187    int ins;
188    unsigned count;
189    stream_frame_t *prev_frame, *next_frame;
190
191    /* Find position in the list, going backwards.  We go backwards because
192     * that is the most likely scenario.
193     */
194    next_frame = TAILQ_LAST(&ncdi->ncdi_frames_in, stream_frames_tailq);
195    if (next_frame && new_frame->data_frame.df_offset < next_frame->data_frame.df_offset)
196    {
197        count = 1;
198        prev_frame = TAILQ_PREV(next_frame, stream_frames_tailq, next_frame);
199        for ( ; prev_frame &&
200                    new_frame->data_frame.df_offset < next_frame->data_frame.df_offset;
201                next_frame = prev_frame,
202                    prev_frame = TAILQ_PREV(prev_frame, stream_frames_tailq, next_frame))
203        {
204            if (new_frame->data_frame.df_offset >= prev_frame->data_frame.df_offset)
205                break;
206            ++count;
207        }
208    }
209    else
210    {
211        count = 0;
212        prev_frame = NULL;
213    }
214
215    if (!prev_frame && next_frame && new_frame->data_frame.df_offset >=
216                                            next_frame->data_frame.df_offset)
217    {
218        prev_frame = next_frame;
219        next_frame = TAILQ_NEXT(next_frame, next_frame);
220    }
221
222    /* Perform checks */
223    if (prev_frame)
224        ins =
225          (((prev_frame->data_frame.df_offset == new_frame->data_frame.df_offset) &
226            (prev_frame->data_frame.df_size   == new_frame->data_frame.df_size)   &
227            (prev_frame->data_frame.df_fin    == new_frame->data_frame.df_fin)) << 1)   /* Duplicate */
228          | prev_frame->data_frame.df_fin                                               /* FIN in the middle or dup */
229          | (prev_frame->data_frame.df_offset + prev_frame->data_frame.df_size
230                                            > new_frame->data_frame.df_offset)          /* Overlap */
231        ;
232    else
233        ins = 0;
234
235    if (next_frame)
236        ins |=
237          (((next_frame->data_frame.df_offset == new_frame->data_frame.df_offset) &
238            (next_frame->data_frame.df_size   == new_frame->data_frame.df_size)   &
239            (next_frame->data_frame.df_fin    == new_frame->data_frame.df_fin)) << 1)   /* Duplicate */
240          | (new_frame->data_frame.df_offset < read_offset) << 1                        /* Duplicate */
241          | new_frame->data_frame.df_fin                                                /* FIN in the middle or dup */
242          | (new_frame->data_frame.df_offset + new_frame->data_frame.df_size
243                                            > next_frame->data_frame.df_offset)         /* Overlap */
244        ;
245    else
246        ins |=
247            (new_frame->data_frame.df_offset < read_offset) << 1                        /* Duplicate */
248        ;
249
250    if (ins)
251        return ins;
252
253    if (prev_frame)
254    {
255        TAILQ_INSERT_AFTER(&ncdi->ncdi_frames_in, prev_frame, new_frame, next_frame);
256        ncdi->ncdi_n_holes += prev_frame->data_frame.df_offset +
257                    prev_frame->data_frame.df_size != new_frame->data_frame.df_offset;
258        if (next_frame)
259        {
260            ncdi->ncdi_n_holes += new_frame->data_frame.df_offset +
261                    new_frame->data_frame.df_size != next_frame->data_frame.df_offset;
262            --ncdi->ncdi_n_holes;
263        }
264    }
265    else
266    {
267        ncdi->ncdi_n_holes += next_frame && new_frame->data_frame.df_offset +
268                    new_frame->data_frame.df_size != next_frame->data_frame.df_offset;
269        TAILQ_INSERT_HEAD(&ncdi->ncdi_frames_in, new_frame, next_frame);
270    }
271    CHECK_ORDER(ncdi);
272
273    ++ncdi->ncdi_n_frames;
274    ncdi->ncdi_byteage += new_frame->data_frame.df_size;
275    *p_n_frames = count;
276
277    return 0;
278}
279
280
281static int
282check_efficiency (struct nocopy_data_in *ncdi, unsigned count)
283{
284    if (ncdi->ncdi_n_frames <= EFF_CHECK_THRESH_LOW)
285    {
286        ncdi->ncdi_cons_far = 0;
287        return 0;
288    }
289    if (ncdi->ncdi_n_frames > EFF_CHECK_THRESH_HIGH)
290        return 1;
291    if (count >= ncdi->ncdi_n_frames / 2)
292    {
293        ++ncdi->ncdi_cons_far;
294        if (ncdi->ncdi_cons_far > EFF_FAR_TRAVERSE_COUNT)
295            return 1;
296    }
297    else
298        ncdi->ncdi_cons_far = 0;
299    if (ncdi->ncdi_n_holes > EFF_MAX_HOLES)
300        return 1;
301    if (ncdi->ncdi_byteage / EFF_TINY_FRAME_SZ < ncdi->ncdi_n_frames)
302        return 1;
303    return 0;
304}
305
306
307static void
308set_eff_alert (struct nocopy_data_in *ncdi)
309{
310    LSQ_DEBUG("low efficiency: n_frames: %u; n_holes: %u; cons_far: %u; "
311              "byteage: %"PRIu64, ncdi->ncdi_n_frames, ncdi->ncdi_n_holes,
312              ncdi->ncdi_cons_far, ncdi->ncdi_byteage);
313    ncdi->ncdi_data_in.di_flags |= DI_SWITCH_IMPL;
314}
315
316
317static enum ins_frame
318nocopy_di_insert_frame (struct data_in *data_in,
319                        struct stream_frame *new_frame, uint64_t read_offset)
320{
321    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
322    unsigned count;
323    int ins;
324
325    assert(0 == (new_frame->data_frame.df_fin & ~1));
326    ins = insert_frame(ncdi, new_frame, read_offset, &count);
327    switch (ins)
328    {
329    case 0:
330        if (check_efficiency(ncdi, count))
331            set_eff_alert(ncdi);
332        return INS_FRAME_OK;
333    case 2:
334    case 3:
335        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in);
336        lsquic_malo_put(new_frame);
337        return INS_FRAME_DUP;
338    default:
339        assert(1 == ins);
340        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in);
341        lsquic_malo_put(new_frame);
342        return INS_FRAME_ERR;
343    }
344}
345
346
347static struct data_frame *
348nocopy_di_get_frame (struct data_in *data_in, uint64_t read_offset)
349{
350    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
351    struct stream_frame *frame = TAILQ_FIRST(&ncdi->ncdi_frames_in);
352    if (frame && frame->data_frame.df_offset +
353                                frame->data_frame.df_read_off == read_offset)
354        return &frame->data_frame;
355    else
356        return NULL;
357}
358
359
360static void
361nocopy_di_frame_done (struct data_in *data_in, struct data_frame *data_frame)
362{
363    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
364    struct stream_frame *const frame = STREAM_FRAME_PTR(data_frame), *first;
365    assert(data_frame->df_read_off == data_frame->df_size);
366    TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
367    first = TAILQ_FIRST(&ncdi->ncdi_frames_in);
368    ncdi->ncdi_n_holes -= first && frame->data_frame.df_offset +
369                    frame->data_frame.df_size != first->data_frame.df_offset;
370    --ncdi->ncdi_n_frames;
371    ncdi->ncdi_byteage -= frame->data_frame.df_size;
372    lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
373    lsquic_malo_put(frame);
374}
375
376
377static int
378nocopy_di_empty (struct data_in *data_in)
379{
380    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
381    return TAILQ_EMPTY(&ncdi->ncdi_frames_in);
382}
383
384
385struct data_in *
386nocopy_di_switch_impl (struct data_in *data_in, uint64_t read_offset)
387{
388    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
389    struct data_in *new_data_in;
390    stream_frame_t *frame;
391    enum ins_frame ins;
392
393    new_data_in = data_in_hash_new(ncdi->ncdi_conn_pub, ncdi->ncdi_stream_id,
394                                                        ncdi->ncdi_byteage);
395    if (!new_data_in)
396        goto end;
397
398    while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in)))
399    {
400        TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
401        ins = data_in_hash_insert_data_frame(new_data_in, &frame->data_frame,
402                                                                  read_offset);
403        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
404        lsquic_malo_put(frame);
405        if (INS_FRAME_ERR == ins)
406        {
407            new_data_in->di_if->di_destroy(new_data_in);
408            new_data_in = NULL;
409            goto end;
410        }
411    }
412
413  end:
414    data_in->di_if->di_destroy(data_in);
415    return new_data_in;
416}
417
418
419/* This function overestimates amount of memory because some packets are
420 * referenced by more than one stream.  In the usual case, however, I
421 * expect the error not to be large.
422 */
423static size_t
424nocopy_di_mem_used (struct data_in *data_in)
425{
426    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
427    const stream_frame_t *frame;
428    size_t size;
429
430    size = sizeof(*data_in);
431    TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame)
432        size += lsquic_packet_in_mem_used(frame->packet_in);
433
434    return size;
435}
436
437static const struct data_in_iface di_if_nocopy = {
438    .di_destroy      = nocopy_di_destroy,
439    .di_empty        = nocopy_di_empty,
440    .di_frame_done   = nocopy_di_frame_done,
441    .di_get_frame    = nocopy_di_get_frame,
442    .di_insert_frame = nocopy_di_insert_frame,
443    .di_mem_used     = nocopy_di_mem_used,
444    .di_switch_impl  = nocopy_di_switch_impl,
445};
446