lsquic_di_nocopy.c revision b93f59be
1/* Copyright (c) 2017 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_di_nocopy.c -- The "no-copy" data in stream.
4 *
5 * Data from packets is not copied: the packets are referenced by stream
6 * frames.  When all data from stream frame is read, the frame is released
7 * and packet reference count is decremented, which possibly results in
8 * packet being released as well.
9 *
10 * This approach works well in regular circumstances; there are two scenarios
11 * when it does not:
12 *
13 *  A.  If we have many out-of-order frames, insertion into the list becomes
14 *      expensive.  In the degenerate case, we'd have to traverse the whole
15 *      list to find appropriate position.
16 *
17 *  B.  Having many frames ties up resources, as each frame keeps a reference
18 *      to the packet that contains it.  This is a possible attack vector:
19 *      send many one-byte packets; a single hole at the beginning will stop
20 *      the server from being able to read the stream, thus tying up resources.
21 *
22 * If we detect that either (A) or (B) is true, we request that the stream
23 * switch to a more robust incoming stream frame handler by setting
24 * DI_SWITCH_IMPL flag.
25 *
26 * For a small number of elements, (A) and (B) do not matter and the checks
27 * are not performed.  This number is defined by EFF_CHECK_THRESH_LOW.  On
28 * the other side of the spectrum, if the number of frames grows very high,
29 * we want to switch to a more memory-efficient implementation even if (A)
30 * and (B) are not true.  EFF_CHECK_THRESH_HIGH defines this threshold.
31 *
32 * Between the low and high thresholds, we detect efficiency problems as
33 * follows.
34 *
35 * To detect (A), we count how many elements we have to traverse during
36 * insertion.  If we have to traverse at least half the list
37 * EFF_FAR_TRAVERSE_COUNT in a row, DI_SWITCH_IMPL is issued.
38 *
39 * If average stream frame size is smaller than EFF_TINY_FRAME_SZ bytes,
40 * (B) condition is true.  In addition, if there are more than EFF_MAX_HOLES
41 * in the stream, this is also indicative of (B).
42 */
43
44
45#include <assert.h>
46#include <inttypes.h>
47#include <stddef.h>
48#include <stdint.h>
49#include <stdlib.h>
50#include <sys/queue.h>
51
52#include "lsquic.h"
53#include "lsquic_types.h"
54#include "lsquic_int_types.h"
55#include "lsquic_conn_flow.h"
56#include "lsquic_packet_common.h"
57#include "lsquic_packet_in.h"
58#include "lsquic_rtt.h"
59#include "lsquic_sfcw.h"
60#include "lsquic_stream.h"
61#include "lsquic_mm.h"
62#include "lsquic_malo.h"
63#include "lsquic_conn.h"
64#include "lsquic_conn_public.h"
65#include "lsquic_data_in_if.h"
66
67
68#define LSQUIC_LOGGER_MODULE LSQLM_DI
69#define LSQUIC_LOG_CONN_ID ncdi->ncdi_conn_pub->lconn->cn_cid
70#define LSQUIC_LOG_STREAM_ID ncdi->ncdi_stream_id
71#include "lsquic_logger.h"
72
73
74/* If number of elements is at or below this number, we do not bother to check
75 * efficiency conditions.
76 */
77#define EFF_CHECK_THRESH_LOW    10
78
79/* If number of elements is higher than this number, efficiency alert
80 * is issued unconditionally.
81 */
82#define EFF_CHECK_THRESH_HIGH   1000
83
84/* Maximum number of consecutive far traversals */
85#define EFF_FAR_TRAVERSE_COUNT  4
86
87/* Maximum number of holes that is not deemed suspicious */
88#define EFF_MAX_HOLES           5
89
90/* What is deemed a tiny frame, in bytes.  If it is a power of two, calculation
91 * is cheaper.
92 */
93#define EFF_TINY_FRAME_SZ       64
94
95
96TAILQ_HEAD(stream_frames_tailq, stream_frame);
97
98
99struct nocopy_data_in
100{
101    struct stream_frames_tailq  ncdi_frames_in;
102    struct data_in              ncdi_data_in;
103    struct lsquic_conn_public  *ncdi_conn_pub;
104    uint64_t                    ncdi_byteage;
105    uint32_t                    ncdi_stream_id;
106    unsigned                    ncdi_n_frames;
107    unsigned                    ncdi_n_holes;
108    unsigned                    ncdi_cons_far;
109};
110
111
112#define NCDI_PTR(data_in) (struct nocopy_data_in *) \
113    ((unsigned char *) (data_in) - offsetof(struct nocopy_data_in, ncdi_data_in))
114
115#define STREAM_FRAME_PTR(data_frame) (struct stream_frame *) \
116    ((unsigned char *) (data_frame) - offsetof(struct stream_frame, data_frame))
117
118
119static const struct data_in_iface *di_if_nocopy_ptr;
120
121
122struct data_in *
123data_in_nocopy_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id)
124{
125    struct nocopy_data_in *ncdi;
126
127    ncdi = malloc(sizeof(*ncdi));
128    if (!ncdi)
129        return NULL;
130
131    TAILQ_INIT(&ncdi->ncdi_frames_in);
132    ncdi->ncdi_data_in.di_if    = di_if_nocopy_ptr;
133    ncdi->ncdi_data_in.di_flags = 0;
134    ncdi->ncdi_conn_pub         = conn_pub;
135    ncdi->ncdi_stream_id        = stream_id;
136    ncdi->ncdi_byteage          = 0;
137    ncdi->ncdi_n_frames         = 0;
138    ncdi->ncdi_n_holes          = 0;
139    ncdi->ncdi_cons_far         = 0;
140    return &ncdi->ncdi_data_in;
141}
142
143
144static void
145nocopy_di_destroy (struct data_in *data_in)
146{
147    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
148    stream_frame_t *frame;
149    while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in)))
150    {
151        TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
152        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
153        lsquic_malo_put(frame);
154    }
155    free(ncdi);
156}
157
158
159#if 1
160#define CHECK_ORDER(ncdi)
161#else
162static int
163ordered (const struct nocopy_data_in *ncdi)
164{
165    const stream_frame_t *frame;
166    uint64_t off = 0;
167    int ordered = 1;
168    TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame)
169    {
170        ordered &= off <= frame->data_frame.df_offset;
171        off = frame->data_frame.df_offset;
172    }
173    return ordered;
174}
175#define CHECK_ORDER(ncdi) assert(ordered(ncdi))
176#endif
177
178
179/* To reduce the number of conditionals, logical operators have been replaced
180 * with arithmetic operators.  Return value is an integer in range [0, 3].
181 * Bit 0 is set due to FIN in previous frame.  If bit 1 is set, it means that
182 * it's a dup.
183 */
184static int
185insert_frame (struct nocopy_data_in *ncdi, struct stream_frame *new_frame,
186                                    uint64_t read_offset, unsigned *p_n_frames)
187{
188    int ins;
189    unsigned count;
190    stream_frame_t *prev_frame, *next_frame;
191
192    /* Find position in the list, going backwards.  We go backwards because
193     * that is the most likely scenario.
194     */
195    next_frame = TAILQ_LAST(&ncdi->ncdi_frames_in, stream_frames_tailq);
196    if (next_frame && new_frame->data_frame.df_offset < next_frame->data_frame.df_offset)
197    {
198        count = 1;
199        prev_frame = TAILQ_PREV(next_frame, stream_frames_tailq, next_frame);
200        for ( ; prev_frame &&
201                    new_frame->data_frame.df_offset < next_frame->data_frame.df_offset;
202                next_frame = prev_frame,
203                    prev_frame = TAILQ_PREV(prev_frame, stream_frames_tailq, next_frame))
204        {
205            if (new_frame->data_frame.df_offset >= prev_frame->data_frame.df_offset)
206                break;
207            ++count;
208        }
209    }
210    else
211    {
212        count = 0;
213        prev_frame = NULL;
214    }
215
216    if (!prev_frame && next_frame && new_frame->data_frame.df_offset >=
217                                            next_frame->data_frame.df_offset)
218    {
219        prev_frame = next_frame;
220        next_frame = TAILQ_NEXT(next_frame, next_frame);
221    }
222
223    /* Perform checks */
224    if (prev_frame)
225        ins =
226          (((prev_frame->data_frame.df_offset == new_frame->data_frame.df_offset) &
227            (prev_frame->data_frame.df_size   == new_frame->data_frame.df_size)   &
228            (prev_frame->data_frame.df_fin    == new_frame->data_frame.df_fin)) << 1)   /* Duplicate */
229          | prev_frame->data_frame.df_fin                                               /* FIN in the middle or dup */
230          | (prev_frame->data_frame.df_offset + prev_frame->data_frame.df_size
231                                            > new_frame->data_frame.df_offset)          /* Overlap */
232        ;
233    else
234        ins = 0;
235
236    if (next_frame)
237        ins |=
238          (((next_frame->data_frame.df_offset == new_frame->data_frame.df_offset) &
239            (next_frame->data_frame.df_size   == new_frame->data_frame.df_size)   &
240            (next_frame->data_frame.df_fin    == new_frame->data_frame.df_fin)) << 1)   /* Duplicate */
241          | (new_frame->data_frame.df_offset < read_offset) << 1                        /* Duplicate */
242          | new_frame->data_frame.df_fin                                                /* FIN in the middle or dup */
243          | (new_frame->data_frame.df_offset + new_frame->data_frame.df_size
244                                            > next_frame->data_frame.df_offset)         /* Overlap */
245        ;
246    else
247        ins |=
248            (new_frame->data_frame.df_offset < read_offset) << 1                        /* Duplicate */
249        ;
250
251    if (ins)
252        return ins;
253
254    if (prev_frame)
255    {
256        TAILQ_INSERT_AFTER(&ncdi->ncdi_frames_in, prev_frame, new_frame, next_frame);
257        ncdi->ncdi_n_holes += prev_frame->data_frame.df_offset +
258                    prev_frame->data_frame.df_size != new_frame->data_frame.df_offset;
259        if (next_frame)
260        {
261            ncdi->ncdi_n_holes += new_frame->data_frame.df_offset +
262                    new_frame->data_frame.df_size != next_frame->data_frame.df_offset;
263            --ncdi->ncdi_n_holes;
264        }
265    }
266    else
267    {
268        ncdi->ncdi_n_holes += next_frame && new_frame->data_frame.df_offset +
269                    new_frame->data_frame.df_size != next_frame->data_frame.df_offset;
270        TAILQ_INSERT_HEAD(&ncdi->ncdi_frames_in, new_frame, next_frame);
271    }
272    CHECK_ORDER(ncdi);
273
274    ++ncdi->ncdi_n_frames;
275    ncdi->ncdi_byteage += new_frame->data_frame.df_size;
276    *p_n_frames = count;
277
278    return 0;
279}
280
281
282static int
283check_efficiency (struct nocopy_data_in *ncdi, unsigned count)
284{
285    if (ncdi->ncdi_n_frames <= EFF_CHECK_THRESH_LOW)
286    {
287        ncdi->ncdi_cons_far = 0;
288        return 0;
289    }
290    if (ncdi->ncdi_n_frames > EFF_CHECK_THRESH_HIGH)
291        return 1;
292    if (count >= ncdi->ncdi_n_frames / 2)
293    {
294        ++ncdi->ncdi_cons_far;
295        if (ncdi->ncdi_cons_far > EFF_FAR_TRAVERSE_COUNT)
296            return 1;
297    }
298    else
299        ncdi->ncdi_cons_far = 0;
300    if (ncdi->ncdi_n_holes > EFF_MAX_HOLES)
301        return 1;
302    if (ncdi->ncdi_byteage / EFF_TINY_FRAME_SZ < ncdi->ncdi_n_frames)
303        return 1;
304    return 0;
305}
306
307
308static void
309set_eff_alert (struct nocopy_data_in *ncdi)
310{
311    LSQ_DEBUG("low efficiency: n_frames: %u; n_holes: %u; cons_far: %u; "
312              "byteage: %"PRIu64, ncdi->ncdi_n_frames, ncdi->ncdi_n_holes,
313              ncdi->ncdi_cons_far, ncdi->ncdi_byteage);
314    ncdi->ncdi_data_in.di_flags |= DI_SWITCH_IMPL;
315}
316
317
318static enum ins_frame
319nocopy_di_insert_frame (struct data_in *data_in,
320                        struct stream_frame *new_frame, uint64_t read_offset)
321{
322    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
323    unsigned count;
324    int ins;
325
326    assert(0 == (new_frame->data_frame.df_fin & ~1));
327    ins = insert_frame(ncdi, new_frame, read_offset, &count);
328    switch (ins)
329    {
330    case 0:
331        if (check_efficiency(ncdi, count))
332            set_eff_alert(ncdi);
333        return INS_FRAME_OK;
334    case 2:
335    case 3:
336        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in);
337        lsquic_malo_put(new_frame);
338        return INS_FRAME_DUP;
339    default:
340        assert(1 == ins);
341        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in);
342        lsquic_malo_put(new_frame);
343        return INS_FRAME_ERR;
344    }
345}
346
347
348static struct data_frame *
349nocopy_di_get_frame (struct data_in *data_in, uint64_t read_offset)
350{
351    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
352    struct stream_frame *frame = TAILQ_FIRST(&ncdi->ncdi_frames_in);
353    if (frame && frame->data_frame.df_offset +
354                                frame->data_frame.df_read_off == read_offset)
355        return &frame->data_frame;
356    else
357        return NULL;
358}
359
360
361static void
362nocopy_di_frame_done (struct data_in *data_in, struct data_frame *data_frame)
363{
364    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
365    struct stream_frame *const frame = STREAM_FRAME_PTR(data_frame), *first;
366    assert(data_frame->df_read_off == data_frame->df_size);
367    TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
368    first = TAILQ_FIRST(&ncdi->ncdi_frames_in);
369    ncdi->ncdi_n_holes -= first && frame->data_frame.df_offset +
370                    frame->data_frame.df_size != first->data_frame.df_offset;
371    --ncdi->ncdi_n_frames;
372    ncdi->ncdi_byteage -= frame->data_frame.df_size;
373    lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
374    lsquic_malo_put(frame);
375}
376
377
378static int
379nocopy_di_empty (struct data_in *data_in)
380{
381    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
382    return TAILQ_EMPTY(&ncdi->ncdi_frames_in);
383}
384
385
386struct data_in *
387nocopy_di_switch_impl (struct data_in *data_in, uint64_t read_offset)
388{
389    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
390    struct data_in *new_data_in;
391    stream_frame_t *frame;
392    enum ins_frame ins;
393
394    new_data_in = data_in_hash_new(ncdi->ncdi_conn_pub, ncdi->ncdi_stream_id,
395                                                        ncdi->ncdi_byteage);
396    if (!new_data_in)
397        goto end;
398
399    while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in)))
400    {
401        TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
402        ins = data_in_hash_insert_data_frame(new_data_in, &frame->data_frame,
403                                                                  read_offset);
404        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
405        lsquic_malo_put(frame);
406        if (INS_FRAME_ERR == ins)
407        {
408            new_data_in->di_if->di_destroy(new_data_in);
409            new_data_in = NULL;
410            goto end;
411        }
412    }
413
414  end:
415    data_in->di_if->di_destroy(data_in);
416    return new_data_in;
417}
418
419
420/* This function overestimates amount of memory because some packets are
421 * referenced by more than one stream.  In the usual case, however, I
422 * expect the error not to be large.
423 */
424static size_t
425nocopy_di_mem_used (struct data_in *data_in)
426{
427    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
428    const stream_frame_t *frame;
429    size_t size;
430
431    size = sizeof(*data_in);
432    TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame)
433        size += lsquic_packet_in_mem_used(frame->packet_in);
434
435    return size;
436}
437
438static const struct data_in_iface di_if_nocopy = {
439    .di_destroy      = nocopy_di_destroy,
440    .di_empty        = nocopy_di_empty,
441    .di_frame_done   = nocopy_di_frame_done,
442    .di_get_frame    = nocopy_di_get_frame,
443    .di_insert_frame = nocopy_di_insert_frame,
444    .di_mem_used     = nocopy_di_mem_used,
445    .di_switch_impl  = nocopy_di_switch_impl,
446};
447
448static const struct data_in_iface *di_if_nocopy_ptr = &di_if_nocopy;
449