lsquic_di_nocopy.c revision be4cfad0
1/* Copyright (c) 2017 - 2018 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3 * lsquic_di_nocopy.c -- The "no-copy" data in stream.
4 *
5 * Data from packets is not copied: the packets are referenced by stream
6 * frames.  When all data from stream frame is read, the frame is released
7 * and packet reference count is decremented, which possibly results in
8 * packet being released as well.
9 *
10 * This approach works well in regular circumstances; there are two scenarios
11 * when it does not:
12 *
13 *  A.  If we have many out-of-order frames, insertion into the list becomes
14 *      expensive.  In the degenerate case, we'd have to traverse the whole
15 *      list to find appropriate position.
16 *
17 *  B.  Having many frames ties up resources, as each frame keeps a reference
18 *      to the packet that contains it.  This is a possible attack vector:
19 *      send many one-byte packets; a single hole at the beginning will stop
20 *      the server from being able to read the stream, thus tying up resources.
21 *
22 * If we detect that either (A) or (B) is true, we request that the stream
23 * switch to a more robust incoming stream frame handler by setting
24 * DI_SWITCH_IMPL flag.
25 *
26 * For a small number of elements, (A) and (B) do not matter and the checks
27 * are not performed.  This number is defined by EFF_CHECK_THRESH_LOW.  On
28 * the other side of the spectrum, if the number of frames grows very high,
29 * we want to switch to a more memory-efficient implementation even if (A)
30 * and (B) are not true.  EFF_CHECK_THRESH_HIGH defines this threshold.
31 *
32 * Between the low and high thresholds, we detect efficiency problems as
33 * follows.
34 *
35 * To detect (A), we count how many elements we have to traverse during
36 * insertion.  If we have to traverse at least half the list
37 * EFF_FAR_TRAVERSE_COUNT in a row, DI_SWITCH_IMPL is issued.
38 *
39 * If average stream frame size is smaller than EFF_TINY_FRAME_SZ bytes,
40 * (B) condition is true.  In addition, if there are more than EFF_MAX_HOLES
41 * in the stream, this is also indicative of (B).
42 */
43
44
45#include <assert.h>
46#include <inttypes.h>
47#include <stddef.h>
48#include <stdint.h>
49#include <stdlib.h>
50#include <sys/queue.h>
51
52#include "lsquic.h"
53#include "lsquic_types.h"
54#include "lsquic_int_types.h"
55#include "lsquic_conn_flow.h"
56#include "lsquic_packet_common.h"
57#include "lsquic_packet_in.h"
58#include "lsquic_rtt.h"
59#include "lsquic_sfcw.h"
60#include "lsquic_stream.h"
61#include "lsquic_mm.h"
62#include "lsquic_malo.h"
63#include "lsquic_conn.h"
64#include "lsquic_conn_public.h"
65#include "lsquic_data_in_if.h"
66
67
68#define LSQUIC_LOGGER_MODULE LSQLM_DI
69#define LSQUIC_LOG_CONN_ID ncdi->ncdi_conn_pub->lconn->cn_cid
70#define LSQUIC_LOG_STREAM_ID ncdi->ncdi_stream_id
71#include "lsquic_logger.h"
72
73
74/* If number of elements is at or below this number, we do not bother to check
75 * efficiency conditions.
76 */
77#define EFF_CHECK_THRESH_LOW    10
78
79/* If number of elements is higher than this number, efficiency alert
80 * is issued unconditionally.
81 */
82#define EFF_CHECK_THRESH_HIGH   1000
83
84/* Maximum number of consecutive far traversals */
85#define EFF_FAR_TRAVERSE_COUNT  4
86
87/* Maximum number of holes that is not deemed suspicious */
88#define EFF_MAX_HOLES           5
89
90/* What is deemed a tiny frame, in bytes.  If it is a power of two, calculation
91 * is cheaper.
92 */
93#define EFF_TINY_FRAME_SZ       64
94
95
96TAILQ_HEAD(stream_frames_tailq, stream_frame);
97
98
99struct nocopy_data_in
100{
101    struct stream_frames_tailq  ncdi_frames_in;
102    struct data_in              ncdi_data_in;
103    struct lsquic_conn_public  *ncdi_conn_pub;
104    uint64_t                    ncdi_byteage;
105    uint32_t                    ncdi_stream_id;
106    unsigned                    ncdi_n_frames;
107    unsigned                    ncdi_n_holes;
108    unsigned                    ncdi_cons_far;
109};
110
111
112#define NCDI_PTR(data_in) (struct nocopy_data_in *) \
113    ((unsigned char *) (data_in) - offsetof(struct nocopy_data_in, ncdi_data_in))
114
115#define STREAM_FRAME_PTR(data_frame) (struct stream_frame *) \
116    ((unsigned char *) (data_frame) - offsetof(struct stream_frame, data_frame))
117
118
119static const struct data_in_iface *di_if_nocopy_ptr;
120
121
122struct data_in *
123data_in_nocopy_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id)
124{
125    struct nocopy_data_in *ncdi;
126
127    ncdi = malloc(sizeof(*ncdi));
128    if (!ncdi)
129        return NULL;
130
131    TAILQ_INIT(&ncdi->ncdi_frames_in);
132    ncdi->ncdi_data_in.di_if    = di_if_nocopy_ptr;
133    ncdi->ncdi_data_in.di_flags = 0;
134    ncdi->ncdi_conn_pub         = conn_pub;
135    ncdi->ncdi_stream_id        = stream_id;
136    ncdi->ncdi_byteage          = 0;
137    ncdi->ncdi_n_frames         = 0;
138    ncdi->ncdi_n_holes          = 0;
139    ncdi->ncdi_cons_far         = 0;
140    return &ncdi->ncdi_data_in;
141}
142
143
144static void
145nocopy_di_destroy (struct data_in *data_in)
146{
147    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
148    stream_frame_t *frame;
149    while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in)))
150    {
151        TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
152        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
153        lsquic_malo_put(frame);
154    }
155    free(ncdi);
156}
157
158#define DF_OFF(frame) (frame)->data_frame.df_offset
159#define DF_FIN(frame) (frame)->data_frame.df_fin
160#define DF_SIZE(frame) (frame)->data_frame.df_size
161#define DF_END(frame) (DF_OFF(frame) + DF_SIZE(frame))
162
163
164#if LSQUIC_EXTRA_CHECKS
165static int
166frame_list_is_sane (const struct nocopy_data_in *ncdi)
167{
168    const stream_frame_t *frame;
169    uint64_t prev_off = 0, prev_end = 0;
170    int ordered = 1, overlaps = 0;
171    TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame)
172    {
173        ordered &= prev_off <= DF_OFF(frame);
174        overlaps |= prev_end > DF_OFF(frame);
175        prev_off = DF_OFF(frame);
176        prev_end = DF_END(frame);
177    }
178    return ordered && !overlaps;
179}
180#define CHECK_ORDER(ncdi) assert(frame_list_is_sane(ncdi))
181#else
182#define CHECK_ORDER(ncdi)
183#endif
184
185
186/* When inserting a new frame into the frame list, there are four cases to
187 * consider:
188 *
189 *  I.   New frame is the only frame in the list;
190 *  II.  New frame only has a neighbor to its left (previous frame);
191 *  III. New frame only has a neighbor to its right (next frame); and
192 *  IV.  New frame has both left and right neighbors.
193 *
194 * I. New frame is the only frame in the list.
195 *
196 *      A)  If the read offset is larger than the end of the new frame and
197 *          the new frame has a FIN, it is an ERROR.
198 *
199 *      B)  If the read offset is larger than the end of the new frame and
200 *          the new frame does not have a FIN, it is a DUP.
201 *
202 *      C)  If the read offset is equal to the end of the new frame and
203 *          the new frame has a FIN, it is an OVERLAP.
204 *
205 *      D)  If the read offset is equal to the end of the new frame and
206 *          the new frame does not have a FIN, it is a DUP.
207 *
208 * II. New frame only has a neighbor to its left.
209 *
210 *      - (A) and (B) apply.
211 *
212 *      E)  New frame could be the same as the previous frame: DUP.
213 *
214 *      F)  New frame has the same offset and size as previous frame, but
215 *          previous frame has FIN and the new frame does not: DUP.
216 *
217 *      G)  New frame has the same offset and size as previous frame, but
218 *          previous frame does not have a FIN and the new one does: OVERLAP.
219 *
220 *      H)  New frame could start inside previous frame: OVERLAP.
221 *
222 *  III. New frame only has a neighbor to its right.
223 *
224 *      - (A) and (B) apply.
225 *
226 *      I) Right neighbor could start inside new frame: OVERLAP.
227 *
228 *  IV.  New frame has both left and right neighbors.
229 *
230 *      - (A), (B), (E), (F), (G), (H), and (I) apply.
231 */
232
233
234static enum ins_frame
235insert_frame (struct nocopy_data_in *ncdi, struct stream_frame *new_frame,
236                                    uint64_t read_offset, unsigned *p_n_frames)
237{
238    stream_frame_t *prev_frame, *next_frame;
239    unsigned count;
240
241    if (read_offset > DF_END(new_frame))
242    {
243        if (DF_FIN(new_frame))
244            return INS_FRAME_ERR;               /* Case (A) */
245        else
246            return INS_FRAME_DUP;               /* Case (B) */
247    }
248
249    /* Find position in the list, going backwards.  We go backwards because
250     * that is the most likely scenario.
251     */
252    next_frame = TAILQ_LAST(&ncdi->ncdi_frames_in, stream_frames_tailq);
253    if (next_frame && DF_OFF(new_frame) < DF_OFF(next_frame))
254    {
255        count = 1;
256        prev_frame = TAILQ_PREV(next_frame, stream_frames_tailq, next_frame);
257        for ( ; prev_frame && DF_OFF(new_frame) < DF_OFF(next_frame);
258                next_frame = prev_frame,
259                    prev_frame = TAILQ_PREV(prev_frame, stream_frames_tailq, next_frame))
260        {
261            if (DF_OFF(new_frame) >= DF_OFF(prev_frame))
262                break;
263            ++count;
264        }
265    }
266    else
267    {
268        count = 0;
269        prev_frame = NULL;
270    }
271
272    if (!prev_frame && next_frame && DF_OFF(new_frame) >= DF_OFF(next_frame))
273    {
274        prev_frame = next_frame;
275        next_frame = TAILQ_NEXT(next_frame, next_frame);
276    }
277
278    const int select = !!prev_frame << 1 | !!next_frame;
279    switch (select)
280    {
281    default:    /* No neighbors */
282        if (read_offset == DF_END(new_frame) && DF_SIZE(new_frame))
283        {
284            if (DF_FIN(new_frame))
285                return INS_FRAME_OVERLAP;       /* Case (C) */
286            else
287                return INS_FRAME_DUP;           /* Case (D) */
288        }
289        goto list_was_empty;
290    case 3:     /* Both left and right neighbors */
291    case 2:     /* Only left neighbor (prev_frame) */
292        if (DF_OFF(prev_frame) == DF_OFF(new_frame)
293            && DF_SIZE(prev_frame) == DF_SIZE(new_frame))
294        {
295            if (!DF_FIN(prev_frame) && DF_FIN(new_frame))
296                return INS_FRAME_OVERLAP;       /* Case (G) */
297            else
298                return INS_FRAME_DUP;           /* Cases (E) and (F) */
299        }
300        if (DF_END(prev_frame) > DF_OFF(new_frame))
301            return INS_FRAME_OVERLAP;           /* Case (H) */
302        if (select == 2)
303        {
304            if (DF_FIN(prev_frame))
305                return INS_FRAME_ERR;
306            goto have_prev;
307        }
308        /* Fall-through */
309    case 1:     /* Only right neighbor (next_frame) */
310        if (DF_END(new_frame) > DF_OFF(next_frame))
311            return INS_FRAME_OVERLAP;           /* Case (I) */
312        break;
313    }
314
315    if (prev_frame)
316    {
317  have_prev:
318        TAILQ_INSERT_AFTER(&ncdi->ncdi_frames_in, prev_frame, new_frame, next_frame);
319        ncdi->ncdi_n_holes += DF_END(prev_frame) != DF_OFF(new_frame);
320        if (next_frame)
321        {
322            ncdi->ncdi_n_holes += DF_END(new_frame) != DF_OFF(next_frame);
323            --ncdi->ncdi_n_holes;
324        }
325    }
326    else
327    {
328        ncdi->ncdi_n_holes += next_frame
329                           && DF_END(new_frame) != DF_OFF(next_frame);
330  list_was_empty:
331        TAILQ_INSERT_HEAD(&ncdi->ncdi_frames_in, new_frame, next_frame);
332    }
333    CHECK_ORDER(ncdi);
334
335    ++ncdi->ncdi_n_frames;
336    ncdi->ncdi_byteage += DF_SIZE(new_frame);
337    *p_n_frames = count;
338
339    return INS_FRAME_OK;
340}
341
342
343static int
344check_efficiency (struct nocopy_data_in *ncdi, unsigned count)
345{
346    if (ncdi->ncdi_n_frames <= EFF_CHECK_THRESH_LOW)
347    {
348        ncdi->ncdi_cons_far = 0;
349        return 0;
350    }
351    if (ncdi->ncdi_n_frames > EFF_CHECK_THRESH_HIGH)
352        return 1;
353    if (count >= ncdi->ncdi_n_frames / 2)
354    {
355        ++ncdi->ncdi_cons_far;
356        if (ncdi->ncdi_cons_far > EFF_FAR_TRAVERSE_COUNT)
357            return 1;
358    }
359    else
360        ncdi->ncdi_cons_far = 0;
361    if (ncdi->ncdi_n_holes > EFF_MAX_HOLES)
362        return 1;
363    if (ncdi->ncdi_byteage / EFF_TINY_FRAME_SZ < ncdi->ncdi_n_frames)
364        return 1;
365    return 0;
366}
367
368
369static void
370set_eff_alert (struct nocopy_data_in *ncdi)
371{
372    LSQ_DEBUG("low efficiency: n_frames: %u; n_holes: %u; cons_far: %u; "
373              "byteage: %"PRIu64, ncdi->ncdi_n_frames, ncdi->ncdi_n_holes,
374              ncdi->ncdi_cons_far, ncdi->ncdi_byteage);
375    ncdi->ncdi_data_in.di_flags |= DI_SWITCH_IMPL;
376}
377
378
379static enum ins_frame
380nocopy_di_insert_frame (struct data_in *data_in,
381                        struct stream_frame *new_frame, uint64_t read_offset)
382{
383    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
384    unsigned count;
385    enum ins_frame ins;
386
387    assert(0 == (new_frame->data_frame.df_fin & ~1));
388    ins = insert_frame(ncdi, new_frame, read_offset, &count);
389    switch (ins)
390    {
391    case INS_FRAME_OK:
392        if (check_efficiency(ncdi, count))
393            set_eff_alert(ncdi);
394        break;
395    case INS_FRAME_DUP:
396    case INS_FRAME_ERR:
397        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in);
398        lsquic_malo_put(new_frame);
399        break;
400    default:
401        break;
402    }
403
404    return ins;
405}
406
407
408static struct data_frame *
409nocopy_di_get_frame (struct data_in *data_in, uint64_t read_offset)
410{
411    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
412    struct stream_frame *frame = TAILQ_FIRST(&ncdi->ncdi_frames_in);
413    if (frame && frame->data_frame.df_offset +
414                                frame->data_frame.df_read_off == read_offset)
415        return &frame->data_frame;
416    else
417        return NULL;
418}
419
420
421static void
422nocopy_di_frame_done (struct data_in *data_in, struct data_frame *data_frame)
423{
424    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
425    struct stream_frame *const frame = STREAM_FRAME_PTR(data_frame), *first;
426    assert(data_frame->df_read_off == data_frame->df_size);
427    TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
428    first = TAILQ_FIRST(&ncdi->ncdi_frames_in);
429    ncdi->ncdi_n_holes -= first && frame->data_frame.df_offset +
430                    frame->data_frame.df_size != first->data_frame.df_offset;
431    --ncdi->ncdi_n_frames;
432    ncdi->ncdi_byteage -= frame->data_frame.df_size;
433    lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
434    lsquic_malo_put(frame);
435}
436
437
438static int
439nocopy_di_empty (struct data_in *data_in)
440{
441    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
442    return TAILQ_EMPTY(&ncdi->ncdi_frames_in);
443}
444
445
446struct data_in *
447nocopy_di_switch_impl (struct data_in *data_in, uint64_t read_offset)
448{
449    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
450    struct data_in *new_data_in;
451    stream_frame_t *frame;
452    enum ins_frame ins;
453
454    new_data_in = data_in_hash_new(ncdi->ncdi_conn_pub, ncdi->ncdi_stream_id,
455                                                        ncdi->ncdi_byteage);
456    if (!new_data_in)
457        goto end;
458
459    while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in)))
460    {
461        TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
462        ins = data_in_hash_insert_data_frame(new_data_in, &frame->data_frame,
463                                                                  read_offset);
464        lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
465        lsquic_malo_put(frame);
466        if (INS_FRAME_ERR == ins)
467        {
468            new_data_in->di_if->di_destroy(new_data_in);
469            new_data_in = NULL;
470            goto end;
471        }
472    }
473
474  end:
475    data_in->di_if->di_destroy(data_in);
476    return new_data_in;
477}
478
479
480/* This function overestimates amount of memory because some packets are
481 * referenced by more than one stream.  In the usual case, however, I
482 * expect the error not to be large.
483 */
484static size_t
485nocopy_di_mem_used (struct data_in *data_in)
486{
487    struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
488    const stream_frame_t *frame;
489    size_t size;
490
491    size = sizeof(*data_in);
492    TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame)
493        size += lsquic_packet_in_mem_used(frame->packet_in);
494
495    return size;
496}
497
498static const struct data_in_iface di_if_nocopy = {
499    .di_destroy      = nocopy_di_destroy,
500    .di_empty        = nocopy_di_empty,
501    .di_frame_done   = nocopy_di_frame_done,
502    .di_get_frame    = nocopy_di_get_frame,
503    .di_insert_frame = nocopy_di_insert_frame,
504    .di_mem_used     = nocopy_di_mem_used,
505    .di_switch_impl  = nocopy_di_switch_impl,
506};
507
508static const struct data_in_iface *di_if_nocopy_ptr = &di_if_nocopy;
509