lsquic_di_nocopy.c revision c51ce338
1/* Copyright (c) 2017 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_di_nocopy.c -- The "no-copy" data in stream. 4 * 5 * Data from packets is not copied: the packets are referenced by stream 6 * frames. When all data from stream frame is read, the frame is released 7 * and packet reference count is decremented, which possibly results in 8 * packet being released as well. 9 * 10 * This approach works well in regular circumstances; there are two scenarios 11 * when it does not: 12 * 13 * A. If we have many out-of-order frames, insertion into the list becomes 14 * expensive. In the degenerate case, we'd have to traverse the whole 15 * list to find appropriate position. 16 * 17 * B. Having many frames ties up resources, as each frame keeps a reference 18 * to the packet that contains it. This is a possible attack vector: 19 * send many one-byte packets; a single hole at the beginning will stop 20 * the server from being able to read the stream, thus tying up resources. 21 * 22 * If we detect that either (A) or (B) is true, we request that the stream 23 * switch to a more robust incoming stream frame handler by setting 24 * DI_SWITCH_IMPL flag. 25 * 26 * For a small number of elements, (A) and (B) do not matter and the checks 27 * are not performed. This number is defined by EFF_CHECK_THRESH_LOW. On 28 * the other side of the spectrum, if the number of frames grows very high, 29 * we want to switch to a more memory-efficient implementation even if (A) 30 * and (B) are not true. EFF_CHECK_THRESH_HIGH defines this threshold. 31 * 32 * Between the low and high thresholds, we detect efficiency problems as 33 * follows. 34 * 35 * To detect (A), we count how many elements we have to traverse during 36 * insertion. If we have to traverse at least half the list 37 * EFF_FAR_TRAVERSE_COUNT in a row, DI_SWITCH_IMPL is issued. 38 * 39 * If average stream frame size is smaller than EFF_TINY_FRAME_SZ bytes, 40 * (B) condition is true. In addition, if there are more than EFF_MAX_HOLES 41 * in the stream, this is also indicative of (B). 42 */ 43 44 45#include <assert.h> 46#include <inttypes.h> 47#include <stddef.h> 48#include <stdint.h> 49#include <stdlib.h> 50#include <sys/queue.h> 51 52#include "lsquic.h" 53#include "lsquic_types.h" 54#include "lsquic_int_types.h" 55#include "lsquic_conn_flow.h" 56#include "lsquic_packet_in.h" 57#include "lsquic_rtt.h" 58#include "lsquic_sfcw.h" 59#include "lsquic_stream.h" 60#include "lsquic_mm.h" 61#include "lsquic_malo.h" 62#include "lsquic_conn.h" 63#include "lsquic_conn_public.h" 64#include "lsquic_data_in_if.h" 65 66 67#define LSQUIC_LOGGER_MODULE LSQLM_DI 68#define LSQUIC_LOG_CONN_ID ncdi->ncdi_conn_pub->lconn->cn_cid 69#define LSQUIC_LOG_STREAM_ID ncdi->ncdi_stream_id 70#include "lsquic_logger.h" 71 72 73/* If number of elements is at or below this number, we do not bother to check 74 * efficiency conditions. 75 */ 76#define EFF_CHECK_THRESH_LOW 10 77 78/* If number of elements is higher than this number, efficiency alert 79 * is issued unconditionally. 80 */ 81#define EFF_CHECK_THRESH_HIGH 1000 82 83/* Maximum number of consecutive far traversals */ 84#define EFF_FAR_TRAVERSE_COUNT 4 85 86/* Maximum number of holes that is not deemed suspicious */ 87#define EFF_MAX_HOLES 5 88 89/* What is deemed a tiny frame, in bytes. If it is a power of two, calculation 90 * is cheaper. 91 */ 92#define EFF_TINY_FRAME_SZ 64 93 94 95TAILQ_HEAD(stream_frames_tailq, stream_frame); 96 97 98struct nocopy_data_in 99{ 100 struct stream_frames_tailq ncdi_frames_in; 101 struct data_in ncdi_data_in; 102 struct lsquic_conn_public *ncdi_conn_pub; 103 uint64_t ncdi_byteage; 104 uint32_t ncdi_stream_id; 105 unsigned ncdi_n_frames; 106 unsigned ncdi_n_holes; 107 unsigned ncdi_cons_far; 108}; 109 110 111#define NCDI_PTR(data_in) (struct nocopy_data_in *) \ 112 ((unsigned char *) (data_in) - offsetof(struct nocopy_data_in, ncdi_data_in)) 113 114#define STREAM_FRAME_PTR(data_frame) (struct stream_frame *) \ 115 ((unsigned char *) (data_frame) - offsetof(struct stream_frame, data_frame)) 116 117 118static const struct data_in_iface di_if_nocopy; 119 120 121struct data_in * 122data_in_nocopy_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id) 123{ 124 struct nocopy_data_in *ncdi; 125 126 ncdi = malloc(sizeof(*ncdi)); 127 if (!ncdi) 128 return NULL; 129 130 TAILQ_INIT(&ncdi->ncdi_frames_in); 131 ncdi->ncdi_data_in.di_if = &di_if_nocopy; 132 ncdi->ncdi_data_in.di_flags = 0; 133 ncdi->ncdi_conn_pub = conn_pub; 134 ncdi->ncdi_stream_id = stream_id; 135 ncdi->ncdi_byteage = 0; 136 ncdi->ncdi_n_frames = 0; 137 ncdi->ncdi_n_holes = 0; 138 ncdi->ncdi_cons_far = 0; 139 return &ncdi->ncdi_data_in; 140} 141 142 143static void 144nocopy_di_destroy (struct data_in *data_in) 145{ 146 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 147 stream_frame_t *frame; 148 while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in))) 149 { 150 TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame); 151 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in); 152 lsquic_malo_put(frame); 153 } 154 free(ncdi); 155} 156 157 158#if 1 159#define CHECK_ORDER(ncdi) 160#else 161static int 162ordered (const struct nocopy_data_in *ncdi) 163{ 164 const stream_frame_t *frame; 165 uint64_t off = 0; 166 int ordered = 1; 167 TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame) 168 { 169 ordered &= off <= frame->data_frame.df_offset; 170 off = frame->data_frame.df_offset; 171 } 172 return ordered; 173} 174#define CHECK_ORDER(ncdi) assert(ordered(ncdi)) 175#endif 176 177 178/* To reduce the number of conditionals, logical operators have been replaced 179 * with arithmetic operators. Return value is an integer in range [0, 3]. 180 * Bit 0 is set due to FIN in previous frame. If bit 1 is set, it means that 181 * it's a dup. 182 */ 183static int 184insert_frame (struct nocopy_data_in *ncdi, struct stream_frame *new_frame, 185 uint64_t read_offset, unsigned *p_n_frames) 186{ 187 int ins; 188 unsigned count; 189 stream_frame_t *prev_frame, *next_frame; 190 191 /* Find position in the list, going backwards. We go backwards because 192 * that is the most likely scenario. 193 */ 194 next_frame = TAILQ_LAST(&ncdi->ncdi_frames_in, stream_frames_tailq); 195 if (next_frame && new_frame->data_frame.df_offset < next_frame->data_frame.df_offset) 196 { 197 count = 1; 198 prev_frame = TAILQ_PREV(next_frame, stream_frames_tailq, next_frame); 199 for ( ; prev_frame && 200 new_frame->data_frame.df_offset < next_frame->data_frame.df_offset; 201 next_frame = prev_frame, 202 prev_frame = TAILQ_PREV(prev_frame, stream_frames_tailq, next_frame)) 203 { 204 if (new_frame->data_frame.df_offset >= prev_frame->data_frame.df_offset) 205 break; 206 ++count; 207 } 208 } 209 else 210 { 211 count = 0; 212 prev_frame = NULL; 213 } 214 215 if (!prev_frame && next_frame && new_frame->data_frame.df_offset >= 216 next_frame->data_frame.df_offset) 217 { 218 prev_frame = next_frame; 219 next_frame = TAILQ_NEXT(next_frame, next_frame); 220 } 221 222 /* Perform checks */ 223 if (prev_frame) 224 ins = 225 (((prev_frame->data_frame.df_offset == new_frame->data_frame.df_offset) & 226 (prev_frame->data_frame.df_size == new_frame->data_frame.df_size) & 227 (prev_frame->data_frame.df_fin == new_frame->data_frame.df_fin)) << 1) /* Duplicate */ 228 | prev_frame->data_frame.df_fin /* FIN in the middle or dup */ 229 | (prev_frame->data_frame.df_offset + prev_frame->data_frame.df_size 230 > new_frame->data_frame.df_offset) /* Overlap */ 231 ; 232 else 233 ins = 0; 234 235 if (next_frame) 236 ins |= 237 (((next_frame->data_frame.df_offset == new_frame->data_frame.df_offset) & 238 (next_frame->data_frame.df_size == new_frame->data_frame.df_size) & 239 (next_frame->data_frame.df_fin == new_frame->data_frame.df_fin)) << 1) /* Duplicate */ 240 | (new_frame->data_frame.df_offset < read_offset) << 1 /* Duplicate */ 241 | new_frame->data_frame.df_fin /* FIN in the middle or dup */ 242 | (new_frame->data_frame.df_offset + new_frame->data_frame.df_size 243 > next_frame->data_frame.df_offset) /* Overlap */ 244 ; 245 else 246 ins |= 247 (new_frame->data_frame.df_offset < read_offset) << 1 /* Duplicate */ 248 ; 249 250 if (ins) 251 return ins; 252 253 if (prev_frame) 254 { 255 TAILQ_INSERT_AFTER(&ncdi->ncdi_frames_in, prev_frame, new_frame, next_frame); 256 ncdi->ncdi_n_holes += prev_frame->data_frame.df_offset + 257 prev_frame->data_frame.df_size != new_frame->data_frame.df_offset; 258 if (next_frame) 259 { 260 ncdi->ncdi_n_holes += new_frame->data_frame.df_offset + 261 new_frame->data_frame.df_size != next_frame->data_frame.df_offset; 262 --ncdi->ncdi_n_holes; 263 } 264 } 265 else 266 { 267 ncdi->ncdi_n_holes += next_frame && new_frame->data_frame.df_offset + 268 new_frame->data_frame.df_size != next_frame->data_frame.df_offset; 269 TAILQ_INSERT_HEAD(&ncdi->ncdi_frames_in, new_frame, next_frame); 270 } 271 CHECK_ORDER(ncdi); 272 273 ++ncdi->ncdi_n_frames; 274 ncdi->ncdi_byteage += new_frame->data_frame.df_size; 275 *p_n_frames = count; 276 277 return 0; 278} 279 280 281static int 282check_efficiency (struct nocopy_data_in *ncdi, unsigned count) 283{ 284 if (ncdi->ncdi_n_frames <= EFF_CHECK_THRESH_LOW) 285 { 286 ncdi->ncdi_cons_far = 0; 287 return 0; 288 } 289 if (ncdi->ncdi_n_frames > EFF_CHECK_THRESH_HIGH) 290 return 1; 291 if (count >= ncdi->ncdi_n_frames / 2) 292 { 293 ++ncdi->ncdi_cons_far; 294 if (ncdi->ncdi_cons_far > EFF_FAR_TRAVERSE_COUNT) 295 return 1; 296 } 297 else 298 ncdi->ncdi_cons_far = 0; 299 if (ncdi->ncdi_n_holes > EFF_MAX_HOLES) 300 return 1; 301 if (ncdi->ncdi_byteage / EFF_TINY_FRAME_SZ < ncdi->ncdi_n_frames) 302 return 1; 303 return 0; 304} 305 306 307static void 308set_eff_alert (struct nocopy_data_in *ncdi) 309{ 310 LSQ_DEBUG("low efficiency: n_frames: %u; n_holes: %u; cons_far: %u; " 311 "byteage: %"PRIu64, ncdi->ncdi_n_frames, ncdi->ncdi_n_holes, 312 ncdi->ncdi_cons_far, ncdi->ncdi_byteage); 313 ncdi->ncdi_data_in.di_flags |= DI_SWITCH_IMPL; 314} 315 316 317static enum ins_frame 318nocopy_di_insert_frame (struct data_in *data_in, 319 struct stream_frame *new_frame, uint64_t read_offset) 320{ 321 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 322 unsigned count; 323 int ins; 324 325 assert(0 == (new_frame->data_frame.df_fin & ~1)); 326 ins = insert_frame(ncdi, new_frame, read_offset, &count); 327 switch (ins) 328 { 329 case 0: 330 if (check_efficiency(ncdi, count)) 331 set_eff_alert(ncdi); 332 return INS_FRAME_OK; 333 case 2: 334 case 3: 335 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in); 336 lsquic_malo_put(new_frame); 337 return INS_FRAME_DUP; 338 default: 339 assert(1 == ins); 340 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in); 341 lsquic_malo_put(new_frame); 342 return INS_FRAME_ERR; 343 } 344} 345 346 347static struct data_frame * 348nocopy_di_get_frame (struct data_in *data_in, uint64_t read_offset) 349{ 350 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 351 struct stream_frame *frame = TAILQ_FIRST(&ncdi->ncdi_frames_in); 352 if (frame && frame->data_frame.df_offset + 353 frame->data_frame.df_read_off == read_offset) 354 return &frame->data_frame; 355 else 356 return NULL; 357} 358 359 360static void 361nocopy_di_frame_done (struct data_in *data_in, struct data_frame *data_frame) 362{ 363 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 364 struct stream_frame *const frame = STREAM_FRAME_PTR(data_frame), *first; 365 assert(data_frame->df_read_off == data_frame->df_size); 366 TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame); 367 first = TAILQ_FIRST(&ncdi->ncdi_frames_in); 368 ncdi->ncdi_n_holes -= first && frame->data_frame.df_offset + 369 frame->data_frame.df_size != first->data_frame.df_offset; 370 --ncdi->ncdi_n_frames; 371 ncdi->ncdi_byteage -= frame->data_frame.df_size; 372 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in); 373 lsquic_malo_put(frame); 374} 375 376 377static int 378nocopy_di_empty (struct data_in *data_in) 379{ 380 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 381 return TAILQ_EMPTY(&ncdi->ncdi_frames_in); 382} 383 384 385struct data_in * 386nocopy_di_switch_impl (struct data_in *data_in, uint64_t read_offset) 387{ 388 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 389 struct data_in *new_data_in; 390 stream_frame_t *frame; 391 enum ins_frame ins; 392 393 new_data_in = data_in_hash_new(ncdi->ncdi_conn_pub, ncdi->ncdi_stream_id, 394 ncdi->ncdi_byteage); 395 if (!new_data_in) 396 goto end; 397 398 while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in))) 399 { 400 TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame); 401 ins = data_in_hash_insert_data_frame(new_data_in, &frame->data_frame, 402 read_offset); 403 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in); 404 lsquic_malo_put(frame); 405 if (INS_FRAME_ERR == ins) 406 { 407 new_data_in->di_if->di_destroy(new_data_in); 408 new_data_in = NULL; 409 goto end; 410 } 411 } 412 413 end: 414 data_in->di_if->di_destroy(data_in); 415 return new_data_in; 416} 417 418 419/* This function overestimates amount of memory because some packets are 420 * referenced by more than one stream. In the usual case, however, I 421 * expect the error not to be large. 422 */ 423static size_t 424nocopy_di_mem_used (struct data_in *data_in) 425{ 426 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 427 const stream_frame_t *frame; 428 size_t size; 429 430 size = sizeof(*data_in); 431 TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame) 432 size += lsquic_packet_in_mem_used(frame->packet_in); 433 434 return size; 435} 436 437static const struct data_in_iface di_if_nocopy = { 438 .di_destroy = nocopy_di_destroy, 439 .di_empty = nocopy_di_empty, 440 .di_frame_done = nocopy_di_frame_done, 441 .di_get_frame = nocopy_di_get_frame, 442 .di_insert_frame = nocopy_di_insert_frame, 443 .di_mem_used = nocopy_di_mem_used, 444 .di_switch_impl = nocopy_di_switch_impl, 445}; 446