lsquic_di_nocopy.c revision be4cfad0
1/* Copyright (c) 2017 - 2018 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_di_nocopy.c -- The "no-copy" data in stream. 4 * 5 * Data from packets is not copied: the packets are referenced by stream 6 * frames. When all data from stream frame is read, the frame is released 7 * and packet reference count is decremented, which possibly results in 8 * packet being released as well. 9 * 10 * This approach works well in regular circumstances; there are two scenarios 11 * when it does not: 12 * 13 * A. If we have many out-of-order frames, insertion into the list becomes 14 * expensive. In the degenerate case, we'd have to traverse the whole 15 * list to find appropriate position. 16 * 17 * B. Having many frames ties up resources, as each frame keeps a reference 18 * to the packet that contains it. This is a possible attack vector: 19 * send many one-byte packets; a single hole at the beginning will stop 20 * the server from being able to read the stream, thus tying up resources. 21 * 22 * If we detect that either (A) or (B) is true, we request that the stream 23 * switch to a more robust incoming stream frame handler by setting 24 * DI_SWITCH_IMPL flag. 25 * 26 * For a small number of elements, (A) and (B) do not matter and the checks 27 * are not performed. This number is defined by EFF_CHECK_THRESH_LOW. On 28 * the other side of the spectrum, if the number of frames grows very high, 29 * we want to switch to a more memory-efficient implementation even if (A) 30 * and (B) are not true. EFF_CHECK_THRESH_HIGH defines this threshold. 31 * 32 * Between the low and high thresholds, we detect efficiency problems as 33 * follows. 34 * 35 * To detect (A), we count how many elements we have to traverse during 36 * insertion. If we have to traverse at least half the list 37 * EFF_FAR_TRAVERSE_COUNT in a row, DI_SWITCH_IMPL is issued. 38 * 39 * If average stream frame size is smaller than EFF_TINY_FRAME_SZ bytes, 40 * (B) condition is true. In addition, if there are more than EFF_MAX_HOLES 41 * in the stream, this is also indicative of (B). 42 */ 43 44 45#include <assert.h> 46#include <inttypes.h> 47#include <stddef.h> 48#include <stdint.h> 49#include <stdlib.h> 50#include <sys/queue.h> 51 52#include "lsquic.h" 53#include "lsquic_types.h" 54#include "lsquic_int_types.h" 55#include "lsquic_conn_flow.h" 56#include "lsquic_packet_common.h" 57#include "lsquic_packet_in.h" 58#include "lsquic_rtt.h" 59#include "lsquic_sfcw.h" 60#include "lsquic_stream.h" 61#include "lsquic_mm.h" 62#include "lsquic_malo.h" 63#include "lsquic_conn.h" 64#include "lsquic_conn_public.h" 65#include "lsquic_data_in_if.h" 66 67 68#define LSQUIC_LOGGER_MODULE LSQLM_DI 69#define LSQUIC_LOG_CONN_ID ncdi->ncdi_conn_pub->lconn->cn_cid 70#define LSQUIC_LOG_STREAM_ID ncdi->ncdi_stream_id 71#include "lsquic_logger.h" 72 73 74/* If number of elements is at or below this number, we do not bother to check 75 * efficiency conditions. 76 */ 77#define EFF_CHECK_THRESH_LOW 10 78 79/* If number of elements is higher than this number, efficiency alert 80 * is issued unconditionally. 81 */ 82#define EFF_CHECK_THRESH_HIGH 1000 83 84/* Maximum number of consecutive far traversals */ 85#define EFF_FAR_TRAVERSE_COUNT 4 86 87/* Maximum number of holes that is not deemed suspicious */ 88#define EFF_MAX_HOLES 5 89 90/* What is deemed a tiny frame, in bytes. If it is a power of two, calculation 91 * is cheaper. 92 */ 93#define EFF_TINY_FRAME_SZ 64 94 95 96TAILQ_HEAD(stream_frames_tailq, stream_frame); 97 98 99struct nocopy_data_in 100{ 101 struct stream_frames_tailq ncdi_frames_in; 102 struct data_in ncdi_data_in; 103 struct lsquic_conn_public *ncdi_conn_pub; 104 uint64_t ncdi_byteage; 105 uint32_t ncdi_stream_id; 106 unsigned ncdi_n_frames; 107 unsigned ncdi_n_holes; 108 unsigned ncdi_cons_far; 109}; 110 111 112#define NCDI_PTR(data_in) (struct nocopy_data_in *) \ 113 ((unsigned char *) (data_in) - offsetof(struct nocopy_data_in, ncdi_data_in)) 114 115#define STREAM_FRAME_PTR(data_frame) (struct stream_frame *) \ 116 ((unsigned char *) (data_frame) - offsetof(struct stream_frame, data_frame)) 117 118 119static const struct data_in_iface *di_if_nocopy_ptr; 120 121 122struct data_in * 123data_in_nocopy_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id) 124{ 125 struct nocopy_data_in *ncdi; 126 127 ncdi = malloc(sizeof(*ncdi)); 128 if (!ncdi) 129 return NULL; 130 131 TAILQ_INIT(&ncdi->ncdi_frames_in); 132 ncdi->ncdi_data_in.di_if = di_if_nocopy_ptr; 133 ncdi->ncdi_data_in.di_flags = 0; 134 ncdi->ncdi_conn_pub = conn_pub; 135 ncdi->ncdi_stream_id = stream_id; 136 ncdi->ncdi_byteage = 0; 137 ncdi->ncdi_n_frames = 0; 138 ncdi->ncdi_n_holes = 0; 139 ncdi->ncdi_cons_far = 0; 140 return &ncdi->ncdi_data_in; 141} 142 143 144static void 145nocopy_di_destroy (struct data_in *data_in) 146{ 147 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 148 stream_frame_t *frame; 149 while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in))) 150 { 151 TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame); 152 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in); 153 lsquic_malo_put(frame); 154 } 155 free(ncdi); 156} 157 158#define DF_OFF(frame) (frame)->data_frame.df_offset 159#define DF_FIN(frame) (frame)->data_frame.df_fin 160#define DF_SIZE(frame) (frame)->data_frame.df_size 161#define DF_END(frame) (DF_OFF(frame) + DF_SIZE(frame)) 162 163 164#if LSQUIC_EXTRA_CHECKS 165static int 166frame_list_is_sane (const struct nocopy_data_in *ncdi) 167{ 168 const stream_frame_t *frame; 169 uint64_t prev_off = 0, prev_end = 0; 170 int ordered = 1, overlaps = 0; 171 TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame) 172 { 173 ordered &= prev_off <= DF_OFF(frame); 174 overlaps |= prev_end > DF_OFF(frame); 175 prev_off = DF_OFF(frame); 176 prev_end = DF_END(frame); 177 } 178 return ordered && !overlaps; 179} 180#define CHECK_ORDER(ncdi) assert(frame_list_is_sane(ncdi)) 181#else 182#define CHECK_ORDER(ncdi) 183#endif 184 185 186/* When inserting a new frame into the frame list, there are four cases to 187 * consider: 188 * 189 * I. New frame is the only frame in the list; 190 * II. New frame only has a neighbor to its left (previous frame); 191 * III. New frame only has a neighbor to its right (next frame); and 192 * IV. New frame has both left and right neighbors. 193 * 194 * I. New frame is the only frame in the list. 195 * 196 * A) If the read offset is larger than the end of the new frame and 197 * the new frame has a FIN, it is an ERROR. 198 * 199 * B) If the read offset is larger than the end of the new frame and 200 * the new frame does not have a FIN, it is a DUP. 201 * 202 * C) If the read offset is equal to the end of the new frame and 203 * the new frame has a FIN, it is an OVERLAP. 204 * 205 * D) If the read offset is equal to the end of the new frame and 206 * the new frame does not have a FIN, it is a DUP. 207 * 208 * II. New frame only has a neighbor to its left. 209 * 210 * - (A) and (B) apply. 211 * 212 * E) New frame could be the same as the previous frame: DUP. 213 * 214 * F) New frame has the same offset and size as previous frame, but 215 * previous frame has FIN and the new frame does not: DUP. 216 * 217 * G) New frame has the same offset and size as previous frame, but 218 * previous frame does not have a FIN and the new one does: OVERLAP. 219 * 220 * H) New frame could start inside previous frame: OVERLAP. 221 * 222 * III. New frame only has a neighbor to its right. 223 * 224 * - (A) and (B) apply. 225 * 226 * I) Right neighbor could start inside new frame: OVERLAP. 227 * 228 * IV. New frame has both left and right neighbors. 229 * 230 * - (A), (B), (E), (F), (G), (H), and (I) apply. 231 */ 232 233 234static enum ins_frame 235insert_frame (struct nocopy_data_in *ncdi, struct stream_frame *new_frame, 236 uint64_t read_offset, unsigned *p_n_frames) 237{ 238 stream_frame_t *prev_frame, *next_frame; 239 unsigned count; 240 241 if (read_offset > DF_END(new_frame)) 242 { 243 if (DF_FIN(new_frame)) 244 return INS_FRAME_ERR; /* Case (A) */ 245 else 246 return INS_FRAME_DUP; /* Case (B) */ 247 } 248 249 /* Find position in the list, going backwards. We go backwards because 250 * that is the most likely scenario. 251 */ 252 next_frame = TAILQ_LAST(&ncdi->ncdi_frames_in, stream_frames_tailq); 253 if (next_frame && DF_OFF(new_frame) < DF_OFF(next_frame)) 254 { 255 count = 1; 256 prev_frame = TAILQ_PREV(next_frame, stream_frames_tailq, next_frame); 257 for ( ; prev_frame && DF_OFF(new_frame) < DF_OFF(next_frame); 258 next_frame = prev_frame, 259 prev_frame = TAILQ_PREV(prev_frame, stream_frames_tailq, next_frame)) 260 { 261 if (DF_OFF(new_frame) >= DF_OFF(prev_frame)) 262 break; 263 ++count; 264 } 265 } 266 else 267 { 268 count = 0; 269 prev_frame = NULL; 270 } 271 272 if (!prev_frame && next_frame && DF_OFF(new_frame) >= DF_OFF(next_frame)) 273 { 274 prev_frame = next_frame; 275 next_frame = TAILQ_NEXT(next_frame, next_frame); 276 } 277 278 const int select = !!prev_frame << 1 | !!next_frame; 279 switch (select) 280 { 281 default: /* No neighbors */ 282 if (read_offset == DF_END(new_frame) && DF_SIZE(new_frame)) 283 { 284 if (DF_FIN(new_frame)) 285 return INS_FRAME_OVERLAP; /* Case (C) */ 286 else 287 return INS_FRAME_DUP; /* Case (D) */ 288 } 289 goto list_was_empty; 290 case 3: /* Both left and right neighbors */ 291 case 2: /* Only left neighbor (prev_frame) */ 292 if (DF_OFF(prev_frame) == DF_OFF(new_frame) 293 && DF_SIZE(prev_frame) == DF_SIZE(new_frame)) 294 { 295 if (!DF_FIN(prev_frame) && DF_FIN(new_frame)) 296 return INS_FRAME_OVERLAP; /* Case (G) */ 297 else 298 return INS_FRAME_DUP; /* Cases (E) and (F) */ 299 } 300 if (DF_END(prev_frame) > DF_OFF(new_frame)) 301 return INS_FRAME_OVERLAP; /* Case (H) */ 302 if (select == 2) 303 { 304 if (DF_FIN(prev_frame)) 305 return INS_FRAME_ERR; 306 goto have_prev; 307 } 308 /* Fall-through */ 309 case 1: /* Only right neighbor (next_frame) */ 310 if (DF_END(new_frame) > DF_OFF(next_frame)) 311 return INS_FRAME_OVERLAP; /* Case (I) */ 312 break; 313 } 314 315 if (prev_frame) 316 { 317 have_prev: 318 TAILQ_INSERT_AFTER(&ncdi->ncdi_frames_in, prev_frame, new_frame, next_frame); 319 ncdi->ncdi_n_holes += DF_END(prev_frame) != DF_OFF(new_frame); 320 if (next_frame) 321 { 322 ncdi->ncdi_n_holes += DF_END(new_frame) != DF_OFF(next_frame); 323 --ncdi->ncdi_n_holes; 324 } 325 } 326 else 327 { 328 ncdi->ncdi_n_holes += next_frame 329 && DF_END(new_frame) != DF_OFF(next_frame); 330 list_was_empty: 331 TAILQ_INSERT_HEAD(&ncdi->ncdi_frames_in, new_frame, next_frame); 332 } 333 CHECK_ORDER(ncdi); 334 335 ++ncdi->ncdi_n_frames; 336 ncdi->ncdi_byteage += DF_SIZE(new_frame); 337 *p_n_frames = count; 338 339 return INS_FRAME_OK; 340} 341 342 343static int 344check_efficiency (struct nocopy_data_in *ncdi, unsigned count) 345{ 346 if (ncdi->ncdi_n_frames <= EFF_CHECK_THRESH_LOW) 347 { 348 ncdi->ncdi_cons_far = 0; 349 return 0; 350 } 351 if (ncdi->ncdi_n_frames > EFF_CHECK_THRESH_HIGH) 352 return 1; 353 if (count >= ncdi->ncdi_n_frames / 2) 354 { 355 ++ncdi->ncdi_cons_far; 356 if (ncdi->ncdi_cons_far > EFF_FAR_TRAVERSE_COUNT) 357 return 1; 358 } 359 else 360 ncdi->ncdi_cons_far = 0; 361 if (ncdi->ncdi_n_holes > EFF_MAX_HOLES) 362 return 1; 363 if (ncdi->ncdi_byteage / EFF_TINY_FRAME_SZ < ncdi->ncdi_n_frames) 364 return 1; 365 return 0; 366} 367 368 369static void 370set_eff_alert (struct nocopy_data_in *ncdi) 371{ 372 LSQ_DEBUG("low efficiency: n_frames: %u; n_holes: %u; cons_far: %u; " 373 "byteage: %"PRIu64, ncdi->ncdi_n_frames, ncdi->ncdi_n_holes, 374 ncdi->ncdi_cons_far, ncdi->ncdi_byteage); 375 ncdi->ncdi_data_in.di_flags |= DI_SWITCH_IMPL; 376} 377 378 379static enum ins_frame 380nocopy_di_insert_frame (struct data_in *data_in, 381 struct stream_frame *new_frame, uint64_t read_offset) 382{ 383 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 384 unsigned count; 385 enum ins_frame ins; 386 387 assert(0 == (new_frame->data_frame.df_fin & ~1)); 388 ins = insert_frame(ncdi, new_frame, read_offset, &count); 389 switch (ins) 390 { 391 case INS_FRAME_OK: 392 if (check_efficiency(ncdi, count)) 393 set_eff_alert(ncdi); 394 break; 395 case INS_FRAME_DUP: 396 case INS_FRAME_ERR: 397 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in); 398 lsquic_malo_put(new_frame); 399 break; 400 default: 401 break; 402 } 403 404 return ins; 405} 406 407 408static struct data_frame * 409nocopy_di_get_frame (struct data_in *data_in, uint64_t read_offset) 410{ 411 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 412 struct stream_frame *frame = TAILQ_FIRST(&ncdi->ncdi_frames_in); 413 if (frame && frame->data_frame.df_offset + 414 frame->data_frame.df_read_off == read_offset) 415 return &frame->data_frame; 416 else 417 return NULL; 418} 419 420 421static void 422nocopy_di_frame_done (struct data_in *data_in, struct data_frame *data_frame) 423{ 424 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 425 struct stream_frame *const frame = STREAM_FRAME_PTR(data_frame), *first; 426 assert(data_frame->df_read_off == data_frame->df_size); 427 TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame); 428 first = TAILQ_FIRST(&ncdi->ncdi_frames_in); 429 ncdi->ncdi_n_holes -= first && frame->data_frame.df_offset + 430 frame->data_frame.df_size != first->data_frame.df_offset; 431 --ncdi->ncdi_n_frames; 432 ncdi->ncdi_byteage -= frame->data_frame.df_size; 433 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in); 434 lsquic_malo_put(frame); 435} 436 437 438static int 439nocopy_di_empty (struct data_in *data_in) 440{ 441 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 442 return TAILQ_EMPTY(&ncdi->ncdi_frames_in); 443} 444 445 446struct data_in * 447nocopy_di_switch_impl (struct data_in *data_in, uint64_t read_offset) 448{ 449 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 450 struct data_in *new_data_in; 451 stream_frame_t *frame; 452 enum ins_frame ins; 453 454 new_data_in = data_in_hash_new(ncdi->ncdi_conn_pub, ncdi->ncdi_stream_id, 455 ncdi->ncdi_byteage); 456 if (!new_data_in) 457 goto end; 458 459 while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in))) 460 { 461 TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame); 462 ins = data_in_hash_insert_data_frame(new_data_in, &frame->data_frame, 463 read_offset); 464 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in); 465 lsquic_malo_put(frame); 466 if (INS_FRAME_ERR == ins) 467 { 468 new_data_in->di_if->di_destroy(new_data_in); 469 new_data_in = NULL; 470 goto end; 471 } 472 } 473 474 end: 475 data_in->di_if->di_destroy(data_in); 476 return new_data_in; 477} 478 479 480/* This function overestimates amount of memory because some packets are 481 * referenced by more than one stream. In the usual case, however, I 482 * expect the error not to be large. 483 */ 484static size_t 485nocopy_di_mem_used (struct data_in *data_in) 486{ 487 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 488 const stream_frame_t *frame; 489 size_t size; 490 491 size = sizeof(*data_in); 492 TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame) 493 size += lsquic_packet_in_mem_used(frame->packet_in); 494 495 return size; 496} 497 498static const struct data_in_iface di_if_nocopy = { 499 .di_destroy = nocopy_di_destroy, 500 .di_empty = nocopy_di_empty, 501 .di_frame_done = nocopy_di_frame_done, 502 .di_get_frame = nocopy_di_get_frame, 503 .di_insert_frame = nocopy_di_insert_frame, 504 .di_mem_used = nocopy_di_mem_used, 505 .di_switch_impl = nocopy_di_switch_impl, 506}; 507 508static const struct data_in_iface *di_if_nocopy_ptr = &di_if_nocopy; 509