lsquic_di_nocopy.c revision 461e84d8
1/* Copyright (c) 2017 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_di_nocopy.c -- The "no-copy" data in stream. 4 * 5 * Data from packets is not copied: the packets are referenced by stream 6 * frames. When all data from stream frame is read, the frame is released 7 * and packet reference count is decremented, which possibly results in 8 * packet being released as well. 9 * 10 * This approach works well in regular circumstances; there are two scenarios 11 * when it does not: 12 * 13 * A. If we have many out-of-order frames, insertion into the list becomes 14 * expensive. In the degenerate case, we'd have to traverse the whole 15 * list to find appropriate position. 16 * 17 * B. Having many frames ties up resources, as each frame keeps a reference 18 * to the packet that contains it. This is a possible attack vector: 19 * send many one-byte packets; a single hole at the beginning will stop 20 * the server from being able to read the stream, thus tying up resources. 21 * 22 * If we detect that either (A) or (B) is true, we request that the stream 23 * switch to a more robust incoming stream frame handler by setting 24 * DI_SWITCH_IMPL flag. 25 * 26 * For a small number of elements, (A) and (B) do not matter and the checks 27 * are not performed. This number is defined by EFF_CHECK_THRESH_LOW. On 28 * the other side of the spectrum, if the number of frames grows very high, 29 * we want to switch to a more memory-efficient implementation even if (A) 30 * and (B) are not true. EFF_CHECK_THRESH_HIGH defines this threshold. 31 * 32 * Between the low and high thresholds, we detect efficiency problems as 33 * follows. 34 * 35 * To detect (A), we count how many elements we have to traverse during 36 * insertion. If we have to traverse at least half the list 37 * EFF_FAR_TRAVERSE_COUNT in a row, DI_SWITCH_IMPL is issued. 38 * 39 * If average stream frame size is smaller than EFF_TINY_FRAME_SZ bytes, 40 * (B) condition is true. In addition, if there are more than EFF_MAX_HOLES 41 * in the stream, this is also indicative of (B). 42 */ 43 44 45#include <assert.h> 46#include <inttypes.h> 47#include <stddef.h> 48#include <stdint.h> 49#include <stdlib.h> 50#include <sys/queue.h> 51 52#include "lsquic.h" 53#include "lsquic_types.h" 54#include "lsquic_int_types.h" 55#include "lsquic_conn_flow.h" 56#include "lsquic_packet_common.h" 57#include "lsquic_packet_in.h" 58#include "lsquic_rtt.h" 59#include "lsquic_sfcw.h" 60#include "lsquic_stream.h" 61#include "lsquic_mm.h" 62#include "lsquic_malo.h" 63#include "lsquic_conn.h" 64#include "lsquic_conn_public.h" 65#include "lsquic_data_in_if.h" 66 67 68#define LSQUIC_LOGGER_MODULE LSQLM_DI 69#define LSQUIC_LOG_CONN_ID ncdi->ncdi_conn_pub->lconn->cn_cid 70#define LSQUIC_LOG_STREAM_ID ncdi->ncdi_stream_id 71#include "lsquic_logger.h" 72 73 74/* If number of elements is at or below this number, we do not bother to check 75 * efficiency conditions. 76 */ 77#define EFF_CHECK_THRESH_LOW 10 78 79/* If number of elements is higher than this number, efficiency alert 80 * is issued unconditionally. 81 */ 82#define EFF_CHECK_THRESH_HIGH 1000 83 84/* Maximum number of consecutive far traversals */ 85#define EFF_FAR_TRAVERSE_COUNT 4 86 87/* Maximum number of holes that is not deemed suspicious */ 88#define EFF_MAX_HOLES 5 89 90/* What is deemed a tiny frame, in bytes. If it is a power of two, calculation 91 * is cheaper. 92 */ 93#define EFF_TINY_FRAME_SZ 64 94 95 96TAILQ_HEAD(stream_frames_tailq, stream_frame); 97 98 99struct nocopy_data_in 100{ 101 struct stream_frames_tailq ncdi_frames_in; 102 struct data_in ncdi_data_in; 103 struct lsquic_conn_public *ncdi_conn_pub; 104 uint64_t ncdi_byteage; 105 uint32_t ncdi_stream_id; 106 unsigned ncdi_n_frames; 107 unsigned ncdi_n_holes; 108 unsigned ncdi_cons_far; 109}; 110 111 112#define NCDI_PTR(data_in) (struct nocopy_data_in *) \ 113 ((unsigned char *) (data_in) - offsetof(struct nocopy_data_in, ncdi_data_in)) 114 115#define STREAM_FRAME_PTR(data_frame) (struct stream_frame *) \ 116 ((unsigned char *) (data_frame) - offsetof(struct stream_frame, data_frame)) 117 118 119 120 121 122static void 123nocopy_di_destroy (struct data_in *data_in) 124{ 125 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 126 stream_frame_t *frame; 127 while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in))) 128 { 129 TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame); 130 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in); 131 lsquic_malo_put(frame); 132 } 133 free(ncdi); 134} 135 136 137#if 1 138#define CHECK_ORDER(ncdi) 139#else 140static int 141ordered (const struct nocopy_data_in *ncdi) 142{ 143 const stream_frame_t *frame; 144 uint64_t off = 0; 145 int ordered = 1; 146 TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame) 147 { 148 ordered &= off <= frame->data_frame.df_offset; 149 off = frame->data_frame.df_offset; 150 } 151 return ordered; 152} 153#define CHECK_ORDER(ncdi) assert(ordered(ncdi)) 154#endif 155 156 157/* To reduce the number of conditionals, logical operators have been replaced 158 * with arithmetic operators. Return value is an integer in range [0, 3]. 159 * Bit 0 is set due to FIN in previous frame. If bit 1 is set, it means that 160 * it's a dup. 161 */ 162static int 163insert_frame (struct nocopy_data_in *ncdi, struct stream_frame *new_frame, 164 uint64_t read_offset, unsigned *p_n_frames) 165{ 166 int ins; 167 unsigned count; 168 stream_frame_t *prev_frame, *next_frame; 169 170 /* Find position in the list, going backwards. We go backwards because 171 * that is the most likely scenario. 172 */ 173 next_frame = TAILQ_LAST(&ncdi->ncdi_frames_in, stream_frames_tailq); 174 if (next_frame && new_frame->data_frame.df_offset < next_frame->data_frame.df_offset) 175 { 176 count = 1; 177 prev_frame = TAILQ_PREV(next_frame, stream_frames_tailq, next_frame); 178 for ( ; prev_frame && 179 new_frame->data_frame.df_offset < next_frame->data_frame.df_offset; 180 next_frame = prev_frame, 181 prev_frame = TAILQ_PREV(prev_frame, stream_frames_tailq, next_frame)) 182 { 183 if (new_frame->data_frame.df_offset >= prev_frame->data_frame.df_offset) 184 break; 185 ++count; 186 } 187 } 188 else 189 { 190 count = 0; 191 prev_frame = NULL; 192 } 193 194 if (!prev_frame && next_frame && new_frame->data_frame.df_offset >= 195 next_frame->data_frame.df_offset) 196 { 197 prev_frame = next_frame; 198 next_frame = TAILQ_NEXT(next_frame, next_frame); 199 } 200 201 /* Perform checks */ 202 if (prev_frame) 203 ins = 204 (((prev_frame->data_frame.df_offset == new_frame->data_frame.df_offset) & 205 (prev_frame->data_frame.df_size == new_frame->data_frame.df_size) & 206 (prev_frame->data_frame.df_fin == new_frame->data_frame.df_fin)) << 1) /* Duplicate */ 207 | prev_frame->data_frame.df_fin /* FIN in the middle or dup */ 208 | (prev_frame->data_frame.df_offset + prev_frame->data_frame.df_size 209 > new_frame->data_frame.df_offset) /* Overlap */ 210 ; 211 else 212 ins = 0; 213 214 if (next_frame) 215 ins |= 216 (((next_frame->data_frame.df_offset == new_frame->data_frame.df_offset) & 217 (next_frame->data_frame.df_size == new_frame->data_frame.df_size) & 218 (next_frame->data_frame.df_fin == new_frame->data_frame.df_fin)) << 1) /* Duplicate */ 219 | (new_frame->data_frame.df_offset < read_offset) << 1 /* Duplicate */ 220 | new_frame->data_frame.df_fin /* FIN in the middle or dup */ 221 | (new_frame->data_frame.df_offset + new_frame->data_frame.df_size 222 > next_frame->data_frame.df_offset) /* Overlap */ 223 ; 224 else 225 ins |= 226 (new_frame->data_frame.df_offset < read_offset) << 1 /* Duplicate */ 227 ; 228 229 if (ins) 230 return ins; 231 232 if (prev_frame) 233 { 234 TAILQ_INSERT_AFTER(&ncdi->ncdi_frames_in, prev_frame, new_frame, next_frame); 235 ncdi->ncdi_n_holes += prev_frame->data_frame.df_offset + 236 prev_frame->data_frame.df_size != new_frame->data_frame.df_offset; 237 if (next_frame) 238 { 239 ncdi->ncdi_n_holes += new_frame->data_frame.df_offset + 240 new_frame->data_frame.df_size != next_frame->data_frame.df_offset; 241 --ncdi->ncdi_n_holes; 242 } 243 } 244 else 245 { 246 ncdi->ncdi_n_holes += next_frame && new_frame->data_frame.df_offset + 247 new_frame->data_frame.df_size != next_frame->data_frame.df_offset; 248 TAILQ_INSERT_HEAD(&ncdi->ncdi_frames_in, new_frame, next_frame); 249 } 250 CHECK_ORDER(ncdi); 251 252 ++ncdi->ncdi_n_frames; 253 ncdi->ncdi_byteage += new_frame->data_frame.df_size; 254 *p_n_frames = count; 255 256 return 0; 257} 258 259 260static int 261check_efficiency (struct nocopy_data_in *ncdi, unsigned count) 262{ 263 if (ncdi->ncdi_n_frames <= EFF_CHECK_THRESH_LOW) 264 { 265 ncdi->ncdi_cons_far = 0; 266 return 0; 267 } 268 if (ncdi->ncdi_n_frames > EFF_CHECK_THRESH_HIGH) 269 return 1; 270 if (count >= ncdi->ncdi_n_frames / 2) 271 { 272 ++ncdi->ncdi_cons_far; 273 if (ncdi->ncdi_cons_far > EFF_FAR_TRAVERSE_COUNT) 274 return 1; 275 } 276 else 277 ncdi->ncdi_cons_far = 0; 278 if (ncdi->ncdi_n_holes > EFF_MAX_HOLES) 279 return 1; 280 if (ncdi->ncdi_byteage / EFF_TINY_FRAME_SZ < ncdi->ncdi_n_frames) 281 return 1; 282 return 0; 283} 284 285 286static void 287set_eff_alert (struct nocopy_data_in *ncdi) 288{ 289 LSQ_DEBUG("low efficiency: n_frames: %u; n_holes: %u; cons_far: %u; " 290 "byteage: %"PRIu64, ncdi->ncdi_n_frames, ncdi->ncdi_n_holes, 291 ncdi->ncdi_cons_far, ncdi->ncdi_byteage); 292 ncdi->ncdi_data_in.di_flags |= DI_SWITCH_IMPL; 293} 294 295 296static enum ins_frame 297nocopy_di_insert_frame (struct data_in *data_in, 298 struct stream_frame *new_frame, uint64_t read_offset) 299{ 300 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 301 unsigned count; 302 int ins; 303 304 assert(0 == (new_frame->data_frame.df_fin & ~1)); 305 ins = insert_frame(ncdi, new_frame, read_offset, &count); 306 switch (ins) 307 { 308 case 0: 309 if (check_efficiency(ncdi, count)) 310 set_eff_alert(ncdi); 311 return INS_FRAME_OK; 312 case 2: 313 case 3: 314 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in); 315 lsquic_malo_put(new_frame); 316 return INS_FRAME_DUP; 317 default: 318 assert(1 == ins); 319 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in); 320 lsquic_malo_put(new_frame); 321 return INS_FRAME_ERR; 322 } 323} 324 325 326static struct data_frame * 327nocopy_di_get_frame (struct data_in *data_in, uint64_t read_offset) 328{ 329 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 330 struct stream_frame *frame = TAILQ_FIRST(&ncdi->ncdi_frames_in); 331 if (frame && frame->data_frame.df_offset + 332 frame->data_frame.df_read_off == read_offset) 333 return &frame->data_frame; 334 else 335 return NULL; 336} 337 338 339static void 340nocopy_di_frame_done (struct data_in *data_in, struct data_frame *data_frame) 341{ 342 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 343 struct stream_frame *const frame = STREAM_FRAME_PTR(data_frame), *first; 344 assert(data_frame->df_read_off == data_frame->df_size); 345 TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame); 346 first = TAILQ_FIRST(&ncdi->ncdi_frames_in); 347 ncdi->ncdi_n_holes -= first && frame->data_frame.df_offset + 348 frame->data_frame.df_size != first->data_frame.df_offset; 349 --ncdi->ncdi_n_frames; 350 ncdi->ncdi_byteage -= frame->data_frame.df_size; 351 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in); 352 lsquic_malo_put(frame); 353} 354 355 356static int 357nocopy_di_empty (struct data_in *data_in) 358{ 359 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 360 return TAILQ_EMPTY(&ncdi->ncdi_frames_in); 361} 362 363 364struct data_in * 365nocopy_di_switch_impl (struct data_in *data_in, uint64_t read_offset) 366{ 367 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 368 struct data_in *new_data_in; 369 stream_frame_t *frame; 370 enum ins_frame ins; 371 372 new_data_in = data_in_hash_new(ncdi->ncdi_conn_pub, ncdi->ncdi_stream_id, 373 ncdi->ncdi_byteage); 374 if (!new_data_in) 375 goto end; 376 377 while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in))) 378 { 379 TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame); 380 ins = data_in_hash_insert_data_frame(new_data_in, &frame->data_frame, 381 read_offset); 382 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in); 383 lsquic_malo_put(frame); 384 if (INS_FRAME_ERR == ins) 385 { 386 new_data_in->di_if->di_destroy(new_data_in); 387 new_data_in = NULL; 388 goto end; 389 } 390 } 391 392 end: 393 data_in->di_if->di_destroy(data_in); 394 return new_data_in; 395} 396 397 398/* This function overestimates amount of memory because some packets are 399 * referenced by more than one stream. In the usual case, however, I 400 * expect the error not to be large. 401 */ 402static size_t 403nocopy_di_mem_used (struct data_in *data_in) 404{ 405 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 406 const stream_frame_t *frame; 407 size_t size; 408 409 size = sizeof(*data_in); 410 TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame) 411 size += lsquic_packet_in_mem_used(frame->packet_in); 412 413 return size; 414} 415 416static const struct data_in_iface di_if_nocopy = { 417 .di_destroy = nocopy_di_destroy, 418 .di_empty = nocopy_di_empty, 419 .di_frame_done = nocopy_di_frame_done, 420 .di_get_frame = nocopy_di_get_frame, 421 .di_insert_frame = nocopy_di_insert_frame, 422 .di_mem_used = nocopy_di_mem_used, 423 .di_switch_impl = nocopy_di_switch_impl, 424}; 425struct data_in * 426data_in_nocopy_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id) 427{ 428 struct nocopy_data_in *ncdi; 429 430 ncdi = malloc(sizeof(*ncdi)); 431 if (!ncdi) 432 return NULL; 433 434 TAILQ_INIT(&ncdi->ncdi_frames_in); 435 ncdi->ncdi_data_in.di_if = &di_if_nocopy; 436 ncdi->ncdi_data_in.di_flags = 0; 437 ncdi->ncdi_conn_pub = conn_pub; 438 ncdi->ncdi_stream_id = stream_id; 439 ncdi->ncdi_byteage = 0; 440 ncdi->ncdi_n_frames = 0; 441 ncdi->ncdi_n_holes = 0; 442 ncdi->ncdi_cons_far = 0; 443 return &ncdi->ncdi_data_in; 444} 445 446