lsquic_di_nocopy.c revision b93f59be
1/* Copyright (c) 2017 LiteSpeed Technologies Inc. See LICENSE. */ 2/* 3 * lsquic_di_nocopy.c -- The "no-copy" data in stream. 4 * 5 * Data from packets is not copied: the packets are referenced by stream 6 * frames. When all data from stream frame is read, the frame is released 7 * and packet reference count is decremented, which possibly results in 8 * packet being released as well. 9 * 10 * This approach works well in regular circumstances; there are two scenarios 11 * when it does not: 12 * 13 * A. If we have many out-of-order frames, insertion into the list becomes 14 * expensive. In the degenerate case, we'd have to traverse the whole 15 * list to find appropriate position. 16 * 17 * B. Having many frames ties up resources, as each frame keeps a reference 18 * to the packet that contains it. This is a possible attack vector: 19 * send many one-byte packets; a single hole at the beginning will stop 20 * the server from being able to read the stream, thus tying up resources. 21 * 22 * If we detect that either (A) or (B) is true, we request that the stream 23 * switch to a more robust incoming stream frame handler by setting 24 * DI_SWITCH_IMPL flag. 25 * 26 * For a small number of elements, (A) and (B) do not matter and the checks 27 * are not performed. This number is defined by EFF_CHECK_THRESH_LOW. On 28 * the other side of the spectrum, if the number of frames grows very high, 29 * we want to switch to a more memory-efficient implementation even if (A) 30 * and (B) are not true. EFF_CHECK_THRESH_HIGH defines this threshold. 31 * 32 * Between the low and high thresholds, we detect efficiency problems as 33 * follows. 34 * 35 * To detect (A), we count how many elements we have to traverse during 36 * insertion. If we have to traverse at least half the list 37 * EFF_FAR_TRAVERSE_COUNT in a row, DI_SWITCH_IMPL is issued. 38 * 39 * If average stream frame size is smaller than EFF_TINY_FRAME_SZ bytes, 40 * (B) condition is true. In addition, if there are more than EFF_MAX_HOLES 41 * in the stream, this is also indicative of (B). 42 */ 43 44 45#include <assert.h> 46#include <inttypes.h> 47#include <stddef.h> 48#include <stdint.h> 49#include <stdlib.h> 50#include <sys/queue.h> 51 52#include "lsquic.h" 53#include "lsquic_types.h" 54#include "lsquic_int_types.h" 55#include "lsquic_conn_flow.h" 56#include "lsquic_packet_common.h" 57#include "lsquic_packet_in.h" 58#include "lsquic_rtt.h" 59#include "lsquic_sfcw.h" 60#include "lsquic_stream.h" 61#include "lsquic_mm.h" 62#include "lsquic_malo.h" 63#include "lsquic_conn.h" 64#include "lsquic_conn_public.h" 65#include "lsquic_data_in_if.h" 66 67 68#define LSQUIC_LOGGER_MODULE LSQLM_DI 69#define LSQUIC_LOG_CONN_ID ncdi->ncdi_conn_pub->lconn->cn_cid 70#define LSQUIC_LOG_STREAM_ID ncdi->ncdi_stream_id 71#include "lsquic_logger.h" 72 73 74/* If number of elements is at or below this number, we do not bother to check 75 * efficiency conditions. 76 */ 77#define EFF_CHECK_THRESH_LOW 10 78 79/* If number of elements is higher than this number, efficiency alert 80 * is issued unconditionally. 81 */ 82#define EFF_CHECK_THRESH_HIGH 1000 83 84/* Maximum number of consecutive far traversals */ 85#define EFF_FAR_TRAVERSE_COUNT 4 86 87/* Maximum number of holes that is not deemed suspicious */ 88#define EFF_MAX_HOLES 5 89 90/* What is deemed a tiny frame, in bytes. If it is a power of two, calculation 91 * is cheaper. 92 */ 93#define EFF_TINY_FRAME_SZ 64 94 95 96TAILQ_HEAD(stream_frames_tailq, stream_frame); 97 98 99struct nocopy_data_in 100{ 101 struct stream_frames_tailq ncdi_frames_in; 102 struct data_in ncdi_data_in; 103 struct lsquic_conn_public *ncdi_conn_pub; 104 uint64_t ncdi_byteage; 105 uint32_t ncdi_stream_id; 106 unsigned ncdi_n_frames; 107 unsigned ncdi_n_holes; 108 unsigned ncdi_cons_far; 109}; 110 111 112#define NCDI_PTR(data_in) (struct nocopy_data_in *) \ 113 ((unsigned char *) (data_in) - offsetof(struct nocopy_data_in, ncdi_data_in)) 114 115#define STREAM_FRAME_PTR(data_frame) (struct stream_frame *) \ 116 ((unsigned char *) (data_frame) - offsetof(struct stream_frame, data_frame)) 117 118 119static const struct data_in_iface *di_if_nocopy_ptr; 120 121 122struct data_in * 123data_in_nocopy_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id) 124{ 125 struct nocopy_data_in *ncdi; 126 127 ncdi = malloc(sizeof(*ncdi)); 128 if (!ncdi) 129 return NULL; 130 131 TAILQ_INIT(&ncdi->ncdi_frames_in); 132 ncdi->ncdi_data_in.di_if = di_if_nocopy_ptr; 133 ncdi->ncdi_data_in.di_flags = 0; 134 ncdi->ncdi_conn_pub = conn_pub; 135 ncdi->ncdi_stream_id = stream_id; 136 ncdi->ncdi_byteage = 0; 137 ncdi->ncdi_n_frames = 0; 138 ncdi->ncdi_n_holes = 0; 139 ncdi->ncdi_cons_far = 0; 140 return &ncdi->ncdi_data_in; 141} 142 143 144static void 145nocopy_di_destroy (struct data_in *data_in) 146{ 147 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 148 stream_frame_t *frame; 149 while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in))) 150 { 151 TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame); 152 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in); 153 lsquic_malo_put(frame); 154 } 155 free(ncdi); 156} 157 158 159#if 1 160#define CHECK_ORDER(ncdi) 161#else 162static int 163ordered (const struct nocopy_data_in *ncdi) 164{ 165 const stream_frame_t *frame; 166 uint64_t off = 0; 167 int ordered = 1; 168 TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame) 169 { 170 ordered &= off <= frame->data_frame.df_offset; 171 off = frame->data_frame.df_offset; 172 } 173 return ordered; 174} 175#define CHECK_ORDER(ncdi) assert(ordered(ncdi)) 176#endif 177 178 179/* To reduce the number of conditionals, logical operators have been replaced 180 * with arithmetic operators. Return value is an integer in range [0, 3]. 181 * Bit 0 is set due to FIN in previous frame. If bit 1 is set, it means that 182 * it's a dup. 183 */ 184static int 185insert_frame (struct nocopy_data_in *ncdi, struct stream_frame *new_frame, 186 uint64_t read_offset, unsigned *p_n_frames) 187{ 188 int ins; 189 unsigned count; 190 stream_frame_t *prev_frame, *next_frame; 191 192 /* Find position in the list, going backwards. We go backwards because 193 * that is the most likely scenario. 194 */ 195 next_frame = TAILQ_LAST(&ncdi->ncdi_frames_in, stream_frames_tailq); 196 if (next_frame && new_frame->data_frame.df_offset < next_frame->data_frame.df_offset) 197 { 198 count = 1; 199 prev_frame = TAILQ_PREV(next_frame, stream_frames_tailq, next_frame); 200 for ( ; prev_frame && 201 new_frame->data_frame.df_offset < next_frame->data_frame.df_offset; 202 next_frame = prev_frame, 203 prev_frame = TAILQ_PREV(prev_frame, stream_frames_tailq, next_frame)) 204 { 205 if (new_frame->data_frame.df_offset >= prev_frame->data_frame.df_offset) 206 break; 207 ++count; 208 } 209 } 210 else 211 { 212 count = 0; 213 prev_frame = NULL; 214 } 215 216 if (!prev_frame && next_frame && new_frame->data_frame.df_offset >= 217 next_frame->data_frame.df_offset) 218 { 219 prev_frame = next_frame; 220 next_frame = TAILQ_NEXT(next_frame, next_frame); 221 } 222 223 /* Perform checks */ 224 if (prev_frame) 225 ins = 226 (((prev_frame->data_frame.df_offset == new_frame->data_frame.df_offset) & 227 (prev_frame->data_frame.df_size == new_frame->data_frame.df_size) & 228 (prev_frame->data_frame.df_fin == new_frame->data_frame.df_fin)) << 1) /* Duplicate */ 229 | prev_frame->data_frame.df_fin /* FIN in the middle or dup */ 230 | (prev_frame->data_frame.df_offset + prev_frame->data_frame.df_size 231 > new_frame->data_frame.df_offset) /* Overlap */ 232 ; 233 else 234 ins = 0; 235 236 if (next_frame) 237 ins |= 238 (((next_frame->data_frame.df_offset == new_frame->data_frame.df_offset) & 239 (next_frame->data_frame.df_size == new_frame->data_frame.df_size) & 240 (next_frame->data_frame.df_fin == new_frame->data_frame.df_fin)) << 1) /* Duplicate */ 241 | (new_frame->data_frame.df_offset < read_offset) << 1 /* Duplicate */ 242 | new_frame->data_frame.df_fin /* FIN in the middle or dup */ 243 | (new_frame->data_frame.df_offset + new_frame->data_frame.df_size 244 > next_frame->data_frame.df_offset) /* Overlap */ 245 ; 246 else 247 ins |= 248 (new_frame->data_frame.df_offset < read_offset) << 1 /* Duplicate */ 249 ; 250 251 if (ins) 252 return ins; 253 254 if (prev_frame) 255 { 256 TAILQ_INSERT_AFTER(&ncdi->ncdi_frames_in, prev_frame, new_frame, next_frame); 257 ncdi->ncdi_n_holes += prev_frame->data_frame.df_offset + 258 prev_frame->data_frame.df_size != new_frame->data_frame.df_offset; 259 if (next_frame) 260 { 261 ncdi->ncdi_n_holes += new_frame->data_frame.df_offset + 262 new_frame->data_frame.df_size != next_frame->data_frame.df_offset; 263 --ncdi->ncdi_n_holes; 264 } 265 } 266 else 267 { 268 ncdi->ncdi_n_holes += next_frame && new_frame->data_frame.df_offset + 269 new_frame->data_frame.df_size != next_frame->data_frame.df_offset; 270 TAILQ_INSERT_HEAD(&ncdi->ncdi_frames_in, new_frame, next_frame); 271 } 272 CHECK_ORDER(ncdi); 273 274 ++ncdi->ncdi_n_frames; 275 ncdi->ncdi_byteage += new_frame->data_frame.df_size; 276 *p_n_frames = count; 277 278 return 0; 279} 280 281 282static int 283check_efficiency (struct nocopy_data_in *ncdi, unsigned count) 284{ 285 if (ncdi->ncdi_n_frames <= EFF_CHECK_THRESH_LOW) 286 { 287 ncdi->ncdi_cons_far = 0; 288 return 0; 289 } 290 if (ncdi->ncdi_n_frames > EFF_CHECK_THRESH_HIGH) 291 return 1; 292 if (count >= ncdi->ncdi_n_frames / 2) 293 { 294 ++ncdi->ncdi_cons_far; 295 if (ncdi->ncdi_cons_far > EFF_FAR_TRAVERSE_COUNT) 296 return 1; 297 } 298 else 299 ncdi->ncdi_cons_far = 0; 300 if (ncdi->ncdi_n_holes > EFF_MAX_HOLES) 301 return 1; 302 if (ncdi->ncdi_byteage / EFF_TINY_FRAME_SZ < ncdi->ncdi_n_frames) 303 return 1; 304 return 0; 305} 306 307 308static void 309set_eff_alert (struct nocopy_data_in *ncdi) 310{ 311 LSQ_DEBUG("low efficiency: n_frames: %u; n_holes: %u; cons_far: %u; " 312 "byteage: %"PRIu64, ncdi->ncdi_n_frames, ncdi->ncdi_n_holes, 313 ncdi->ncdi_cons_far, ncdi->ncdi_byteage); 314 ncdi->ncdi_data_in.di_flags |= DI_SWITCH_IMPL; 315} 316 317 318static enum ins_frame 319nocopy_di_insert_frame (struct data_in *data_in, 320 struct stream_frame *new_frame, uint64_t read_offset) 321{ 322 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 323 unsigned count; 324 int ins; 325 326 assert(0 == (new_frame->data_frame.df_fin & ~1)); 327 ins = insert_frame(ncdi, new_frame, read_offset, &count); 328 switch (ins) 329 { 330 case 0: 331 if (check_efficiency(ncdi, count)) 332 set_eff_alert(ncdi); 333 return INS_FRAME_OK; 334 case 2: 335 case 3: 336 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in); 337 lsquic_malo_put(new_frame); 338 return INS_FRAME_DUP; 339 default: 340 assert(1 == ins); 341 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in); 342 lsquic_malo_put(new_frame); 343 return INS_FRAME_ERR; 344 } 345} 346 347 348static struct data_frame * 349nocopy_di_get_frame (struct data_in *data_in, uint64_t read_offset) 350{ 351 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 352 struct stream_frame *frame = TAILQ_FIRST(&ncdi->ncdi_frames_in); 353 if (frame && frame->data_frame.df_offset + 354 frame->data_frame.df_read_off == read_offset) 355 return &frame->data_frame; 356 else 357 return NULL; 358} 359 360 361static void 362nocopy_di_frame_done (struct data_in *data_in, struct data_frame *data_frame) 363{ 364 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 365 struct stream_frame *const frame = STREAM_FRAME_PTR(data_frame), *first; 366 assert(data_frame->df_read_off == data_frame->df_size); 367 TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame); 368 first = TAILQ_FIRST(&ncdi->ncdi_frames_in); 369 ncdi->ncdi_n_holes -= first && frame->data_frame.df_offset + 370 frame->data_frame.df_size != first->data_frame.df_offset; 371 --ncdi->ncdi_n_frames; 372 ncdi->ncdi_byteage -= frame->data_frame.df_size; 373 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in); 374 lsquic_malo_put(frame); 375} 376 377 378static int 379nocopy_di_empty (struct data_in *data_in) 380{ 381 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 382 return TAILQ_EMPTY(&ncdi->ncdi_frames_in); 383} 384 385 386struct data_in * 387nocopy_di_switch_impl (struct data_in *data_in, uint64_t read_offset) 388{ 389 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 390 struct data_in *new_data_in; 391 stream_frame_t *frame; 392 enum ins_frame ins; 393 394 new_data_in = data_in_hash_new(ncdi->ncdi_conn_pub, ncdi->ncdi_stream_id, 395 ncdi->ncdi_byteage); 396 if (!new_data_in) 397 goto end; 398 399 while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in))) 400 { 401 TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame); 402 ins = data_in_hash_insert_data_frame(new_data_in, &frame->data_frame, 403 read_offset); 404 lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in); 405 lsquic_malo_put(frame); 406 if (INS_FRAME_ERR == ins) 407 { 408 new_data_in->di_if->di_destroy(new_data_in); 409 new_data_in = NULL; 410 goto end; 411 } 412 } 413 414 end: 415 data_in->di_if->di_destroy(data_in); 416 return new_data_in; 417} 418 419 420/* This function overestimates amount of memory because some packets are 421 * referenced by more than one stream. In the usual case, however, I 422 * expect the error not to be large. 423 */ 424static size_t 425nocopy_di_mem_used (struct data_in *data_in) 426{ 427 struct nocopy_data_in *const ncdi = NCDI_PTR(data_in); 428 const stream_frame_t *frame; 429 size_t size; 430 431 size = sizeof(*data_in); 432 TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame) 433 size += lsquic_packet_in_mem_used(frame->packet_in); 434 435 return size; 436} 437 438static const struct data_in_iface di_if_nocopy = { 439 .di_destroy = nocopy_di_destroy, 440 .di_empty = nocopy_di_empty, 441 .di_frame_done = nocopy_di_frame_done, 442 .di_get_frame = nocopy_di_get_frame, 443 .di_insert_frame = nocopy_di_insert_frame, 444 .di_mem_used = nocopy_di_mem_used, 445 .di_switch_impl = nocopy_di_switch_impl, 446}; 447 448static const struct data_in_iface *di_if_nocopy_ptr = &di_if_nocopy; 449