lsquic_bbr.c revision 5392f7a3
1/* Copyright (c) 2017 - 2019 LiteSpeed Technologies Inc. See LICENSE. */ 2// Copyright 2016 The Chromium Authors. All rights reserved. 3// Use of this source code is governed by a BSD-style license that can be 4// found in the LICENSE.chrome file. 5 6#include <assert.h> 7#include <inttypes.h> 8#include <stdint.h> 9#include <stdlib.h> 10#include <string.h> 11#include <sys/queue.h> 12 13#include "lsquic.h" 14#include "lsquic_int_types.h" 15#include "lsquic_cong_ctl.h" 16#include "lsquic_minmax.h" 17#include "lsquic_packet_common.h" 18#include "lsquic_packet_out.h" 19#include "lsquic_bw_sampler.h" 20#include "lsquic_bbr.h" 21#include "lsquic_hash.h" 22#include "lsquic_conn.h" 23#include "lsquic_sfcw.h" 24#include "lsquic_conn_flow.h" 25#include "lsquic_varint.h" 26#include "lsquic_hq.h" 27#include "lsquic_stream.h" 28#include "lsquic_rtt.h" 29#include "lsquic_conn_public.h" 30#include "lsquic_util.h" 31#include "lsquic_malo.h" 32 33#define LSQUIC_LOGGER_MODULE LSQLM_BBR 34#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(bbr->bbr_conn) 35#include "lsquic_logger.h" 36 37#define MIN(a, b) ((a) < (b) ? (a) : (b)) 38#define MAX(a, b) ((a) > (b) ? (a) : (b)) 39 40#define ms(val_) ((val_) * 1000) 41#define sec(val_) ((val_) * 1000 * 1000) 42 43// Default maximum packet size used in the Linux TCP implementation. 44// Used in QUIC for congestion window computations in bytes. 45#define kDefaultTCPMSS 1460 46#define kMaxSegmentSize kDefaultTCPMSS 47 48// Constants based on TCP defaults. 49// The minimum CWND to ensure delayed acks don't reduce bandwidth measurements. 50// Does not inflate the pacing rate. 51#define kDefaultMinimumCongestionWindow (4 * kDefaultTCPMSS) 52 53// The gain used for the STARTUP, equal to 2/ln(2). 54#define kDefaultHighGain 2.885f 55 56// The newly derived gain for STARTUP, equal to 4 * ln(2) 57#define kDerivedHighGain 2.773f 58 59// The newly derived CWND gain for STARTUP, 2. 60#define kDerivedHighCWNDGain 2.0f 61 62// The gain used in STARTUP after loss has been detected. 63// 1.5 is enough to allow for 25% exogenous loss and still observe a 25% growth 64// in measured bandwidth. 65#define kStartupAfterLossGain 1.5f 66 67// We match SPDY's use of 32 (since we'd compete with SPDY). 68#define kInitialCongestionWindow 32 69 70/* Taken from send_algorithm_interface.h */ 71#define kDefaultMaxCongestionWindowPackets 2000 72 73// The time after which the current min_rtt value expires. 74#define kMinRttExpiry sec(10) 75 76// Coefficient to determine if a new RTT is sufficiently similar to min_rtt that 77// we don't need to enter PROBE_RTT. 78#define kSimilarMinRttThreshold 1.125f 79 80// If the bandwidth does not increase by the factor of |kStartupGrowthTarget| 81// within |kRoundTripsWithoutGrowthBeforeExitingStartup| rounds, the connection 82// will exit the STARTUP mode. 83#define kStartupGrowthTarget 1.25 84 85#define kRoundTripsWithoutGrowthBeforeExitingStartup 3 86 87#define startup_rate_reduction_multiplier_ 0 88 89// The cycle of gains used during the PROBE_BW stage. 90static const float kPacingGain[] = {1.25, 0.75, 1, 1, 1, 1, 1, 1}; 91 92// The length of the gain cycle. 93static const size_t kGainCycleLength = sizeof(kPacingGain) 94 / sizeof(kPacingGain[0]); 95 96// Coefficient of target congestion window to use when basing PROBE_RTT on BDP. 97#define kModerateProbeRttMultiplier 0.75 98 99// The maximum packet size of any QUIC packet over IPv6, based on ethernet's max 100// size, minus the IP and UDP headers. IPv6 has a 40 byte header, UDP adds an 101// additional 8 bytes. This is a total overhead of 48 bytes. Ethernet's 102// max packet size is 1500 bytes, 1500 - 48 = 1452. 103#define kMaxV6PacketSize 1452 104// The maximum packet size of any QUIC packet over IPv4. 105// 1500(Ethernet) - 20(IPv4 header) - 8(UDP header) = 1472. 106#define kMaxV4PacketSize 1472 107// The maximum incoming packet size allowed. 108#define kMaxIncomingPacketSize kMaxV4PacketSize 109// The maximum outgoing packet size allowed. 110#define kMaxOutgoingPacketSize kMaxV6PacketSize 111 112// The minimum time the connection can spend in PROBE_RTT mode. 113#define kProbeRttTime ms(200) 114 115/* FLAG* are from net/quic/quic_flags_list.h */ 116 117// When in STARTUP and recovery, do not add bytes_acked to QUIC BBR's CWND in 118// CalculateCongestionWindow() 119#define FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery 0 120 121// When true, ensure BBR allows at least one MSS to be sent in response to an 122// ACK in packet conservation. 123#define FLAG_quic_bbr_one_mss_conservation 0 124 125/* From net/quic/quic_flags_list.h */ 126#define kCwndGain 2.0 127 128 129static uint64_t lsquic_bbr_get_cwnd (void *); 130 131 132static const char *const mode2str[] = 133{ 134 [BBR_MODE_STARTUP] = "STARTUP", 135 [BBR_MODE_DRAIN] = "DRAIN", 136 [BBR_MODE_PROBE_BW] = "PROBE_BW", 137 [BBR_MODE_PROBE_RTT] = "PROBE_RTT", 138}; 139 140 141static void 142set_mode (struct lsquic_bbr *bbr, enum bbr_mode mode) 143{ 144 if (bbr->bbr_mode != mode) 145 { 146 LSQ_DEBUG("mode change %s -> %s", mode2str[bbr->bbr_mode], 147 mode2str[mode]); 148 bbr->bbr_mode = mode; 149 } 150 else 151 LSQ_DEBUG("mode remains %s", mode2str[mode]); 152} 153 154 155static void 156set_startup_values (struct lsquic_bbr *bbr) 157{ 158 bbr->bbr_pacing_gain = bbr->bbr_high_gain; 159 bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain; 160} 161 162 163static void 164lsquic_bbr_init (void *cong_ctl, const struct lsquic_conn_public *conn_pub, 165 enum quic_ft_bit retx_frames) 166{ 167 struct lsquic_bbr *const bbr = cong_ctl; 168 bbr->bbr_conn = conn_pub->lconn; 169 lsquic_bw_sampler_init(&bbr->bbr_bw_sampler, bbr->bbr_conn, retx_frames); 170 171 bbr->bbr_rtt_stats = &conn_pub->rtt_stats; 172 bbr->bbr_mode = BBR_MODE_STARTUP; 173 bbr->bbr_round_count = 0; 174 minmax_init(&bbr->bbr_max_bandwidth, 10); 175 minmax_init(&bbr->bbr_max_ack_height, 10); 176 bbr->bbr_aggregation_epoch_bytes = 0; 177 bbr->bbr_aggregation_epoch_start_time = 0; 178 bbr->bbr_min_rtt = 0; 179 bbr->bbr_min_rtt_timestamp = 0; 180 bbr->bbr_init_cwnd = kInitialCongestionWindow * kDefaultTCPMSS; 181 bbr->bbr_cwnd = kInitialCongestionWindow * kDefaultTCPMSS; 182 bbr->bbr_max_cwnd = kDefaultMaxCongestionWindowPackets * kDefaultTCPMSS; 183 bbr->bbr_min_cwnd = kDefaultMinimumCongestionWindow; 184 bbr->bbr_high_gain = kDefaultHighGain; 185 bbr->bbr_high_cwnd_gain = kDefaultHighGain; 186 bbr->bbr_drain_gain = 1.0 / kDefaultHighGain; 187 bbr->bbr_pacing_rate = BW_ZERO(); 188 bbr->bbr_pacing_gain = 1.0; 189 bbr->bbr_cwnd_gain = 1.0; 190 bbr->bbr_num_startup_rtts = kRoundTripsWithoutGrowthBeforeExitingStartup; 191 bbr->bbr_flags &= ~BBR_FLAG_EXIT_STARTUP_ON_LOSS; 192 bbr->bbr_cycle_current_offset = 0; 193 bbr->bbr_last_cycle_start = 0; 194 bbr->bbr_flags &= ~BBR_FLAG_IS_AT_FULL_BANDWIDTH; 195 bbr->bbr_round_wo_bw_gain = 0; 196 bbr->bbr_bw_at_last_round = BW_ZERO(); 197 bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE; 198 bbr->bbr_exit_probe_rtt_at = 0; 199 bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED; 200 bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 201 bbr->bbr_flags &= ~BBR_FLAG_HAS_NON_APP_LIMITED; 202 bbr->bbr_flags &= ~BBR_FLAG_FLEXIBLE_APP_LIMITED; 203 204 set_startup_values(bbr); 205 206 LSQ_DEBUG("initialized"); 207} 208 209 210static lsquic_time_t 211get_min_rtt (const struct lsquic_bbr *bbr) 212{ 213 lsquic_time_t min_rtt; 214 215 if (bbr->bbr_min_rtt) 216 return bbr->bbr_min_rtt; 217 else 218 { 219 min_rtt = lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats); 220 if (min_rtt == 0) 221 min_rtt = 25000; 222 return min_rtt; 223 } 224} 225 226 227static uint64_t 228lsquic_bbr_pacing_rate (void *cong_ctl, int in_recovery) 229{ 230 struct lsquic_bbr *const bbr = cong_ctl; 231 lsquic_time_t min_rtt; 232 struct bandwidth bw; 233 234 if (!BW_IS_ZERO(&bbr->bbr_pacing_rate)) 235 bw = bbr->bbr_pacing_rate; 236 else 237 { 238 min_rtt = get_min_rtt(bbr); 239 bw = BW_FROM_BYTES_AND_DELTA(bbr->bbr_init_cwnd, min_rtt); 240 bw = BW_TIMES(&bw, bbr->bbr_high_cwnd_gain); 241 } 242 243 return BW_TO_BYTES_PER_SEC(&bw); 244} 245 246 247/* BbrSender::GetTargetCongestionWindow */ 248static uint64_t 249get_target_cwnd (const struct lsquic_bbr *bbr, float gain) 250{ 251 struct bandwidth bw; 252 uint64_t bdp, cwnd; 253 254 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 255 bdp = get_min_rtt(bbr) * BW_TO_BYTES_PER_SEC(&bw) / 1000000; 256 cwnd = gain * bdp; 257 258 // BDP estimate will be zero if no bandwidth samples are available yet. 259 if (cwnd == 0) 260 cwnd = gain * bbr->bbr_init_cwnd; 261 262 return MAX(cwnd, bbr->bbr_min_cwnd); 263} 264 265 266/* See BbrSender::IsPipeSufficientlyFull */ 267static int 268is_pipe_sufficiently_full (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 269{ 270 // See if we need more bytes in flight to see more bandwidth. 271 if (bbr->bbr_mode == BBR_MODE_STARTUP) 272 // STARTUP exits if it doesn't observe a 25% bandwidth increase, so 273 // the CWND must be more than 25% above the target. 274 return bytes_in_flight >= get_target_cwnd(bbr, 1.5); 275 else if (bbr->bbr_pacing_gain > 1) 276 // Super-unity PROBE_BW doesn't exit until 1.25 * BDP is achieved. 277 return bytes_in_flight >= get_target_cwnd(bbr, bbr->bbr_pacing_gain); 278 else 279 // If bytes_in_flight are above the target congestion window, it should 280 // be possible to observe the same or more bandwidth if it's available. 281 return bytes_in_flight >= get_target_cwnd(bbr, 1.1); 282} 283 284 285/* See BbrSender::OnApplicationLimited */ 286static void 287lsquic_bbr_was_quiet (void *cong_ctl, lsquic_time_t now, 288 uint64_t bytes_in_flight) 289{ 290 struct lsquic_bbr *const bbr = cong_ctl; 291 uint64_t cwnd; 292 293 cwnd = lsquic_bbr_get_cwnd(cong_ctl); 294 if (bytes_in_flight >= cwnd) 295 return; 296 if ((bbr->bbr_flags & BBR_FLAG_FLEXIBLE_APP_LIMITED) 297 && is_pipe_sufficiently_full(bbr, bytes_in_flight)) 298 return; 299 300 bbr->bbr_flags |= BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT; 301 lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler); 302 LSQ_DEBUG("becoming application-limited. Last sent packet: %"PRIu64"; " 303 "CWND: %"PRIu64, bbr->bbr_last_sent_packno, cwnd); 304} 305 306 307static void 308lsquic_bbr_ack (void *cong_ctl, struct lsquic_packet_out *packet_out, 309 unsigned packet_sz, lsquic_time_t now_time, int app_limited) 310{ 311 struct lsquic_bbr *const bbr = cong_ctl; 312 struct bw_sample *sample; 313 314 assert(bbr->bbr_flags & BBR_FLAG_IN_ACK); 315 316 sample = lsquic_bw_sampler_packet_acked(&bbr->bbr_bw_sampler, packet_out, 317 bbr->bbr_ack_state.ack_time); 318 if (sample) 319 TAILQ_INSERT_TAIL(&bbr->bbr_ack_state.samples, sample, next); 320 321 if (is_valid_packno(bbr->bbr_ack_state.max_packno)) 322 /* We assume packet numbers are ordered */ 323 assert(packet_out->po_packno > bbr->bbr_ack_state.max_packno); 324 bbr->bbr_ack_state.max_packno = packet_out->po_packno; 325 bbr->bbr_ack_state.acked_bytes += packet_sz; 326} 327 328 329static void 330lsquic_bbr_sent (void *cong_ctl, struct lsquic_packet_out *packet_out, 331 uint64_t in_flight) 332{ 333 struct lsquic_bbr *const bbr = cong_ctl; 334 335 if (!(packet_out->po_flags & PO_MINI)) 336 lsquic_bw_sampler_packet_sent(&bbr->bbr_bw_sampler, packet_out, 337 in_flight); 338 339 /* Obviously we make an assumption that sent packet number are always 340 * increasing. 341 */ 342 bbr->bbr_last_sent_packno = packet_out->po_packno; 343} 344 345 346static void 347lsquic_bbr_lost (void *cong_ctl, struct lsquic_packet_out *packet_out, 348 unsigned packet_sz) 349{ 350 struct lsquic_bbr *const bbr = cong_ctl; 351 352 lsquic_bw_sampler_packet_lost(&bbr->bbr_bw_sampler, packet_out); 353 bbr->bbr_ack_state.has_losses = 1; 354 bbr->bbr_ack_state.lost_bytes += packet_sz; 355} 356 357 358static void 359lsquic_bbr_begin_ack (void *cong_ctl, lsquic_time_t ack_time, uint64_t in_flight) 360{ 361 struct lsquic_bbr *const bbr = cong_ctl; 362 363 assert(!(bbr->bbr_flags & BBR_FLAG_IN_ACK)); 364 bbr->bbr_flags |= BBR_FLAG_IN_ACK; 365 memset(&bbr->bbr_ack_state, 0, sizeof(bbr->bbr_ack_state)); 366 TAILQ_INIT(&bbr->bbr_ack_state.samples); 367 bbr->bbr_ack_state.ack_time = ack_time; 368 bbr->bbr_ack_state.max_packno = UINT64_MAX; 369 bbr->bbr_ack_state.in_flight = in_flight; 370 bbr->bbr_ack_state.total_bytes_acked_before 371 = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler); 372} 373 374 375/* Based on BbrSender::ShouldExtendMinRttExpiry() */ 376static int 377should_extend_min_rtt_expiry (const struct lsquic_bbr *bbr) 378{ 379 int increased_since_last_probe; 380 381 if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 382 |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED)) 383 == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 384 |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED)) 385 // Extend the current min_rtt if we've been app limited recently. 386 return 1; 387 388 increased_since_last_probe = bbr->bbr_min_rtt_since_last_probe 389 > bbr->bbr_min_rtt * kSimilarMinRttThreshold; 390 if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 391 |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT)) 392 == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 393 |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT) 394 && !increased_since_last_probe) 395 // Extend the current min_rtt if we've been app limited recently and an 396 // rtt has been measured in that time that's less than 12.5% more than 397 // the current min_rtt. 398 return 1; 399 400 return 0; 401} 402 403 404/* Based on BbrSender::UpdateBandwidthAndMinRtt */ 405/* Returns true if min RTT expired, false otherwise */ 406static int 407update_bandwidth_and_min_rtt (struct lsquic_bbr *bbr) 408{ 409 struct bw_sample *sample, *next_sample; 410 uint64_t sample_min_rtt; 411 int min_rtt_expired; 412 413 sample_min_rtt = UINT64_MAX; 414 for (sample = TAILQ_FIRST(&bbr->bbr_ack_state.samples); sample; 415 sample = next_sample) 416 { 417 next_sample = TAILQ_NEXT(sample, next); 418 419 if (sample->is_app_limited) 420 bbr->bbr_flags |= BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 421 else 422 { 423 bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 424 bbr->bbr_flags |= BBR_FLAG_HAS_NON_APP_LIMITED; 425 } 426 427 if (sample_min_rtt == UINT64_MAX || sample->rtt < sample_min_rtt) 428 sample_min_rtt = sample->rtt; 429 430 if (!sample->is_app_limited 431 || BW_VALUE(&sample->bandwidth) 432 > minmax_get(&bbr->bbr_max_bandwidth)) 433 minmax_upmax(&bbr->bbr_max_bandwidth, bbr->bbr_round_count, 434 BW_VALUE(&sample->bandwidth)); 435 436 lsquic_malo_put(sample); 437 } 438 439 if (sample_min_rtt == UINT64_MAX) 440 return 0; 441 442 bbr->bbr_min_rtt_since_last_probe 443 = MIN(bbr->bbr_min_rtt_since_last_probe, sample_min_rtt); 444 445 min_rtt_expired = bbr->bbr_min_rtt != 0 && (bbr->bbr_ack_state.ack_time 446 > bbr->bbr_min_rtt_timestamp + kMinRttExpiry); 447 if (min_rtt_expired || sample_min_rtt < bbr->bbr_min_rtt 448 || 0 == bbr->bbr_min_rtt) 449 { 450 if (min_rtt_expired && should_extend_min_rtt_expiry(bbr)) 451 { 452 LSQ_DEBUG("min rtt expiration extended, stay at: %"PRIu64, 453 bbr->bbr_min_rtt); 454 min_rtt_expired = 0; 455 } 456 else 457 { 458 LSQ_DEBUG("min rtt updated: %"PRIu64" -> %"PRIu64, 459 bbr->bbr_min_rtt, sample_min_rtt); 460 bbr->bbr_min_rtt = sample_min_rtt; 461 } 462 bbr->bbr_min_rtt_timestamp = bbr->bbr_ack_state.ack_time; 463 bbr->bbr_min_rtt_since_last_probe = UINT64_MAX; 464 bbr->bbr_flags &= ~BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT; 465 } 466 467 return min_rtt_expired; 468} 469 470 471/* Based on BbrSender::UpdateRecoveryState() */ 472static void 473update_recovery_state (struct lsquic_bbr *bbr, int is_round_start) 474{ 475 // Exit recovery when there are no losses for a round. 476 if (bbr->bbr_ack_state.has_losses) 477 bbr->bbr_end_recovery_at = bbr->bbr_last_sent_packno; 478 479 switch (bbr->bbr_recovery_state) 480 { 481 case BBR_RS_NOT_IN_RECOVERY: 482 // Enter conservation on the first loss. 483 if (bbr->bbr_ack_state.has_losses) 484 { 485 bbr->bbr_recovery_state = BBR_RS_CONSERVATION; 486 // This will cause the |bbr_recovery_window| to be set to the 487 // correct value in CalculateRecoveryWindow(). 488 bbr->bbr_recovery_window = 0; 489 // Since the conservation phase is meant to be lasting for a whole 490 // round, extend the current round as if it were started right now. 491 bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno; 492 } 493 break; 494 case BBR_RS_CONSERVATION: 495 if (is_round_start) 496 bbr->bbr_recovery_state = BBR_RS_GROWTH; 497 /* Fall-through */ 498 case BBR_RS_GROWTH: 499 // Exit recovery if appropriate. 500 if (!bbr->bbr_ack_state.has_losses 501 && bbr->bbr_ack_state.max_packno > bbr->bbr_end_recovery_at) 502 bbr->bbr_recovery_state = BBR_RS_NOT_IN_RECOVERY; 503 break; 504 } 505} 506 507 508static uint64_t 509update_ack_aggregation_bytes (struct lsquic_bbr *bbr, 510 uint64_t newly_acked_bytes) 511{ 512 const lsquic_time_t ack_time = bbr->bbr_ack_state.ack_time; 513 uint64_t expected_bytes_acked, diff; 514 515 // Compute how many bytes are expected to be delivered, assuming max 516 // bandwidth is correct. 517 expected_bytes_acked = minmax_get(&bbr->bbr_max_bandwidth) 518 * (ack_time - bbr->bbr_aggregation_epoch_start_time); 519 520 // Reset the current aggregation epoch as soon as the ack arrival rate is 521 // less than or equal to the max bandwidth. 522 if (bbr->bbr_aggregation_epoch_bytes <= expected_bytes_acked) 523 { 524 // Reset to start measuring a new aggregation epoch. 525 bbr->bbr_aggregation_epoch_bytes = newly_acked_bytes; 526 bbr->bbr_aggregation_epoch_start_time = ack_time; 527 return 0; 528 } 529 530 // Compute how many extra bytes were delivered vs max bandwidth. 531 // Include the bytes most recently acknowledged to account for stretch acks. 532 bbr->bbr_aggregation_epoch_bytes += newly_acked_bytes; 533 diff = bbr->bbr_aggregation_epoch_bytes - expected_bytes_acked; 534 minmax_upmax(&bbr->bbr_max_ack_height, bbr->bbr_round_count, diff); 535 return diff; 536} 537 538 539/* See BbrSender::UpdateGainCyclePhase() */ 540static void 541update_gain_cycle_phase (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 542{ 543 const uint64_t prior_in_flight = bbr->bbr_ack_state.in_flight; 544 const lsquic_time_t now = bbr->bbr_ack_state.ack_time; 545 // In most cases, the cycle is advanced after an RTT passes. 546 int should_advance_gain_cycling 547 = now - bbr->bbr_last_cycle_start > get_min_rtt(bbr); 548 549 // If the pacing gain is above 1.0, the connection is trying to probe the 550 // bandwidth by increasing the number of bytes in flight to at least 551 // pacing_gain * BDP. Make sure that it actually reaches the target, as 552 // long as there are no losses suggesting that the buffers are not able to 553 // hold that much. 554 if (bbr->bbr_pacing_gain > 1.0 555 && !bbr->bbr_ack_state.has_losses 556 && prior_in_flight < get_target_cwnd(bbr, bbr->bbr_pacing_gain)) 557 should_advance_gain_cycling = 0; 558 559 /* Several optimizations are possible here: "else if" instead of "if", as 560 * well as not calling get_target_cwnd() if `should_advance_gain_cycling' 561 * is already set to the target value. 562 */ 563 564 // If pacing gain is below 1.0, the connection is trying to drain the extra 565 // queue which could have been incurred by probing prior to it. If the 566 // number of bytes in flight falls down to the estimated BDP value earlier, 567 // conclude that the queue has been successfully drained and exit this cycle 568 // early. 569 if (bbr->bbr_pacing_gain < 1.0 570 && bytes_in_flight <= get_target_cwnd(bbr, 1)) 571 should_advance_gain_cycling = 1; 572 573 if (should_advance_gain_cycling) 574 { 575 bbr->bbr_cycle_current_offset = 576 (bbr->bbr_cycle_current_offset + 1) % kGainCycleLength; 577 bbr->bbr_last_cycle_start = now; 578 // Stay in low gain mode until the target BDP is hit. Low gain mode 579 // will be exited immediately when the target BDP is achieved. 580 if ((bbr->bbr_flags & BBR_FLAG_DRAIN_TO_TARGET) 581 && bbr->bbr_pacing_gain < 1 582 && kPacingGain[bbr->bbr_cycle_current_offset] == 1 583 && bytes_in_flight > get_target_cwnd(bbr, 1)) 584 return; 585 bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset]; 586 LSQ_DEBUG("advanced gain cycle, pacing gain set to %.2f", 587 bbr->bbr_pacing_gain); 588 } 589} 590 591 592/* BbrSender::InRecovery() */ 593static int 594in_recovery (const struct lsquic_bbr *bbr) 595{ 596 return bbr->bbr_recovery_state != BBR_RS_NOT_IN_RECOVERY; 597} 598 599 600/* See BbrSender::CheckIfFullBandwidthReached() */ 601static void 602check_if_full_bw_reached (struct lsquic_bbr *bbr) 603{ 604 struct bandwidth target, bw; 605 606 if (bbr->bbr_flags & BBR_FLAG_LAST_SAMPLE_APP_LIMITED) 607 { 608 LSQ_DEBUG("last sample app limited: full BW not reached"); 609 return; 610 } 611 612 target = BW_TIMES(&bbr->bbr_bw_at_last_round, kStartupGrowthTarget); 613 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 614 if (BW_VALUE(&bw) >= BW_VALUE(&target)) 615 { 616 bbr->bbr_bw_at_last_round = bw; 617 bbr->bbr_round_wo_bw_gain = 0; 618 if (bbr->bbr_flags & BBR_FLAG_EXPIRE_ACK_AGG_IN_STARTUP) 619 // Expire old excess delivery measurements now that bandwidth 620 // increased. 621 minmax_reset(&bbr->bbr_max_ack_height, 622 ((struct minmax_sample) { bbr->bbr_round_count, 0, })); 623 LSQ_DEBUG("BW estimate %"PRIu64"bps greater than or equal to target " 624 "%"PRIu64"bps: full BW not reached", 625 BW_VALUE(&bw), BW_VALUE(&target)); 626 return; 627 } 628 629 ++bbr->bbr_round_wo_bw_gain; 630 if ((bbr->bbr_round_wo_bw_gain >= bbr->bbr_num_startup_rtts) 631 || ((bbr->bbr_flags & BBR_FLAG_EXIT_STARTUP_ON_LOSS) 632 && in_recovery(bbr))) 633 { 634 assert(bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED); /* DCHECK */ 635 bbr->bbr_flags |= BBR_FLAG_IS_AT_FULL_BANDWIDTH; 636 LSQ_DEBUG("reached full BW"); 637 } 638 else 639 LSQ_DEBUG("rounds w/o gain: %u, full BW not reached", 640 bbr->bbr_round_wo_bw_gain); 641} 642 643 644/* See BbrSender::OnExitStartup */ 645static void 646on_exit_startup (struct lsquic_bbr *bbr, lsquic_time_t now) 647{ 648 assert(bbr->bbr_mode == BBR_MODE_STARTUP); 649 /* Apparently this method is just to update stats, something that we 650 * don't do yet. 651 */ 652} 653 654 655/* See BbrSender::EnterProbeBandwidthMode */ 656static void 657enter_probe_bw_mode (struct lsquic_bbr *bbr, lsquic_time_t now) 658{ 659 set_mode(bbr, BBR_MODE_PROBE_BW); 660 bbr->bbr_cwnd_gain = kCwndGain; 661 662 // Pick a random offset for the gain cycle out of {0, 2..7} range. 1 is 663 // excluded because in that case increased gain and decreased gain would not 664 // follow each other. 665 bbr->bbr_cycle_current_offset = rand() % (kGainCycleLength - 1); 666 if (bbr->bbr_cycle_current_offset >= 1) 667 ++bbr->bbr_cycle_current_offset; 668 669 bbr->bbr_last_cycle_start = now; 670 bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset]; 671} 672 673 674/* See BbrSender::EnterStartupMode */ 675static void 676enter_startup_mode (struct lsquic_bbr *bbr, lsquic_time_t now) 677{ 678 set_mode(bbr, BBR_MODE_STARTUP); 679 set_startup_values(bbr); 680} 681 682 683/* See BbrSender::MaybeExitStartupOrDrain() */ 684static void 685maybe_exit_startup_or_drain (struct lsquic_bbr *bbr, lsquic_time_t now, 686 uint64_t bytes_in_flight) 687{ 688 uint64_t target_cwnd; 689 690 if (bbr->bbr_mode == BBR_MODE_STARTUP 691 && (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 692 { 693 on_exit_startup(bbr, now); 694 set_mode(bbr, BBR_MODE_DRAIN); 695 bbr->bbr_pacing_gain = bbr->bbr_drain_gain; 696 bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain; 697 } 698 699 if (bbr->bbr_mode == BBR_MODE_DRAIN) 700 { 701 target_cwnd = get_target_cwnd(bbr, 1); 702 LSQ_DEBUG("%s: bytes in flight: %"PRIu64"; target cwnd: %"PRIu64, 703 __func__, bytes_in_flight, target_cwnd); 704 if (bytes_in_flight <= target_cwnd) 705 enter_probe_bw_mode(bbr, now); 706 } 707} 708 709 710static int 711in_slow_start (const struct lsquic_bbr *bbr) 712{ 713 return bbr->bbr_mode == BBR_MODE_STARTUP; 714} 715 716 717/* See QuicByteCount BbrSender::ProbeRttCongestionWindow() */ 718static uint64_t 719get_probe_rtt_cwnd (const struct lsquic_bbr *bbr) 720{ 721 if (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_BASED_ON_BDP) 722 return get_target_cwnd(bbr, kModerateProbeRttMultiplier); 723 else 724 return bbr->bbr_min_cwnd; 725} 726 727 728static uint64_t 729lsquic_bbr_get_cwnd (void *cong_ctl) 730{ 731 struct lsquic_bbr *const bbr = cong_ctl; 732 uint64_t cwnd; 733 734 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 735 cwnd = get_probe_rtt_cwnd(bbr); 736 else if (in_recovery(bbr) && 737 !((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP) 738 && bbr->bbr_mode == BBR_MODE_STARTUP)) 739 cwnd = MIN(bbr->bbr_cwnd, bbr->bbr_recovery_window); 740 else 741 cwnd = bbr->bbr_cwnd; 742 743 return cwnd; 744} 745 746 747/* See BbrSender::MaybeEnterOrExitProbeRtt */ 748static void 749maybe_enter_or_exit_probe_rtt (struct lsquic_bbr *bbr, lsquic_time_t now, 750 int is_round_start, int min_rtt_expired, uint64_t bytes_in_flight) 751{ 752 if (min_rtt_expired 753 && !(bbr->bbr_flags & BBR_FLAG_EXITING_QUIESCENCE) 754 && bbr->bbr_mode != BBR_MODE_PROBE_RTT) 755 { 756 if (in_slow_start(bbr)) 757 on_exit_startup(bbr, now); 758 set_mode(bbr, BBR_MODE_PROBE_RTT); 759 bbr->bbr_pacing_gain = 1; 760 // Do not decide on the time to exit PROBE_RTT until the 761 // |bytes_in_flight| is at the target small value. 762 bbr->bbr_exit_probe_rtt_at = 0; 763 } 764 765 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 766 { 767 lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler); 768 LSQ_DEBUG("%s: exit probe at: %"PRIu64"; now: %"PRIu64 769 "; round start: %d; round passed: %d; rtt: %"PRIu64" usec", 770 __func__, bbr->bbr_exit_probe_rtt_at, now, is_round_start, 771 !!(bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED), 772 lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)); 773 if (bbr->bbr_exit_probe_rtt_at == 0) 774 { 775 // If the window has reached the appropriate size, schedule exiting 776 // PROBE_RTT. The CWND during PROBE_RTT is 777 // kMinimumCongestionWindow, but we allow an extra packet since QUIC 778 // checks CWND before sending a packet. 779 if (bytes_in_flight 780 < get_probe_rtt_cwnd(bbr) + kMaxOutgoingPacketSize) 781 { 782 bbr->bbr_exit_probe_rtt_at = now + kProbeRttTime; 783 bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED; 784 } 785 } 786 else 787 { 788 if (is_round_start) 789 bbr->bbr_flags |= BBR_FLAG_PROBE_RTT_ROUND_PASSED; 790 if (now >= bbr->bbr_exit_probe_rtt_at 791 && (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED)) 792 { 793 bbr->bbr_min_rtt_timestamp = now; 794 if (!(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 795 enter_startup_mode(bbr, now); 796 else 797 enter_probe_bw_mode(bbr, now); 798 } 799 } 800 } 801 802 bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE; 803} 804 805 806/* See BbrSender::CalculatePacingRate */ 807static void 808calculate_pacing_rate (struct lsquic_bbr *bbr) 809{ 810 struct bandwidth bw, target_rate; 811 812 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 813 if (BW_IS_ZERO(&bw)) 814 return; 815 816 LSQ_DEBUG("BW estimate: %"PRIu64, BW_VALUE(&bw)); 817 818 target_rate = BW_TIMES(&bw, bbr->bbr_pacing_gain); 819 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 820 { 821 bbr->bbr_pacing_rate = target_rate; 822 return; 823 } 824 825 // Pace at the rate of initial_window / RTT as soon as RTT measurements are 826 // available. 827 if (BW_IS_ZERO(&bbr->bbr_pacing_rate) 828 && 0 != lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)) 829 { 830 bbr->bbr_pacing_rate = BW_FROM_BYTES_AND_DELTA( 831 bbr->bbr_init_cwnd, 832 lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)); 833 return; 834 } 835 836 // Slow the pacing rate in STARTUP once loss has ever been detected. 837 const int has_ever_detected_loss = bbr->bbr_end_recovery_at != 0; 838 if (has_ever_detected_loss 839 && (bbr->bbr_flags & (BBR_FLAG_SLOWER_STARTUP 840 |BBR_FLAG_HAS_NON_APP_LIMITED)) 841 == (BBR_FLAG_SLOWER_STARTUP|BBR_FLAG_HAS_NON_APP_LIMITED)) 842 { 843 bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupAfterLossGain); 844 return; 845 } 846 847 // Slow the pacing rate in STARTUP by the bytes_lost / CWND. 848 if (startup_rate_reduction_multiplier_ != 0 849 && has_ever_detected_loss 850 && (bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED)) 851 { 852 bbr->bbr_pacing_rate = BW_TIMES(&target_rate, 853 (1 - (bbr->bbr_startup_bytes_lost 854 * startup_rate_reduction_multiplier_ * 1.0f 855 / bbr->bbr_cwnd_gain))); 856 // Ensure the pacing rate doesn't drop below the startup growth target 857 // times the bandwidth estimate. 858 if (BW_VALUE(&bbr->bbr_pacing_rate) 859 < BW_VALUE(&bw) * kStartupGrowthTarget) 860 bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupGrowthTarget); 861 return; 862 } 863 864 // Do not decrease the pacing rate during startup. 865 if (BW_VALUE(&bbr->bbr_pacing_rate) < BW_VALUE(&target_rate)) 866 bbr->bbr_pacing_rate = target_rate; 867} 868 869 870/* See BbrSender::CalculateCongestionWindow */ 871static void 872calculate_cwnd (struct lsquic_bbr *bbr, uint64_t bytes_acked, 873 uint64_t excess_acked) 874{ 875 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 876 return; 877 878 uint64_t target_window = get_target_cwnd(bbr, bbr->bbr_cwnd_gain); 879 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 880 // Add the max recently measured ack aggregation to CWND. 881 target_window += minmax_get(&bbr->bbr_max_ack_height); 882 else if (bbr->bbr_flags & BBR_FLAG_ENABLE_ACK_AGG_IN_STARTUP) 883 // Add the most recent excess acked. Because CWND never decreases in 884 // STARTUP, this will automatically create a very localized max filter. 885 target_window += excess_acked; 886 887 // Instead of immediately setting the target CWND as the new one, BBR grows 888 // the CWND towards |target_window| by only increasing it |bytes_acked| at a 889 // time. 890 const int add_bytes_acked = 891 !FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery || !in_recovery(bbr); 892 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 893 bbr->bbr_cwnd = MIN(target_window, bbr->bbr_cwnd + bytes_acked); 894 else if (add_bytes_acked && 895 (bbr->bbr_cwnd_gain < target_window || 896 lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler) 897 < bbr->bbr_init_cwnd)) 898 // If the connection is not yet out of startup phase, do not decrease 899 // the window. 900 bbr->bbr_cwnd += bytes_acked; 901 902 // Enforce the limits on the congestion window. 903 if (bbr->bbr_cwnd < bbr->bbr_min_cwnd) 904 bbr->bbr_cwnd = bbr->bbr_min_cwnd; 905 else if (bbr->bbr_cwnd > bbr->bbr_max_cwnd) 906 { 907 LSQ_DEBUG("exceed max cwnd"); 908 bbr->bbr_cwnd = bbr->bbr_max_cwnd; 909 } 910} 911 912 913/* See BbrSender::CalculateRecoveryWindow */ 914static void 915calculate_recovery_window (struct lsquic_bbr *bbr, uint64_t bytes_acked, 916 uint64_t bytes_lost, uint64_t bytes_in_flight) 917{ 918 if ((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP) 919 && bbr->bbr_mode == BBR_MODE_STARTUP) 920 return; 921 922 if (bbr->bbr_recovery_state == BBR_RS_NOT_IN_RECOVERY) 923 return; 924 925 // Set up the initial recovery window. 926 if (bbr->bbr_recovery_window == 0) 927 { 928 bbr->bbr_recovery_window = bytes_in_flight + bytes_acked; 929 bbr->bbr_recovery_window = MAX(bbr->bbr_min_cwnd, 930 bbr->bbr_recovery_window); 931 return; 932 } 933 934 // Remove losses from the recovery window, while accounting for a potential 935 // integer underflow. 936 if (bbr->bbr_recovery_window >= bytes_lost) 937 bbr->bbr_recovery_window -= bytes_lost; 938 else 939 bbr->bbr_recovery_window = kMaxSegmentSize; 940 941 // In CONSERVATION mode, just subtracting losses is sufficient. In GROWTH, 942 // release additional |bytes_acked| to achieve a slow-start-like behavior. 943 if (bbr->bbr_recovery_state == BBR_RS_GROWTH) 944 bbr->bbr_recovery_window += bytes_acked; 945 946 // Sanity checks. Ensure that we always allow to send at least an MSS or 947 // |bytes_acked| in response, whichever is larger. 948 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, 949 bytes_in_flight + bytes_acked); 950 if (FLAG_quic_bbr_one_mss_conservation) 951 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, 952 bytes_in_flight + kMaxSegmentSize); 953 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, bbr->bbr_min_cwnd); 954} 955 956 957static void 958lsquic_bbr_end_ack (void *cong_ctl, uint64_t in_flight) 959{ 960 struct lsquic_bbr *const bbr = cong_ctl; 961 int is_round_start, min_rtt_expired; 962 uint64_t bytes_acked, excess_acked, bytes_lost; 963 964 assert(bbr->bbr_flags & BBR_FLAG_IN_ACK); 965 bbr->bbr_flags &= ~BBR_FLAG_IN_ACK; 966 967 LSQ_DEBUG("end_ack; mode: %s; in_flight: %"PRIu64, mode2str[bbr->bbr_mode], 968 in_flight); 969 970 bytes_acked = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler) 971 - bbr->bbr_ack_state.total_bytes_acked_before; 972 if (bbr->bbr_ack_state.acked_bytes) 973 { 974 is_round_start = bbr->bbr_ack_state.max_packno 975 > bbr->bbr_current_round_trip_end 976 || !is_valid_packno(bbr->bbr_current_round_trip_end); 977 if (is_round_start) 978 { 979 ++bbr->bbr_round_count; 980 bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno; 981 LSQ_DEBUG("up round count to %"PRIu64"; new rt end: %"PRIu64, 982 bbr->bbr_round_count, bbr->bbr_current_round_trip_end); 983 } 984 min_rtt_expired = update_bandwidth_and_min_rtt(bbr); 985 update_recovery_state(bbr, is_round_start); 986 excess_acked = update_ack_aggregation_bytes(bbr, bytes_acked); 987 } 988 else 989 { 990 is_round_start = 0; 991 min_rtt_expired = 0; 992 excess_acked = 0; 993 } 994 995 if (bbr->bbr_mode == BBR_MODE_PROBE_BW) 996 update_gain_cycle_phase(bbr, in_flight); 997 998 if (is_round_start && !(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 999 check_if_full_bw_reached(bbr); 1000 1001 maybe_exit_startup_or_drain(bbr, bbr->bbr_ack_state.ack_time, in_flight); 1002 1003 maybe_enter_or_exit_probe_rtt(bbr, bbr->bbr_ack_state.ack_time, 1004 is_round_start, min_rtt_expired, in_flight); 1005 1006 // Calculate number of packets acked and lost. 1007 bytes_lost = bbr->bbr_ack_state.lost_bytes; 1008 1009 // After the model is updated, recalculate the pacing rate and congestion 1010 // window. 1011 calculate_pacing_rate(bbr); 1012 calculate_cwnd(bbr, bytes_acked, excess_acked); 1013 calculate_recovery_window(bbr, bytes_acked, bytes_lost, in_flight); 1014 1015 /* We don't need to clean up BW sampler */ 1016} 1017 1018 1019static void 1020lsquic_bbr_cleanup (void *cong_ctl) 1021{ 1022 struct lsquic_bbr *const bbr = cong_ctl; 1023 1024 LSQ_DEBUG("cleanup"); 1025} 1026 1027 1028static void 1029lsquic_bbr_loss (void *cong_ctl) { /* Noop */ } 1030 1031 1032static void 1033lsquic_bbr_timeout (void *cong_ctl) { /* Noop */ } 1034 1035 1036const struct cong_ctl_if lsquic_cong_bbr_if = 1037{ 1038 .cci_ack = lsquic_bbr_ack, 1039 .cci_begin_ack = lsquic_bbr_begin_ack, 1040 .cci_end_ack = lsquic_bbr_end_ack, 1041 .cci_cleanup = lsquic_bbr_cleanup, 1042 .cci_get_cwnd = lsquic_bbr_get_cwnd, 1043 .cci_init = lsquic_bbr_init, 1044 .cci_pacing_rate = lsquic_bbr_pacing_rate, 1045 .cci_loss = lsquic_bbr_loss, 1046 .cci_lost = lsquic_bbr_lost, 1047 .cci_timeout = lsquic_bbr_timeout, 1048 .cci_sent = lsquic_bbr_sent, 1049 .cci_was_quiet = lsquic_bbr_was_quiet, 1050}; 1051