lsquic_bbr.c revision a74702c6
1/* Copyright (c) 2017 - 2022 LiteSpeed Technologies Inc. See LICENSE. */ 2// Copyright 2016 The Chromium Authors. All rights reserved. 3// Use of this source code is governed by a BSD-style license that can be 4// found in the LICENSE.chrome file. 5 6#include <assert.h> 7#include <inttypes.h> 8#include <stdint.h> 9#include <stdlib.h> 10#include <string.h> 11#include <sys/queue.h> 12 13#include "lsquic.h" 14#include "lsquic_int_types.h" 15#include "lsquic_cong_ctl.h" 16#include "lsquic_minmax.h" 17#include "lsquic_packet_common.h" 18#include "lsquic_packet_out.h" 19#include "lsquic_bw_sampler.h" 20#include "lsquic_bbr.h" 21#include "lsquic_hash.h" 22#include "lsquic_conn.h" 23#include "lsquic_sfcw.h" 24#include "lsquic_conn_flow.h" 25#include "lsquic_varint.h" 26#include "lsquic_hq.h" 27#include "lsquic_stream.h" 28#include "lsquic_rtt.h" 29#include "lsquic_conn_public.h" 30#include "lsquic_util.h" 31#include "lsquic_malo.h" 32#include "lsquic_crand.h" 33#include "lsquic_mm.h" 34#include "lsquic_engine_public.h" 35 36#define LSQUIC_LOGGER_MODULE LSQLM_BBR 37#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(bbr->bbr_conn_pub->lconn) 38#include "lsquic_logger.h" 39 40#define MIN(a, b) ((a) < (b) ? (a) : (b)) 41#define MAX(a, b) ((a) > (b) ? (a) : (b)) 42 43#define ms(val_) ((val_) * 1000) 44#define sec(val_) ((val_) * 1000 * 1000) 45 46// Default maximum packet size used in the Linux TCP implementation. 47// Used in QUIC for congestion window computations in bytes. 48#define kDefaultTCPMSS 1460 49#define kMaxSegmentSize kDefaultTCPMSS 50 51// Constants based on TCP defaults. 52// The minimum CWND to ensure delayed acks don't reduce bandwidth measurements. 53// Does not inflate the pacing rate. 54#define kDefaultMinimumCongestionWindow (4 * kDefaultTCPMSS) 55 56// The gain used for the STARTUP, equal to 2/ln(2). 57#define kDefaultHighGain 2.885f 58 59// The newly derived gain for STARTUP, equal to 4 * ln(2) 60#define kDerivedHighGain 2.773f 61 62// The newly derived CWND gain for STARTUP, 2. 63#define kDerivedHighCWNDGain 2.0f 64 65// The gain used in STARTUP after loss has been detected. 66// 1.5 is enough to allow for 25% exogenous loss and still observe a 25% growth 67// in measured bandwidth. 68#define kStartupAfterLossGain 1.5f 69 70// We match SPDY's use of 32 (since we'd compete with SPDY). 71#define kInitialCongestionWindow 32 72 73/* Taken from send_algorithm_interface.h */ 74#define kDefaultMaxCongestionWindowPackets 2000 75 76// The time after which the current min_rtt value expires. 77#define kMinRttExpiry sec(10) 78 79// Coefficient to determine if a new RTT is sufficiently similar to min_rtt that 80// we don't need to enter PROBE_RTT. 81#define kSimilarMinRttThreshold 1.125f 82 83// If the bandwidth does not increase by the factor of |kStartupGrowthTarget| 84// within |kRoundTripsWithoutGrowthBeforeExitingStartup| rounds, the connection 85// will exit the STARTUP mode. 86#define kStartupGrowthTarget 1.25 87 88#define kRoundTripsWithoutGrowthBeforeExitingStartup 3 89 90#define startup_rate_reduction_multiplier_ 0 91 92// The cycle of gains used during the PROBE_BW stage. 93static const float kPacingGain[] = {1.25, 0.75, 1, 1, 1, 1, 1, 1}; 94 95// The length of the gain cycle. 96static const size_t kGainCycleLength = sizeof(kPacingGain) 97 / sizeof(kPacingGain[0]); 98 99// Coefficient of target congestion window to use when basing PROBE_RTT on BDP. 100#define kModerateProbeRttMultiplier 0.75 101 102// The maximum packet size of any QUIC packet over IPv6, based on ethernet's max 103// size, minus the IP and UDP headers. IPv6 has a 40 byte header, UDP adds an 104// additional 8 bytes. This is a total overhead of 48 bytes. Ethernet's 105// max packet size is 1500 bytes, 1500 - 48 = 1452. 106#define kMaxV6PacketSize 1452 107// The maximum packet size of any QUIC packet over IPv4. 108// 1500(Ethernet) - 20(IPv4 header) - 8(UDP header) = 1472. 109#define kMaxV4PacketSize 1472 110// The maximum incoming packet size allowed. 111#define kMaxIncomingPacketSize kMaxV4PacketSize 112// The maximum outgoing packet size allowed. 113#define kMaxOutgoingPacketSize kMaxV6PacketSize 114 115// The minimum time the connection can spend in PROBE_RTT mode. 116#define kProbeRttTime ms(200) 117 118/* FLAG* are from net/quic/quic_flags_list.h */ 119 120// When in STARTUP and recovery, do not add bytes_acked to QUIC BBR's CWND in 121// CalculateCongestionWindow() 122#define FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery 0 123 124// When true, ensure BBR allows at least one MSS to be sent in response to an 125// ACK in packet conservation. 126#define FLAG_quic_bbr_one_mss_conservation 0 127 128/* From net/quic/quic_flags_list.h */ 129#define kCwndGain 2.0 130 131 132static uint64_t lsquic_bbr_get_cwnd (void *); 133 134 135static const char *const mode2str[] = 136{ 137 [BBR_MODE_STARTUP] = "STARTUP", 138 [BBR_MODE_DRAIN] = "DRAIN", 139 [BBR_MODE_PROBE_BW] = "PROBE_BW", 140 [BBR_MODE_PROBE_RTT] = "PROBE_RTT", 141}; 142 143 144static void 145set_mode (struct lsquic_bbr *bbr, enum bbr_mode mode) 146{ 147 if (bbr->bbr_mode != mode) 148 { 149 LSQ_DEBUG("mode change %s -> %s", mode2str[bbr->bbr_mode], 150 mode2str[mode]); 151 bbr->bbr_mode = mode; 152 } 153 else 154 LSQ_DEBUG("mode remains %s", mode2str[mode]); 155} 156 157 158static void 159set_startup_values (struct lsquic_bbr *bbr) 160{ 161 bbr->bbr_pacing_gain = bbr->bbr_high_gain; 162 bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain; 163} 164 165 166static void 167init_bbr (struct lsquic_bbr *bbr) 168{ 169 bbr->bbr_mode = BBR_MODE_STARTUP; 170 bbr->bbr_round_count = 0; 171 minmax_init(&bbr->bbr_max_bandwidth, 10); 172 minmax_init(&bbr->bbr_max_ack_height, 10); 173 bbr->bbr_aggregation_epoch_bytes = 0; 174 bbr->bbr_aggregation_epoch_start_time = 0; 175 bbr->bbr_min_rtt = 0; 176 bbr->bbr_min_rtt_timestamp = 0; 177 bbr->bbr_init_cwnd = kInitialCongestionWindow * kDefaultTCPMSS; 178 bbr->bbr_cwnd = kInitialCongestionWindow * kDefaultTCPMSS; 179 bbr->bbr_max_cwnd = kDefaultMaxCongestionWindowPackets * kDefaultTCPMSS; 180 bbr->bbr_min_cwnd = kDefaultMinimumCongestionWindow; 181 bbr->bbr_high_gain = kDefaultHighGain; 182 bbr->bbr_high_cwnd_gain = kDefaultHighGain; 183 bbr->bbr_drain_gain = 1.0f / kDefaultHighGain; 184 bbr->bbr_pacing_rate = BW_ZERO(); 185 bbr->bbr_pacing_gain = 1.0; 186 bbr->bbr_cwnd_gain = 1.0; 187 bbr->bbr_num_startup_rtts = kRoundTripsWithoutGrowthBeforeExitingStartup; 188 bbr->bbr_flags &= ~BBR_FLAG_EXIT_STARTUP_ON_LOSS; 189 bbr->bbr_cycle_current_offset = 0; 190 bbr->bbr_last_cycle_start = 0; 191 bbr->bbr_flags &= ~BBR_FLAG_IS_AT_FULL_BANDWIDTH; 192 bbr->bbr_round_wo_bw_gain = 0; 193 bbr->bbr_bw_at_last_round = BW_ZERO(); 194 bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE; 195 bbr->bbr_exit_probe_rtt_at = 0; 196 bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED; 197 bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 198 bbr->bbr_flags &= ~BBR_FLAG_HAS_NON_APP_LIMITED; 199 bbr->bbr_flags &= ~BBR_FLAG_FLEXIBLE_APP_LIMITED; 200 set_startup_values(bbr); 201} 202 203 204static void 205lsquic_bbr_init (void *cong_ctl, const struct lsquic_conn_public *conn_pub, 206 enum quic_ft_bit retx_frames) 207{ 208 struct lsquic_bbr *const bbr = cong_ctl; 209 bbr->bbr_conn_pub = conn_pub; 210 lsquic_bw_sampler_init(&bbr->bbr_bw_sampler, conn_pub->lconn, retx_frames); 211 bbr->bbr_rtt_stats = &conn_pub->rtt_stats; 212 213 init_bbr(bbr); 214 215 LSQ_DEBUG("initialized"); 216} 217 218 219static void 220lsquic_bbr_reinit (void *cong_ctl) 221{ 222 struct lsquic_bbr *const bbr = cong_ctl; 223 224 init_bbr(bbr); 225 226 LSQ_DEBUG("re-initialized"); 227} 228 229 230static lsquic_time_t 231get_min_rtt (const struct lsquic_bbr *bbr) 232{ 233 lsquic_time_t min_rtt; 234 235 if (bbr->bbr_min_rtt) 236 return bbr->bbr_min_rtt; 237 else 238 { 239 min_rtt = lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats); 240 if (min_rtt == 0) 241 min_rtt = 25000; 242 return min_rtt; 243 } 244} 245 246 247static uint64_t 248lsquic_bbr_pacing_rate (void *cong_ctl, int in_recovery) 249{ 250 struct lsquic_bbr *const bbr = cong_ctl; 251 lsquic_time_t min_rtt; 252 struct bandwidth bw; 253 254 if (!BW_IS_ZERO(&bbr->bbr_pacing_rate)) 255 bw = bbr->bbr_pacing_rate; 256 else 257 { 258 min_rtt = get_min_rtt(bbr); 259 bw = BW_FROM_BYTES_AND_DELTA(bbr->bbr_init_cwnd, min_rtt); 260 bw = BW_TIMES(&bw, bbr->bbr_high_cwnd_gain); 261 } 262 263 return BW_TO_BYTES_PER_SEC(&bw); 264} 265 266 267/* BbrSender::GetTargetCongestionWindow */ 268static uint64_t 269get_target_cwnd (const struct lsquic_bbr *bbr, float gain) 270{ 271 struct bandwidth bw; 272 uint64_t bdp, cwnd; 273 274 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 275 bdp = get_min_rtt(bbr) * BW_TO_BYTES_PER_SEC(&bw) / 1000000; 276 cwnd = gain * bdp; 277 278 // BDP estimate will be zero if no bandwidth samples are available yet. 279 if (cwnd == 0) 280 cwnd = gain * bbr->bbr_init_cwnd; 281 282 return MAX(cwnd, bbr->bbr_min_cwnd); 283} 284 285 286/* See BbrSender::IsPipeSufficientlyFull */ 287static int 288is_pipe_sufficiently_full (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 289{ 290 // See if we need more bytes in flight to see more bandwidth. 291 if (bbr->bbr_mode == BBR_MODE_STARTUP) 292 // STARTUP exits if it doesn't observe a 25% bandwidth increase, so 293 // the CWND must be more than 25% above the target. 294 return bytes_in_flight >= get_target_cwnd(bbr, 1.5); 295 else if (bbr->bbr_pacing_gain > 1) 296 // Super-unity PROBE_BW doesn't exit until 1.25 * BDP is achieved. 297 return bytes_in_flight >= get_target_cwnd(bbr, bbr->bbr_pacing_gain); 298 else 299 // If bytes_in_flight are above the target congestion window, it should 300 // be possible to observe the same or more bandwidth if it's available. 301 return bytes_in_flight >= get_target_cwnd(bbr, 1.1f); 302} 303 304 305static void 306lsquic_bbr_was_quiet (void *cong_ctl, lsquic_time_t now, uint64_t in_flight) 307{ 308 struct lsquic_bbr *const bbr = cong_ctl; 309 LSQ_DEBUG("was quiet"); /* Do nothing */ 310} 311 312 313/* See BbrSender::OnApplicationLimited */ 314static void 315bbr_app_limited (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 316{ 317 uint64_t cwnd; 318 319 cwnd = lsquic_bbr_get_cwnd(bbr); 320 if (bytes_in_flight >= cwnd) 321 return; 322 if ((bbr->bbr_flags & BBR_FLAG_FLEXIBLE_APP_LIMITED) 323 && is_pipe_sufficiently_full(bbr, bytes_in_flight)) 324 return; 325 326 bbr->bbr_flags |= BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT; 327 lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler); 328 LSQ_DEBUG("becoming application-limited. Last sent packet: %"PRIu64"; " 329 "CWND: %"PRIu64, bbr->bbr_last_sent_packno, cwnd); 330} 331 332 333static void 334lsquic_bbr_ack (void *cong_ctl, struct lsquic_packet_out *packet_out, 335 unsigned packet_sz, lsquic_time_t now_time, int app_limited) 336{ 337 struct lsquic_bbr *const bbr = cong_ctl; 338 struct bw_sample *sample; 339 340 assert(bbr->bbr_flags & BBR_FLAG_IN_ACK); 341 342 sample = lsquic_bw_sampler_packet_acked(&bbr->bbr_bw_sampler, packet_out, 343 bbr->bbr_ack_state.ack_time); 344 if (sample) 345 TAILQ_INSERT_TAIL(&bbr->bbr_ack_state.samples, sample, next); 346 347 if (!is_valid_packno(bbr->bbr_ack_state.max_packno) 348 /* Packet ordering is checked for, and warned about, in 349 * lsquic_senhist_add(). 350 */ 351 || packet_out->po_packno > bbr->bbr_ack_state.max_packno) 352 bbr->bbr_ack_state.max_packno = packet_out->po_packno; 353 bbr->bbr_ack_state.acked_bytes += packet_sz; 354} 355 356 357static void 358lsquic_bbr_sent (void *cong_ctl, struct lsquic_packet_out *packet_out, 359 uint64_t in_flight, int app_limited) 360{ 361 struct lsquic_bbr *const bbr = cong_ctl; 362 363 if (!(packet_out->po_flags & PO_MINI)) 364 lsquic_bw_sampler_packet_sent(&bbr->bbr_bw_sampler, packet_out, 365 in_flight); 366 367 /* Obviously we make an assumption that sent packet number are always 368 * increasing. 369 */ 370 bbr->bbr_last_sent_packno = packet_out->po_packno; 371 372 if (app_limited) 373 bbr_app_limited(bbr, in_flight); 374} 375 376 377static void 378lsquic_bbr_lost (void *cong_ctl, struct lsquic_packet_out *packet_out, 379 unsigned packet_sz) 380{ 381 struct lsquic_bbr *const bbr = cong_ctl; 382 383 lsquic_bw_sampler_packet_lost(&bbr->bbr_bw_sampler, packet_out); 384 bbr->bbr_ack_state.has_losses = 1; 385 bbr->bbr_ack_state.lost_bytes += packet_sz; 386} 387 388 389static void 390lsquic_bbr_begin_ack (void *cong_ctl, lsquic_time_t ack_time, uint64_t in_flight) 391{ 392 struct lsquic_bbr *const bbr = cong_ctl; 393 394 assert(!(bbr->bbr_flags & BBR_FLAG_IN_ACK)); 395 bbr->bbr_flags |= BBR_FLAG_IN_ACK; 396 memset(&bbr->bbr_ack_state, 0, sizeof(bbr->bbr_ack_state)); 397 TAILQ_INIT(&bbr->bbr_ack_state.samples); 398 bbr->bbr_ack_state.ack_time = ack_time; 399 bbr->bbr_ack_state.max_packno = UINT64_MAX; 400 bbr->bbr_ack_state.in_flight = in_flight; 401 bbr->bbr_ack_state.total_bytes_acked_before 402 = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler); 403} 404 405 406/* Based on BbrSender::ShouldExtendMinRttExpiry() */ 407static int 408should_extend_min_rtt_expiry (const struct lsquic_bbr *bbr) 409{ 410 int increased_since_last_probe; 411 412 if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 413 |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED)) 414 == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 415 |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED)) 416 // Extend the current min_rtt if we've been app limited recently. 417 return 1; 418 419 increased_since_last_probe = bbr->bbr_min_rtt_since_last_probe 420 > bbr->bbr_min_rtt * kSimilarMinRttThreshold; 421 if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 422 |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT)) 423 == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 424 |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT) 425 && !increased_since_last_probe) 426 // Extend the current min_rtt if we've been app limited recently and an 427 // rtt has been measured in that time that's less than 12.5% more than 428 // the current min_rtt. 429 return 1; 430 431 return 0; 432} 433 434 435/* Based on BbrSender::UpdateBandwidthAndMinRtt */ 436/* Returns true if min RTT expired, false otherwise */ 437static int 438update_bandwidth_and_min_rtt (struct lsquic_bbr *bbr) 439{ 440 struct bw_sample *sample, *next_sample; 441 uint64_t sample_min_rtt; 442 int min_rtt_expired; 443 444 sample_min_rtt = UINT64_MAX; 445 for (sample = TAILQ_FIRST(&bbr->bbr_ack_state.samples); sample; 446 sample = next_sample) 447 { 448 next_sample = TAILQ_NEXT(sample, next); 449 450 if (sample->is_app_limited) 451 bbr->bbr_flags |= BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 452 else 453 { 454 bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 455 bbr->bbr_flags |= BBR_FLAG_HAS_NON_APP_LIMITED; 456 } 457 458 if (sample_min_rtt == UINT64_MAX || sample->rtt < sample_min_rtt) 459 sample_min_rtt = sample->rtt; 460 461 if (!sample->is_app_limited 462 || BW_VALUE(&sample->bandwidth) 463 > minmax_get(&bbr->bbr_max_bandwidth)) 464 minmax_upmax(&bbr->bbr_max_bandwidth, bbr->bbr_round_count, 465 BW_VALUE(&sample->bandwidth)); 466 467 lsquic_malo_put(sample); 468 } 469 470 if (sample_min_rtt == UINT64_MAX) 471 return 0; 472 473 bbr->bbr_min_rtt_since_last_probe 474 = MIN(bbr->bbr_min_rtt_since_last_probe, sample_min_rtt); 475 476 min_rtt_expired = bbr->bbr_min_rtt != 0 && (bbr->bbr_ack_state.ack_time 477 > bbr->bbr_min_rtt_timestamp + kMinRttExpiry); 478 if (min_rtt_expired || sample_min_rtt < bbr->bbr_min_rtt 479 || 0 == bbr->bbr_min_rtt) 480 { 481 if (min_rtt_expired && should_extend_min_rtt_expiry(bbr)) 482 { 483 LSQ_DEBUG("min rtt expiration extended, stay at: %"PRIu64, 484 bbr->bbr_min_rtt); 485 min_rtt_expired = 0; 486 } 487 else 488 { 489 LSQ_DEBUG("min rtt updated: %"PRIu64" -> %"PRIu64, 490 bbr->bbr_min_rtt, sample_min_rtt); 491 bbr->bbr_min_rtt = sample_min_rtt; 492 } 493 bbr->bbr_min_rtt_timestamp = bbr->bbr_ack_state.ack_time; 494 bbr->bbr_min_rtt_since_last_probe = UINT64_MAX; 495 bbr->bbr_flags &= ~BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT; 496 } 497 498 return min_rtt_expired; 499} 500 501 502/* Based on BbrSender::UpdateRecoveryState() */ 503static void 504update_recovery_state (struct lsquic_bbr *bbr, int is_round_start) 505{ 506 // Exit recovery when there are no losses for a round. 507 if (bbr->bbr_ack_state.has_losses) 508 bbr->bbr_end_recovery_at = bbr->bbr_last_sent_packno; 509 510 switch (bbr->bbr_recovery_state) 511 { 512 case BBR_RS_NOT_IN_RECOVERY: 513 // Enter conservation on the first loss. 514 if (bbr->bbr_ack_state.has_losses) 515 { 516 bbr->bbr_recovery_state = BBR_RS_CONSERVATION; 517 // This will cause the |bbr_recovery_window| to be set to the 518 // correct value in CalculateRecoveryWindow(). 519 bbr->bbr_recovery_window = 0; 520 // Since the conservation phase is meant to be lasting for a whole 521 // round, extend the current round as if it were started right now. 522 bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno; 523 } 524 break; 525 case BBR_RS_CONSERVATION: 526 if (is_round_start) 527 bbr->bbr_recovery_state = BBR_RS_GROWTH; 528 /* Fall-through */ 529 case BBR_RS_GROWTH: 530 // Exit recovery if appropriate. 531 if (!bbr->bbr_ack_state.has_losses 532 && bbr->bbr_ack_state.max_packno > bbr->bbr_end_recovery_at) 533 bbr->bbr_recovery_state = BBR_RS_NOT_IN_RECOVERY; 534 break; 535 } 536} 537 538 539static uint64_t 540update_ack_aggregation_bytes (struct lsquic_bbr *bbr, 541 uint64_t newly_acked_bytes) 542{ 543 const lsquic_time_t ack_time = bbr->bbr_ack_state.ack_time; 544 uint64_t expected_bytes_acked, diff; 545 546 // Compute how many bytes are expected to be delivered, assuming max 547 // bandwidth is correct. 548 expected_bytes_acked = minmax_get(&bbr->bbr_max_bandwidth) 549 * (ack_time - bbr->bbr_aggregation_epoch_start_time); 550 551 // Reset the current aggregation epoch as soon as the ack arrival rate is 552 // less than or equal to the max bandwidth. 553 if (bbr->bbr_aggregation_epoch_bytes <= expected_bytes_acked) 554 { 555 // Reset to start measuring a new aggregation epoch. 556 bbr->bbr_aggregation_epoch_bytes = newly_acked_bytes; 557 bbr->bbr_aggregation_epoch_start_time = ack_time; 558 return 0; 559 } 560 561 // Compute how many extra bytes were delivered vs max bandwidth. 562 // Include the bytes most recently acknowledged to account for stretch acks. 563 bbr->bbr_aggregation_epoch_bytes += newly_acked_bytes; 564 diff = bbr->bbr_aggregation_epoch_bytes - expected_bytes_acked; 565 minmax_upmax(&bbr->bbr_max_ack_height, bbr->bbr_round_count, diff); 566 return diff; 567} 568 569 570/* See BbrSender::UpdateGainCyclePhase() */ 571static void 572update_gain_cycle_phase (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 573{ 574 const uint64_t prior_in_flight = bbr->bbr_ack_state.in_flight; 575 const lsquic_time_t now = bbr->bbr_ack_state.ack_time; 576 // In most cases, the cycle is advanced after an RTT passes. 577 int should_advance_gain_cycling 578 = now - bbr->bbr_last_cycle_start > get_min_rtt(bbr); 579 580 // If the pacing gain is above 1.0, the connection is trying to probe the 581 // bandwidth by increasing the number of bytes in flight to at least 582 // pacing_gain * BDP. Make sure that it actually reaches the target, as 583 // long as there are no losses suggesting that the buffers are not able to 584 // hold that much. 585 if (bbr->bbr_pacing_gain > 1.0 586 && !bbr->bbr_ack_state.has_losses 587 && prior_in_flight < get_target_cwnd(bbr, bbr->bbr_pacing_gain)) 588 should_advance_gain_cycling = 0; 589 590 /* Several optimizations are possible here: "else if" instead of "if", as 591 * well as not calling get_target_cwnd() if `should_advance_gain_cycling' 592 * is already set to the target value. 593 */ 594 595 // If pacing gain is below 1.0, the connection is trying to drain the extra 596 // queue which could have been incurred by probing prior to it. If the 597 // number of bytes in flight falls down to the estimated BDP value earlier, 598 // conclude that the queue has been successfully drained and exit this cycle 599 // early. 600 if (bbr->bbr_pacing_gain < 1.0 601 && bytes_in_flight <= get_target_cwnd(bbr, 1)) 602 should_advance_gain_cycling = 1; 603 604 if (should_advance_gain_cycling) 605 { 606 bbr->bbr_cycle_current_offset = 607 (bbr->bbr_cycle_current_offset + 1) % kGainCycleLength; 608 bbr->bbr_last_cycle_start = now; 609 // Stay in low gain mode until the target BDP is hit. Low gain mode 610 // will be exited immediately when the target BDP is achieved. 611 if ((bbr->bbr_flags & BBR_FLAG_DRAIN_TO_TARGET) 612 && bbr->bbr_pacing_gain < 1 613 && kPacingGain[bbr->bbr_cycle_current_offset] == 1 614 && bytes_in_flight > get_target_cwnd(bbr, 1)) 615 return; 616 bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset]; 617 LSQ_DEBUG("advanced gain cycle, pacing gain set to %.2f", 618 bbr->bbr_pacing_gain); 619 } 620} 621 622 623/* BbrSender::InRecovery() */ 624static int 625in_recovery (const struct lsquic_bbr *bbr) 626{ 627 return bbr->bbr_recovery_state != BBR_RS_NOT_IN_RECOVERY; 628} 629 630 631/* See BbrSender::CheckIfFullBandwidthReached() */ 632static void 633check_if_full_bw_reached (struct lsquic_bbr *bbr) 634{ 635 struct bandwidth target, bw; 636 637 if (bbr->bbr_flags & BBR_FLAG_LAST_SAMPLE_APP_LIMITED) 638 { 639 LSQ_DEBUG("last sample app limited: full BW not reached"); 640 return; 641 } 642 643 target = BW_TIMES(&bbr->bbr_bw_at_last_round, kStartupGrowthTarget); 644 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 645 if (BW_VALUE(&bw) >= BW_VALUE(&target)) 646 { 647 bbr->bbr_bw_at_last_round = bw; 648 bbr->bbr_round_wo_bw_gain = 0; 649 if (bbr->bbr_flags & BBR_FLAG_EXPIRE_ACK_AGG_IN_STARTUP) 650 // Expire old excess delivery measurements now that bandwidth 651 // increased. 652 minmax_reset(&bbr->bbr_max_ack_height, 653 ((struct minmax_sample) { bbr->bbr_round_count, 0, })); 654 LSQ_DEBUG("BW estimate %"PRIu64"bps greater than or equal to target " 655 "%"PRIu64"bps: full BW not reached", 656 BW_VALUE(&bw), BW_VALUE(&target)); 657 return; 658 } 659 660 ++bbr->bbr_round_wo_bw_gain; 661 if ((bbr->bbr_round_wo_bw_gain >= bbr->bbr_num_startup_rtts) 662 || ((bbr->bbr_flags & BBR_FLAG_EXIT_STARTUP_ON_LOSS) 663 && in_recovery(bbr))) 664 { 665 assert(bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED); /* DCHECK */ 666 bbr->bbr_flags |= BBR_FLAG_IS_AT_FULL_BANDWIDTH; 667 LSQ_DEBUG("reached full BW"); 668 } 669 else 670 LSQ_DEBUG("rounds w/o gain: %u, full BW not reached", 671 bbr->bbr_round_wo_bw_gain); 672} 673 674 675/* See BbrSender::OnExitStartup */ 676static void 677on_exit_startup (struct lsquic_bbr *bbr, lsquic_time_t now) 678{ 679 assert(bbr->bbr_mode == BBR_MODE_STARTUP); 680 /* Apparently this method is just to update stats, something that we 681 * don't do yet. 682 */ 683} 684 685 686/* See BbrSender::EnterProbeBandwidthMode */ 687static void 688enter_probe_bw_mode (struct lsquic_bbr *bbr, lsquic_time_t now) 689{ 690 uint8_t rand; 691 692 set_mode(bbr, BBR_MODE_PROBE_BW); 693 bbr->bbr_cwnd_gain = kCwndGain; 694 695 // Pick a random offset for the gain cycle out of {0, 2..7} range. 1 is 696 // excluded because in that case increased gain and decreased gain would not 697 // follow each other. 698 rand = lsquic_crand_get_byte(bbr->bbr_conn_pub->enpub->enp_crand); 699 bbr->bbr_cycle_current_offset = rand % (kGainCycleLength - 1); 700 if (bbr->bbr_cycle_current_offset >= 1) 701 ++bbr->bbr_cycle_current_offset; 702 703 bbr->bbr_last_cycle_start = now; 704 bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset]; 705} 706 707 708/* See BbrSender::EnterStartupMode */ 709static void 710enter_startup_mode (struct lsquic_bbr *bbr, lsquic_time_t now) 711{ 712 set_mode(bbr, BBR_MODE_STARTUP); 713 set_startup_values(bbr); 714} 715 716 717/* See BbrSender::MaybeExitStartupOrDrain() */ 718static void 719maybe_exit_startup_or_drain (struct lsquic_bbr *bbr, lsquic_time_t now, 720 uint64_t bytes_in_flight) 721{ 722 uint64_t target_cwnd; 723 724 if (bbr->bbr_mode == BBR_MODE_STARTUP 725 && (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 726 { 727 on_exit_startup(bbr, now); 728 set_mode(bbr, BBR_MODE_DRAIN); 729 bbr->bbr_pacing_gain = bbr->bbr_drain_gain; 730 bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain; 731 } 732 733 if (bbr->bbr_mode == BBR_MODE_DRAIN) 734 { 735 target_cwnd = get_target_cwnd(bbr, 1); 736 LSQ_DEBUG("%s: bytes in flight: %"PRIu64"; target cwnd: %"PRIu64, 737 __func__, bytes_in_flight, target_cwnd); 738 if (bytes_in_flight <= target_cwnd) 739 enter_probe_bw_mode(bbr, now); 740 } 741} 742 743 744static int 745in_slow_start (const struct lsquic_bbr *bbr) 746{ 747 return bbr->bbr_mode == BBR_MODE_STARTUP; 748} 749 750 751/* See QuicByteCount BbrSender::ProbeRttCongestionWindow() */ 752static uint64_t 753get_probe_rtt_cwnd (const struct lsquic_bbr *bbr) 754{ 755 if (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_BASED_ON_BDP) 756 return get_target_cwnd(bbr, kModerateProbeRttMultiplier); 757 else 758 return bbr->bbr_min_cwnd; 759} 760 761 762static uint64_t 763lsquic_bbr_get_cwnd (void *cong_ctl) 764{ 765 struct lsquic_bbr *const bbr = cong_ctl; 766 uint64_t cwnd; 767 768 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 769 cwnd = get_probe_rtt_cwnd(bbr); 770 else if (in_recovery(bbr) && 771 !((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP) 772 && bbr->bbr_mode == BBR_MODE_STARTUP)) 773 cwnd = MIN(bbr->bbr_cwnd, bbr->bbr_recovery_window); 774 else 775 cwnd = bbr->bbr_cwnd; 776 777 return cwnd; 778} 779 780 781/* See BbrSender::MaybeEnterOrExitProbeRtt */ 782static void 783maybe_enter_or_exit_probe_rtt (struct lsquic_bbr *bbr, lsquic_time_t now, 784 int is_round_start, int min_rtt_expired, uint64_t bytes_in_flight) 785{ 786 if (min_rtt_expired 787 && !(bbr->bbr_flags & BBR_FLAG_EXITING_QUIESCENCE) 788 && bbr->bbr_mode != BBR_MODE_PROBE_RTT) 789 { 790 if (in_slow_start(bbr)) 791 on_exit_startup(bbr, now); 792 set_mode(bbr, BBR_MODE_PROBE_RTT); 793 bbr->bbr_pacing_gain = 1; 794 // Do not decide on the time to exit PROBE_RTT until the 795 // |bytes_in_flight| is at the target small value. 796 bbr->bbr_exit_probe_rtt_at = 0; 797 } 798 799 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 800 { 801 lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler); 802 LSQ_DEBUG("%s: exit probe at: %"PRIu64"; now: %"PRIu64 803 "; round start: %d; round passed: %d; rtt: %"PRIu64" usec", 804 __func__, bbr->bbr_exit_probe_rtt_at, now, is_round_start, 805 !!(bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED), 806 lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)); 807 if (bbr->bbr_exit_probe_rtt_at == 0) 808 { 809 // If the window has reached the appropriate size, schedule exiting 810 // PROBE_RTT. The CWND during PROBE_RTT is 811 // kMinimumCongestionWindow, but we allow an extra packet since QUIC 812 // checks CWND before sending a packet. 813 if (bytes_in_flight 814 < get_probe_rtt_cwnd(bbr) + kMaxOutgoingPacketSize) 815 { 816 bbr->bbr_exit_probe_rtt_at = now + kProbeRttTime; 817 bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED; 818 } 819 } 820 else 821 { 822 if (is_round_start) 823 bbr->bbr_flags |= BBR_FLAG_PROBE_RTT_ROUND_PASSED; 824 if (now >= bbr->bbr_exit_probe_rtt_at 825 && (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED)) 826 { 827 bbr->bbr_min_rtt_timestamp = now; 828 if (!(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 829 enter_startup_mode(bbr, now); 830 else 831 enter_probe_bw_mode(bbr, now); 832 } 833 } 834 } 835 836 bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE; 837} 838 839 840/* See BbrSender::CalculatePacingRate */ 841static void 842calculate_pacing_rate (struct lsquic_bbr *bbr) 843{ 844 struct bandwidth bw, target_rate; 845 846 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 847 if (BW_IS_ZERO(&bw)) 848 return; 849 850 LSQ_DEBUG("BW estimate: %"PRIu64, BW_VALUE(&bw)); 851 852 target_rate = BW_TIMES(&bw, bbr->bbr_pacing_gain); 853 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 854 { 855 bbr->bbr_pacing_rate = target_rate; 856 return; 857 } 858 859 // Pace at the rate of initial_window / RTT as soon as RTT measurements are 860 // available. 861 if (BW_IS_ZERO(&bbr->bbr_pacing_rate) 862 && 0 != lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)) 863 { 864 bbr->bbr_pacing_rate = BW_FROM_BYTES_AND_DELTA( 865 bbr->bbr_init_cwnd, 866 lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)); 867 return; 868 } 869 870 // Slow the pacing rate in STARTUP once loss has ever been detected. 871 const int has_ever_detected_loss = bbr->bbr_end_recovery_at != 0; 872 if (has_ever_detected_loss 873 && (bbr->bbr_flags & (BBR_FLAG_SLOWER_STARTUP 874 |BBR_FLAG_HAS_NON_APP_LIMITED)) 875 == (BBR_FLAG_SLOWER_STARTUP|BBR_FLAG_HAS_NON_APP_LIMITED)) 876 { 877 bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupAfterLossGain); 878 return; 879 } 880 881 // Slow the pacing rate in STARTUP by the bytes_lost / CWND. 882 if (startup_rate_reduction_multiplier_ != 0 883 && has_ever_detected_loss 884 && (bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED)) 885 { 886 bbr->bbr_pacing_rate = BW_TIMES(&target_rate, 887 (1 - (bbr->bbr_startup_bytes_lost 888 * startup_rate_reduction_multiplier_ * 1.0f 889 / bbr->bbr_cwnd_gain))); 890 // Ensure the pacing rate doesn't drop below the startup growth target 891 // times the bandwidth estimate. 892 if (BW_VALUE(&bbr->bbr_pacing_rate) 893 < BW_VALUE(&bw) * kStartupGrowthTarget) 894 bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupGrowthTarget); 895 return; 896 } 897 898 // Do not decrease the pacing rate during startup. 899 if (BW_VALUE(&bbr->bbr_pacing_rate) < BW_VALUE(&target_rate)) 900 bbr->bbr_pacing_rate = target_rate; 901} 902 903 904/* See BbrSender::CalculateCongestionWindow */ 905static void 906calculate_cwnd (struct lsquic_bbr *bbr, uint64_t bytes_acked, 907 uint64_t excess_acked) 908{ 909 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 910 return; 911 912 uint64_t target_window = get_target_cwnd(bbr, bbr->bbr_cwnd_gain); 913 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 914 // Add the max recently measured ack aggregation to CWND. 915 target_window += minmax_get(&bbr->bbr_max_ack_height); 916 else if (bbr->bbr_flags & BBR_FLAG_ENABLE_ACK_AGG_IN_STARTUP) 917 // Add the most recent excess acked. Because CWND never decreases in 918 // STARTUP, this will automatically create a very localized max filter. 919 target_window += excess_acked; 920 921 // Instead of immediately setting the target CWND as the new one, BBR grows 922 // the CWND towards |target_window| by only increasing it |bytes_acked| at a 923 // time. 924 const int add_bytes_acked = 925 !FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery || !in_recovery(bbr); 926 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 927 bbr->bbr_cwnd = MIN(target_window, bbr->bbr_cwnd + bytes_acked); 928 else if (add_bytes_acked && 929 (bbr->bbr_cwnd < target_window || 930 lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler) 931 < bbr->bbr_init_cwnd)) 932 // If the connection is not yet out of startup phase, do not decrease 933 // the window. 934 bbr->bbr_cwnd += bytes_acked; 935 936 // Enforce the limits on the congestion window. 937 if (bbr->bbr_cwnd < bbr->bbr_min_cwnd) 938 bbr->bbr_cwnd = bbr->bbr_min_cwnd; 939 else if (bbr->bbr_cwnd > bbr->bbr_max_cwnd) 940 { 941 LSQ_DEBUG("exceed max cwnd"); 942 bbr->bbr_cwnd = bbr->bbr_max_cwnd; 943 } 944} 945 946 947/* See BbrSender::CalculateRecoveryWindow */ 948static void 949calculate_recovery_window (struct lsquic_bbr *bbr, uint64_t bytes_acked, 950 uint64_t bytes_lost, uint64_t bytes_in_flight) 951{ 952 if ((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP) 953 && bbr->bbr_mode == BBR_MODE_STARTUP) 954 return; 955 956 if (bbr->bbr_recovery_state == BBR_RS_NOT_IN_RECOVERY) 957 return; 958 959 // Set up the initial recovery window. 960 if (bbr->bbr_recovery_window == 0) 961 { 962 bbr->bbr_recovery_window = bytes_in_flight + bytes_acked; 963 bbr->bbr_recovery_window = MAX(bbr->bbr_min_cwnd, 964 bbr->bbr_recovery_window); 965 return; 966 } 967 968 // Remove losses from the recovery window, while accounting for a potential 969 // integer underflow. 970 if (bbr->bbr_recovery_window >= bytes_lost) 971 bbr->bbr_recovery_window -= bytes_lost; 972 else 973 bbr->bbr_recovery_window = kMaxSegmentSize; 974 975 // In CONSERVATION mode, just subtracting losses is sufficient. In GROWTH, 976 // release additional |bytes_acked| to achieve a slow-start-like behavior. 977 if (bbr->bbr_recovery_state == BBR_RS_GROWTH) 978 bbr->bbr_recovery_window += bytes_acked; 979 980 // Sanity checks. Ensure that we always allow to send at least an MSS or 981 // |bytes_acked| in response, whichever is larger. 982 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, 983 bytes_in_flight + bytes_acked); 984 if (FLAG_quic_bbr_one_mss_conservation) 985 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, 986 bytes_in_flight + kMaxSegmentSize); 987 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, bbr->bbr_min_cwnd); 988} 989 990 991static void 992lsquic_bbr_end_ack (void *cong_ctl, uint64_t in_flight) 993{ 994 struct lsquic_bbr *const bbr = cong_ctl; 995 int is_round_start, min_rtt_expired; 996 uint64_t bytes_acked, excess_acked, bytes_lost; 997 998 assert(bbr->bbr_flags & BBR_FLAG_IN_ACK); 999 bbr->bbr_flags &= ~BBR_FLAG_IN_ACK; 1000 1001 LSQ_DEBUG("end_ack; mode: %s; in_flight: %"PRIu64, mode2str[bbr->bbr_mode], 1002 in_flight); 1003 1004 bytes_acked = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler) 1005 - bbr->bbr_ack_state.total_bytes_acked_before; 1006 if (bbr->bbr_ack_state.acked_bytes) 1007 { 1008 is_round_start = bbr->bbr_ack_state.max_packno 1009 > bbr->bbr_current_round_trip_end 1010 || !is_valid_packno(bbr->bbr_current_round_trip_end); 1011 if (is_round_start) 1012 { 1013 ++bbr->bbr_round_count; 1014 bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno; 1015 LSQ_DEBUG("up round count to %"PRIu64"; new rt end: %"PRIu64, 1016 bbr->bbr_round_count, bbr->bbr_current_round_trip_end); 1017 } 1018 min_rtt_expired = update_bandwidth_and_min_rtt(bbr); 1019 update_recovery_state(bbr, is_round_start); 1020 excess_acked = update_ack_aggregation_bytes(bbr, bytes_acked); 1021 } 1022 else 1023 { 1024 is_round_start = 0; 1025 min_rtt_expired = 0; 1026 excess_acked = 0; 1027 } 1028 1029 if (bbr->bbr_mode == BBR_MODE_PROBE_BW) 1030 update_gain_cycle_phase(bbr, in_flight); 1031 1032 if (is_round_start && !(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 1033 check_if_full_bw_reached(bbr); 1034 1035 maybe_exit_startup_or_drain(bbr, bbr->bbr_ack_state.ack_time, in_flight); 1036 1037 maybe_enter_or_exit_probe_rtt(bbr, bbr->bbr_ack_state.ack_time, 1038 is_round_start, min_rtt_expired, in_flight); 1039 1040 // Calculate number of packets acked and lost. 1041 bytes_lost = bbr->bbr_ack_state.lost_bytes; 1042 1043 // After the model is updated, recalculate the pacing rate and congestion 1044 // window. 1045 calculate_pacing_rate(bbr); 1046 calculate_cwnd(bbr, bytes_acked, excess_acked); 1047 calculate_recovery_window(bbr, bytes_acked, bytes_lost, in_flight); 1048 1049 /* We don't need to clean up BW sampler */ 1050} 1051 1052 1053static void 1054lsquic_bbr_cleanup (void *cong_ctl) 1055{ 1056 struct lsquic_bbr *const bbr = cong_ctl; 1057 1058 lsquic_bw_sampler_cleanup(&bbr->bbr_bw_sampler); 1059 LSQ_DEBUG("cleanup"); 1060} 1061 1062 1063static void 1064lsquic_bbr_loss (void *cong_ctl) { /* Noop */ } 1065 1066 1067static void 1068lsquic_bbr_timeout (void *cong_ctl) { /* Noop */ } 1069 1070 1071const struct cong_ctl_if lsquic_cong_bbr_if = 1072{ 1073 .cci_ack = lsquic_bbr_ack, 1074 .cci_begin_ack = lsquic_bbr_begin_ack, 1075 .cci_end_ack = lsquic_bbr_end_ack, 1076 .cci_cleanup = lsquic_bbr_cleanup, 1077 .cci_get_cwnd = lsquic_bbr_get_cwnd, 1078 .cci_init = lsquic_bbr_init, 1079 .cci_pacing_rate = lsquic_bbr_pacing_rate, 1080 .cci_loss = lsquic_bbr_loss, 1081 .cci_lost = lsquic_bbr_lost, 1082 .cci_reinit = lsquic_bbr_reinit, 1083 .cci_timeout = lsquic_bbr_timeout, 1084 .cci_sent = lsquic_bbr_sent, 1085 .cci_was_quiet = lsquic_bbr_was_quiet, 1086}; 1087