lsquic_bbr.c revision 9fc12041
1/* Copyright (c) 2017 - 2020 LiteSpeed Technologies Inc. See LICENSE. */ 2// Copyright 2016 The Chromium Authors. All rights reserved. 3// Use of this source code is governed by a BSD-style license that can be 4// found in the LICENSE.chrome file. 5 6#include <assert.h> 7#include <inttypes.h> 8#include <stdint.h> 9#include <stdlib.h> 10#include <string.h> 11#include <sys/queue.h> 12 13#include "lsquic.h" 14#include "lsquic_int_types.h" 15#include "lsquic_cong_ctl.h" 16#include "lsquic_minmax.h" 17#include "lsquic_packet_common.h" 18#include "lsquic_packet_out.h" 19#include "lsquic_bw_sampler.h" 20#include "lsquic_bbr.h" 21#include "lsquic_hash.h" 22#include "lsquic_conn.h" 23#include "lsquic_sfcw.h" 24#include "lsquic_conn_flow.h" 25#include "lsquic_varint.h" 26#include "lsquic_hq.h" 27#include "lsquic_stream.h" 28#include "lsquic_rtt.h" 29#include "lsquic_conn_public.h" 30#include "lsquic_util.h" 31#include "lsquic_malo.h" 32#include "lsquic_crand.h" 33#include "lsquic_mm.h" 34#include "lsquic_engine_public.h" 35 36#define LSQUIC_LOGGER_MODULE LSQLM_BBR 37#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(bbr->bbr_conn_pub->lconn) 38#include "lsquic_logger.h" 39 40#define MIN(a, b) ((a) < (b) ? (a) : (b)) 41#define MAX(a, b) ((a) > (b) ? (a) : (b)) 42 43#define ms(val_) ((val_) * 1000) 44#define sec(val_) ((val_) * 1000 * 1000) 45 46// Default maximum packet size used in the Linux TCP implementation. 47// Used in QUIC for congestion window computations in bytes. 48#define kDefaultTCPMSS 1460 49#define kMaxSegmentSize kDefaultTCPMSS 50 51// Constants based on TCP defaults. 52// The minimum CWND to ensure delayed acks don't reduce bandwidth measurements. 53// Does not inflate the pacing rate. 54#define kDefaultMinimumCongestionWindow (4 * kDefaultTCPMSS) 55 56// The gain used for the STARTUP, equal to 2/ln(2). 57#define kDefaultHighGain 2.885f 58 59// The newly derived gain for STARTUP, equal to 4 * ln(2) 60#define kDerivedHighGain 2.773f 61 62// The newly derived CWND gain for STARTUP, 2. 63#define kDerivedHighCWNDGain 2.0f 64 65// The gain used in STARTUP after loss has been detected. 66// 1.5 is enough to allow for 25% exogenous loss and still observe a 25% growth 67// in measured bandwidth. 68#define kStartupAfterLossGain 1.5f 69 70// We match SPDY's use of 32 (since we'd compete with SPDY). 71#define kInitialCongestionWindow 32 72 73/* Taken from send_algorithm_interface.h */ 74#define kDefaultMaxCongestionWindowPackets 2000 75 76// The time after which the current min_rtt value expires. 77#define kMinRttExpiry sec(10) 78 79// Coefficient to determine if a new RTT is sufficiently similar to min_rtt that 80// we don't need to enter PROBE_RTT. 81#define kSimilarMinRttThreshold 1.125f 82 83// If the bandwidth does not increase by the factor of |kStartupGrowthTarget| 84// within |kRoundTripsWithoutGrowthBeforeExitingStartup| rounds, the connection 85// will exit the STARTUP mode. 86#define kStartupGrowthTarget 1.25 87 88#define kRoundTripsWithoutGrowthBeforeExitingStartup 3 89 90#define startup_rate_reduction_multiplier_ 0 91 92// The cycle of gains used during the PROBE_BW stage. 93static const float kPacingGain[] = {1.25, 0.75, 1, 1, 1, 1, 1, 1}; 94 95// The length of the gain cycle. 96static const size_t kGainCycleLength = sizeof(kPacingGain) 97 / sizeof(kPacingGain[0]); 98 99// Coefficient of target congestion window to use when basing PROBE_RTT on BDP. 100#define kModerateProbeRttMultiplier 0.75 101 102// The maximum packet size of any QUIC packet over IPv6, based on ethernet's max 103// size, minus the IP and UDP headers. IPv6 has a 40 byte header, UDP adds an 104// additional 8 bytes. This is a total overhead of 48 bytes. Ethernet's 105// max packet size is 1500 bytes, 1500 - 48 = 1452. 106#define kMaxV6PacketSize 1452 107// The maximum packet size of any QUIC packet over IPv4. 108// 1500(Ethernet) - 20(IPv4 header) - 8(UDP header) = 1472. 109#define kMaxV4PacketSize 1472 110// The maximum incoming packet size allowed. 111#define kMaxIncomingPacketSize kMaxV4PacketSize 112// The maximum outgoing packet size allowed. 113#define kMaxOutgoingPacketSize kMaxV6PacketSize 114 115// The minimum time the connection can spend in PROBE_RTT mode. 116#define kProbeRttTime ms(200) 117 118/* FLAG* are from net/quic/quic_flags_list.h */ 119 120// When in STARTUP and recovery, do not add bytes_acked to QUIC BBR's CWND in 121// CalculateCongestionWindow() 122#define FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery 0 123 124// When true, ensure BBR allows at least one MSS to be sent in response to an 125// ACK in packet conservation. 126#define FLAG_quic_bbr_one_mss_conservation 0 127 128/* From net/quic/quic_flags_list.h */ 129#define kCwndGain 2.0 130 131 132static uint64_t lsquic_bbr_get_cwnd (void *); 133 134 135static const char *const mode2str[] = 136{ 137 [BBR_MODE_STARTUP] = "STARTUP", 138 [BBR_MODE_DRAIN] = "DRAIN", 139 [BBR_MODE_PROBE_BW] = "PROBE_BW", 140 [BBR_MODE_PROBE_RTT] = "PROBE_RTT", 141}; 142 143 144static void 145set_mode (struct lsquic_bbr *bbr, enum bbr_mode mode) 146{ 147 if (bbr->bbr_mode != mode) 148 { 149 LSQ_DEBUG("mode change %s -> %s", mode2str[bbr->bbr_mode], 150 mode2str[mode]); 151 bbr->bbr_mode = mode; 152 } 153 else 154 LSQ_DEBUG("mode remains %s", mode2str[mode]); 155} 156 157 158static void 159set_startup_values (struct lsquic_bbr *bbr) 160{ 161 bbr->bbr_pacing_gain = bbr->bbr_high_gain; 162 bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain; 163} 164 165 166static void 167init_bbr (struct lsquic_bbr *bbr) 168{ 169 bbr->bbr_mode = BBR_MODE_STARTUP; 170 bbr->bbr_round_count = 0; 171 minmax_init(&bbr->bbr_max_bandwidth, 10); 172 minmax_init(&bbr->bbr_max_ack_height, 10); 173 bbr->bbr_aggregation_epoch_bytes = 0; 174 bbr->bbr_aggregation_epoch_start_time = 0; 175 bbr->bbr_min_rtt = 0; 176 bbr->bbr_min_rtt_timestamp = 0; 177 bbr->bbr_init_cwnd = kInitialCongestionWindow * kDefaultTCPMSS; 178 bbr->bbr_cwnd = kInitialCongestionWindow * kDefaultTCPMSS; 179 bbr->bbr_max_cwnd = kDefaultMaxCongestionWindowPackets * kDefaultTCPMSS; 180 bbr->bbr_min_cwnd = kDefaultMinimumCongestionWindow; 181 bbr->bbr_high_gain = kDefaultHighGain; 182 bbr->bbr_high_cwnd_gain = kDefaultHighGain; 183 bbr->bbr_drain_gain = 1.0 / kDefaultHighGain; 184 bbr->bbr_pacing_rate = BW_ZERO(); 185 bbr->bbr_pacing_gain = 1.0; 186 bbr->bbr_cwnd_gain = 1.0; 187 bbr->bbr_num_startup_rtts = kRoundTripsWithoutGrowthBeforeExitingStartup; 188 bbr->bbr_flags &= ~BBR_FLAG_EXIT_STARTUP_ON_LOSS; 189 bbr->bbr_cycle_current_offset = 0; 190 bbr->bbr_last_cycle_start = 0; 191 bbr->bbr_flags &= ~BBR_FLAG_IS_AT_FULL_BANDWIDTH; 192 bbr->bbr_round_wo_bw_gain = 0; 193 bbr->bbr_bw_at_last_round = BW_ZERO(); 194 bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE; 195 bbr->bbr_exit_probe_rtt_at = 0; 196 bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED; 197 bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 198 bbr->bbr_flags &= ~BBR_FLAG_HAS_NON_APP_LIMITED; 199 bbr->bbr_flags &= ~BBR_FLAG_FLEXIBLE_APP_LIMITED; 200 set_startup_values(bbr); 201} 202 203 204static void 205lsquic_bbr_init (void *cong_ctl, const struct lsquic_conn_public *conn_pub, 206 enum quic_ft_bit retx_frames) 207{ 208 struct lsquic_bbr *const bbr = cong_ctl; 209 bbr->bbr_conn_pub = conn_pub; 210 lsquic_bw_sampler_init(&bbr->bbr_bw_sampler, conn_pub->lconn, retx_frames); 211 bbr->bbr_rtt_stats = &conn_pub->rtt_stats; 212 213 init_bbr(bbr); 214 215 LSQ_DEBUG("initialized"); 216} 217 218 219static void 220lsquic_bbr_reinit (void *cong_ctl) 221{ 222 struct lsquic_bbr *const bbr = cong_ctl; 223 224 init_bbr(bbr); 225 226 LSQ_DEBUG("re-initialized"); 227} 228 229 230static lsquic_time_t 231get_min_rtt (const struct lsquic_bbr *bbr) 232{ 233 lsquic_time_t min_rtt; 234 235 if (bbr->bbr_min_rtt) 236 return bbr->bbr_min_rtt; 237 else 238 { 239 min_rtt = lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats); 240 if (min_rtt == 0) 241 min_rtt = 25000; 242 return min_rtt; 243 } 244} 245 246 247static uint64_t 248lsquic_bbr_pacing_rate (void *cong_ctl, int in_recovery) 249{ 250 struct lsquic_bbr *const bbr = cong_ctl; 251 lsquic_time_t min_rtt; 252 struct bandwidth bw; 253 254 if (!BW_IS_ZERO(&bbr->bbr_pacing_rate)) 255 bw = bbr->bbr_pacing_rate; 256 else 257 { 258 min_rtt = get_min_rtt(bbr); 259 bw = BW_FROM_BYTES_AND_DELTA(bbr->bbr_init_cwnd, min_rtt); 260 bw = BW_TIMES(&bw, bbr->bbr_high_cwnd_gain); 261 } 262 263 return BW_TO_BYTES_PER_SEC(&bw); 264} 265 266 267/* BbrSender::GetTargetCongestionWindow */ 268static uint64_t 269get_target_cwnd (const struct lsquic_bbr *bbr, float gain) 270{ 271 struct bandwidth bw; 272 uint64_t bdp, cwnd; 273 274 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 275 bdp = get_min_rtt(bbr) * BW_TO_BYTES_PER_SEC(&bw) / 1000000; 276 cwnd = gain * bdp; 277 278 // BDP estimate will be zero if no bandwidth samples are available yet. 279 if (cwnd == 0) 280 cwnd = gain * bbr->bbr_init_cwnd; 281 282 return MAX(cwnd, bbr->bbr_min_cwnd); 283} 284 285 286/* See BbrSender::IsPipeSufficientlyFull */ 287static int 288is_pipe_sufficiently_full (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 289{ 290 // See if we need more bytes in flight to see more bandwidth. 291 if (bbr->bbr_mode == BBR_MODE_STARTUP) 292 // STARTUP exits if it doesn't observe a 25% bandwidth increase, so 293 // the CWND must be more than 25% above the target. 294 return bytes_in_flight >= get_target_cwnd(bbr, 1.5); 295 else if (bbr->bbr_pacing_gain > 1) 296 // Super-unity PROBE_BW doesn't exit until 1.25 * BDP is achieved. 297 return bytes_in_flight >= get_target_cwnd(bbr, bbr->bbr_pacing_gain); 298 else 299 // If bytes_in_flight are above the target congestion window, it should 300 // be possible to observe the same or more bandwidth if it's available. 301 return bytes_in_flight >= get_target_cwnd(bbr, 1.1); 302} 303 304 305static void 306lsquic_bbr_was_quiet (void *cong_ctl, lsquic_time_t now, uint64_t in_flight) 307{ 308 struct lsquic_bbr *const bbr = cong_ctl; 309 LSQ_DEBUG("was quiet"); /* Do nothing */ 310} 311 312 313/* See BbrSender::OnApplicationLimited */ 314static void 315bbr_app_limited (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 316{ 317 uint64_t cwnd; 318 319 cwnd = lsquic_bbr_get_cwnd(bbr); 320 if (bytes_in_flight >= cwnd) 321 return; 322 if ((bbr->bbr_flags & BBR_FLAG_FLEXIBLE_APP_LIMITED) 323 && is_pipe_sufficiently_full(bbr, bytes_in_flight)) 324 return; 325 326 bbr->bbr_flags |= BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT; 327 lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler); 328 LSQ_DEBUG("becoming application-limited. Last sent packet: %"PRIu64"; " 329 "CWND: %"PRIu64, bbr->bbr_last_sent_packno, cwnd); 330} 331 332 333static void 334lsquic_bbr_ack (void *cong_ctl, struct lsquic_packet_out *packet_out, 335 unsigned packet_sz, lsquic_time_t now_time, int app_limited) 336{ 337 struct lsquic_bbr *const bbr = cong_ctl; 338 struct bw_sample *sample; 339 340 assert(bbr->bbr_flags & BBR_FLAG_IN_ACK); 341 342 sample = lsquic_bw_sampler_packet_acked(&bbr->bbr_bw_sampler, packet_out, 343 bbr->bbr_ack_state.ack_time); 344 if (sample) 345 TAILQ_INSERT_TAIL(&bbr->bbr_ack_state.samples, sample, next); 346 347 if (is_valid_packno(bbr->bbr_ack_state.max_packno)) 348 /* We assume packet numbers are ordered */ 349 assert(packet_out->po_packno > bbr->bbr_ack_state.max_packno); 350 bbr->bbr_ack_state.max_packno = packet_out->po_packno; 351 bbr->bbr_ack_state.acked_bytes += packet_sz; 352} 353 354 355static void 356lsquic_bbr_sent (void *cong_ctl, struct lsquic_packet_out *packet_out, 357 uint64_t in_flight, int app_limited) 358{ 359 struct lsquic_bbr *const bbr = cong_ctl; 360 361 if (!(packet_out->po_flags & PO_MINI)) 362 lsquic_bw_sampler_packet_sent(&bbr->bbr_bw_sampler, packet_out, 363 in_flight); 364 365 /* Obviously we make an assumption that sent packet number are always 366 * increasing. 367 */ 368 bbr->bbr_last_sent_packno = packet_out->po_packno; 369 370 if (app_limited) 371 bbr_app_limited(bbr, in_flight); 372} 373 374 375static void 376lsquic_bbr_lost (void *cong_ctl, struct lsquic_packet_out *packet_out, 377 unsigned packet_sz) 378{ 379 struct lsquic_bbr *const bbr = cong_ctl; 380 381 lsquic_bw_sampler_packet_lost(&bbr->bbr_bw_sampler, packet_out); 382 bbr->bbr_ack_state.has_losses = 1; 383 bbr->bbr_ack_state.lost_bytes += packet_sz; 384} 385 386 387static void 388lsquic_bbr_begin_ack (void *cong_ctl, lsquic_time_t ack_time, uint64_t in_flight) 389{ 390 struct lsquic_bbr *const bbr = cong_ctl; 391 392 assert(!(bbr->bbr_flags & BBR_FLAG_IN_ACK)); 393 bbr->bbr_flags |= BBR_FLAG_IN_ACK; 394 memset(&bbr->bbr_ack_state, 0, sizeof(bbr->bbr_ack_state)); 395 TAILQ_INIT(&bbr->bbr_ack_state.samples); 396 bbr->bbr_ack_state.ack_time = ack_time; 397 bbr->bbr_ack_state.max_packno = UINT64_MAX; 398 bbr->bbr_ack_state.in_flight = in_flight; 399 bbr->bbr_ack_state.total_bytes_acked_before 400 = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler); 401} 402 403 404/* Based on BbrSender::ShouldExtendMinRttExpiry() */ 405static int 406should_extend_min_rtt_expiry (const struct lsquic_bbr *bbr) 407{ 408 int increased_since_last_probe; 409 410 if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 411 |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED)) 412 == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 413 |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED)) 414 // Extend the current min_rtt if we've been app limited recently. 415 return 1; 416 417 increased_since_last_probe = bbr->bbr_min_rtt_since_last_probe 418 > bbr->bbr_min_rtt * kSimilarMinRttThreshold; 419 if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 420 |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT)) 421 == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 422 |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT) 423 && !increased_since_last_probe) 424 // Extend the current min_rtt if we've been app limited recently and an 425 // rtt has been measured in that time that's less than 12.5% more than 426 // the current min_rtt. 427 return 1; 428 429 return 0; 430} 431 432 433/* Based on BbrSender::UpdateBandwidthAndMinRtt */ 434/* Returns true if min RTT expired, false otherwise */ 435static int 436update_bandwidth_and_min_rtt (struct lsquic_bbr *bbr) 437{ 438 struct bw_sample *sample, *next_sample; 439 uint64_t sample_min_rtt; 440 int min_rtt_expired; 441 442 sample_min_rtt = UINT64_MAX; 443 for (sample = TAILQ_FIRST(&bbr->bbr_ack_state.samples); sample; 444 sample = next_sample) 445 { 446 next_sample = TAILQ_NEXT(sample, next); 447 448 if (sample->is_app_limited) 449 bbr->bbr_flags |= BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 450 else 451 { 452 bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 453 bbr->bbr_flags |= BBR_FLAG_HAS_NON_APP_LIMITED; 454 } 455 456 if (sample_min_rtt == UINT64_MAX || sample->rtt < sample_min_rtt) 457 sample_min_rtt = sample->rtt; 458 459 if (!sample->is_app_limited 460 || BW_VALUE(&sample->bandwidth) 461 > minmax_get(&bbr->bbr_max_bandwidth)) 462 minmax_upmax(&bbr->bbr_max_bandwidth, bbr->bbr_round_count, 463 BW_VALUE(&sample->bandwidth)); 464 465 lsquic_malo_put(sample); 466 } 467 468 if (sample_min_rtt == UINT64_MAX) 469 return 0; 470 471 bbr->bbr_min_rtt_since_last_probe 472 = MIN(bbr->bbr_min_rtt_since_last_probe, sample_min_rtt); 473 474 min_rtt_expired = bbr->bbr_min_rtt != 0 && (bbr->bbr_ack_state.ack_time 475 > bbr->bbr_min_rtt_timestamp + kMinRttExpiry); 476 if (min_rtt_expired || sample_min_rtt < bbr->bbr_min_rtt 477 || 0 == bbr->bbr_min_rtt) 478 { 479 if (min_rtt_expired && should_extend_min_rtt_expiry(bbr)) 480 { 481 LSQ_DEBUG("min rtt expiration extended, stay at: %"PRIu64, 482 bbr->bbr_min_rtt); 483 min_rtt_expired = 0; 484 } 485 else 486 { 487 LSQ_DEBUG("min rtt updated: %"PRIu64" -> %"PRIu64, 488 bbr->bbr_min_rtt, sample_min_rtt); 489 bbr->bbr_min_rtt = sample_min_rtt; 490 } 491 bbr->bbr_min_rtt_timestamp = bbr->bbr_ack_state.ack_time; 492 bbr->bbr_min_rtt_since_last_probe = UINT64_MAX; 493 bbr->bbr_flags &= ~BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT; 494 } 495 496 return min_rtt_expired; 497} 498 499 500/* Based on BbrSender::UpdateRecoveryState() */ 501static void 502update_recovery_state (struct lsquic_bbr *bbr, int is_round_start) 503{ 504 // Exit recovery when there are no losses for a round. 505 if (bbr->bbr_ack_state.has_losses) 506 bbr->bbr_end_recovery_at = bbr->bbr_last_sent_packno; 507 508 switch (bbr->bbr_recovery_state) 509 { 510 case BBR_RS_NOT_IN_RECOVERY: 511 // Enter conservation on the first loss. 512 if (bbr->bbr_ack_state.has_losses) 513 { 514 bbr->bbr_recovery_state = BBR_RS_CONSERVATION; 515 // This will cause the |bbr_recovery_window| to be set to the 516 // correct value in CalculateRecoveryWindow(). 517 bbr->bbr_recovery_window = 0; 518 // Since the conservation phase is meant to be lasting for a whole 519 // round, extend the current round as if it were started right now. 520 bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno; 521 } 522 break; 523 case BBR_RS_CONSERVATION: 524 if (is_round_start) 525 bbr->bbr_recovery_state = BBR_RS_GROWTH; 526 /* Fall-through */ 527 case BBR_RS_GROWTH: 528 // Exit recovery if appropriate. 529 if (!bbr->bbr_ack_state.has_losses 530 && bbr->bbr_ack_state.max_packno > bbr->bbr_end_recovery_at) 531 bbr->bbr_recovery_state = BBR_RS_NOT_IN_RECOVERY; 532 break; 533 } 534} 535 536 537static uint64_t 538update_ack_aggregation_bytes (struct lsquic_bbr *bbr, 539 uint64_t newly_acked_bytes) 540{ 541 const lsquic_time_t ack_time = bbr->bbr_ack_state.ack_time; 542 uint64_t expected_bytes_acked, diff; 543 544 // Compute how many bytes are expected to be delivered, assuming max 545 // bandwidth is correct. 546 expected_bytes_acked = minmax_get(&bbr->bbr_max_bandwidth) 547 * (ack_time - bbr->bbr_aggregation_epoch_start_time); 548 549 // Reset the current aggregation epoch as soon as the ack arrival rate is 550 // less than or equal to the max bandwidth. 551 if (bbr->bbr_aggregation_epoch_bytes <= expected_bytes_acked) 552 { 553 // Reset to start measuring a new aggregation epoch. 554 bbr->bbr_aggregation_epoch_bytes = newly_acked_bytes; 555 bbr->bbr_aggregation_epoch_start_time = ack_time; 556 return 0; 557 } 558 559 // Compute how many extra bytes were delivered vs max bandwidth. 560 // Include the bytes most recently acknowledged to account for stretch acks. 561 bbr->bbr_aggregation_epoch_bytes += newly_acked_bytes; 562 diff = bbr->bbr_aggregation_epoch_bytes - expected_bytes_acked; 563 minmax_upmax(&bbr->bbr_max_ack_height, bbr->bbr_round_count, diff); 564 return diff; 565} 566 567 568/* See BbrSender::UpdateGainCyclePhase() */ 569static void 570update_gain_cycle_phase (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 571{ 572 const uint64_t prior_in_flight = bbr->bbr_ack_state.in_flight; 573 const lsquic_time_t now = bbr->bbr_ack_state.ack_time; 574 // In most cases, the cycle is advanced after an RTT passes. 575 int should_advance_gain_cycling 576 = now - bbr->bbr_last_cycle_start > get_min_rtt(bbr); 577 578 // If the pacing gain is above 1.0, the connection is trying to probe the 579 // bandwidth by increasing the number of bytes in flight to at least 580 // pacing_gain * BDP. Make sure that it actually reaches the target, as 581 // long as there are no losses suggesting that the buffers are not able to 582 // hold that much. 583 if (bbr->bbr_pacing_gain > 1.0 584 && !bbr->bbr_ack_state.has_losses 585 && prior_in_flight < get_target_cwnd(bbr, bbr->bbr_pacing_gain)) 586 should_advance_gain_cycling = 0; 587 588 /* Several optimizations are possible here: "else if" instead of "if", as 589 * well as not calling get_target_cwnd() if `should_advance_gain_cycling' 590 * is already set to the target value. 591 */ 592 593 // If pacing gain is below 1.0, the connection is trying to drain the extra 594 // queue which could have been incurred by probing prior to it. If the 595 // number of bytes in flight falls down to the estimated BDP value earlier, 596 // conclude that the queue has been successfully drained and exit this cycle 597 // early. 598 if (bbr->bbr_pacing_gain < 1.0 599 && bytes_in_flight <= get_target_cwnd(bbr, 1)) 600 should_advance_gain_cycling = 1; 601 602 if (should_advance_gain_cycling) 603 { 604 bbr->bbr_cycle_current_offset = 605 (bbr->bbr_cycle_current_offset + 1) % kGainCycleLength; 606 bbr->bbr_last_cycle_start = now; 607 // Stay in low gain mode until the target BDP is hit. Low gain mode 608 // will be exited immediately when the target BDP is achieved. 609 if ((bbr->bbr_flags & BBR_FLAG_DRAIN_TO_TARGET) 610 && bbr->bbr_pacing_gain < 1 611 && kPacingGain[bbr->bbr_cycle_current_offset] == 1 612 && bytes_in_flight > get_target_cwnd(bbr, 1)) 613 return; 614 bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset]; 615 LSQ_DEBUG("advanced gain cycle, pacing gain set to %.2f", 616 bbr->bbr_pacing_gain); 617 } 618} 619 620 621/* BbrSender::InRecovery() */ 622static int 623in_recovery (const struct lsquic_bbr *bbr) 624{ 625 return bbr->bbr_recovery_state != BBR_RS_NOT_IN_RECOVERY; 626} 627 628 629/* See BbrSender::CheckIfFullBandwidthReached() */ 630static void 631check_if_full_bw_reached (struct lsquic_bbr *bbr) 632{ 633 struct bandwidth target, bw; 634 635 if (bbr->bbr_flags & BBR_FLAG_LAST_SAMPLE_APP_LIMITED) 636 { 637 LSQ_DEBUG("last sample app limited: full BW not reached"); 638 return; 639 } 640 641 target = BW_TIMES(&bbr->bbr_bw_at_last_round, kStartupGrowthTarget); 642 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 643 if (BW_VALUE(&bw) >= BW_VALUE(&target)) 644 { 645 bbr->bbr_bw_at_last_round = bw; 646 bbr->bbr_round_wo_bw_gain = 0; 647 if (bbr->bbr_flags & BBR_FLAG_EXPIRE_ACK_AGG_IN_STARTUP) 648 // Expire old excess delivery measurements now that bandwidth 649 // increased. 650 minmax_reset(&bbr->bbr_max_ack_height, 651 ((struct minmax_sample) { bbr->bbr_round_count, 0, })); 652 LSQ_DEBUG("BW estimate %"PRIu64"bps greater than or equal to target " 653 "%"PRIu64"bps: full BW not reached", 654 BW_VALUE(&bw), BW_VALUE(&target)); 655 return; 656 } 657 658 ++bbr->bbr_round_wo_bw_gain; 659 if ((bbr->bbr_round_wo_bw_gain >= bbr->bbr_num_startup_rtts) 660 || ((bbr->bbr_flags & BBR_FLAG_EXIT_STARTUP_ON_LOSS) 661 && in_recovery(bbr))) 662 { 663 assert(bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED); /* DCHECK */ 664 bbr->bbr_flags |= BBR_FLAG_IS_AT_FULL_BANDWIDTH; 665 LSQ_DEBUG("reached full BW"); 666 } 667 else 668 LSQ_DEBUG("rounds w/o gain: %u, full BW not reached", 669 bbr->bbr_round_wo_bw_gain); 670} 671 672 673/* See BbrSender::OnExitStartup */ 674static void 675on_exit_startup (struct lsquic_bbr *bbr, lsquic_time_t now) 676{ 677 assert(bbr->bbr_mode == BBR_MODE_STARTUP); 678 /* Apparently this method is just to update stats, something that we 679 * don't do yet. 680 */ 681} 682 683 684/* See BbrSender::EnterProbeBandwidthMode */ 685static void 686enter_probe_bw_mode (struct lsquic_bbr *bbr, lsquic_time_t now) 687{ 688 uint8_t rand; 689 690 set_mode(bbr, BBR_MODE_PROBE_BW); 691 bbr->bbr_cwnd_gain = kCwndGain; 692 693 // Pick a random offset for the gain cycle out of {0, 2..7} range. 1 is 694 // excluded because in that case increased gain and decreased gain would not 695 // follow each other. 696 rand = lsquic_crand_get_byte(bbr->bbr_conn_pub->enpub->enp_crand); 697 bbr->bbr_cycle_current_offset = rand % (kGainCycleLength - 1); 698 if (bbr->bbr_cycle_current_offset >= 1) 699 ++bbr->bbr_cycle_current_offset; 700 701 bbr->bbr_last_cycle_start = now; 702 bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset]; 703} 704 705 706/* See BbrSender::EnterStartupMode */ 707static void 708enter_startup_mode (struct lsquic_bbr *bbr, lsquic_time_t now) 709{ 710 set_mode(bbr, BBR_MODE_STARTUP); 711 set_startup_values(bbr); 712} 713 714 715/* See BbrSender::MaybeExitStartupOrDrain() */ 716static void 717maybe_exit_startup_or_drain (struct lsquic_bbr *bbr, lsquic_time_t now, 718 uint64_t bytes_in_flight) 719{ 720 uint64_t target_cwnd; 721 722 if (bbr->bbr_mode == BBR_MODE_STARTUP 723 && (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 724 { 725 on_exit_startup(bbr, now); 726 set_mode(bbr, BBR_MODE_DRAIN); 727 bbr->bbr_pacing_gain = bbr->bbr_drain_gain; 728 bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain; 729 } 730 731 if (bbr->bbr_mode == BBR_MODE_DRAIN) 732 { 733 target_cwnd = get_target_cwnd(bbr, 1); 734 LSQ_DEBUG("%s: bytes in flight: %"PRIu64"; target cwnd: %"PRIu64, 735 __func__, bytes_in_flight, target_cwnd); 736 if (bytes_in_flight <= target_cwnd) 737 enter_probe_bw_mode(bbr, now); 738 } 739} 740 741 742static int 743in_slow_start (const struct lsquic_bbr *bbr) 744{ 745 return bbr->bbr_mode == BBR_MODE_STARTUP; 746} 747 748 749/* See QuicByteCount BbrSender::ProbeRttCongestionWindow() */ 750static uint64_t 751get_probe_rtt_cwnd (const struct lsquic_bbr *bbr) 752{ 753 if (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_BASED_ON_BDP) 754 return get_target_cwnd(bbr, kModerateProbeRttMultiplier); 755 else 756 return bbr->bbr_min_cwnd; 757} 758 759 760static uint64_t 761lsquic_bbr_get_cwnd (void *cong_ctl) 762{ 763 struct lsquic_bbr *const bbr = cong_ctl; 764 uint64_t cwnd; 765 766 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 767 cwnd = get_probe_rtt_cwnd(bbr); 768 else if (in_recovery(bbr) && 769 !((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP) 770 && bbr->bbr_mode == BBR_MODE_STARTUP)) 771 cwnd = MIN(bbr->bbr_cwnd, bbr->bbr_recovery_window); 772 else 773 cwnd = bbr->bbr_cwnd; 774 775 return cwnd; 776} 777 778 779/* See BbrSender::MaybeEnterOrExitProbeRtt */ 780static void 781maybe_enter_or_exit_probe_rtt (struct lsquic_bbr *bbr, lsquic_time_t now, 782 int is_round_start, int min_rtt_expired, uint64_t bytes_in_flight) 783{ 784 if (min_rtt_expired 785 && !(bbr->bbr_flags & BBR_FLAG_EXITING_QUIESCENCE) 786 && bbr->bbr_mode != BBR_MODE_PROBE_RTT) 787 { 788 if (in_slow_start(bbr)) 789 on_exit_startup(bbr, now); 790 set_mode(bbr, BBR_MODE_PROBE_RTT); 791 bbr->bbr_pacing_gain = 1; 792 // Do not decide on the time to exit PROBE_RTT until the 793 // |bytes_in_flight| is at the target small value. 794 bbr->bbr_exit_probe_rtt_at = 0; 795 } 796 797 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 798 { 799 lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler); 800 LSQ_DEBUG("%s: exit probe at: %"PRIu64"; now: %"PRIu64 801 "; round start: %d; round passed: %d; rtt: %"PRIu64" usec", 802 __func__, bbr->bbr_exit_probe_rtt_at, now, is_round_start, 803 !!(bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED), 804 lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)); 805 if (bbr->bbr_exit_probe_rtt_at == 0) 806 { 807 // If the window has reached the appropriate size, schedule exiting 808 // PROBE_RTT. The CWND during PROBE_RTT is 809 // kMinimumCongestionWindow, but we allow an extra packet since QUIC 810 // checks CWND before sending a packet. 811 if (bytes_in_flight 812 < get_probe_rtt_cwnd(bbr) + kMaxOutgoingPacketSize) 813 { 814 bbr->bbr_exit_probe_rtt_at = now + kProbeRttTime; 815 bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED; 816 } 817 } 818 else 819 { 820 if (is_round_start) 821 bbr->bbr_flags |= BBR_FLAG_PROBE_RTT_ROUND_PASSED; 822 if (now >= bbr->bbr_exit_probe_rtt_at 823 && (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED)) 824 { 825 bbr->bbr_min_rtt_timestamp = now; 826 if (!(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 827 enter_startup_mode(bbr, now); 828 else 829 enter_probe_bw_mode(bbr, now); 830 } 831 } 832 } 833 834 bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE; 835} 836 837 838/* See BbrSender::CalculatePacingRate */ 839static void 840calculate_pacing_rate (struct lsquic_bbr *bbr) 841{ 842 struct bandwidth bw, target_rate; 843 844 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 845 if (BW_IS_ZERO(&bw)) 846 return; 847 848 LSQ_DEBUG("BW estimate: %"PRIu64, BW_VALUE(&bw)); 849 850 target_rate = BW_TIMES(&bw, bbr->bbr_pacing_gain); 851 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 852 { 853 bbr->bbr_pacing_rate = target_rate; 854 return; 855 } 856 857 // Pace at the rate of initial_window / RTT as soon as RTT measurements are 858 // available. 859 if (BW_IS_ZERO(&bbr->bbr_pacing_rate) 860 && 0 != lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)) 861 { 862 bbr->bbr_pacing_rate = BW_FROM_BYTES_AND_DELTA( 863 bbr->bbr_init_cwnd, 864 lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)); 865 return; 866 } 867 868 // Slow the pacing rate in STARTUP once loss has ever been detected. 869 const int has_ever_detected_loss = bbr->bbr_end_recovery_at != 0; 870 if (has_ever_detected_loss 871 && (bbr->bbr_flags & (BBR_FLAG_SLOWER_STARTUP 872 |BBR_FLAG_HAS_NON_APP_LIMITED)) 873 == (BBR_FLAG_SLOWER_STARTUP|BBR_FLAG_HAS_NON_APP_LIMITED)) 874 { 875 bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupAfterLossGain); 876 return; 877 } 878 879 // Slow the pacing rate in STARTUP by the bytes_lost / CWND. 880 if (startup_rate_reduction_multiplier_ != 0 881 && has_ever_detected_loss 882 && (bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED)) 883 { 884 bbr->bbr_pacing_rate = BW_TIMES(&target_rate, 885 (1 - (bbr->bbr_startup_bytes_lost 886 * startup_rate_reduction_multiplier_ * 1.0f 887 / bbr->bbr_cwnd_gain))); 888 // Ensure the pacing rate doesn't drop below the startup growth target 889 // times the bandwidth estimate. 890 if (BW_VALUE(&bbr->bbr_pacing_rate) 891 < BW_VALUE(&bw) * kStartupGrowthTarget) 892 bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupGrowthTarget); 893 return; 894 } 895 896 // Do not decrease the pacing rate during startup. 897 if (BW_VALUE(&bbr->bbr_pacing_rate) < BW_VALUE(&target_rate)) 898 bbr->bbr_pacing_rate = target_rate; 899} 900 901 902/* See BbrSender::CalculateCongestionWindow */ 903static void 904calculate_cwnd (struct lsquic_bbr *bbr, uint64_t bytes_acked, 905 uint64_t excess_acked) 906{ 907 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 908 return; 909 910 uint64_t target_window = get_target_cwnd(bbr, bbr->bbr_cwnd_gain); 911 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 912 // Add the max recently measured ack aggregation to CWND. 913 target_window += minmax_get(&bbr->bbr_max_ack_height); 914 else if (bbr->bbr_flags & BBR_FLAG_ENABLE_ACK_AGG_IN_STARTUP) 915 // Add the most recent excess acked. Because CWND never decreases in 916 // STARTUP, this will automatically create a very localized max filter. 917 target_window += excess_acked; 918 919 // Instead of immediately setting the target CWND as the new one, BBR grows 920 // the CWND towards |target_window| by only increasing it |bytes_acked| at a 921 // time. 922 const int add_bytes_acked = 923 !FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery || !in_recovery(bbr); 924 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 925 bbr->bbr_cwnd = MIN(target_window, bbr->bbr_cwnd + bytes_acked); 926 else if (add_bytes_acked && 927 (bbr->bbr_cwnd_gain < target_window || 928 lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler) 929 < bbr->bbr_init_cwnd)) 930 // If the connection is not yet out of startup phase, do not decrease 931 // the window. 932 bbr->bbr_cwnd += bytes_acked; 933 934 // Enforce the limits on the congestion window. 935 if (bbr->bbr_cwnd < bbr->bbr_min_cwnd) 936 bbr->bbr_cwnd = bbr->bbr_min_cwnd; 937 else if (bbr->bbr_cwnd > bbr->bbr_max_cwnd) 938 { 939 LSQ_DEBUG("exceed max cwnd"); 940 bbr->bbr_cwnd = bbr->bbr_max_cwnd; 941 } 942} 943 944 945/* See BbrSender::CalculateRecoveryWindow */ 946static void 947calculate_recovery_window (struct lsquic_bbr *bbr, uint64_t bytes_acked, 948 uint64_t bytes_lost, uint64_t bytes_in_flight) 949{ 950 if ((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP) 951 && bbr->bbr_mode == BBR_MODE_STARTUP) 952 return; 953 954 if (bbr->bbr_recovery_state == BBR_RS_NOT_IN_RECOVERY) 955 return; 956 957 // Set up the initial recovery window. 958 if (bbr->bbr_recovery_window == 0) 959 { 960 bbr->bbr_recovery_window = bytes_in_flight + bytes_acked; 961 bbr->bbr_recovery_window = MAX(bbr->bbr_min_cwnd, 962 bbr->bbr_recovery_window); 963 return; 964 } 965 966 // Remove losses from the recovery window, while accounting for a potential 967 // integer underflow. 968 if (bbr->bbr_recovery_window >= bytes_lost) 969 bbr->bbr_recovery_window -= bytes_lost; 970 else 971 bbr->bbr_recovery_window = kMaxSegmentSize; 972 973 // In CONSERVATION mode, just subtracting losses is sufficient. In GROWTH, 974 // release additional |bytes_acked| to achieve a slow-start-like behavior. 975 if (bbr->bbr_recovery_state == BBR_RS_GROWTH) 976 bbr->bbr_recovery_window += bytes_acked; 977 978 // Sanity checks. Ensure that we always allow to send at least an MSS or 979 // |bytes_acked| in response, whichever is larger. 980 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, 981 bytes_in_flight + bytes_acked); 982 if (FLAG_quic_bbr_one_mss_conservation) 983 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, 984 bytes_in_flight + kMaxSegmentSize); 985 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, bbr->bbr_min_cwnd); 986} 987 988 989static void 990lsquic_bbr_end_ack (void *cong_ctl, uint64_t in_flight) 991{ 992 struct lsquic_bbr *const bbr = cong_ctl; 993 int is_round_start, min_rtt_expired; 994 uint64_t bytes_acked, excess_acked, bytes_lost; 995 996 assert(bbr->bbr_flags & BBR_FLAG_IN_ACK); 997 bbr->bbr_flags &= ~BBR_FLAG_IN_ACK; 998 999 LSQ_DEBUG("end_ack; mode: %s; in_flight: %"PRIu64, mode2str[bbr->bbr_mode], 1000 in_flight); 1001 1002 bytes_acked = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler) 1003 - bbr->bbr_ack_state.total_bytes_acked_before; 1004 if (bbr->bbr_ack_state.acked_bytes) 1005 { 1006 is_round_start = bbr->bbr_ack_state.max_packno 1007 > bbr->bbr_current_round_trip_end 1008 || !is_valid_packno(bbr->bbr_current_round_trip_end); 1009 if (is_round_start) 1010 { 1011 ++bbr->bbr_round_count; 1012 bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno; 1013 LSQ_DEBUG("up round count to %"PRIu64"; new rt end: %"PRIu64, 1014 bbr->bbr_round_count, bbr->bbr_current_round_trip_end); 1015 } 1016 min_rtt_expired = update_bandwidth_and_min_rtt(bbr); 1017 update_recovery_state(bbr, is_round_start); 1018 excess_acked = update_ack_aggregation_bytes(bbr, bytes_acked); 1019 } 1020 else 1021 { 1022 is_round_start = 0; 1023 min_rtt_expired = 0; 1024 excess_acked = 0; 1025 } 1026 1027 if (bbr->bbr_mode == BBR_MODE_PROBE_BW) 1028 update_gain_cycle_phase(bbr, in_flight); 1029 1030 if (is_round_start && !(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 1031 check_if_full_bw_reached(bbr); 1032 1033 maybe_exit_startup_or_drain(bbr, bbr->bbr_ack_state.ack_time, in_flight); 1034 1035 maybe_enter_or_exit_probe_rtt(bbr, bbr->bbr_ack_state.ack_time, 1036 is_round_start, min_rtt_expired, in_flight); 1037 1038 // Calculate number of packets acked and lost. 1039 bytes_lost = bbr->bbr_ack_state.lost_bytes; 1040 1041 // After the model is updated, recalculate the pacing rate and congestion 1042 // window. 1043 calculate_pacing_rate(bbr); 1044 calculate_cwnd(bbr, bytes_acked, excess_acked); 1045 calculate_recovery_window(bbr, bytes_acked, bytes_lost, in_flight); 1046 1047 /* We don't need to clean up BW sampler */ 1048} 1049 1050 1051static void 1052lsquic_bbr_cleanup (void *cong_ctl) 1053{ 1054 struct lsquic_bbr *const bbr = cong_ctl; 1055 1056 lsquic_bw_sampler_cleanup(&bbr->bbr_bw_sampler); 1057 LSQ_DEBUG("cleanup"); 1058} 1059 1060 1061static void 1062lsquic_bbr_loss (void *cong_ctl) { /* Noop */ } 1063 1064 1065static void 1066lsquic_bbr_timeout (void *cong_ctl) { /* Noop */ } 1067 1068 1069const struct cong_ctl_if lsquic_cong_bbr_if = 1070{ 1071 .cci_ack = lsquic_bbr_ack, 1072 .cci_begin_ack = lsquic_bbr_begin_ack, 1073 .cci_end_ack = lsquic_bbr_end_ack, 1074 .cci_cleanup = lsquic_bbr_cleanup, 1075 .cci_get_cwnd = lsquic_bbr_get_cwnd, 1076 .cci_init = lsquic_bbr_init, 1077 .cci_pacing_rate = lsquic_bbr_pacing_rate, 1078 .cci_loss = lsquic_bbr_loss, 1079 .cci_lost = lsquic_bbr_lost, 1080 .cci_reinit = lsquic_bbr_reinit, 1081 .cci_timeout = lsquic_bbr_timeout, 1082 .cci_sent = lsquic_bbr_sent, 1083 .cci_was_quiet = lsquic_bbr_was_quiet, 1084}; 1085