lsquic_bbr.c revision 10c41073
1/* Copyright (c) 2017 - 2020 LiteSpeed Technologies Inc. See LICENSE. */ 2// Copyright 2016 The Chromium Authors. All rights reserved. 3// Use of this source code is governed by a BSD-style license that can be 4// found in the LICENSE.chrome file. 5 6#include <assert.h> 7#include <inttypes.h> 8#include <stdint.h> 9#include <stdlib.h> 10#include <string.h> 11#include <sys/queue.h> 12 13#include "lsquic.h" 14#include "lsquic_int_types.h" 15#include "lsquic_cong_ctl.h" 16#include "lsquic_minmax.h" 17#include "lsquic_packet_common.h" 18#include "lsquic_packet_out.h" 19#include "lsquic_bw_sampler.h" 20#include "lsquic_bbr.h" 21#include "lsquic_hash.h" 22#include "lsquic_conn.h" 23#include "lsquic_sfcw.h" 24#include "lsquic_conn_flow.h" 25#include "lsquic_varint.h" 26#include "lsquic_hq.h" 27#include "lsquic_stream.h" 28#include "lsquic_rtt.h" 29#include "lsquic_conn_public.h" 30#include "lsquic_util.h" 31#include "lsquic_malo.h" 32#include "lsquic_crand.h" 33#include "lsquic_mm.h" 34#include "lsquic_engine_public.h" 35 36#define LSQUIC_LOGGER_MODULE LSQLM_BBR 37#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(bbr->bbr_conn_pub->lconn) 38#include "lsquic_logger.h" 39 40#define MIN(a, b) ((a) < (b) ? (a) : (b)) 41#define MAX(a, b) ((a) > (b) ? (a) : (b)) 42 43#define ms(val_) ((val_) * 1000) 44#define sec(val_) ((val_) * 1000 * 1000) 45 46// Default maximum packet size used in the Linux TCP implementation. 47// Used in QUIC for congestion window computations in bytes. 48#define kDefaultTCPMSS 1460 49#define kMaxSegmentSize kDefaultTCPMSS 50 51// Constants based on TCP defaults. 52// The minimum CWND to ensure delayed acks don't reduce bandwidth measurements. 53// Does not inflate the pacing rate. 54#define kDefaultMinimumCongestionWindow (4 * kDefaultTCPMSS) 55 56// The gain used for the STARTUP, equal to 2/ln(2). 57#define kDefaultHighGain 2.885f 58 59// The newly derived gain for STARTUP, equal to 4 * ln(2) 60#define kDerivedHighGain 2.773f 61 62// The newly derived CWND gain for STARTUP, 2. 63#define kDerivedHighCWNDGain 2.0f 64 65// The gain used in STARTUP after loss has been detected. 66// 1.5 is enough to allow for 25% exogenous loss and still observe a 25% growth 67// in measured bandwidth. 68#define kStartupAfterLossGain 1.5f 69 70// We match SPDY's use of 32 (since we'd compete with SPDY). 71#define kInitialCongestionWindow 32 72 73/* Taken from send_algorithm_interface.h */ 74#define kDefaultMaxCongestionWindowPackets 2000 75 76// The time after which the current min_rtt value expires. 77#define kMinRttExpiry sec(10) 78 79// Coefficient to determine if a new RTT is sufficiently similar to min_rtt that 80// we don't need to enter PROBE_RTT. 81#define kSimilarMinRttThreshold 1.125f 82 83// If the bandwidth does not increase by the factor of |kStartupGrowthTarget| 84// within |kRoundTripsWithoutGrowthBeforeExitingStartup| rounds, the connection 85// will exit the STARTUP mode. 86#define kStartupGrowthTarget 1.25 87 88#define kRoundTripsWithoutGrowthBeforeExitingStartup 3 89 90#define startup_rate_reduction_multiplier_ 0 91 92// The cycle of gains used during the PROBE_BW stage. 93static const float kPacingGain[] = {1.25, 0.75, 1, 1, 1, 1, 1, 1}; 94 95// The length of the gain cycle. 96static const size_t kGainCycleLength = sizeof(kPacingGain) 97 / sizeof(kPacingGain[0]); 98 99// Coefficient of target congestion window to use when basing PROBE_RTT on BDP. 100#define kModerateProbeRttMultiplier 0.75 101 102// The maximum packet size of any QUIC packet over IPv6, based on ethernet's max 103// size, minus the IP and UDP headers. IPv6 has a 40 byte header, UDP adds an 104// additional 8 bytes. This is a total overhead of 48 bytes. Ethernet's 105// max packet size is 1500 bytes, 1500 - 48 = 1452. 106#define kMaxV6PacketSize 1452 107// The maximum packet size of any QUIC packet over IPv4. 108// 1500(Ethernet) - 20(IPv4 header) - 8(UDP header) = 1472. 109#define kMaxV4PacketSize 1472 110// The maximum incoming packet size allowed. 111#define kMaxIncomingPacketSize kMaxV4PacketSize 112// The maximum outgoing packet size allowed. 113#define kMaxOutgoingPacketSize kMaxV6PacketSize 114 115// The minimum time the connection can spend in PROBE_RTT mode. 116#define kProbeRttTime ms(200) 117 118/* FLAG* are from net/quic/quic_flags_list.h */ 119 120// When in STARTUP and recovery, do not add bytes_acked to QUIC BBR's CWND in 121// CalculateCongestionWindow() 122#define FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery 0 123 124// When true, ensure BBR allows at least one MSS to be sent in response to an 125// ACK in packet conservation. 126#define FLAG_quic_bbr_one_mss_conservation 0 127 128/* From net/quic/quic_flags_list.h */ 129#define kCwndGain 2.0 130 131 132static uint64_t lsquic_bbr_get_cwnd (void *); 133 134 135static const char *const mode2str[] = 136{ 137 [BBR_MODE_STARTUP] = "STARTUP", 138 [BBR_MODE_DRAIN] = "DRAIN", 139 [BBR_MODE_PROBE_BW] = "PROBE_BW", 140 [BBR_MODE_PROBE_RTT] = "PROBE_RTT", 141}; 142 143 144static void 145set_mode (struct lsquic_bbr *bbr, enum bbr_mode mode) 146{ 147 if (bbr->bbr_mode != mode) 148 { 149 LSQ_DEBUG("mode change %s -> %s", mode2str[bbr->bbr_mode], 150 mode2str[mode]); 151 bbr->bbr_mode = mode; 152 } 153 else 154 LSQ_DEBUG("mode remains %s", mode2str[mode]); 155} 156 157 158static void 159set_startup_values (struct lsquic_bbr *bbr) 160{ 161 bbr->bbr_pacing_gain = bbr->bbr_high_gain; 162 bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain; 163} 164 165 166static void 167lsquic_bbr_init (void *cong_ctl, const struct lsquic_conn_public *conn_pub, 168 enum quic_ft_bit retx_frames) 169{ 170 struct lsquic_bbr *const bbr = cong_ctl; 171 bbr->bbr_conn_pub = conn_pub; 172 lsquic_bw_sampler_init(&bbr->bbr_bw_sampler, conn_pub->lconn, retx_frames); 173 174 bbr->bbr_rtt_stats = &conn_pub->rtt_stats; 175 bbr->bbr_mode = BBR_MODE_STARTUP; 176 bbr->bbr_round_count = 0; 177 minmax_init(&bbr->bbr_max_bandwidth, 10); 178 minmax_init(&bbr->bbr_max_ack_height, 10); 179 bbr->bbr_aggregation_epoch_bytes = 0; 180 bbr->bbr_aggregation_epoch_start_time = 0; 181 bbr->bbr_min_rtt = 0; 182 bbr->bbr_min_rtt_timestamp = 0; 183 bbr->bbr_init_cwnd = kInitialCongestionWindow * kDefaultTCPMSS; 184 bbr->bbr_cwnd = kInitialCongestionWindow * kDefaultTCPMSS; 185 bbr->bbr_max_cwnd = kDefaultMaxCongestionWindowPackets * kDefaultTCPMSS; 186 bbr->bbr_min_cwnd = kDefaultMinimumCongestionWindow; 187 bbr->bbr_high_gain = kDefaultHighGain; 188 bbr->bbr_high_cwnd_gain = kDefaultHighGain; 189 bbr->bbr_drain_gain = 1.0 / kDefaultHighGain; 190 bbr->bbr_pacing_rate = BW_ZERO(); 191 bbr->bbr_pacing_gain = 1.0; 192 bbr->bbr_cwnd_gain = 1.0; 193 bbr->bbr_num_startup_rtts = kRoundTripsWithoutGrowthBeforeExitingStartup; 194 bbr->bbr_flags &= ~BBR_FLAG_EXIT_STARTUP_ON_LOSS; 195 bbr->bbr_cycle_current_offset = 0; 196 bbr->bbr_last_cycle_start = 0; 197 bbr->bbr_flags &= ~BBR_FLAG_IS_AT_FULL_BANDWIDTH; 198 bbr->bbr_round_wo_bw_gain = 0; 199 bbr->bbr_bw_at_last_round = BW_ZERO(); 200 bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE; 201 bbr->bbr_exit_probe_rtt_at = 0; 202 bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED; 203 bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 204 bbr->bbr_flags &= ~BBR_FLAG_HAS_NON_APP_LIMITED; 205 bbr->bbr_flags &= ~BBR_FLAG_FLEXIBLE_APP_LIMITED; 206 207 set_startup_values(bbr); 208 209 LSQ_DEBUG("initialized"); 210} 211 212 213static lsquic_time_t 214get_min_rtt (const struct lsquic_bbr *bbr) 215{ 216 lsquic_time_t min_rtt; 217 218 if (bbr->bbr_min_rtt) 219 return bbr->bbr_min_rtt; 220 else 221 { 222 min_rtt = lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats); 223 if (min_rtt == 0) 224 min_rtt = 25000; 225 return min_rtt; 226 } 227} 228 229 230static uint64_t 231lsquic_bbr_pacing_rate (void *cong_ctl, int in_recovery) 232{ 233 struct lsquic_bbr *const bbr = cong_ctl; 234 lsquic_time_t min_rtt; 235 struct bandwidth bw; 236 237 if (!BW_IS_ZERO(&bbr->bbr_pacing_rate)) 238 bw = bbr->bbr_pacing_rate; 239 else 240 { 241 min_rtt = get_min_rtt(bbr); 242 bw = BW_FROM_BYTES_AND_DELTA(bbr->bbr_init_cwnd, min_rtt); 243 bw = BW_TIMES(&bw, bbr->bbr_high_cwnd_gain); 244 } 245 246 return BW_TO_BYTES_PER_SEC(&bw); 247} 248 249 250/* BbrSender::GetTargetCongestionWindow */ 251static uint64_t 252get_target_cwnd (const struct lsquic_bbr *bbr, float gain) 253{ 254 struct bandwidth bw; 255 uint64_t bdp, cwnd; 256 257 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 258 bdp = get_min_rtt(bbr) * BW_TO_BYTES_PER_SEC(&bw) / 1000000; 259 cwnd = gain * bdp; 260 261 // BDP estimate will be zero if no bandwidth samples are available yet. 262 if (cwnd == 0) 263 cwnd = gain * bbr->bbr_init_cwnd; 264 265 return MAX(cwnd, bbr->bbr_min_cwnd); 266} 267 268 269/* See BbrSender::IsPipeSufficientlyFull */ 270static int 271is_pipe_sufficiently_full (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 272{ 273 // See if we need more bytes in flight to see more bandwidth. 274 if (bbr->bbr_mode == BBR_MODE_STARTUP) 275 // STARTUP exits if it doesn't observe a 25% bandwidth increase, so 276 // the CWND must be more than 25% above the target. 277 return bytes_in_flight >= get_target_cwnd(bbr, 1.5); 278 else if (bbr->bbr_pacing_gain > 1) 279 // Super-unity PROBE_BW doesn't exit until 1.25 * BDP is achieved. 280 return bytes_in_flight >= get_target_cwnd(bbr, bbr->bbr_pacing_gain); 281 else 282 // If bytes_in_flight are above the target congestion window, it should 283 // be possible to observe the same or more bandwidth if it's available. 284 return bytes_in_flight >= get_target_cwnd(bbr, 1.1); 285} 286 287 288static void 289lsquic_bbr_was_quiet (void *cong_ctl, lsquic_time_t now, uint64_t in_flight) 290{ 291 struct lsquic_bbr *const bbr = cong_ctl; 292 LSQ_DEBUG("was quiet"); /* Do nothing */ 293} 294 295 296/* See BbrSender::OnApplicationLimited */ 297static void 298bbr_app_limited (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 299{ 300 uint64_t cwnd; 301 302 cwnd = lsquic_bbr_get_cwnd(bbr); 303 if (bytes_in_flight >= cwnd) 304 return; 305 if ((bbr->bbr_flags & BBR_FLAG_FLEXIBLE_APP_LIMITED) 306 && is_pipe_sufficiently_full(bbr, bytes_in_flight)) 307 return; 308 309 bbr->bbr_flags |= BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT; 310 lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler); 311 LSQ_DEBUG("becoming application-limited. Last sent packet: %"PRIu64"; " 312 "CWND: %"PRIu64, bbr->bbr_last_sent_packno, cwnd); 313} 314 315 316static void 317lsquic_bbr_ack (void *cong_ctl, struct lsquic_packet_out *packet_out, 318 unsigned packet_sz, lsquic_time_t now_time, int app_limited) 319{ 320 struct lsquic_bbr *const bbr = cong_ctl; 321 struct bw_sample *sample; 322 323 assert(bbr->bbr_flags & BBR_FLAG_IN_ACK); 324 325 sample = lsquic_bw_sampler_packet_acked(&bbr->bbr_bw_sampler, packet_out, 326 bbr->bbr_ack_state.ack_time); 327 if (sample) 328 TAILQ_INSERT_TAIL(&bbr->bbr_ack_state.samples, sample, next); 329 330 if (is_valid_packno(bbr->bbr_ack_state.max_packno)) 331 /* We assume packet numbers are ordered */ 332 assert(packet_out->po_packno > bbr->bbr_ack_state.max_packno); 333 bbr->bbr_ack_state.max_packno = packet_out->po_packno; 334 bbr->bbr_ack_state.acked_bytes += packet_sz; 335} 336 337 338static void 339lsquic_bbr_sent (void *cong_ctl, struct lsquic_packet_out *packet_out, 340 uint64_t in_flight, int app_limited) 341{ 342 struct lsquic_bbr *const bbr = cong_ctl; 343 344 if (!(packet_out->po_flags & PO_MINI)) 345 lsquic_bw_sampler_packet_sent(&bbr->bbr_bw_sampler, packet_out, 346 in_flight); 347 348 /* Obviously we make an assumption that sent packet number are always 349 * increasing. 350 */ 351 bbr->bbr_last_sent_packno = packet_out->po_packno; 352 353 if (app_limited) 354 bbr_app_limited(bbr, in_flight); 355} 356 357 358static void 359lsquic_bbr_lost (void *cong_ctl, struct lsquic_packet_out *packet_out, 360 unsigned packet_sz) 361{ 362 struct lsquic_bbr *const bbr = cong_ctl; 363 364 lsquic_bw_sampler_packet_lost(&bbr->bbr_bw_sampler, packet_out); 365 bbr->bbr_ack_state.has_losses = 1; 366 bbr->bbr_ack_state.lost_bytes += packet_sz; 367} 368 369 370static void 371lsquic_bbr_begin_ack (void *cong_ctl, lsquic_time_t ack_time, uint64_t in_flight) 372{ 373 struct lsquic_bbr *const bbr = cong_ctl; 374 375 assert(!(bbr->bbr_flags & BBR_FLAG_IN_ACK)); 376 bbr->bbr_flags |= BBR_FLAG_IN_ACK; 377 memset(&bbr->bbr_ack_state, 0, sizeof(bbr->bbr_ack_state)); 378 TAILQ_INIT(&bbr->bbr_ack_state.samples); 379 bbr->bbr_ack_state.ack_time = ack_time; 380 bbr->bbr_ack_state.max_packno = UINT64_MAX; 381 bbr->bbr_ack_state.in_flight = in_flight; 382 bbr->bbr_ack_state.total_bytes_acked_before 383 = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler); 384} 385 386 387/* Based on BbrSender::ShouldExtendMinRttExpiry() */ 388static int 389should_extend_min_rtt_expiry (const struct lsquic_bbr *bbr) 390{ 391 int increased_since_last_probe; 392 393 if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 394 |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED)) 395 == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 396 |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED)) 397 // Extend the current min_rtt if we've been app limited recently. 398 return 1; 399 400 increased_since_last_probe = bbr->bbr_min_rtt_since_last_probe 401 > bbr->bbr_min_rtt * kSimilarMinRttThreshold; 402 if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 403 |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT)) 404 == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 405 |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT) 406 && !increased_since_last_probe) 407 // Extend the current min_rtt if we've been app limited recently and an 408 // rtt has been measured in that time that's less than 12.5% more than 409 // the current min_rtt. 410 return 1; 411 412 return 0; 413} 414 415 416/* Based on BbrSender::UpdateBandwidthAndMinRtt */ 417/* Returns true if min RTT expired, false otherwise */ 418static int 419update_bandwidth_and_min_rtt (struct lsquic_bbr *bbr) 420{ 421 struct bw_sample *sample, *next_sample; 422 uint64_t sample_min_rtt; 423 int min_rtt_expired; 424 425 sample_min_rtt = UINT64_MAX; 426 for (sample = TAILQ_FIRST(&bbr->bbr_ack_state.samples); sample; 427 sample = next_sample) 428 { 429 next_sample = TAILQ_NEXT(sample, next); 430 431 if (sample->is_app_limited) 432 bbr->bbr_flags |= BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 433 else 434 { 435 bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 436 bbr->bbr_flags |= BBR_FLAG_HAS_NON_APP_LIMITED; 437 } 438 439 if (sample_min_rtt == UINT64_MAX || sample->rtt < sample_min_rtt) 440 sample_min_rtt = sample->rtt; 441 442 if (!sample->is_app_limited 443 || BW_VALUE(&sample->bandwidth) 444 > minmax_get(&bbr->bbr_max_bandwidth)) 445 minmax_upmax(&bbr->bbr_max_bandwidth, bbr->bbr_round_count, 446 BW_VALUE(&sample->bandwidth)); 447 448 lsquic_malo_put(sample); 449 } 450 451 if (sample_min_rtt == UINT64_MAX) 452 return 0; 453 454 bbr->bbr_min_rtt_since_last_probe 455 = MIN(bbr->bbr_min_rtt_since_last_probe, sample_min_rtt); 456 457 min_rtt_expired = bbr->bbr_min_rtt != 0 && (bbr->bbr_ack_state.ack_time 458 > bbr->bbr_min_rtt_timestamp + kMinRttExpiry); 459 if (min_rtt_expired || sample_min_rtt < bbr->bbr_min_rtt 460 || 0 == bbr->bbr_min_rtt) 461 { 462 if (min_rtt_expired && should_extend_min_rtt_expiry(bbr)) 463 { 464 LSQ_DEBUG("min rtt expiration extended, stay at: %"PRIu64, 465 bbr->bbr_min_rtt); 466 min_rtt_expired = 0; 467 } 468 else 469 { 470 LSQ_DEBUG("min rtt updated: %"PRIu64" -> %"PRIu64, 471 bbr->bbr_min_rtt, sample_min_rtt); 472 bbr->bbr_min_rtt = sample_min_rtt; 473 } 474 bbr->bbr_min_rtt_timestamp = bbr->bbr_ack_state.ack_time; 475 bbr->bbr_min_rtt_since_last_probe = UINT64_MAX; 476 bbr->bbr_flags &= ~BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT; 477 } 478 479 return min_rtt_expired; 480} 481 482 483/* Based on BbrSender::UpdateRecoveryState() */ 484static void 485update_recovery_state (struct lsquic_bbr *bbr, int is_round_start) 486{ 487 // Exit recovery when there are no losses for a round. 488 if (bbr->bbr_ack_state.has_losses) 489 bbr->bbr_end_recovery_at = bbr->bbr_last_sent_packno; 490 491 switch (bbr->bbr_recovery_state) 492 { 493 case BBR_RS_NOT_IN_RECOVERY: 494 // Enter conservation on the first loss. 495 if (bbr->bbr_ack_state.has_losses) 496 { 497 bbr->bbr_recovery_state = BBR_RS_CONSERVATION; 498 // This will cause the |bbr_recovery_window| to be set to the 499 // correct value in CalculateRecoveryWindow(). 500 bbr->bbr_recovery_window = 0; 501 // Since the conservation phase is meant to be lasting for a whole 502 // round, extend the current round as if it were started right now. 503 bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno; 504 } 505 break; 506 case BBR_RS_CONSERVATION: 507 if (is_round_start) 508 bbr->bbr_recovery_state = BBR_RS_GROWTH; 509 /* Fall-through */ 510 case BBR_RS_GROWTH: 511 // Exit recovery if appropriate. 512 if (!bbr->bbr_ack_state.has_losses 513 && bbr->bbr_ack_state.max_packno > bbr->bbr_end_recovery_at) 514 bbr->bbr_recovery_state = BBR_RS_NOT_IN_RECOVERY; 515 break; 516 } 517} 518 519 520static uint64_t 521update_ack_aggregation_bytes (struct lsquic_bbr *bbr, 522 uint64_t newly_acked_bytes) 523{ 524 const lsquic_time_t ack_time = bbr->bbr_ack_state.ack_time; 525 uint64_t expected_bytes_acked, diff; 526 527 // Compute how many bytes are expected to be delivered, assuming max 528 // bandwidth is correct. 529 expected_bytes_acked = minmax_get(&bbr->bbr_max_bandwidth) 530 * (ack_time - bbr->bbr_aggregation_epoch_start_time); 531 532 // Reset the current aggregation epoch as soon as the ack arrival rate is 533 // less than or equal to the max bandwidth. 534 if (bbr->bbr_aggregation_epoch_bytes <= expected_bytes_acked) 535 { 536 // Reset to start measuring a new aggregation epoch. 537 bbr->bbr_aggregation_epoch_bytes = newly_acked_bytes; 538 bbr->bbr_aggregation_epoch_start_time = ack_time; 539 return 0; 540 } 541 542 // Compute how many extra bytes were delivered vs max bandwidth. 543 // Include the bytes most recently acknowledged to account for stretch acks. 544 bbr->bbr_aggregation_epoch_bytes += newly_acked_bytes; 545 diff = bbr->bbr_aggregation_epoch_bytes - expected_bytes_acked; 546 minmax_upmax(&bbr->bbr_max_ack_height, bbr->bbr_round_count, diff); 547 return diff; 548} 549 550 551/* See BbrSender::UpdateGainCyclePhase() */ 552static void 553update_gain_cycle_phase (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 554{ 555 const uint64_t prior_in_flight = bbr->bbr_ack_state.in_flight; 556 const lsquic_time_t now = bbr->bbr_ack_state.ack_time; 557 // In most cases, the cycle is advanced after an RTT passes. 558 int should_advance_gain_cycling 559 = now - bbr->bbr_last_cycle_start > get_min_rtt(bbr); 560 561 // If the pacing gain is above 1.0, the connection is trying to probe the 562 // bandwidth by increasing the number of bytes in flight to at least 563 // pacing_gain * BDP. Make sure that it actually reaches the target, as 564 // long as there are no losses suggesting that the buffers are not able to 565 // hold that much. 566 if (bbr->bbr_pacing_gain > 1.0 567 && !bbr->bbr_ack_state.has_losses 568 && prior_in_flight < get_target_cwnd(bbr, bbr->bbr_pacing_gain)) 569 should_advance_gain_cycling = 0; 570 571 /* Several optimizations are possible here: "else if" instead of "if", as 572 * well as not calling get_target_cwnd() if `should_advance_gain_cycling' 573 * is already set to the target value. 574 */ 575 576 // If pacing gain is below 1.0, the connection is trying to drain the extra 577 // queue which could have been incurred by probing prior to it. If the 578 // number of bytes in flight falls down to the estimated BDP value earlier, 579 // conclude that the queue has been successfully drained and exit this cycle 580 // early. 581 if (bbr->bbr_pacing_gain < 1.0 582 && bytes_in_flight <= get_target_cwnd(bbr, 1)) 583 should_advance_gain_cycling = 1; 584 585 if (should_advance_gain_cycling) 586 { 587 bbr->bbr_cycle_current_offset = 588 (bbr->bbr_cycle_current_offset + 1) % kGainCycleLength; 589 bbr->bbr_last_cycle_start = now; 590 // Stay in low gain mode until the target BDP is hit. Low gain mode 591 // will be exited immediately when the target BDP is achieved. 592 if ((bbr->bbr_flags & BBR_FLAG_DRAIN_TO_TARGET) 593 && bbr->bbr_pacing_gain < 1 594 && kPacingGain[bbr->bbr_cycle_current_offset] == 1 595 && bytes_in_flight > get_target_cwnd(bbr, 1)) 596 return; 597 bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset]; 598 LSQ_DEBUG("advanced gain cycle, pacing gain set to %.2f", 599 bbr->bbr_pacing_gain); 600 } 601} 602 603 604/* BbrSender::InRecovery() */ 605static int 606in_recovery (const struct lsquic_bbr *bbr) 607{ 608 return bbr->bbr_recovery_state != BBR_RS_NOT_IN_RECOVERY; 609} 610 611 612/* See BbrSender::CheckIfFullBandwidthReached() */ 613static void 614check_if_full_bw_reached (struct lsquic_bbr *bbr) 615{ 616 struct bandwidth target, bw; 617 618 if (bbr->bbr_flags & BBR_FLAG_LAST_SAMPLE_APP_LIMITED) 619 { 620 LSQ_DEBUG("last sample app limited: full BW not reached"); 621 return; 622 } 623 624 target = BW_TIMES(&bbr->bbr_bw_at_last_round, kStartupGrowthTarget); 625 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 626 if (BW_VALUE(&bw) >= BW_VALUE(&target)) 627 { 628 bbr->bbr_bw_at_last_round = bw; 629 bbr->bbr_round_wo_bw_gain = 0; 630 if (bbr->bbr_flags & BBR_FLAG_EXPIRE_ACK_AGG_IN_STARTUP) 631 // Expire old excess delivery measurements now that bandwidth 632 // increased. 633 minmax_reset(&bbr->bbr_max_ack_height, 634 ((struct minmax_sample) { bbr->bbr_round_count, 0, })); 635 LSQ_DEBUG("BW estimate %"PRIu64"bps greater than or equal to target " 636 "%"PRIu64"bps: full BW not reached", 637 BW_VALUE(&bw), BW_VALUE(&target)); 638 return; 639 } 640 641 ++bbr->bbr_round_wo_bw_gain; 642 if ((bbr->bbr_round_wo_bw_gain >= bbr->bbr_num_startup_rtts) 643 || ((bbr->bbr_flags & BBR_FLAG_EXIT_STARTUP_ON_LOSS) 644 && in_recovery(bbr))) 645 { 646 assert(bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED); /* DCHECK */ 647 bbr->bbr_flags |= BBR_FLAG_IS_AT_FULL_BANDWIDTH; 648 LSQ_DEBUG("reached full BW"); 649 } 650 else 651 LSQ_DEBUG("rounds w/o gain: %u, full BW not reached", 652 bbr->bbr_round_wo_bw_gain); 653} 654 655 656/* See BbrSender::OnExitStartup */ 657static void 658on_exit_startup (struct lsquic_bbr *bbr, lsquic_time_t now) 659{ 660 assert(bbr->bbr_mode == BBR_MODE_STARTUP); 661 /* Apparently this method is just to update stats, something that we 662 * don't do yet. 663 */ 664} 665 666 667/* See BbrSender::EnterProbeBandwidthMode */ 668static void 669enter_probe_bw_mode (struct lsquic_bbr *bbr, lsquic_time_t now) 670{ 671 uint8_t rand; 672 673 set_mode(bbr, BBR_MODE_PROBE_BW); 674 bbr->bbr_cwnd_gain = kCwndGain; 675 676 // Pick a random offset for the gain cycle out of {0, 2..7} range. 1 is 677 // excluded because in that case increased gain and decreased gain would not 678 // follow each other. 679 rand = lsquic_crand_get_byte(bbr->bbr_conn_pub->enpub->enp_crand); 680 bbr->bbr_cycle_current_offset = rand % (kGainCycleLength - 1); 681 if (bbr->bbr_cycle_current_offset >= 1) 682 ++bbr->bbr_cycle_current_offset; 683 684 bbr->bbr_last_cycle_start = now; 685 bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset]; 686} 687 688 689/* See BbrSender::EnterStartupMode */ 690static void 691enter_startup_mode (struct lsquic_bbr *bbr, lsquic_time_t now) 692{ 693 set_mode(bbr, BBR_MODE_STARTUP); 694 set_startup_values(bbr); 695} 696 697 698/* See BbrSender::MaybeExitStartupOrDrain() */ 699static void 700maybe_exit_startup_or_drain (struct lsquic_bbr *bbr, lsquic_time_t now, 701 uint64_t bytes_in_flight) 702{ 703 uint64_t target_cwnd; 704 705 if (bbr->bbr_mode == BBR_MODE_STARTUP 706 && (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 707 { 708 on_exit_startup(bbr, now); 709 set_mode(bbr, BBR_MODE_DRAIN); 710 bbr->bbr_pacing_gain = bbr->bbr_drain_gain; 711 bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain; 712 } 713 714 if (bbr->bbr_mode == BBR_MODE_DRAIN) 715 { 716 target_cwnd = get_target_cwnd(bbr, 1); 717 LSQ_DEBUG("%s: bytes in flight: %"PRIu64"; target cwnd: %"PRIu64, 718 __func__, bytes_in_flight, target_cwnd); 719 if (bytes_in_flight <= target_cwnd) 720 enter_probe_bw_mode(bbr, now); 721 } 722} 723 724 725static int 726in_slow_start (const struct lsquic_bbr *bbr) 727{ 728 return bbr->bbr_mode == BBR_MODE_STARTUP; 729} 730 731 732/* See QuicByteCount BbrSender::ProbeRttCongestionWindow() */ 733static uint64_t 734get_probe_rtt_cwnd (const struct lsquic_bbr *bbr) 735{ 736 if (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_BASED_ON_BDP) 737 return get_target_cwnd(bbr, kModerateProbeRttMultiplier); 738 else 739 return bbr->bbr_min_cwnd; 740} 741 742 743static uint64_t 744lsquic_bbr_get_cwnd (void *cong_ctl) 745{ 746 struct lsquic_bbr *const bbr = cong_ctl; 747 uint64_t cwnd; 748 749 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 750 cwnd = get_probe_rtt_cwnd(bbr); 751 else if (in_recovery(bbr) && 752 !((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP) 753 && bbr->bbr_mode == BBR_MODE_STARTUP)) 754 cwnd = MIN(bbr->bbr_cwnd, bbr->bbr_recovery_window); 755 else 756 cwnd = bbr->bbr_cwnd; 757 758 return cwnd; 759} 760 761 762/* See BbrSender::MaybeEnterOrExitProbeRtt */ 763static void 764maybe_enter_or_exit_probe_rtt (struct lsquic_bbr *bbr, lsquic_time_t now, 765 int is_round_start, int min_rtt_expired, uint64_t bytes_in_flight) 766{ 767 if (min_rtt_expired 768 && !(bbr->bbr_flags & BBR_FLAG_EXITING_QUIESCENCE) 769 && bbr->bbr_mode != BBR_MODE_PROBE_RTT) 770 { 771 if (in_slow_start(bbr)) 772 on_exit_startup(bbr, now); 773 set_mode(bbr, BBR_MODE_PROBE_RTT); 774 bbr->bbr_pacing_gain = 1; 775 // Do not decide on the time to exit PROBE_RTT until the 776 // |bytes_in_flight| is at the target small value. 777 bbr->bbr_exit_probe_rtt_at = 0; 778 } 779 780 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 781 { 782 lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler); 783 LSQ_DEBUG("%s: exit probe at: %"PRIu64"; now: %"PRIu64 784 "; round start: %d; round passed: %d; rtt: %"PRIu64" usec", 785 __func__, bbr->bbr_exit_probe_rtt_at, now, is_round_start, 786 !!(bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED), 787 lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)); 788 if (bbr->bbr_exit_probe_rtt_at == 0) 789 { 790 // If the window has reached the appropriate size, schedule exiting 791 // PROBE_RTT. The CWND during PROBE_RTT is 792 // kMinimumCongestionWindow, but we allow an extra packet since QUIC 793 // checks CWND before sending a packet. 794 if (bytes_in_flight 795 < get_probe_rtt_cwnd(bbr) + kMaxOutgoingPacketSize) 796 { 797 bbr->bbr_exit_probe_rtt_at = now + kProbeRttTime; 798 bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED; 799 } 800 } 801 else 802 { 803 if (is_round_start) 804 bbr->bbr_flags |= BBR_FLAG_PROBE_RTT_ROUND_PASSED; 805 if (now >= bbr->bbr_exit_probe_rtt_at 806 && (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED)) 807 { 808 bbr->bbr_min_rtt_timestamp = now; 809 if (!(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 810 enter_startup_mode(bbr, now); 811 else 812 enter_probe_bw_mode(bbr, now); 813 } 814 } 815 } 816 817 bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE; 818} 819 820 821/* See BbrSender::CalculatePacingRate */ 822static void 823calculate_pacing_rate (struct lsquic_bbr *bbr) 824{ 825 struct bandwidth bw, target_rate; 826 827 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 828 if (BW_IS_ZERO(&bw)) 829 return; 830 831 LSQ_DEBUG("BW estimate: %"PRIu64, BW_VALUE(&bw)); 832 833 target_rate = BW_TIMES(&bw, bbr->bbr_pacing_gain); 834 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 835 { 836 bbr->bbr_pacing_rate = target_rate; 837 return; 838 } 839 840 // Pace at the rate of initial_window / RTT as soon as RTT measurements are 841 // available. 842 if (BW_IS_ZERO(&bbr->bbr_pacing_rate) 843 && 0 != lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)) 844 { 845 bbr->bbr_pacing_rate = BW_FROM_BYTES_AND_DELTA( 846 bbr->bbr_init_cwnd, 847 lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)); 848 return; 849 } 850 851 // Slow the pacing rate in STARTUP once loss has ever been detected. 852 const int has_ever_detected_loss = bbr->bbr_end_recovery_at != 0; 853 if (has_ever_detected_loss 854 && (bbr->bbr_flags & (BBR_FLAG_SLOWER_STARTUP 855 |BBR_FLAG_HAS_NON_APP_LIMITED)) 856 == (BBR_FLAG_SLOWER_STARTUP|BBR_FLAG_HAS_NON_APP_LIMITED)) 857 { 858 bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupAfterLossGain); 859 return; 860 } 861 862 // Slow the pacing rate in STARTUP by the bytes_lost / CWND. 863 if (startup_rate_reduction_multiplier_ != 0 864 && has_ever_detected_loss 865 && (bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED)) 866 { 867 bbr->bbr_pacing_rate = BW_TIMES(&target_rate, 868 (1 - (bbr->bbr_startup_bytes_lost 869 * startup_rate_reduction_multiplier_ * 1.0f 870 / bbr->bbr_cwnd_gain))); 871 // Ensure the pacing rate doesn't drop below the startup growth target 872 // times the bandwidth estimate. 873 if (BW_VALUE(&bbr->bbr_pacing_rate) 874 < BW_VALUE(&bw) * kStartupGrowthTarget) 875 bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupGrowthTarget); 876 return; 877 } 878 879 // Do not decrease the pacing rate during startup. 880 if (BW_VALUE(&bbr->bbr_pacing_rate) < BW_VALUE(&target_rate)) 881 bbr->bbr_pacing_rate = target_rate; 882} 883 884 885/* See BbrSender::CalculateCongestionWindow */ 886static void 887calculate_cwnd (struct lsquic_bbr *bbr, uint64_t bytes_acked, 888 uint64_t excess_acked) 889{ 890 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 891 return; 892 893 uint64_t target_window = get_target_cwnd(bbr, bbr->bbr_cwnd_gain); 894 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 895 // Add the max recently measured ack aggregation to CWND. 896 target_window += minmax_get(&bbr->bbr_max_ack_height); 897 else if (bbr->bbr_flags & BBR_FLAG_ENABLE_ACK_AGG_IN_STARTUP) 898 // Add the most recent excess acked. Because CWND never decreases in 899 // STARTUP, this will automatically create a very localized max filter. 900 target_window += excess_acked; 901 902 // Instead of immediately setting the target CWND as the new one, BBR grows 903 // the CWND towards |target_window| by only increasing it |bytes_acked| at a 904 // time. 905 const int add_bytes_acked = 906 !FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery || !in_recovery(bbr); 907 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 908 bbr->bbr_cwnd = MIN(target_window, bbr->bbr_cwnd + bytes_acked); 909 else if (add_bytes_acked && 910 (bbr->bbr_cwnd_gain < target_window || 911 lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler) 912 < bbr->bbr_init_cwnd)) 913 // If the connection is not yet out of startup phase, do not decrease 914 // the window. 915 bbr->bbr_cwnd += bytes_acked; 916 917 // Enforce the limits on the congestion window. 918 if (bbr->bbr_cwnd < bbr->bbr_min_cwnd) 919 bbr->bbr_cwnd = bbr->bbr_min_cwnd; 920 else if (bbr->bbr_cwnd > bbr->bbr_max_cwnd) 921 { 922 LSQ_DEBUG("exceed max cwnd"); 923 bbr->bbr_cwnd = bbr->bbr_max_cwnd; 924 } 925} 926 927 928/* See BbrSender::CalculateRecoveryWindow */ 929static void 930calculate_recovery_window (struct lsquic_bbr *bbr, uint64_t bytes_acked, 931 uint64_t bytes_lost, uint64_t bytes_in_flight) 932{ 933 if ((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP) 934 && bbr->bbr_mode == BBR_MODE_STARTUP) 935 return; 936 937 if (bbr->bbr_recovery_state == BBR_RS_NOT_IN_RECOVERY) 938 return; 939 940 // Set up the initial recovery window. 941 if (bbr->bbr_recovery_window == 0) 942 { 943 bbr->bbr_recovery_window = bytes_in_flight + bytes_acked; 944 bbr->bbr_recovery_window = MAX(bbr->bbr_min_cwnd, 945 bbr->bbr_recovery_window); 946 return; 947 } 948 949 // Remove losses from the recovery window, while accounting for a potential 950 // integer underflow. 951 if (bbr->bbr_recovery_window >= bytes_lost) 952 bbr->bbr_recovery_window -= bytes_lost; 953 else 954 bbr->bbr_recovery_window = kMaxSegmentSize; 955 956 // In CONSERVATION mode, just subtracting losses is sufficient. In GROWTH, 957 // release additional |bytes_acked| to achieve a slow-start-like behavior. 958 if (bbr->bbr_recovery_state == BBR_RS_GROWTH) 959 bbr->bbr_recovery_window += bytes_acked; 960 961 // Sanity checks. Ensure that we always allow to send at least an MSS or 962 // |bytes_acked| in response, whichever is larger. 963 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, 964 bytes_in_flight + bytes_acked); 965 if (FLAG_quic_bbr_one_mss_conservation) 966 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, 967 bytes_in_flight + kMaxSegmentSize); 968 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, bbr->bbr_min_cwnd); 969} 970 971 972static void 973lsquic_bbr_end_ack (void *cong_ctl, uint64_t in_flight) 974{ 975 struct lsquic_bbr *const bbr = cong_ctl; 976 int is_round_start, min_rtt_expired; 977 uint64_t bytes_acked, excess_acked, bytes_lost; 978 979 assert(bbr->bbr_flags & BBR_FLAG_IN_ACK); 980 bbr->bbr_flags &= ~BBR_FLAG_IN_ACK; 981 982 LSQ_DEBUG("end_ack; mode: %s; in_flight: %"PRIu64, mode2str[bbr->bbr_mode], 983 in_flight); 984 985 bytes_acked = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler) 986 - bbr->bbr_ack_state.total_bytes_acked_before; 987 if (bbr->bbr_ack_state.acked_bytes) 988 { 989 is_round_start = bbr->bbr_ack_state.max_packno 990 > bbr->bbr_current_round_trip_end 991 || !is_valid_packno(bbr->bbr_current_round_trip_end); 992 if (is_round_start) 993 { 994 ++bbr->bbr_round_count; 995 bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno; 996 LSQ_DEBUG("up round count to %"PRIu64"; new rt end: %"PRIu64, 997 bbr->bbr_round_count, bbr->bbr_current_round_trip_end); 998 } 999 min_rtt_expired = update_bandwidth_and_min_rtt(bbr); 1000 update_recovery_state(bbr, is_round_start); 1001 excess_acked = update_ack_aggregation_bytes(bbr, bytes_acked); 1002 } 1003 else 1004 { 1005 is_round_start = 0; 1006 min_rtt_expired = 0; 1007 excess_acked = 0; 1008 } 1009 1010 if (bbr->bbr_mode == BBR_MODE_PROBE_BW) 1011 update_gain_cycle_phase(bbr, in_flight); 1012 1013 if (is_round_start && !(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 1014 check_if_full_bw_reached(bbr); 1015 1016 maybe_exit_startup_or_drain(bbr, bbr->bbr_ack_state.ack_time, in_flight); 1017 1018 maybe_enter_or_exit_probe_rtt(bbr, bbr->bbr_ack_state.ack_time, 1019 is_round_start, min_rtt_expired, in_flight); 1020 1021 // Calculate number of packets acked and lost. 1022 bytes_lost = bbr->bbr_ack_state.lost_bytes; 1023 1024 // After the model is updated, recalculate the pacing rate and congestion 1025 // window. 1026 calculate_pacing_rate(bbr); 1027 calculate_cwnd(bbr, bytes_acked, excess_acked); 1028 calculate_recovery_window(bbr, bytes_acked, bytes_lost, in_flight); 1029 1030 /* We don't need to clean up BW sampler */ 1031} 1032 1033 1034static void 1035lsquic_bbr_cleanup (void *cong_ctl) 1036{ 1037 struct lsquic_bbr *const bbr = cong_ctl; 1038 1039 lsquic_bw_sampler_cleanup(&bbr->bbr_bw_sampler); 1040 LSQ_DEBUG("cleanup"); 1041} 1042 1043 1044static void 1045lsquic_bbr_loss (void *cong_ctl) { /* Noop */ } 1046 1047 1048static void 1049lsquic_bbr_timeout (void *cong_ctl) { /* Noop */ } 1050 1051 1052const struct cong_ctl_if lsquic_cong_bbr_if = 1053{ 1054 .cci_ack = lsquic_bbr_ack, 1055 .cci_begin_ack = lsquic_bbr_begin_ack, 1056 .cci_end_ack = lsquic_bbr_end_ack, 1057 .cci_cleanup = lsquic_bbr_cleanup, 1058 .cci_get_cwnd = lsquic_bbr_get_cwnd, 1059 .cci_init = lsquic_bbr_init, 1060 .cci_pacing_rate = lsquic_bbr_pacing_rate, 1061 .cci_loss = lsquic_bbr_loss, 1062 .cci_lost = lsquic_bbr_lost, 1063 .cci_timeout = lsquic_bbr_timeout, 1064 .cci_sent = lsquic_bbr_sent, 1065 .cci_was_quiet = lsquic_bbr_was_quiet, 1066}; 1067