lsquic_bbr.c revision 7d09751d
1/* Copyright (c) 2017 - 2020 LiteSpeed Technologies Inc. See LICENSE. */ 2// Copyright 2016 The Chromium Authors. All rights reserved. 3// Use of this source code is governed by a BSD-style license that can be 4// found in the LICENSE.chrome file. 5 6#include <assert.h> 7#include <inttypes.h> 8#include <stdint.h> 9#include <stdlib.h> 10#include <string.h> 11#include <sys/queue.h> 12 13#include "lsquic.h" 14#include "lsquic_int_types.h" 15#include "lsquic_cong_ctl.h" 16#include "lsquic_minmax.h" 17#include "lsquic_packet_common.h" 18#include "lsquic_packet_out.h" 19#include "lsquic_bw_sampler.h" 20#include "lsquic_bbr.h" 21#include "lsquic_hash.h" 22#include "lsquic_conn.h" 23#include "lsquic_sfcw.h" 24#include "lsquic_conn_flow.h" 25#include "lsquic_varint.h" 26#include "lsquic_hq.h" 27#include "lsquic_stream.h" 28#include "lsquic_rtt.h" 29#include "lsquic_conn_public.h" 30#include "lsquic_util.h" 31#include "lsquic_malo.h" 32 33#define LSQUIC_LOGGER_MODULE LSQLM_BBR 34#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(bbr->bbr_conn) 35#include "lsquic_logger.h" 36 37#define MIN(a, b) ((a) < (b) ? (a) : (b)) 38#define MAX(a, b) ((a) > (b) ? (a) : (b)) 39 40#define ms(val_) ((val_) * 1000) 41#define sec(val_) ((val_) * 1000 * 1000) 42 43// Default maximum packet size used in the Linux TCP implementation. 44// Used in QUIC for congestion window computations in bytes. 45#define kDefaultTCPMSS 1460 46#define kMaxSegmentSize kDefaultTCPMSS 47 48// Constants based on TCP defaults. 49// The minimum CWND to ensure delayed acks don't reduce bandwidth measurements. 50// Does not inflate the pacing rate. 51#define kDefaultMinimumCongestionWindow (4 * kDefaultTCPMSS) 52 53// The gain used for the STARTUP, equal to 2/ln(2). 54#define kDefaultHighGain 2.885f 55 56// The newly derived gain for STARTUP, equal to 4 * ln(2) 57#define kDerivedHighGain 2.773f 58 59// The newly derived CWND gain for STARTUP, 2. 60#define kDerivedHighCWNDGain 2.0f 61 62// The gain used in STARTUP after loss has been detected. 63// 1.5 is enough to allow for 25% exogenous loss and still observe a 25% growth 64// in measured bandwidth. 65#define kStartupAfterLossGain 1.5f 66 67// We match SPDY's use of 32 (since we'd compete with SPDY). 68#define kInitialCongestionWindow 32 69 70/* Taken from send_algorithm_interface.h */ 71#define kDefaultMaxCongestionWindowPackets 2000 72 73// The time after which the current min_rtt value expires. 74#define kMinRttExpiry sec(10) 75 76// Coefficient to determine if a new RTT is sufficiently similar to min_rtt that 77// we don't need to enter PROBE_RTT. 78#define kSimilarMinRttThreshold 1.125f 79 80// If the bandwidth does not increase by the factor of |kStartupGrowthTarget| 81// within |kRoundTripsWithoutGrowthBeforeExitingStartup| rounds, the connection 82// will exit the STARTUP mode. 83#define kStartupGrowthTarget 1.25 84 85#define kRoundTripsWithoutGrowthBeforeExitingStartup 3 86 87#define startup_rate_reduction_multiplier_ 0 88 89// The cycle of gains used during the PROBE_BW stage. 90static const float kPacingGain[] = {1.25, 0.75, 1, 1, 1, 1, 1, 1}; 91 92// The length of the gain cycle. 93static const size_t kGainCycleLength = sizeof(kPacingGain) 94 / sizeof(kPacingGain[0]); 95 96// Coefficient of target congestion window to use when basing PROBE_RTT on BDP. 97#define kModerateProbeRttMultiplier 0.75 98 99// The maximum packet size of any QUIC packet over IPv6, based on ethernet's max 100// size, minus the IP and UDP headers. IPv6 has a 40 byte header, UDP adds an 101// additional 8 bytes. This is a total overhead of 48 bytes. Ethernet's 102// max packet size is 1500 bytes, 1500 - 48 = 1452. 103#define kMaxV6PacketSize 1452 104// The maximum packet size of any QUIC packet over IPv4. 105// 1500(Ethernet) - 20(IPv4 header) - 8(UDP header) = 1472. 106#define kMaxV4PacketSize 1472 107// The maximum incoming packet size allowed. 108#define kMaxIncomingPacketSize kMaxV4PacketSize 109// The maximum outgoing packet size allowed. 110#define kMaxOutgoingPacketSize kMaxV6PacketSize 111 112// The minimum time the connection can spend in PROBE_RTT mode. 113#define kProbeRttTime ms(200) 114 115/* FLAG* are from net/quic/quic_flags_list.h */ 116 117// When in STARTUP and recovery, do not add bytes_acked to QUIC BBR's CWND in 118// CalculateCongestionWindow() 119#define FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery 0 120 121// When true, ensure BBR allows at least one MSS to be sent in response to an 122// ACK in packet conservation. 123#define FLAG_quic_bbr_one_mss_conservation 0 124 125/* From net/quic/quic_flags_list.h */ 126#define kCwndGain 2.0 127 128 129static uint64_t lsquic_bbr_get_cwnd (void *); 130 131 132static const char *const mode2str[] = 133{ 134 [BBR_MODE_STARTUP] = "STARTUP", 135 [BBR_MODE_DRAIN] = "DRAIN", 136 [BBR_MODE_PROBE_BW] = "PROBE_BW", 137 [BBR_MODE_PROBE_RTT] = "PROBE_RTT", 138}; 139 140 141static void 142set_mode (struct lsquic_bbr *bbr, enum bbr_mode mode) 143{ 144 if (bbr->bbr_mode != mode) 145 { 146 LSQ_DEBUG("mode change %s -> %s", mode2str[bbr->bbr_mode], 147 mode2str[mode]); 148 bbr->bbr_mode = mode; 149 } 150 else 151 LSQ_DEBUG("mode remains %s", mode2str[mode]); 152} 153 154 155static void 156set_startup_values (struct lsquic_bbr *bbr) 157{ 158 bbr->bbr_pacing_gain = bbr->bbr_high_gain; 159 bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain; 160} 161 162 163static void 164lsquic_bbr_init (void *cong_ctl, const struct lsquic_conn_public *conn_pub, 165 enum quic_ft_bit retx_frames) 166{ 167 struct lsquic_bbr *const bbr = cong_ctl; 168 bbr->bbr_conn = conn_pub->lconn; 169 lsquic_bw_sampler_init(&bbr->bbr_bw_sampler, bbr->bbr_conn, retx_frames); 170 171 bbr->bbr_rtt_stats = &conn_pub->rtt_stats; 172 bbr->bbr_mode = BBR_MODE_STARTUP; 173 bbr->bbr_round_count = 0; 174 minmax_init(&bbr->bbr_max_bandwidth, 10); 175 minmax_init(&bbr->bbr_max_ack_height, 10); 176 bbr->bbr_aggregation_epoch_bytes = 0; 177 bbr->bbr_aggregation_epoch_start_time = 0; 178 bbr->bbr_min_rtt = 0; 179 bbr->bbr_min_rtt_timestamp = 0; 180 bbr->bbr_init_cwnd = kInitialCongestionWindow * kDefaultTCPMSS; 181 bbr->bbr_cwnd = kInitialCongestionWindow * kDefaultTCPMSS; 182 bbr->bbr_max_cwnd = kDefaultMaxCongestionWindowPackets * kDefaultTCPMSS; 183 bbr->bbr_min_cwnd = kDefaultMinimumCongestionWindow; 184 bbr->bbr_high_gain = kDefaultHighGain; 185 bbr->bbr_high_cwnd_gain = kDefaultHighGain; 186 bbr->bbr_drain_gain = 1.0 / kDefaultHighGain; 187 bbr->bbr_pacing_rate = BW_ZERO(); 188 bbr->bbr_pacing_gain = 1.0; 189 bbr->bbr_cwnd_gain = 1.0; 190 bbr->bbr_num_startup_rtts = kRoundTripsWithoutGrowthBeforeExitingStartup; 191 bbr->bbr_flags &= ~BBR_FLAG_EXIT_STARTUP_ON_LOSS; 192 bbr->bbr_cycle_current_offset = 0; 193 bbr->bbr_last_cycle_start = 0; 194 bbr->bbr_flags &= ~BBR_FLAG_IS_AT_FULL_BANDWIDTH; 195 bbr->bbr_round_wo_bw_gain = 0; 196 bbr->bbr_bw_at_last_round = BW_ZERO(); 197 bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE; 198 bbr->bbr_exit_probe_rtt_at = 0; 199 bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED; 200 bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 201 bbr->bbr_flags &= ~BBR_FLAG_HAS_NON_APP_LIMITED; 202 bbr->bbr_flags &= ~BBR_FLAG_FLEXIBLE_APP_LIMITED; 203 204 set_startup_values(bbr); 205 206 LSQ_DEBUG("initialized"); 207} 208 209 210static lsquic_time_t 211get_min_rtt (const struct lsquic_bbr *bbr) 212{ 213 lsquic_time_t min_rtt; 214 215 if (bbr->bbr_min_rtt) 216 return bbr->bbr_min_rtt; 217 else 218 { 219 min_rtt = lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats); 220 if (min_rtt == 0) 221 min_rtt = 25000; 222 return min_rtt; 223 } 224} 225 226 227static uint64_t 228lsquic_bbr_pacing_rate (void *cong_ctl, int in_recovery) 229{ 230 struct lsquic_bbr *const bbr = cong_ctl; 231 lsquic_time_t min_rtt; 232 struct bandwidth bw; 233 234 if (!BW_IS_ZERO(&bbr->bbr_pacing_rate)) 235 bw = bbr->bbr_pacing_rate; 236 else 237 { 238 min_rtt = get_min_rtt(bbr); 239 bw = BW_FROM_BYTES_AND_DELTA(bbr->bbr_init_cwnd, min_rtt); 240 bw = BW_TIMES(&bw, bbr->bbr_high_cwnd_gain); 241 } 242 243 return BW_TO_BYTES_PER_SEC(&bw); 244} 245 246 247/* BbrSender::GetTargetCongestionWindow */ 248static uint64_t 249get_target_cwnd (const struct lsquic_bbr *bbr, float gain) 250{ 251 struct bandwidth bw; 252 uint64_t bdp, cwnd; 253 254 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 255 bdp = get_min_rtt(bbr) * BW_TO_BYTES_PER_SEC(&bw) / 1000000; 256 cwnd = gain * bdp; 257 258 // BDP estimate will be zero if no bandwidth samples are available yet. 259 if (cwnd == 0) 260 cwnd = gain * bbr->bbr_init_cwnd; 261 262 return MAX(cwnd, bbr->bbr_min_cwnd); 263} 264 265 266/* See BbrSender::IsPipeSufficientlyFull */ 267static int 268is_pipe_sufficiently_full (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 269{ 270 // See if we need more bytes in flight to see more bandwidth. 271 if (bbr->bbr_mode == BBR_MODE_STARTUP) 272 // STARTUP exits if it doesn't observe a 25% bandwidth increase, so 273 // the CWND must be more than 25% above the target. 274 return bytes_in_flight >= get_target_cwnd(bbr, 1.5); 275 else if (bbr->bbr_pacing_gain > 1) 276 // Super-unity PROBE_BW doesn't exit until 1.25 * BDP is achieved. 277 return bytes_in_flight >= get_target_cwnd(bbr, bbr->bbr_pacing_gain); 278 else 279 // If bytes_in_flight are above the target congestion window, it should 280 // be possible to observe the same or more bandwidth if it's available. 281 return bytes_in_flight >= get_target_cwnd(bbr, 1.1); 282} 283 284 285static void 286lsquic_bbr_was_quiet (void *cong_ctl, lsquic_time_t now, uint64_t in_flight) 287{ 288 struct lsquic_bbr *const bbr = cong_ctl; 289 LSQ_DEBUG("was quiet"); /* Do nothing */ 290} 291 292 293/* See BbrSender::OnApplicationLimited */ 294static void 295bbr_app_limited (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 296{ 297 uint64_t cwnd; 298 299 cwnd = lsquic_bbr_get_cwnd(bbr); 300 if (bytes_in_flight >= cwnd) 301 return; 302 if ((bbr->bbr_flags & BBR_FLAG_FLEXIBLE_APP_LIMITED) 303 && is_pipe_sufficiently_full(bbr, bytes_in_flight)) 304 return; 305 306 bbr->bbr_flags |= BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT; 307 lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler); 308 LSQ_DEBUG("becoming application-limited. Last sent packet: %"PRIu64"; " 309 "CWND: %"PRIu64, bbr->bbr_last_sent_packno, cwnd); 310} 311 312 313static void 314lsquic_bbr_ack (void *cong_ctl, struct lsquic_packet_out *packet_out, 315 unsigned packet_sz, lsquic_time_t now_time, int app_limited) 316{ 317 struct lsquic_bbr *const bbr = cong_ctl; 318 struct bw_sample *sample; 319 320 assert(bbr->bbr_flags & BBR_FLAG_IN_ACK); 321 322 sample = lsquic_bw_sampler_packet_acked(&bbr->bbr_bw_sampler, packet_out, 323 bbr->bbr_ack_state.ack_time); 324 if (sample) 325 TAILQ_INSERT_TAIL(&bbr->bbr_ack_state.samples, sample, next); 326 327 if (is_valid_packno(bbr->bbr_ack_state.max_packno)) 328 /* We assume packet numbers are ordered */ 329 assert(packet_out->po_packno > bbr->bbr_ack_state.max_packno); 330 bbr->bbr_ack_state.max_packno = packet_out->po_packno; 331 bbr->bbr_ack_state.acked_bytes += packet_sz; 332} 333 334 335static void 336lsquic_bbr_sent (void *cong_ctl, struct lsquic_packet_out *packet_out, 337 uint64_t in_flight, int app_limited) 338{ 339 struct lsquic_bbr *const bbr = cong_ctl; 340 341 if (!(packet_out->po_flags & PO_MINI)) 342 lsquic_bw_sampler_packet_sent(&bbr->bbr_bw_sampler, packet_out, 343 in_flight); 344 345 /* Obviously we make an assumption that sent packet number are always 346 * increasing. 347 */ 348 bbr->bbr_last_sent_packno = packet_out->po_packno; 349 350 if (app_limited) 351 bbr_app_limited(bbr, in_flight); 352} 353 354 355static void 356lsquic_bbr_lost (void *cong_ctl, struct lsquic_packet_out *packet_out, 357 unsigned packet_sz) 358{ 359 struct lsquic_bbr *const bbr = cong_ctl; 360 361 lsquic_bw_sampler_packet_lost(&bbr->bbr_bw_sampler, packet_out); 362 bbr->bbr_ack_state.has_losses = 1; 363 bbr->bbr_ack_state.lost_bytes += packet_sz; 364} 365 366 367static void 368lsquic_bbr_begin_ack (void *cong_ctl, lsquic_time_t ack_time, uint64_t in_flight) 369{ 370 struct lsquic_bbr *const bbr = cong_ctl; 371 372 assert(!(bbr->bbr_flags & BBR_FLAG_IN_ACK)); 373 bbr->bbr_flags |= BBR_FLAG_IN_ACK; 374 memset(&bbr->bbr_ack_state, 0, sizeof(bbr->bbr_ack_state)); 375 TAILQ_INIT(&bbr->bbr_ack_state.samples); 376 bbr->bbr_ack_state.ack_time = ack_time; 377 bbr->bbr_ack_state.max_packno = UINT64_MAX; 378 bbr->bbr_ack_state.in_flight = in_flight; 379 bbr->bbr_ack_state.total_bytes_acked_before 380 = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler); 381} 382 383 384/* Based on BbrSender::ShouldExtendMinRttExpiry() */ 385static int 386should_extend_min_rtt_expiry (const struct lsquic_bbr *bbr) 387{ 388 int increased_since_last_probe; 389 390 if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 391 |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED)) 392 == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 393 |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED)) 394 // Extend the current min_rtt if we've been app limited recently. 395 return 1; 396 397 increased_since_last_probe = bbr->bbr_min_rtt_since_last_probe 398 > bbr->bbr_min_rtt * kSimilarMinRttThreshold; 399 if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 400 |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT)) 401 == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT 402 |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT) 403 && !increased_since_last_probe) 404 // Extend the current min_rtt if we've been app limited recently and an 405 // rtt has been measured in that time that's less than 12.5% more than 406 // the current min_rtt. 407 return 1; 408 409 return 0; 410} 411 412 413/* Based on BbrSender::UpdateBandwidthAndMinRtt */ 414/* Returns true if min RTT expired, false otherwise */ 415static int 416update_bandwidth_and_min_rtt (struct lsquic_bbr *bbr) 417{ 418 struct bw_sample *sample, *next_sample; 419 uint64_t sample_min_rtt; 420 int min_rtt_expired; 421 422 sample_min_rtt = UINT64_MAX; 423 for (sample = TAILQ_FIRST(&bbr->bbr_ack_state.samples); sample; 424 sample = next_sample) 425 { 426 next_sample = TAILQ_NEXT(sample, next); 427 428 if (sample->is_app_limited) 429 bbr->bbr_flags |= BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 430 else 431 { 432 bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED; 433 bbr->bbr_flags |= BBR_FLAG_HAS_NON_APP_LIMITED; 434 } 435 436 if (sample_min_rtt == UINT64_MAX || sample->rtt < sample_min_rtt) 437 sample_min_rtt = sample->rtt; 438 439 if (!sample->is_app_limited 440 || BW_VALUE(&sample->bandwidth) 441 > minmax_get(&bbr->bbr_max_bandwidth)) 442 minmax_upmax(&bbr->bbr_max_bandwidth, bbr->bbr_round_count, 443 BW_VALUE(&sample->bandwidth)); 444 445 lsquic_malo_put(sample); 446 } 447 448 if (sample_min_rtt == UINT64_MAX) 449 return 0; 450 451 bbr->bbr_min_rtt_since_last_probe 452 = MIN(bbr->bbr_min_rtt_since_last_probe, sample_min_rtt); 453 454 min_rtt_expired = bbr->bbr_min_rtt != 0 && (bbr->bbr_ack_state.ack_time 455 > bbr->bbr_min_rtt_timestamp + kMinRttExpiry); 456 if (min_rtt_expired || sample_min_rtt < bbr->bbr_min_rtt 457 || 0 == bbr->bbr_min_rtt) 458 { 459 if (min_rtt_expired && should_extend_min_rtt_expiry(bbr)) 460 { 461 LSQ_DEBUG("min rtt expiration extended, stay at: %"PRIu64, 462 bbr->bbr_min_rtt); 463 min_rtt_expired = 0; 464 } 465 else 466 { 467 LSQ_DEBUG("min rtt updated: %"PRIu64" -> %"PRIu64, 468 bbr->bbr_min_rtt, sample_min_rtt); 469 bbr->bbr_min_rtt = sample_min_rtt; 470 } 471 bbr->bbr_min_rtt_timestamp = bbr->bbr_ack_state.ack_time; 472 bbr->bbr_min_rtt_since_last_probe = UINT64_MAX; 473 bbr->bbr_flags &= ~BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT; 474 } 475 476 return min_rtt_expired; 477} 478 479 480/* Based on BbrSender::UpdateRecoveryState() */ 481static void 482update_recovery_state (struct lsquic_bbr *bbr, int is_round_start) 483{ 484 // Exit recovery when there are no losses for a round. 485 if (bbr->bbr_ack_state.has_losses) 486 bbr->bbr_end_recovery_at = bbr->bbr_last_sent_packno; 487 488 switch (bbr->bbr_recovery_state) 489 { 490 case BBR_RS_NOT_IN_RECOVERY: 491 // Enter conservation on the first loss. 492 if (bbr->bbr_ack_state.has_losses) 493 { 494 bbr->bbr_recovery_state = BBR_RS_CONSERVATION; 495 // This will cause the |bbr_recovery_window| to be set to the 496 // correct value in CalculateRecoveryWindow(). 497 bbr->bbr_recovery_window = 0; 498 // Since the conservation phase is meant to be lasting for a whole 499 // round, extend the current round as if it were started right now. 500 bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno; 501 } 502 break; 503 case BBR_RS_CONSERVATION: 504 if (is_round_start) 505 bbr->bbr_recovery_state = BBR_RS_GROWTH; 506 /* Fall-through */ 507 case BBR_RS_GROWTH: 508 // Exit recovery if appropriate. 509 if (!bbr->bbr_ack_state.has_losses 510 && bbr->bbr_ack_state.max_packno > bbr->bbr_end_recovery_at) 511 bbr->bbr_recovery_state = BBR_RS_NOT_IN_RECOVERY; 512 break; 513 } 514} 515 516 517static uint64_t 518update_ack_aggregation_bytes (struct lsquic_bbr *bbr, 519 uint64_t newly_acked_bytes) 520{ 521 const lsquic_time_t ack_time = bbr->bbr_ack_state.ack_time; 522 uint64_t expected_bytes_acked, diff; 523 524 // Compute how many bytes are expected to be delivered, assuming max 525 // bandwidth is correct. 526 expected_bytes_acked = minmax_get(&bbr->bbr_max_bandwidth) 527 * (ack_time - bbr->bbr_aggregation_epoch_start_time); 528 529 // Reset the current aggregation epoch as soon as the ack arrival rate is 530 // less than or equal to the max bandwidth. 531 if (bbr->bbr_aggregation_epoch_bytes <= expected_bytes_acked) 532 { 533 // Reset to start measuring a new aggregation epoch. 534 bbr->bbr_aggregation_epoch_bytes = newly_acked_bytes; 535 bbr->bbr_aggregation_epoch_start_time = ack_time; 536 return 0; 537 } 538 539 // Compute how many extra bytes were delivered vs max bandwidth. 540 // Include the bytes most recently acknowledged to account for stretch acks. 541 bbr->bbr_aggregation_epoch_bytes += newly_acked_bytes; 542 diff = bbr->bbr_aggregation_epoch_bytes - expected_bytes_acked; 543 minmax_upmax(&bbr->bbr_max_ack_height, bbr->bbr_round_count, diff); 544 return diff; 545} 546 547 548/* See BbrSender::UpdateGainCyclePhase() */ 549static void 550update_gain_cycle_phase (struct lsquic_bbr *bbr, uint64_t bytes_in_flight) 551{ 552 const uint64_t prior_in_flight = bbr->bbr_ack_state.in_flight; 553 const lsquic_time_t now = bbr->bbr_ack_state.ack_time; 554 // In most cases, the cycle is advanced after an RTT passes. 555 int should_advance_gain_cycling 556 = now - bbr->bbr_last_cycle_start > get_min_rtt(bbr); 557 558 // If the pacing gain is above 1.0, the connection is trying to probe the 559 // bandwidth by increasing the number of bytes in flight to at least 560 // pacing_gain * BDP. Make sure that it actually reaches the target, as 561 // long as there are no losses suggesting that the buffers are not able to 562 // hold that much. 563 if (bbr->bbr_pacing_gain > 1.0 564 && !bbr->bbr_ack_state.has_losses 565 && prior_in_flight < get_target_cwnd(bbr, bbr->bbr_pacing_gain)) 566 should_advance_gain_cycling = 0; 567 568 /* Several optimizations are possible here: "else if" instead of "if", as 569 * well as not calling get_target_cwnd() if `should_advance_gain_cycling' 570 * is already set to the target value. 571 */ 572 573 // If pacing gain is below 1.0, the connection is trying to drain the extra 574 // queue which could have been incurred by probing prior to it. If the 575 // number of bytes in flight falls down to the estimated BDP value earlier, 576 // conclude that the queue has been successfully drained and exit this cycle 577 // early. 578 if (bbr->bbr_pacing_gain < 1.0 579 && bytes_in_flight <= get_target_cwnd(bbr, 1)) 580 should_advance_gain_cycling = 1; 581 582 if (should_advance_gain_cycling) 583 { 584 bbr->bbr_cycle_current_offset = 585 (bbr->bbr_cycle_current_offset + 1) % kGainCycleLength; 586 bbr->bbr_last_cycle_start = now; 587 // Stay in low gain mode until the target BDP is hit. Low gain mode 588 // will be exited immediately when the target BDP is achieved. 589 if ((bbr->bbr_flags & BBR_FLAG_DRAIN_TO_TARGET) 590 && bbr->bbr_pacing_gain < 1 591 && kPacingGain[bbr->bbr_cycle_current_offset] == 1 592 && bytes_in_flight > get_target_cwnd(bbr, 1)) 593 return; 594 bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset]; 595 LSQ_DEBUG("advanced gain cycle, pacing gain set to %.2f", 596 bbr->bbr_pacing_gain); 597 } 598} 599 600 601/* BbrSender::InRecovery() */ 602static int 603in_recovery (const struct lsquic_bbr *bbr) 604{ 605 return bbr->bbr_recovery_state != BBR_RS_NOT_IN_RECOVERY; 606} 607 608 609/* See BbrSender::CheckIfFullBandwidthReached() */ 610static void 611check_if_full_bw_reached (struct lsquic_bbr *bbr) 612{ 613 struct bandwidth target, bw; 614 615 if (bbr->bbr_flags & BBR_FLAG_LAST_SAMPLE_APP_LIMITED) 616 { 617 LSQ_DEBUG("last sample app limited: full BW not reached"); 618 return; 619 } 620 621 target = BW_TIMES(&bbr->bbr_bw_at_last_round, kStartupGrowthTarget); 622 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 623 if (BW_VALUE(&bw) >= BW_VALUE(&target)) 624 { 625 bbr->bbr_bw_at_last_round = bw; 626 bbr->bbr_round_wo_bw_gain = 0; 627 if (bbr->bbr_flags & BBR_FLAG_EXPIRE_ACK_AGG_IN_STARTUP) 628 // Expire old excess delivery measurements now that bandwidth 629 // increased. 630 minmax_reset(&bbr->bbr_max_ack_height, 631 ((struct minmax_sample) { bbr->bbr_round_count, 0, })); 632 LSQ_DEBUG("BW estimate %"PRIu64"bps greater than or equal to target " 633 "%"PRIu64"bps: full BW not reached", 634 BW_VALUE(&bw), BW_VALUE(&target)); 635 return; 636 } 637 638 ++bbr->bbr_round_wo_bw_gain; 639 if ((bbr->bbr_round_wo_bw_gain >= bbr->bbr_num_startup_rtts) 640 || ((bbr->bbr_flags & BBR_FLAG_EXIT_STARTUP_ON_LOSS) 641 && in_recovery(bbr))) 642 { 643 assert(bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED); /* DCHECK */ 644 bbr->bbr_flags |= BBR_FLAG_IS_AT_FULL_BANDWIDTH; 645 LSQ_DEBUG("reached full BW"); 646 } 647 else 648 LSQ_DEBUG("rounds w/o gain: %u, full BW not reached", 649 bbr->bbr_round_wo_bw_gain); 650} 651 652 653/* See BbrSender::OnExitStartup */ 654static void 655on_exit_startup (struct lsquic_bbr *bbr, lsquic_time_t now) 656{ 657 assert(bbr->bbr_mode == BBR_MODE_STARTUP); 658 /* Apparently this method is just to update stats, something that we 659 * don't do yet. 660 */ 661} 662 663 664/* See BbrSender::EnterProbeBandwidthMode */ 665static void 666enter_probe_bw_mode (struct lsquic_bbr *bbr, lsquic_time_t now) 667{ 668 set_mode(bbr, BBR_MODE_PROBE_BW); 669 bbr->bbr_cwnd_gain = kCwndGain; 670 671 // Pick a random offset for the gain cycle out of {0, 2..7} range. 1 is 672 // excluded because in that case increased gain and decreased gain would not 673 // follow each other. 674 bbr->bbr_cycle_current_offset = rand() % (kGainCycleLength - 1); 675 if (bbr->bbr_cycle_current_offset >= 1) 676 ++bbr->bbr_cycle_current_offset; 677 678 bbr->bbr_last_cycle_start = now; 679 bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset]; 680} 681 682 683/* See BbrSender::EnterStartupMode */ 684static void 685enter_startup_mode (struct lsquic_bbr *bbr, lsquic_time_t now) 686{ 687 set_mode(bbr, BBR_MODE_STARTUP); 688 set_startup_values(bbr); 689} 690 691 692/* See BbrSender::MaybeExitStartupOrDrain() */ 693static void 694maybe_exit_startup_or_drain (struct lsquic_bbr *bbr, lsquic_time_t now, 695 uint64_t bytes_in_flight) 696{ 697 uint64_t target_cwnd; 698 699 if (bbr->bbr_mode == BBR_MODE_STARTUP 700 && (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 701 { 702 on_exit_startup(bbr, now); 703 set_mode(bbr, BBR_MODE_DRAIN); 704 bbr->bbr_pacing_gain = bbr->bbr_drain_gain; 705 bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain; 706 } 707 708 if (bbr->bbr_mode == BBR_MODE_DRAIN) 709 { 710 target_cwnd = get_target_cwnd(bbr, 1); 711 LSQ_DEBUG("%s: bytes in flight: %"PRIu64"; target cwnd: %"PRIu64, 712 __func__, bytes_in_flight, target_cwnd); 713 if (bytes_in_flight <= target_cwnd) 714 enter_probe_bw_mode(bbr, now); 715 } 716} 717 718 719static int 720in_slow_start (const struct lsquic_bbr *bbr) 721{ 722 return bbr->bbr_mode == BBR_MODE_STARTUP; 723} 724 725 726/* See QuicByteCount BbrSender::ProbeRttCongestionWindow() */ 727static uint64_t 728get_probe_rtt_cwnd (const struct lsquic_bbr *bbr) 729{ 730 if (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_BASED_ON_BDP) 731 return get_target_cwnd(bbr, kModerateProbeRttMultiplier); 732 else 733 return bbr->bbr_min_cwnd; 734} 735 736 737static uint64_t 738lsquic_bbr_get_cwnd (void *cong_ctl) 739{ 740 struct lsquic_bbr *const bbr = cong_ctl; 741 uint64_t cwnd; 742 743 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 744 cwnd = get_probe_rtt_cwnd(bbr); 745 else if (in_recovery(bbr) && 746 !((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP) 747 && bbr->bbr_mode == BBR_MODE_STARTUP)) 748 cwnd = MIN(bbr->bbr_cwnd, bbr->bbr_recovery_window); 749 else 750 cwnd = bbr->bbr_cwnd; 751 752 return cwnd; 753} 754 755 756/* See BbrSender::MaybeEnterOrExitProbeRtt */ 757static void 758maybe_enter_or_exit_probe_rtt (struct lsquic_bbr *bbr, lsquic_time_t now, 759 int is_round_start, int min_rtt_expired, uint64_t bytes_in_flight) 760{ 761 if (min_rtt_expired 762 && !(bbr->bbr_flags & BBR_FLAG_EXITING_QUIESCENCE) 763 && bbr->bbr_mode != BBR_MODE_PROBE_RTT) 764 { 765 if (in_slow_start(bbr)) 766 on_exit_startup(bbr, now); 767 set_mode(bbr, BBR_MODE_PROBE_RTT); 768 bbr->bbr_pacing_gain = 1; 769 // Do not decide on the time to exit PROBE_RTT until the 770 // |bytes_in_flight| is at the target small value. 771 bbr->bbr_exit_probe_rtt_at = 0; 772 } 773 774 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 775 { 776 lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler); 777 LSQ_DEBUG("%s: exit probe at: %"PRIu64"; now: %"PRIu64 778 "; round start: %d; round passed: %d; rtt: %"PRIu64" usec", 779 __func__, bbr->bbr_exit_probe_rtt_at, now, is_round_start, 780 !!(bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED), 781 lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)); 782 if (bbr->bbr_exit_probe_rtt_at == 0) 783 { 784 // If the window has reached the appropriate size, schedule exiting 785 // PROBE_RTT. The CWND during PROBE_RTT is 786 // kMinimumCongestionWindow, but we allow an extra packet since QUIC 787 // checks CWND before sending a packet. 788 if (bytes_in_flight 789 < get_probe_rtt_cwnd(bbr) + kMaxOutgoingPacketSize) 790 { 791 bbr->bbr_exit_probe_rtt_at = now + kProbeRttTime; 792 bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED; 793 } 794 } 795 else 796 { 797 if (is_round_start) 798 bbr->bbr_flags |= BBR_FLAG_PROBE_RTT_ROUND_PASSED; 799 if (now >= bbr->bbr_exit_probe_rtt_at 800 && (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED)) 801 { 802 bbr->bbr_min_rtt_timestamp = now; 803 if (!(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 804 enter_startup_mode(bbr, now); 805 else 806 enter_probe_bw_mode(bbr, now); 807 } 808 } 809 } 810 811 bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE; 812} 813 814 815/* See BbrSender::CalculatePacingRate */ 816static void 817calculate_pacing_rate (struct lsquic_bbr *bbr) 818{ 819 struct bandwidth bw, target_rate; 820 821 bw = BW(minmax_get(&bbr->bbr_max_bandwidth)); 822 if (BW_IS_ZERO(&bw)) 823 return; 824 825 LSQ_DEBUG("BW estimate: %"PRIu64, BW_VALUE(&bw)); 826 827 target_rate = BW_TIMES(&bw, bbr->bbr_pacing_gain); 828 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 829 { 830 bbr->bbr_pacing_rate = target_rate; 831 return; 832 } 833 834 // Pace at the rate of initial_window / RTT as soon as RTT measurements are 835 // available. 836 if (BW_IS_ZERO(&bbr->bbr_pacing_rate) 837 && 0 != lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)) 838 { 839 bbr->bbr_pacing_rate = BW_FROM_BYTES_AND_DELTA( 840 bbr->bbr_init_cwnd, 841 lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats)); 842 return; 843 } 844 845 // Slow the pacing rate in STARTUP once loss has ever been detected. 846 const int has_ever_detected_loss = bbr->bbr_end_recovery_at != 0; 847 if (has_ever_detected_loss 848 && (bbr->bbr_flags & (BBR_FLAG_SLOWER_STARTUP 849 |BBR_FLAG_HAS_NON_APP_LIMITED)) 850 == (BBR_FLAG_SLOWER_STARTUP|BBR_FLAG_HAS_NON_APP_LIMITED)) 851 { 852 bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupAfterLossGain); 853 return; 854 } 855 856 // Slow the pacing rate in STARTUP by the bytes_lost / CWND. 857 if (startup_rate_reduction_multiplier_ != 0 858 && has_ever_detected_loss 859 && (bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED)) 860 { 861 bbr->bbr_pacing_rate = BW_TIMES(&target_rate, 862 (1 - (bbr->bbr_startup_bytes_lost 863 * startup_rate_reduction_multiplier_ * 1.0f 864 / bbr->bbr_cwnd_gain))); 865 // Ensure the pacing rate doesn't drop below the startup growth target 866 // times the bandwidth estimate. 867 if (BW_VALUE(&bbr->bbr_pacing_rate) 868 < BW_VALUE(&bw) * kStartupGrowthTarget) 869 bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupGrowthTarget); 870 return; 871 } 872 873 // Do not decrease the pacing rate during startup. 874 if (BW_VALUE(&bbr->bbr_pacing_rate) < BW_VALUE(&target_rate)) 875 bbr->bbr_pacing_rate = target_rate; 876} 877 878 879/* See BbrSender::CalculateCongestionWindow */ 880static void 881calculate_cwnd (struct lsquic_bbr *bbr, uint64_t bytes_acked, 882 uint64_t excess_acked) 883{ 884 if (bbr->bbr_mode == BBR_MODE_PROBE_RTT) 885 return; 886 887 uint64_t target_window = get_target_cwnd(bbr, bbr->bbr_cwnd_gain); 888 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 889 // Add the max recently measured ack aggregation to CWND. 890 target_window += minmax_get(&bbr->bbr_max_ack_height); 891 else if (bbr->bbr_flags & BBR_FLAG_ENABLE_ACK_AGG_IN_STARTUP) 892 // Add the most recent excess acked. Because CWND never decreases in 893 // STARTUP, this will automatically create a very localized max filter. 894 target_window += excess_acked; 895 896 // Instead of immediately setting the target CWND as the new one, BBR grows 897 // the CWND towards |target_window| by only increasing it |bytes_acked| at a 898 // time. 899 const int add_bytes_acked = 900 !FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery || !in_recovery(bbr); 901 if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH) 902 bbr->bbr_cwnd = MIN(target_window, bbr->bbr_cwnd + bytes_acked); 903 else if (add_bytes_acked && 904 (bbr->bbr_cwnd_gain < target_window || 905 lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler) 906 < bbr->bbr_init_cwnd)) 907 // If the connection is not yet out of startup phase, do not decrease 908 // the window. 909 bbr->bbr_cwnd += bytes_acked; 910 911 // Enforce the limits on the congestion window. 912 if (bbr->bbr_cwnd < bbr->bbr_min_cwnd) 913 bbr->bbr_cwnd = bbr->bbr_min_cwnd; 914 else if (bbr->bbr_cwnd > bbr->bbr_max_cwnd) 915 { 916 LSQ_DEBUG("exceed max cwnd"); 917 bbr->bbr_cwnd = bbr->bbr_max_cwnd; 918 } 919} 920 921 922/* See BbrSender::CalculateRecoveryWindow */ 923static void 924calculate_recovery_window (struct lsquic_bbr *bbr, uint64_t bytes_acked, 925 uint64_t bytes_lost, uint64_t bytes_in_flight) 926{ 927 if ((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP) 928 && bbr->bbr_mode == BBR_MODE_STARTUP) 929 return; 930 931 if (bbr->bbr_recovery_state == BBR_RS_NOT_IN_RECOVERY) 932 return; 933 934 // Set up the initial recovery window. 935 if (bbr->bbr_recovery_window == 0) 936 { 937 bbr->bbr_recovery_window = bytes_in_flight + bytes_acked; 938 bbr->bbr_recovery_window = MAX(bbr->bbr_min_cwnd, 939 bbr->bbr_recovery_window); 940 return; 941 } 942 943 // Remove losses from the recovery window, while accounting for a potential 944 // integer underflow. 945 if (bbr->bbr_recovery_window >= bytes_lost) 946 bbr->bbr_recovery_window -= bytes_lost; 947 else 948 bbr->bbr_recovery_window = kMaxSegmentSize; 949 950 // In CONSERVATION mode, just subtracting losses is sufficient. In GROWTH, 951 // release additional |bytes_acked| to achieve a slow-start-like behavior. 952 if (bbr->bbr_recovery_state == BBR_RS_GROWTH) 953 bbr->bbr_recovery_window += bytes_acked; 954 955 // Sanity checks. Ensure that we always allow to send at least an MSS or 956 // |bytes_acked| in response, whichever is larger. 957 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, 958 bytes_in_flight + bytes_acked); 959 if (FLAG_quic_bbr_one_mss_conservation) 960 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, 961 bytes_in_flight + kMaxSegmentSize); 962 bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, bbr->bbr_min_cwnd); 963} 964 965 966static void 967lsquic_bbr_end_ack (void *cong_ctl, uint64_t in_flight) 968{ 969 struct lsquic_bbr *const bbr = cong_ctl; 970 int is_round_start, min_rtt_expired; 971 uint64_t bytes_acked, excess_acked, bytes_lost; 972 973 assert(bbr->bbr_flags & BBR_FLAG_IN_ACK); 974 bbr->bbr_flags &= ~BBR_FLAG_IN_ACK; 975 976 LSQ_DEBUG("end_ack; mode: %s; in_flight: %"PRIu64, mode2str[bbr->bbr_mode], 977 in_flight); 978 979 bytes_acked = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler) 980 - bbr->bbr_ack_state.total_bytes_acked_before; 981 if (bbr->bbr_ack_state.acked_bytes) 982 { 983 is_round_start = bbr->bbr_ack_state.max_packno 984 > bbr->bbr_current_round_trip_end 985 || !is_valid_packno(bbr->bbr_current_round_trip_end); 986 if (is_round_start) 987 { 988 ++bbr->bbr_round_count; 989 bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno; 990 LSQ_DEBUG("up round count to %"PRIu64"; new rt end: %"PRIu64, 991 bbr->bbr_round_count, bbr->bbr_current_round_trip_end); 992 } 993 min_rtt_expired = update_bandwidth_and_min_rtt(bbr); 994 update_recovery_state(bbr, is_round_start); 995 excess_acked = update_ack_aggregation_bytes(bbr, bytes_acked); 996 } 997 else 998 { 999 is_round_start = 0; 1000 min_rtt_expired = 0; 1001 excess_acked = 0; 1002 } 1003 1004 if (bbr->bbr_mode == BBR_MODE_PROBE_BW) 1005 update_gain_cycle_phase(bbr, in_flight); 1006 1007 if (is_round_start && !(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)) 1008 check_if_full_bw_reached(bbr); 1009 1010 maybe_exit_startup_or_drain(bbr, bbr->bbr_ack_state.ack_time, in_flight); 1011 1012 maybe_enter_or_exit_probe_rtt(bbr, bbr->bbr_ack_state.ack_time, 1013 is_round_start, min_rtt_expired, in_flight); 1014 1015 // Calculate number of packets acked and lost. 1016 bytes_lost = bbr->bbr_ack_state.lost_bytes; 1017 1018 // After the model is updated, recalculate the pacing rate and congestion 1019 // window. 1020 calculate_pacing_rate(bbr); 1021 calculate_cwnd(bbr, bytes_acked, excess_acked); 1022 calculate_recovery_window(bbr, bytes_acked, bytes_lost, in_flight); 1023 1024 /* We don't need to clean up BW sampler */ 1025} 1026 1027 1028static void 1029lsquic_bbr_cleanup (void *cong_ctl) 1030{ 1031 struct lsquic_bbr *const bbr = cong_ctl; 1032 1033 lsquic_bw_sampler_cleanup(&bbr->bbr_bw_sampler); 1034 LSQ_DEBUG("cleanup"); 1035} 1036 1037 1038static void 1039lsquic_bbr_loss (void *cong_ctl) { /* Noop */ } 1040 1041 1042static void 1043lsquic_bbr_timeout (void *cong_ctl) { /* Noop */ } 1044 1045 1046const struct cong_ctl_if lsquic_cong_bbr_if = 1047{ 1048 .cci_ack = lsquic_bbr_ack, 1049 .cci_begin_ack = lsquic_bbr_begin_ack, 1050 .cci_end_ack = lsquic_bbr_end_ack, 1051 .cci_cleanup = lsquic_bbr_cleanup, 1052 .cci_get_cwnd = lsquic_bbr_get_cwnd, 1053 .cci_init = lsquic_bbr_init, 1054 .cci_pacing_rate = lsquic_bbr_pacing_rate, 1055 .cci_loss = lsquic_bbr_loss, 1056 .cci_lost = lsquic_bbr_lost, 1057 .cci_timeout = lsquic_bbr_timeout, 1058 .cci_sent = lsquic_bbr_sent, 1059 .cci_was_quiet = lsquic_bbr_was_quiet, 1060}; 1061