lsquic_bbr.c revision 5392f7a3
1/* Copyright (c) 2017 - 2019 LiteSpeed Technologies Inc.  See LICENSE. */
2// Copyright 2016 The Chromium Authors. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE.chrome file.
5
6#include <assert.h>
7#include <inttypes.h>
8#include <stdint.h>
9#include <stdlib.h>
10#include <string.h>
11#include <sys/queue.h>
12
13#include "lsquic.h"
14#include "lsquic_int_types.h"
15#include "lsquic_cong_ctl.h"
16#include "lsquic_minmax.h"
17#include "lsquic_packet_common.h"
18#include "lsquic_packet_out.h"
19#include "lsquic_bw_sampler.h"
20#include "lsquic_bbr.h"
21#include "lsquic_hash.h"
22#include "lsquic_conn.h"
23#include "lsquic_sfcw.h"
24#include "lsquic_conn_flow.h"
25#include "lsquic_varint.h"
26#include "lsquic_hq.h"
27#include "lsquic_stream.h"
28#include "lsquic_rtt.h"
29#include "lsquic_conn_public.h"
30#include "lsquic_util.h"
31#include "lsquic_malo.h"
32
33#define LSQUIC_LOGGER_MODULE LSQLM_BBR
34#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(bbr->bbr_conn)
35#include "lsquic_logger.h"
36
37#define MIN(a, b) ((a) < (b) ? (a) : (b))
38#define MAX(a, b) ((a) > (b) ? (a) : (b))
39
40#define ms(val_) ((val_) * 1000)
41#define sec(val_) ((val_) * 1000 * 1000)
42
43// Default maximum packet size used in the Linux TCP implementation.
44// Used in QUIC for congestion window computations in bytes.
45#define kDefaultTCPMSS 1460
46#define kMaxSegmentSize kDefaultTCPMSS
47
48// Constants based on TCP defaults.
49// The minimum CWND to ensure delayed acks don't reduce bandwidth measurements.
50// Does not inflate the pacing rate.
51#define kDefaultMinimumCongestionWindow  (4 * kDefaultTCPMSS)
52
53// The gain used for the STARTUP, equal to 2/ln(2).
54#define kDefaultHighGain 2.885f
55
56// The newly derived gain for STARTUP, equal to 4 * ln(2)
57#define kDerivedHighGain 2.773f
58
59// The newly derived CWND gain for STARTUP, 2.
60#define kDerivedHighCWNDGain 2.0f
61
62// The gain used in STARTUP after loss has been detected.
63// 1.5 is enough to allow for 25% exogenous loss and still observe a 25% growth
64// in measured bandwidth.
65#define kStartupAfterLossGain 1.5f
66
67// We match SPDY's use of 32 (since we'd compete with SPDY).
68#define kInitialCongestionWindow 32
69
70/* Taken from send_algorithm_interface.h */
71#define kDefaultMaxCongestionWindowPackets 2000
72
73// The time after which the current min_rtt value expires.
74#define kMinRttExpiry sec(10)
75
76// Coefficient to determine if a new RTT is sufficiently similar to min_rtt that
77// we don't need to enter PROBE_RTT.
78#define kSimilarMinRttThreshold 1.125f
79
80// If the bandwidth does not increase by the factor of |kStartupGrowthTarget|
81// within |kRoundTripsWithoutGrowthBeforeExitingStartup| rounds, the connection
82// will exit the STARTUP mode.
83#define kStartupGrowthTarget 1.25
84
85#define kRoundTripsWithoutGrowthBeforeExitingStartup 3
86
87#define startup_rate_reduction_multiplier_ 0
88
89// The cycle of gains used during the PROBE_BW stage.
90static const float kPacingGain[] = {1.25, 0.75, 1, 1, 1, 1, 1, 1};
91
92// The length of the gain cycle.
93static const size_t kGainCycleLength = sizeof(kPacingGain)
94                                                    / sizeof(kPacingGain[0]);
95
96// Coefficient of target congestion window to use when basing PROBE_RTT on BDP.
97#define kModerateProbeRttMultiplier 0.75
98
99// The maximum packet size of any QUIC packet over IPv6, based on ethernet's max
100// size, minus the IP and UDP headers. IPv6 has a 40 byte header, UDP adds an
101// additional 8 bytes.  This is a total overhead of 48 bytes.  Ethernet's
102// max packet size is 1500 bytes,  1500 - 48 = 1452.
103#define kMaxV6PacketSize 1452
104// The maximum packet size of any QUIC packet over IPv4.
105// 1500(Ethernet) - 20(IPv4 header) - 8(UDP header) = 1472.
106#define kMaxV4PacketSize 1472
107// The maximum incoming packet size allowed.
108#define kMaxIncomingPacketSize kMaxV4PacketSize
109// The maximum outgoing packet size allowed.
110#define kMaxOutgoingPacketSize kMaxV6PacketSize
111
112// The minimum time the connection can spend in PROBE_RTT mode.
113#define kProbeRttTime ms(200)
114
115/* FLAG* are from net/quic/quic_flags_list.h */
116
117// When in STARTUP and recovery, do not add bytes_acked to QUIC BBR's CWND in
118// CalculateCongestionWindow()
119#define FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery 0
120
121// When true, ensure BBR allows at least one MSS to be sent in response to an
122// ACK in packet conservation.
123#define FLAG_quic_bbr_one_mss_conservation 0
124
125/* From net/quic/quic_flags_list.h */
126#define kCwndGain 2.0
127
128
129static uint64_t lsquic_bbr_get_cwnd (void *);
130
131
132static const char *const mode2str[] =
133{
134    [BBR_MODE_STARTUP]   = "STARTUP",
135    [BBR_MODE_DRAIN]     = "DRAIN",
136    [BBR_MODE_PROBE_BW]  = "PROBE_BW",
137    [BBR_MODE_PROBE_RTT] = "PROBE_RTT",
138};
139
140
141static void
142set_mode (struct lsquic_bbr *bbr, enum bbr_mode mode)
143{
144    if (bbr->bbr_mode != mode)
145    {
146        LSQ_DEBUG("mode change %s -> %s", mode2str[bbr->bbr_mode],
147                                                        mode2str[mode]);
148        bbr->bbr_mode = mode;
149    }
150    else
151        LSQ_DEBUG("mode remains %s", mode2str[mode]);
152}
153
154
155static void
156set_startup_values (struct lsquic_bbr *bbr)
157{
158    bbr->bbr_pacing_gain = bbr->bbr_high_gain;
159    bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain;
160}
161
162
163static void
164lsquic_bbr_init (void *cong_ctl, const struct lsquic_conn_public *conn_pub,
165                                                enum quic_ft_bit retx_frames)
166{
167    struct lsquic_bbr *const bbr = cong_ctl;
168    bbr->bbr_conn = conn_pub->lconn;
169    lsquic_bw_sampler_init(&bbr->bbr_bw_sampler, bbr->bbr_conn, retx_frames);
170
171    bbr->bbr_rtt_stats = &conn_pub->rtt_stats;
172    bbr->bbr_mode = BBR_MODE_STARTUP;
173    bbr->bbr_round_count = 0;
174    minmax_init(&bbr->bbr_max_bandwidth, 10);
175    minmax_init(&bbr->bbr_max_ack_height, 10);
176    bbr->bbr_aggregation_epoch_bytes = 0;
177    bbr->bbr_aggregation_epoch_start_time = 0;
178    bbr->bbr_min_rtt = 0;
179    bbr->bbr_min_rtt_timestamp = 0;
180    bbr->bbr_init_cwnd = kInitialCongestionWindow * kDefaultTCPMSS;
181    bbr->bbr_cwnd = kInitialCongestionWindow * kDefaultTCPMSS;
182    bbr->bbr_max_cwnd = kDefaultMaxCongestionWindowPackets * kDefaultTCPMSS;
183    bbr->bbr_min_cwnd = kDefaultMinimumCongestionWindow;
184    bbr->bbr_high_gain = kDefaultHighGain;
185    bbr->bbr_high_cwnd_gain = kDefaultHighGain;
186    bbr->bbr_drain_gain = 1.0 / kDefaultHighGain;
187    bbr->bbr_pacing_rate = BW_ZERO();
188    bbr->bbr_pacing_gain = 1.0;
189    bbr->bbr_cwnd_gain = 1.0;
190    bbr->bbr_num_startup_rtts = kRoundTripsWithoutGrowthBeforeExitingStartup;
191    bbr->bbr_flags &= ~BBR_FLAG_EXIT_STARTUP_ON_LOSS;
192    bbr->bbr_cycle_current_offset = 0;
193    bbr->bbr_last_cycle_start = 0;
194    bbr->bbr_flags &= ~BBR_FLAG_IS_AT_FULL_BANDWIDTH;
195    bbr->bbr_round_wo_bw_gain = 0;
196    bbr->bbr_bw_at_last_round = BW_ZERO();
197    bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE;
198    bbr->bbr_exit_probe_rtt_at = 0;
199    bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED;
200    bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
201    bbr->bbr_flags &= ~BBR_FLAG_HAS_NON_APP_LIMITED;
202    bbr->bbr_flags &= ~BBR_FLAG_FLEXIBLE_APP_LIMITED;
203
204    set_startup_values(bbr);
205
206    LSQ_DEBUG("initialized");
207}
208
209
210static lsquic_time_t
211get_min_rtt (const struct lsquic_bbr *bbr)
212{
213    lsquic_time_t min_rtt;
214
215    if (bbr->bbr_min_rtt)
216        return bbr->bbr_min_rtt;
217    else
218    {
219        min_rtt = lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats);
220        if (min_rtt == 0)
221            min_rtt = 25000;
222        return min_rtt;
223    }
224}
225
226
227static uint64_t
228lsquic_bbr_pacing_rate (void *cong_ctl, int in_recovery)
229{
230    struct lsquic_bbr *const bbr = cong_ctl;
231    lsquic_time_t min_rtt;
232    struct bandwidth bw;
233
234    if (!BW_IS_ZERO(&bbr->bbr_pacing_rate))
235        bw = bbr->bbr_pacing_rate;
236    else
237    {
238        min_rtt = get_min_rtt(bbr);
239        bw = BW_FROM_BYTES_AND_DELTA(bbr->bbr_init_cwnd, min_rtt);
240        bw = BW_TIMES(&bw, bbr->bbr_high_cwnd_gain);
241    }
242
243    return BW_TO_BYTES_PER_SEC(&bw);
244}
245
246
247/* BbrSender::GetTargetCongestionWindow */
248static uint64_t
249get_target_cwnd (const struct lsquic_bbr *bbr, float gain)
250{
251    struct bandwidth bw;
252    uint64_t bdp, cwnd;
253
254    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
255    bdp = get_min_rtt(bbr) * BW_TO_BYTES_PER_SEC(&bw) / 1000000;
256    cwnd = gain * bdp;
257
258    // BDP estimate will be zero if no bandwidth samples are available yet.
259    if (cwnd == 0)
260        cwnd = gain * bbr->bbr_init_cwnd;
261
262    return MAX(cwnd, bbr->bbr_min_cwnd);
263}
264
265
266/* See BbrSender::IsPipeSufficientlyFull */
267static int
268is_pipe_sufficiently_full (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
269{
270    // See if we need more bytes in flight to see more bandwidth.
271    if (bbr->bbr_mode == BBR_MODE_STARTUP)
272        // STARTUP exits if it doesn't observe a 25% bandwidth increase, so
273        // the CWND must be more than 25% above the target.
274        return bytes_in_flight >= get_target_cwnd(bbr, 1.5);
275    else if (bbr->bbr_pacing_gain > 1)
276        // Super-unity PROBE_BW doesn't exit until 1.25 * BDP is achieved.
277        return bytes_in_flight >= get_target_cwnd(bbr, bbr->bbr_pacing_gain);
278    else
279        // If bytes_in_flight are above the target congestion window, it should
280        // be possible to observe the same or more bandwidth if it's available.
281        return bytes_in_flight >= get_target_cwnd(bbr, 1.1);
282}
283
284
285/* See BbrSender::OnApplicationLimited */
286static void
287lsquic_bbr_was_quiet (void *cong_ctl, lsquic_time_t now,
288                                                    uint64_t bytes_in_flight)
289{
290    struct lsquic_bbr *const bbr = cong_ctl;
291    uint64_t cwnd;
292
293    cwnd = lsquic_bbr_get_cwnd(cong_ctl);
294    if (bytes_in_flight >= cwnd)
295        return;
296    if ((bbr->bbr_flags & BBR_FLAG_FLEXIBLE_APP_LIMITED)
297                            && is_pipe_sufficiently_full(bbr, bytes_in_flight))
298        return;
299
300    bbr->bbr_flags |= BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT;
301    lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler);
302    LSQ_DEBUG("becoming application-limited.  Last sent packet: %"PRIu64"; "
303                            "CWND: %"PRIu64, bbr->bbr_last_sent_packno, cwnd);
304}
305
306
307static void
308lsquic_bbr_ack (void *cong_ctl, struct lsquic_packet_out *packet_out,
309                  unsigned packet_sz, lsquic_time_t now_time, int app_limited)
310{
311    struct lsquic_bbr *const bbr = cong_ctl;
312    struct bw_sample *sample;
313
314    assert(bbr->bbr_flags & BBR_FLAG_IN_ACK);
315
316    sample = lsquic_bw_sampler_packet_acked(&bbr->bbr_bw_sampler, packet_out,
317                                                bbr->bbr_ack_state.ack_time);
318    if (sample)
319        TAILQ_INSERT_TAIL(&bbr->bbr_ack_state.samples, sample, next);
320
321    if (is_valid_packno(bbr->bbr_ack_state.max_packno))
322        /* We assume packet numbers are ordered */
323        assert(packet_out->po_packno > bbr->bbr_ack_state.max_packno);
324    bbr->bbr_ack_state.max_packno = packet_out->po_packno;
325    bbr->bbr_ack_state.acked_bytes += packet_sz;
326}
327
328
329static void
330lsquic_bbr_sent (void *cong_ctl, struct lsquic_packet_out *packet_out,
331                                                        uint64_t in_flight)
332{
333    struct lsquic_bbr *const bbr = cong_ctl;
334
335    if (!(packet_out->po_flags & PO_MINI))
336        lsquic_bw_sampler_packet_sent(&bbr->bbr_bw_sampler, packet_out,
337                                                                in_flight);
338
339    /* Obviously we make an assumption that sent packet number are always
340     * increasing.
341     */
342    bbr->bbr_last_sent_packno = packet_out->po_packno;
343}
344
345
346static void
347lsquic_bbr_lost (void *cong_ctl, struct lsquic_packet_out *packet_out,
348                                                        unsigned packet_sz)
349{
350    struct lsquic_bbr *const bbr = cong_ctl;
351
352    lsquic_bw_sampler_packet_lost(&bbr->bbr_bw_sampler, packet_out);
353    bbr->bbr_ack_state.has_losses = 1;
354    bbr->bbr_ack_state.lost_bytes += packet_sz;
355}
356
357
358static void
359lsquic_bbr_begin_ack (void *cong_ctl, lsquic_time_t ack_time, uint64_t in_flight)
360{
361    struct lsquic_bbr *const bbr = cong_ctl;
362
363    assert(!(bbr->bbr_flags & BBR_FLAG_IN_ACK));
364    bbr->bbr_flags |= BBR_FLAG_IN_ACK;
365    memset(&bbr->bbr_ack_state, 0, sizeof(bbr->bbr_ack_state));
366    TAILQ_INIT(&bbr->bbr_ack_state.samples);
367    bbr->bbr_ack_state.ack_time = ack_time;
368    bbr->bbr_ack_state.max_packno = UINT64_MAX;
369    bbr->bbr_ack_state.in_flight = in_flight;
370    bbr->bbr_ack_state.total_bytes_acked_before
371                        = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler);
372}
373
374
375/* Based on BbrSender::ShouldExtendMinRttExpiry() */
376static int
377should_extend_min_rtt_expiry (const struct lsquic_bbr *bbr)
378{
379    int increased_since_last_probe;
380
381    if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
382                          |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED))
383            == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
384               |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED))
385        // Extend the current min_rtt if we've been app limited recently.
386        return 1;
387
388    increased_since_last_probe = bbr->bbr_min_rtt_since_last_probe
389                                > bbr->bbr_min_rtt * kSimilarMinRttThreshold;
390    if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
391                          |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT))
392            == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
393               |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT)
394            && !increased_since_last_probe)
395        // Extend the current min_rtt if we've been app limited recently and an
396        // rtt has been measured in that time that's less than 12.5% more than
397        // the current min_rtt.
398        return 1;
399
400    return 0;
401}
402
403
404/* Based on BbrSender::UpdateBandwidthAndMinRtt */
405/* Returns true if min RTT expired, false otherwise */
406static int
407update_bandwidth_and_min_rtt (struct lsquic_bbr *bbr)
408{
409    struct bw_sample *sample, *next_sample;
410    uint64_t sample_min_rtt;
411    int min_rtt_expired;
412
413    sample_min_rtt = UINT64_MAX;
414    for (sample = TAILQ_FIRST(&bbr->bbr_ack_state.samples); sample;
415                                                        sample = next_sample)
416    {
417        next_sample = TAILQ_NEXT(sample, next);
418
419        if (sample->is_app_limited)
420            bbr->bbr_flags |= BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
421        else
422        {
423            bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
424            bbr->bbr_flags |=  BBR_FLAG_HAS_NON_APP_LIMITED;
425        }
426
427        if (sample_min_rtt == UINT64_MAX || sample->rtt < sample_min_rtt)
428            sample_min_rtt = sample->rtt;
429
430        if (!sample->is_app_limited
431                    || BW_VALUE(&sample->bandwidth)
432                                    > minmax_get(&bbr->bbr_max_bandwidth))
433            minmax_upmax(&bbr->bbr_max_bandwidth, bbr->bbr_round_count,
434                                                BW_VALUE(&sample->bandwidth));
435
436        lsquic_malo_put(sample);
437    }
438
439    if (sample_min_rtt == UINT64_MAX)
440        return 0;
441
442    bbr->bbr_min_rtt_since_last_probe
443                    = MIN(bbr->bbr_min_rtt_since_last_probe, sample_min_rtt);
444
445    min_rtt_expired = bbr->bbr_min_rtt != 0 && (bbr->bbr_ack_state.ack_time
446                                > bbr->bbr_min_rtt_timestamp + kMinRttExpiry);
447    if (min_rtt_expired || sample_min_rtt < bbr->bbr_min_rtt
448                                                    || 0 == bbr->bbr_min_rtt)
449    {
450        if (min_rtt_expired && should_extend_min_rtt_expiry(bbr))
451        {
452            LSQ_DEBUG("min rtt expiration extended, stay at: %"PRIu64,
453                bbr->bbr_min_rtt);
454            min_rtt_expired = 0;
455        }
456        else
457        {
458            LSQ_DEBUG("min rtt updated: %"PRIu64" -> %"PRIu64,
459                bbr->bbr_min_rtt, sample_min_rtt);
460            bbr->bbr_min_rtt = sample_min_rtt;
461        }
462        bbr->bbr_min_rtt_timestamp = bbr->bbr_ack_state.ack_time;
463        bbr->bbr_min_rtt_since_last_probe = UINT64_MAX;
464        bbr->bbr_flags &= ~BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT;
465    }
466
467    return min_rtt_expired;
468}
469
470
471/* Based on BbrSender::UpdateRecoveryState() */
472static void
473update_recovery_state (struct lsquic_bbr *bbr, int is_round_start)
474{
475    // Exit recovery when there are no losses for a round.
476    if (bbr->bbr_ack_state.has_losses)
477        bbr->bbr_end_recovery_at = bbr->bbr_last_sent_packno;
478
479    switch (bbr->bbr_recovery_state)
480    {
481    case BBR_RS_NOT_IN_RECOVERY:
482        // Enter conservation on the first loss.
483        if (bbr->bbr_ack_state.has_losses)
484        {
485            bbr->bbr_recovery_state = BBR_RS_CONSERVATION;
486            // This will cause the |bbr_recovery_window| to be set to the
487            // correct value in CalculateRecoveryWindow().
488            bbr->bbr_recovery_window = 0;
489            // Since the conservation phase is meant to be lasting for a whole
490            // round, extend the current round as if it were started right now.
491            bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno;
492        }
493        break;
494    case BBR_RS_CONSERVATION:
495        if (is_round_start)
496            bbr->bbr_recovery_state = BBR_RS_GROWTH;
497        /* Fall-through */
498    case BBR_RS_GROWTH:
499        // Exit recovery if appropriate.
500        if (!bbr->bbr_ack_state.has_losses
501                && bbr->bbr_ack_state.max_packno > bbr->bbr_end_recovery_at)
502            bbr->bbr_recovery_state = BBR_RS_NOT_IN_RECOVERY;
503        break;
504    }
505}
506
507
508static uint64_t
509update_ack_aggregation_bytes (struct lsquic_bbr *bbr,
510                                                uint64_t newly_acked_bytes)
511{
512    const lsquic_time_t ack_time = bbr->bbr_ack_state.ack_time;
513    uint64_t expected_bytes_acked, diff;
514
515    // Compute how many bytes are expected to be delivered, assuming max
516    // bandwidth is correct.
517    expected_bytes_acked = minmax_get(&bbr->bbr_max_bandwidth)
518                        * (ack_time - bbr->bbr_aggregation_epoch_start_time);
519
520    // Reset the current aggregation epoch as soon as the ack arrival rate is
521    // less than or equal to the max bandwidth.
522    if (bbr->bbr_aggregation_epoch_bytes <= expected_bytes_acked)
523    {
524        // Reset to start measuring a new aggregation epoch.
525        bbr->bbr_aggregation_epoch_bytes = newly_acked_bytes;
526        bbr->bbr_aggregation_epoch_start_time = ack_time;
527        return 0;
528    }
529
530    // Compute how many extra bytes were delivered vs max bandwidth.
531    // Include the bytes most recently acknowledged to account for stretch acks.
532    bbr->bbr_aggregation_epoch_bytes += newly_acked_bytes;
533    diff = bbr->bbr_aggregation_epoch_bytes - expected_bytes_acked;
534    minmax_upmax(&bbr->bbr_max_ack_height, bbr->bbr_round_count, diff);
535    return diff;
536}
537
538
539/* See BbrSender::UpdateGainCyclePhase() */
540static void
541update_gain_cycle_phase (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
542{
543    const uint64_t prior_in_flight = bbr->bbr_ack_state.in_flight;
544    const lsquic_time_t now = bbr->bbr_ack_state.ack_time;
545    // In most cases, the cycle is advanced after an RTT passes.
546    int should_advance_gain_cycling
547        = now - bbr->bbr_last_cycle_start > get_min_rtt(bbr);
548
549    // If the pacing gain is above 1.0, the connection is trying to probe the
550    // bandwidth by increasing the number of bytes in flight to at least
551    // pacing_gain * BDP.  Make sure that it actually reaches the target, as
552    // long as there are no losses suggesting that the buffers are not able to
553    // hold that much.
554    if (bbr->bbr_pacing_gain > 1.0
555            && !bbr->bbr_ack_state.has_losses
556            && prior_in_flight < get_target_cwnd(bbr, bbr->bbr_pacing_gain))
557        should_advance_gain_cycling = 0;
558
559    /* Several optimizations are possible here: "else if" instead of "if", as
560     * well as not calling get_target_cwnd() if `should_advance_gain_cycling'
561     * is already set to the target value.
562     */
563
564    // If pacing gain is below 1.0, the connection is trying to drain the extra
565    // queue which could have been incurred by probing prior to it.  If the
566    // number of bytes in flight falls down to the estimated BDP value earlier,
567    // conclude that the queue has been successfully drained and exit this cycle
568    // early.
569    if (bbr->bbr_pacing_gain < 1.0
570                                && bytes_in_flight <= get_target_cwnd(bbr, 1))
571        should_advance_gain_cycling = 1;
572
573    if (should_advance_gain_cycling)
574    {
575        bbr->bbr_cycle_current_offset =
576                        (bbr->bbr_cycle_current_offset + 1) % kGainCycleLength;
577        bbr->bbr_last_cycle_start = now;
578        // Stay in low gain mode until the target BDP is hit.  Low gain mode
579        // will be exited immediately when the target BDP is achieved.
580        if ((bbr->bbr_flags & BBR_FLAG_DRAIN_TO_TARGET)
581                && bbr->bbr_pacing_gain < 1
582                && kPacingGain[bbr->bbr_cycle_current_offset] == 1
583                && bytes_in_flight > get_target_cwnd(bbr, 1))
584              return;
585        bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset];
586        LSQ_DEBUG("advanced gain cycle, pacing gain set to %.2f",
587                                                        bbr->bbr_pacing_gain);
588    }
589}
590
591
592/* BbrSender::InRecovery() */
593static int
594in_recovery (const struct lsquic_bbr *bbr)
595{
596    return bbr->bbr_recovery_state != BBR_RS_NOT_IN_RECOVERY;
597}
598
599
600/* See BbrSender::CheckIfFullBandwidthReached() */
601static void
602check_if_full_bw_reached (struct lsquic_bbr *bbr)
603{
604    struct bandwidth target, bw;
605
606    if (bbr->bbr_flags & BBR_FLAG_LAST_SAMPLE_APP_LIMITED)
607    {
608        LSQ_DEBUG("last sample app limited: full BW not reached");
609        return;
610    }
611
612    target = BW_TIMES(&bbr->bbr_bw_at_last_round, kStartupGrowthTarget);
613    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
614    if (BW_VALUE(&bw) >= BW_VALUE(&target))
615    {
616        bbr->bbr_bw_at_last_round = bw;
617        bbr->bbr_round_wo_bw_gain = 0;
618        if (bbr->bbr_flags & BBR_FLAG_EXPIRE_ACK_AGG_IN_STARTUP)
619            // Expire old excess delivery measurements now that bandwidth
620            // increased.
621            minmax_reset(&bbr->bbr_max_ack_height,
622                        ((struct minmax_sample) { bbr->bbr_round_count, 0, }));
623        LSQ_DEBUG("BW estimate %"PRIu64"bps greater than or equal to target "
624            "%"PRIu64"bps: full BW not reached",
625            BW_VALUE(&bw), BW_VALUE(&target));
626        return;
627    }
628
629    ++bbr->bbr_round_wo_bw_gain;
630    if ((bbr->bbr_round_wo_bw_gain >= bbr->bbr_num_startup_rtts)
631            || ((bbr->bbr_flags & BBR_FLAG_EXIT_STARTUP_ON_LOSS)
632                                                    && in_recovery(bbr)))
633    {
634        assert(bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED);  /* DCHECK */
635        bbr->bbr_flags |= BBR_FLAG_IS_AT_FULL_BANDWIDTH;
636        LSQ_DEBUG("reached full BW");
637    }
638    else
639        LSQ_DEBUG("rounds w/o gain: %u, full BW not reached",
640                                                bbr->bbr_round_wo_bw_gain);
641}
642
643
644/* See BbrSender::OnExitStartup */
645static void
646on_exit_startup (struct lsquic_bbr *bbr, lsquic_time_t now)
647{
648    assert(bbr->bbr_mode == BBR_MODE_STARTUP);
649    /* Apparently this method is just to update stats, something that we
650     * don't do yet.
651     */
652}
653
654
655/* See BbrSender::EnterProbeBandwidthMode */
656static void
657enter_probe_bw_mode (struct lsquic_bbr *bbr, lsquic_time_t now)
658{
659    set_mode(bbr, BBR_MODE_PROBE_BW);
660    bbr->bbr_cwnd_gain = kCwndGain;
661
662    // Pick a random offset for the gain cycle out of {0, 2..7} range. 1 is
663    // excluded because in that case increased gain and decreased gain would not
664    // follow each other.
665    bbr->bbr_cycle_current_offset = rand() % (kGainCycleLength - 1);
666    if (bbr->bbr_cycle_current_offset >= 1)
667        ++bbr->bbr_cycle_current_offset;
668
669    bbr->bbr_last_cycle_start = now;
670    bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset];
671}
672
673
674/* See BbrSender::EnterStartupMode */
675static void
676enter_startup_mode (struct lsquic_bbr *bbr, lsquic_time_t now)
677{
678    set_mode(bbr, BBR_MODE_STARTUP);
679    set_startup_values(bbr);
680}
681
682
683/* See  BbrSender::MaybeExitStartupOrDrain() */
684static void
685maybe_exit_startup_or_drain (struct lsquic_bbr *bbr, lsquic_time_t now,
686                                                    uint64_t bytes_in_flight)
687{
688    uint64_t target_cwnd;
689
690    if (bbr->bbr_mode == BBR_MODE_STARTUP
691                        && (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
692    {
693        on_exit_startup(bbr, now);
694        set_mode(bbr, BBR_MODE_DRAIN);
695        bbr->bbr_pacing_gain = bbr->bbr_drain_gain;
696        bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain;
697    }
698
699    if (bbr->bbr_mode == BBR_MODE_DRAIN)
700    {
701        target_cwnd = get_target_cwnd(bbr, 1);
702        LSQ_DEBUG("%s: bytes in flight: %"PRIu64"; target cwnd: %"PRIu64,
703                                    __func__, bytes_in_flight, target_cwnd);
704        if (bytes_in_flight <= target_cwnd)
705            enter_probe_bw_mode(bbr, now);
706    }
707}
708
709
710static int
711in_slow_start (const struct lsquic_bbr *bbr)
712{
713    return bbr->bbr_mode == BBR_MODE_STARTUP;
714}
715
716
717/* See QuicByteCount BbrSender::ProbeRttCongestionWindow() */
718static uint64_t
719get_probe_rtt_cwnd (const struct lsquic_bbr *bbr)
720{
721    if (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_BASED_ON_BDP)
722        return get_target_cwnd(bbr, kModerateProbeRttMultiplier);
723    else
724        return bbr->bbr_min_cwnd;
725}
726
727
728static uint64_t
729lsquic_bbr_get_cwnd (void *cong_ctl)
730{
731    struct lsquic_bbr *const bbr = cong_ctl;
732    uint64_t cwnd;
733
734    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
735        cwnd = get_probe_rtt_cwnd(bbr);
736    else if (in_recovery(bbr) &&
737                !((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP)
738                                    && bbr->bbr_mode == BBR_MODE_STARTUP))
739        cwnd = MIN(bbr->bbr_cwnd, bbr->bbr_recovery_window);
740    else
741        cwnd = bbr->bbr_cwnd;
742
743    return cwnd;
744}
745
746
747/* See BbrSender::MaybeEnterOrExitProbeRtt */
748static void
749maybe_enter_or_exit_probe_rtt (struct lsquic_bbr *bbr, lsquic_time_t now,
750            int is_round_start, int min_rtt_expired, uint64_t bytes_in_flight)
751{
752    if (min_rtt_expired
753            && !(bbr->bbr_flags & BBR_FLAG_EXITING_QUIESCENCE)
754                && bbr->bbr_mode != BBR_MODE_PROBE_RTT)
755    {
756        if (in_slow_start(bbr))
757            on_exit_startup(bbr, now);
758        set_mode(bbr, BBR_MODE_PROBE_RTT);
759        bbr->bbr_pacing_gain = 1;
760        // Do not decide on the time to exit PROBE_RTT until the
761        // |bytes_in_flight| is at the target small value.
762        bbr->bbr_exit_probe_rtt_at = 0;
763    }
764
765    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
766    {
767        lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler);
768        LSQ_DEBUG("%s: exit probe at: %"PRIu64"; now: %"PRIu64
769            "; round start: %d; round passed: %d; rtt: %"PRIu64" usec",
770            __func__, bbr->bbr_exit_probe_rtt_at, now, is_round_start,
771            !!(bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED),
772            lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats));
773        if (bbr->bbr_exit_probe_rtt_at == 0)
774        {
775            // If the window has reached the appropriate size, schedule exiting
776            // PROBE_RTT.  The CWND during PROBE_RTT is
777            // kMinimumCongestionWindow, but we allow an extra packet since QUIC
778            // checks CWND before sending a packet.
779            if (bytes_in_flight
780                        < get_probe_rtt_cwnd(bbr) + kMaxOutgoingPacketSize)
781            {
782                bbr->bbr_exit_probe_rtt_at = now + kProbeRttTime;
783                bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED;
784            }
785        }
786        else
787        {
788            if (is_round_start)
789                bbr->bbr_flags |= BBR_FLAG_PROBE_RTT_ROUND_PASSED;
790            if (now >= bbr->bbr_exit_probe_rtt_at
791                        && (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED))
792            {
793                bbr->bbr_min_rtt_timestamp = now;
794                if (!(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
795                    enter_startup_mode(bbr, now);
796                else
797                    enter_probe_bw_mode(bbr, now);
798            }
799        }
800    }
801
802    bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE;
803}
804
805
806/* See BbrSender::CalculatePacingRate */
807static void
808calculate_pacing_rate (struct lsquic_bbr *bbr)
809{
810    struct bandwidth bw, target_rate;
811
812    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
813    if (BW_IS_ZERO(&bw))
814        return;
815
816    LSQ_DEBUG("BW estimate: %"PRIu64, BW_VALUE(&bw));
817
818    target_rate = BW_TIMES(&bw, bbr->bbr_pacing_gain);
819    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
820    {
821        bbr->bbr_pacing_rate = target_rate;
822        return;
823    }
824
825    // Pace at the rate of initial_window / RTT as soon as RTT measurements are
826    // available.
827    if (BW_IS_ZERO(&bbr->bbr_pacing_rate)
828            && 0 != lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats))
829    {
830        bbr->bbr_pacing_rate = BW_FROM_BYTES_AND_DELTA(
831            bbr->bbr_init_cwnd,
832            lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats));
833        return;
834    }
835
836    // Slow the pacing rate in STARTUP once loss has ever been detected.
837    const int has_ever_detected_loss = bbr->bbr_end_recovery_at != 0;
838    if (has_ever_detected_loss
839            && (bbr->bbr_flags & (BBR_FLAG_SLOWER_STARTUP
840                                  |BBR_FLAG_HAS_NON_APP_LIMITED))
841                == (BBR_FLAG_SLOWER_STARTUP|BBR_FLAG_HAS_NON_APP_LIMITED))
842    {
843        bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupAfterLossGain);
844        return;
845    }
846
847    // Slow the pacing rate in STARTUP by the bytes_lost / CWND.
848    if (startup_rate_reduction_multiplier_ != 0
849            && has_ever_detected_loss
850                && (bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED))
851    {
852        bbr->bbr_pacing_rate = BW_TIMES(&target_rate,
853            (1 - (bbr->bbr_startup_bytes_lost
854                    * startup_rate_reduction_multiplier_ * 1.0f
855                        / bbr->bbr_cwnd_gain)));
856        // Ensure the pacing rate doesn't drop below the startup growth target
857        // times  the bandwidth estimate.
858        if (BW_VALUE(&bbr->bbr_pacing_rate)
859                                        < BW_VALUE(&bw) * kStartupGrowthTarget)
860            bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupGrowthTarget);
861        return;
862    }
863
864    // Do not decrease the pacing rate during startup.
865    if (BW_VALUE(&bbr->bbr_pacing_rate) < BW_VALUE(&target_rate))
866        bbr->bbr_pacing_rate = target_rate;
867}
868
869
870/* See BbrSender::CalculateCongestionWindow */
871static void
872calculate_cwnd (struct lsquic_bbr *bbr, uint64_t bytes_acked,
873                                                  uint64_t excess_acked)
874{
875    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
876        return;
877
878    uint64_t target_window = get_target_cwnd(bbr, bbr->bbr_cwnd_gain);
879    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
880        // Add the max recently measured ack aggregation to CWND.
881        target_window += minmax_get(&bbr->bbr_max_ack_height);
882    else if (bbr->bbr_flags & BBR_FLAG_ENABLE_ACK_AGG_IN_STARTUP)
883        // Add the most recent excess acked.  Because CWND never decreases in
884        // STARTUP, this will automatically create a very localized max filter.
885        target_window += excess_acked;
886
887    // Instead of immediately setting the target CWND as the new one, BBR grows
888    // the CWND towards |target_window| by only increasing it |bytes_acked| at a
889    // time.
890    const int add_bytes_acked =
891	!FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery || !in_recovery(bbr);
892    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
893        bbr->bbr_cwnd = MIN(target_window, bbr->bbr_cwnd + bytes_acked);
894    else if (add_bytes_acked &&
895             (bbr->bbr_cwnd_gain < target_window ||
896              lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler)
897                                                        < bbr->bbr_init_cwnd))
898        // If the connection is not yet out of startup phase, do not decrease
899        // the window.
900        bbr->bbr_cwnd += bytes_acked;
901
902    // Enforce the limits on the congestion window.
903    if (bbr->bbr_cwnd < bbr->bbr_min_cwnd)
904        bbr->bbr_cwnd = bbr->bbr_min_cwnd;
905    else if (bbr->bbr_cwnd > bbr->bbr_max_cwnd)
906    {
907        LSQ_DEBUG("exceed max cwnd");
908        bbr->bbr_cwnd = bbr->bbr_max_cwnd;
909    }
910}
911
912
913/* See BbrSender::CalculateRecoveryWindow */
914static void
915calculate_recovery_window (struct lsquic_bbr *bbr, uint64_t bytes_acked,
916                                uint64_t bytes_lost, uint64_t bytes_in_flight)
917{
918    if ((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP)
919                                        && bbr->bbr_mode == BBR_MODE_STARTUP)
920        return;
921
922    if (bbr->bbr_recovery_state == BBR_RS_NOT_IN_RECOVERY)
923        return;
924
925    // Set up the initial recovery window.
926    if (bbr->bbr_recovery_window == 0)
927    {
928        bbr->bbr_recovery_window = bytes_in_flight + bytes_acked;
929        bbr->bbr_recovery_window = MAX(bbr->bbr_min_cwnd,
930                                                    bbr->bbr_recovery_window);
931        return;
932    }
933
934    // Remove losses from the recovery window, while accounting for a potential
935    // integer underflow.
936    if (bbr->bbr_recovery_window >= bytes_lost)
937        bbr->bbr_recovery_window -= bytes_lost;
938    else
939        bbr->bbr_recovery_window = kMaxSegmentSize;
940
941    // In CONSERVATION mode, just subtracting losses is sufficient.  In GROWTH,
942    // release additional |bytes_acked| to achieve a slow-start-like behavior.
943    if (bbr->bbr_recovery_state == BBR_RS_GROWTH)
944        bbr->bbr_recovery_window += bytes_acked;
945
946    // Sanity checks.  Ensure that we always allow to send at least an MSS or
947    // |bytes_acked| in response, whichever is larger.
948    bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window,
949                                            bytes_in_flight + bytes_acked);
950    if (FLAG_quic_bbr_one_mss_conservation)
951        bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window,
952                                            bytes_in_flight + kMaxSegmentSize);
953    bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, bbr->bbr_min_cwnd);
954}
955
956
957static void
958lsquic_bbr_end_ack (void *cong_ctl, uint64_t in_flight)
959{
960    struct lsquic_bbr *const bbr = cong_ctl;
961    int is_round_start, min_rtt_expired;
962    uint64_t bytes_acked, excess_acked, bytes_lost;
963
964    assert(bbr->bbr_flags & BBR_FLAG_IN_ACK);
965    bbr->bbr_flags &= ~BBR_FLAG_IN_ACK;
966
967    LSQ_DEBUG("end_ack; mode: %s; in_flight: %"PRIu64, mode2str[bbr->bbr_mode],
968                                                                    in_flight);
969
970    bytes_acked = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler)
971                            - bbr->bbr_ack_state.total_bytes_acked_before;
972    if (bbr->bbr_ack_state.acked_bytes)
973    {
974        is_round_start = bbr->bbr_ack_state.max_packno
975                                    > bbr->bbr_current_round_trip_end
976                    || !is_valid_packno(bbr->bbr_current_round_trip_end);
977        if (is_round_start)
978        {
979            ++bbr->bbr_round_count;
980            bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno;
981            LSQ_DEBUG("up round count to %"PRIu64"; new rt end: %"PRIu64,
982                        bbr->bbr_round_count, bbr->bbr_current_round_trip_end);
983        }
984        min_rtt_expired = update_bandwidth_and_min_rtt(bbr);
985        update_recovery_state(bbr, is_round_start);
986        excess_acked = update_ack_aggregation_bytes(bbr, bytes_acked);
987    }
988    else
989    {
990        is_round_start = 0;
991        min_rtt_expired = 0;
992        excess_acked = 0;
993    }
994
995    if (bbr->bbr_mode == BBR_MODE_PROBE_BW)
996        update_gain_cycle_phase(bbr, in_flight);
997
998    if (is_round_start && !(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
999        check_if_full_bw_reached(bbr);
1000
1001    maybe_exit_startup_or_drain(bbr, bbr->bbr_ack_state.ack_time, in_flight);
1002
1003    maybe_enter_or_exit_probe_rtt(bbr, bbr->bbr_ack_state.ack_time,
1004                                is_round_start, min_rtt_expired, in_flight);
1005
1006    // Calculate number of packets acked and lost.
1007    bytes_lost = bbr->bbr_ack_state.lost_bytes;
1008
1009    // After the model is updated, recalculate the pacing rate and congestion
1010    // window.
1011    calculate_pacing_rate(bbr);
1012    calculate_cwnd(bbr, bytes_acked, excess_acked);
1013    calculate_recovery_window(bbr, bytes_acked, bytes_lost, in_flight);
1014
1015    /* We don't need to clean up BW sampler */
1016}
1017
1018
1019static void
1020lsquic_bbr_cleanup (void *cong_ctl)
1021{
1022    struct lsquic_bbr *const bbr = cong_ctl;
1023
1024    LSQ_DEBUG("cleanup");
1025}
1026
1027
1028static void
1029lsquic_bbr_loss (void *cong_ctl) {   /* Noop */   }
1030
1031
1032static void
1033lsquic_bbr_timeout (void *cong_ctl) {   /* Noop */   }
1034
1035
1036const struct cong_ctl_if lsquic_cong_bbr_if =
1037{
1038    .cci_ack           = lsquic_bbr_ack,
1039    .cci_begin_ack     = lsquic_bbr_begin_ack,
1040    .cci_end_ack       = lsquic_bbr_end_ack,
1041    .cci_cleanup       = lsquic_bbr_cleanup,
1042    .cci_get_cwnd      = lsquic_bbr_get_cwnd,
1043    .cci_init          = lsquic_bbr_init,
1044    .cci_pacing_rate   = lsquic_bbr_pacing_rate,
1045    .cci_loss          = lsquic_bbr_loss,
1046    .cci_lost          = lsquic_bbr_lost,
1047    .cci_timeout       = lsquic_bbr_timeout,
1048    .cci_sent          = lsquic_bbr_sent,
1049    .cci_was_quiet     = lsquic_bbr_was_quiet,
1050};
1051