lsquic_bbr.c revision 10c41073
1/* Copyright (c) 2017 - 2020 LiteSpeed Technologies Inc.  See LICENSE. */
2// Copyright 2016 The Chromium Authors. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE.chrome file.
5
6#include <assert.h>
7#include <inttypes.h>
8#include <stdint.h>
9#include <stdlib.h>
10#include <string.h>
11#include <sys/queue.h>
12
13#include "lsquic.h"
14#include "lsquic_int_types.h"
15#include "lsquic_cong_ctl.h"
16#include "lsquic_minmax.h"
17#include "lsquic_packet_common.h"
18#include "lsquic_packet_out.h"
19#include "lsquic_bw_sampler.h"
20#include "lsquic_bbr.h"
21#include "lsquic_hash.h"
22#include "lsquic_conn.h"
23#include "lsquic_sfcw.h"
24#include "lsquic_conn_flow.h"
25#include "lsquic_varint.h"
26#include "lsquic_hq.h"
27#include "lsquic_stream.h"
28#include "lsquic_rtt.h"
29#include "lsquic_conn_public.h"
30#include "lsquic_util.h"
31#include "lsquic_malo.h"
32#include "lsquic_crand.h"
33#include "lsquic_mm.h"
34#include "lsquic_engine_public.h"
35
36#define LSQUIC_LOGGER_MODULE LSQLM_BBR
37#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(bbr->bbr_conn_pub->lconn)
38#include "lsquic_logger.h"
39
40#define MIN(a, b) ((a) < (b) ? (a) : (b))
41#define MAX(a, b) ((a) > (b) ? (a) : (b))
42
43#define ms(val_) ((val_) * 1000)
44#define sec(val_) ((val_) * 1000 * 1000)
45
46// Default maximum packet size used in the Linux TCP implementation.
47// Used in QUIC for congestion window computations in bytes.
48#define kDefaultTCPMSS 1460
49#define kMaxSegmentSize kDefaultTCPMSS
50
51// Constants based on TCP defaults.
52// The minimum CWND to ensure delayed acks don't reduce bandwidth measurements.
53// Does not inflate the pacing rate.
54#define kDefaultMinimumCongestionWindow  (4 * kDefaultTCPMSS)
55
56// The gain used for the STARTUP, equal to 2/ln(2).
57#define kDefaultHighGain 2.885f
58
59// The newly derived gain for STARTUP, equal to 4 * ln(2)
60#define kDerivedHighGain 2.773f
61
62// The newly derived CWND gain for STARTUP, 2.
63#define kDerivedHighCWNDGain 2.0f
64
65// The gain used in STARTUP after loss has been detected.
66// 1.5 is enough to allow for 25% exogenous loss and still observe a 25% growth
67// in measured bandwidth.
68#define kStartupAfterLossGain 1.5f
69
70// We match SPDY's use of 32 (since we'd compete with SPDY).
71#define kInitialCongestionWindow 32
72
73/* Taken from send_algorithm_interface.h */
74#define kDefaultMaxCongestionWindowPackets 2000
75
76// The time after which the current min_rtt value expires.
77#define kMinRttExpiry sec(10)
78
79// Coefficient to determine if a new RTT is sufficiently similar to min_rtt that
80// we don't need to enter PROBE_RTT.
81#define kSimilarMinRttThreshold 1.125f
82
83// If the bandwidth does not increase by the factor of |kStartupGrowthTarget|
84// within |kRoundTripsWithoutGrowthBeforeExitingStartup| rounds, the connection
85// will exit the STARTUP mode.
86#define kStartupGrowthTarget 1.25
87
88#define kRoundTripsWithoutGrowthBeforeExitingStartup 3
89
90#define startup_rate_reduction_multiplier_ 0
91
92// The cycle of gains used during the PROBE_BW stage.
93static const float kPacingGain[] = {1.25, 0.75, 1, 1, 1, 1, 1, 1};
94
95// The length of the gain cycle.
96static const size_t kGainCycleLength = sizeof(kPacingGain)
97                                                    / sizeof(kPacingGain[0]);
98
99// Coefficient of target congestion window to use when basing PROBE_RTT on BDP.
100#define kModerateProbeRttMultiplier 0.75
101
102// The maximum packet size of any QUIC packet over IPv6, based on ethernet's max
103// size, minus the IP and UDP headers. IPv6 has a 40 byte header, UDP adds an
104// additional 8 bytes.  This is a total overhead of 48 bytes.  Ethernet's
105// max packet size is 1500 bytes,  1500 - 48 = 1452.
106#define kMaxV6PacketSize 1452
107// The maximum packet size of any QUIC packet over IPv4.
108// 1500(Ethernet) - 20(IPv4 header) - 8(UDP header) = 1472.
109#define kMaxV4PacketSize 1472
110// The maximum incoming packet size allowed.
111#define kMaxIncomingPacketSize kMaxV4PacketSize
112// The maximum outgoing packet size allowed.
113#define kMaxOutgoingPacketSize kMaxV6PacketSize
114
115// The minimum time the connection can spend in PROBE_RTT mode.
116#define kProbeRttTime ms(200)
117
118/* FLAG* are from net/quic/quic_flags_list.h */
119
120// When in STARTUP and recovery, do not add bytes_acked to QUIC BBR's CWND in
121// CalculateCongestionWindow()
122#define FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery 0
123
124// When true, ensure BBR allows at least one MSS to be sent in response to an
125// ACK in packet conservation.
126#define FLAG_quic_bbr_one_mss_conservation 0
127
128/* From net/quic/quic_flags_list.h */
129#define kCwndGain 2.0
130
131
132static uint64_t lsquic_bbr_get_cwnd (void *);
133
134
135static const char *const mode2str[] =
136{
137    [BBR_MODE_STARTUP]   = "STARTUP",
138    [BBR_MODE_DRAIN]     = "DRAIN",
139    [BBR_MODE_PROBE_BW]  = "PROBE_BW",
140    [BBR_MODE_PROBE_RTT] = "PROBE_RTT",
141};
142
143
144static void
145set_mode (struct lsquic_bbr *bbr, enum bbr_mode mode)
146{
147    if (bbr->bbr_mode != mode)
148    {
149        LSQ_DEBUG("mode change %s -> %s", mode2str[bbr->bbr_mode],
150                                                        mode2str[mode]);
151        bbr->bbr_mode = mode;
152    }
153    else
154        LSQ_DEBUG("mode remains %s", mode2str[mode]);
155}
156
157
158static void
159set_startup_values (struct lsquic_bbr *bbr)
160{
161    bbr->bbr_pacing_gain = bbr->bbr_high_gain;
162    bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain;
163}
164
165
166static void
167lsquic_bbr_init (void *cong_ctl, const struct lsquic_conn_public *conn_pub,
168                                                enum quic_ft_bit retx_frames)
169{
170    struct lsquic_bbr *const bbr = cong_ctl;
171    bbr->bbr_conn_pub = conn_pub;
172    lsquic_bw_sampler_init(&bbr->bbr_bw_sampler, conn_pub->lconn, retx_frames);
173
174    bbr->bbr_rtt_stats = &conn_pub->rtt_stats;
175    bbr->bbr_mode = BBR_MODE_STARTUP;
176    bbr->bbr_round_count = 0;
177    minmax_init(&bbr->bbr_max_bandwidth, 10);
178    minmax_init(&bbr->bbr_max_ack_height, 10);
179    bbr->bbr_aggregation_epoch_bytes = 0;
180    bbr->bbr_aggregation_epoch_start_time = 0;
181    bbr->bbr_min_rtt = 0;
182    bbr->bbr_min_rtt_timestamp = 0;
183    bbr->bbr_init_cwnd = kInitialCongestionWindow * kDefaultTCPMSS;
184    bbr->bbr_cwnd = kInitialCongestionWindow * kDefaultTCPMSS;
185    bbr->bbr_max_cwnd = kDefaultMaxCongestionWindowPackets * kDefaultTCPMSS;
186    bbr->bbr_min_cwnd = kDefaultMinimumCongestionWindow;
187    bbr->bbr_high_gain = kDefaultHighGain;
188    bbr->bbr_high_cwnd_gain = kDefaultHighGain;
189    bbr->bbr_drain_gain = 1.0 / kDefaultHighGain;
190    bbr->bbr_pacing_rate = BW_ZERO();
191    bbr->bbr_pacing_gain = 1.0;
192    bbr->bbr_cwnd_gain = 1.0;
193    bbr->bbr_num_startup_rtts = kRoundTripsWithoutGrowthBeforeExitingStartup;
194    bbr->bbr_flags &= ~BBR_FLAG_EXIT_STARTUP_ON_LOSS;
195    bbr->bbr_cycle_current_offset = 0;
196    bbr->bbr_last_cycle_start = 0;
197    bbr->bbr_flags &= ~BBR_FLAG_IS_AT_FULL_BANDWIDTH;
198    bbr->bbr_round_wo_bw_gain = 0;
199    bbr->bbr_bw_at_last_round = BW_ZERO();
200    bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE;
201    bbr->bbr_exit_probe_rtt_at = 0;
202    bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED;
203    bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
204    bbr->bbr_flags &= ~BBR_FLAG_HAS_NON_APP_LIMITED;
205    bbr->bbr_flags &= ~BBR_FLAG_FLEXIBLE_APP_LIMITED;
206
207    set_startup_values(bbr);
208
209    LSQ_DEBUG("initialized");
210}
211
212
213static lsquic_time_t
214get_min_rtt (const struct lsquic_bbr *bbr)
215{
216    lsquic_time_t min_rtt;
217
218    if (bbr->bbr_min_rtt)
219        return bbr->bbr_min_rtt;
220    else
221    {
222        min_rtt = lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats);
223        if (min_rtt == 0)
224            min_rtt = 25000;
225        return min_rtt;
226    }
227}
228
229
230static uint64_t
231lsquic_bbr_pacing_rate (void *cong_ctl, int in_recovery)
232{
233    struct lsquic_bbr *const bbr = cong_ctl;
234    lsquic_time_t min_rtt;
235    struct bandwidth bw;
236
237    if (!BW_IS_ZERO(&bbr->bbr_pacing_rate))
238        bw = bbr->bbr_pacing_rate;
239    else
240    {
241        min_rtt = get_min_rtt(bbr);
242        bw = BW_FROM_BYTES_AND_DELTA(bbr->bbr_init_cwnd, min_rtt);
243        bw = BW_TIMES(&bw, bbr->bbr_high_cwnd_gain);
244    }
245
246    return BW_TO_BYTES_PER_SEC(&bw);
247}
248
249
250/* BbrSender::GetTargetCongestionWindow */
251static uint64_t
252get_target_cwnd (const struct lsquic_bbr *bbr, float gain)
253{
254    struct bandwidth bw;
255    uint64_t bdp, cwnd;
256
257    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
258    bdp = get_min_rtt(bbr) * BW_TO_BYTES_PER_SEC(&bw) / 1000000;
259    cwnd = gain * bdp;
260
261    // BDP estimate will be zero if no bandwidth samples are available yet.
262    if (cwnd == 0)
263        cwnd = gain * bbr->bbr_init_cwnd;
264
265    return MAX(cwnd, bbr->bbr_min_cwnd);
266}
267
268
269/* See BbrSender::IsPipeSufficientlyFull */
270static int
271is_pipe_sufficiently_full (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
272{
273    // See if we need more bytes in flight to see more bandwidth.
274    if (bbr->bbr_mode == BBR_MODE_STARTUP)
275        // STARTUP exits if it doesn't observe a 25% bandwidth increase, so
276        // the CWND must be more than 25% above the target.
277        return bytes_in_flight >= get_target_cwnd(bbr, 1.5);
278    else if (bbr->bbr_pacing_gain > 1)
279        // Super-unity PROBE_BW doesn't exit until 1.25 * BDP is achieved.
280        return bytes_in_flight >= get_target_cwnd(bbr, bbr->bbr_pacing_gain);
281    else
282        // If bytes_in_flight are above the target congestion window, it should
283        // be possible to observe the same or more bandwidth if it's available.
284        return bytes_in_flight >= get_target_cwnd(bbr, 1.1);
285}
286
287
288static void
289lsquic_bbr_was_quiet (void *cong_ctl, lsquic_time_t now, uint64_t in_flight)
290{
291    struct lsquic_bbr *const bbr = cong_ctl;
292    LSQ_DEBUG("was quiet");         /* Do nothing */
293}
294
295
296/* See BbrSender::OnApplicationLimited */
297static void
298bbr_app_limited (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
299{
300    uint64_t cwnd;
301
302    cwnd = lsquic_bbr_get_cwnd(bbr);
303    if (bytes_in_flight >= cwnd)
304        return;
305    if ((bbr->bbr_flags & BBR_FLAG_FLEXIBLE_APP_LIMITED)
306                            && is_pipe_sufficiently_full(bbr, bytes_in_flight))
307        return;
308
309    bbr->bbr_flags |= BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT;
310    lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler);
311    LSQ_DEBUG("becoming application-limited.  Last sent packet: %"PRIu64"; "
312                            "CWND: %"PRIu64, bbr->bbr_last_sent_packno, cwnd);
313}
314
315
316static void
317lsquic_bbr_ack (void *cong_ctl, struct lsquic_packet_out *packet_out,
318                  unsigned packet_sz, lsquic_time_t now_time, int app_limited)
319{
320    struct lsquic_bbr *const bbr = cong_ctl;
321    struct bw_sample *sample;
322
323    assert(bbr->bbr_flags & BBR_FLAG_IN_ACK);
324
325    sample = lsquic_bw_sampler_packet_acked(&bbr->bbr_bw_sampler, packet_out,
326                                                bbr->bbr_ack_state.ack_time);
327    if (sample)
328        TAILQ_INSERT_TAIL(&bbr->bbr_ack_state.samples, sample, next);
329
330    if (is_valid_packno(bbr->bbr_ack_state.max_packno))
331        /* We assume packet numbers are ordered */
332        assert(packet_out->po_packno > bbr->bbr_ack_state.max_packno);
333    bbr->bbr_ack_state.max_packno = packet_out->po_packno;
334    bbr->bbr_ack_state.acked_bytes += packet_sz;
335}
336
337
338static void
339lsquic_bbr_sent (void *cong_ctl, struct lsquic_packet_out *packet_out,
340                                        uint64_t in_flight, int app_limited)
341{
342    struct lsquic_bbr *const bbr = cong_ctl;
343
344    if (!(packet_out->po_flags & PO_MINI))
345        lsquic_bw_sampler_packet_sent(&bbr->bbr_bw_sampler, packet_out,
346                                                                in_flight);
347
348    /* Obviously we make an assumption that sent packet number are always
349     * increasing.
350     */
351    bbr->bbr_last_sent_packno = packet_out->po_packno;
352
353    if (app_limited)
354        bbr_app_limited(bbr, in_flight);
355}
356
357
358static void
359lsquic_bbr_lost (void *cong_ctl, struct lsquic_packet_out *packet_out,
360                                                        unsigned packet_sz)
361{
362    struct lsquic_bbr *const bbr = cong_ctl;
363
364    lsquic_bw_sampler_packet_lost(&bbr->bbr_bw_sampler, packet_out);
365    bbr->bbr_ack_state.has_losses = 1;
366    bbr->bbr_ack_state.lost_bytes += packet_sz;
367}
368
369
370static void
371lsquic_bbr_begin_ack (void *cong_ctl, lsquic_time_t ack_time, uint64_t in_flight)
372{
373    struct lsquic_bbr *const bbr = cong_ctl;
374
375    assert(!(bbr->bbr_flags & BBR_FLAG_IN_ACK));
376    bbr->bbr_flags |= BBR_FLAG_IN_ACK;
377    memset(&bbr->bbr_ack_state, 0, sizeof(bbr->bbr_ack_state));
378    TAILQ_INIT(&bbr->bbr_ack_state.samples);
379    bbr->bbr_ack_state.ack_time = ack_time;
380    bbr->bbr_ack_state.max_packno = UINT64_MAX;
381    bbr->bbr_ack_state.in_flight = in_flight;
382    bbr->bbr_ack_state.total_bytes_acked_before
383                        = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler);
384}
385
386
387/* Based on BbrSender::ShouldExtendMinRttExpiry() */
388static int
389should_extend_min_rtt_expiry (const struct lsquic_bbr *bbr)
390{
391    int increased_since_last_probe;
392
393    if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
394                          |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED))
395            == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
396               |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED))
397        // Extend the current min_rtt if we've been app limited recently.
398        return 1;
399
400    increased_since_last_probe = bbr->bbr_min_rtt_since_last_probe
401                                > bbr->bbr_min_rtt * kSimilarMinRttThreshold;
402    if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
403                          |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT))
404            == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
405               |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT)
406            && !increased_since_last_probe)
407        // Extend the current min_rtt if we've been app limited recently and an
408        // rtt has been measured in that time that's less than 12.5% more than
409        // the current min_rtt.
410        return 1;
411
412    return 0;
413}
414
415
416/* Based on BbrSender::UpdateBandwidthAndMinRtt */
417/* Returns true if min RTT expired, false otherwise */
418static int
419update_bandwidth_and_min_rtt (struct lsquic_bbr *bbr)
420{
421    struct bw_sample *sample, *next_sample;
422    uint64_t sample_min_rtt;
423    int min_rtt_expired;
424
425    sample_min_rtt = UINT64_MAX;
426    for (sample = TAILQ_FIRST(&bbr->bbr_ack_state.samples); sample;
427                                                        sample = next_sample)
428    {
429        next_sample = TAILQ_NEXT(sample, next);
430
431        if (sample->is_app_limited)
432            bbr->bbr_flags |= BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
433        else
434        {
435            bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
436            bbr->bbr_flags |=  BBR_FLAG_HAS_NON_APP_LIMITED;
437        }
438
439        if (sample_min_rtt == UINT64_MAX || sample->rtt < sample_min_rtt)
440            sample_min_rtt = sample->rtt;
441
442        if (!sample->is_app_limited
443                    || BW_VALUE(&sample->bandwidth)
444                                    > minmax_get(&bbr->bbr_max_bandwidth))
445            minmax_upmax(&bbr->bbr_max_bandwidth, bbr->bbr_round_count,
446                                                BW_VALUE(&sample->bandwidth));
447
448        lsquic_malo_put(sample);
449    }
450
451    if (sample_min_rtt == UINT64_MAX)
452        return 0;
453
454    bbr->bbr_min_rtt_since_last_probe
455                    = MIN(bbr->bbr_min_rtt_since_last_probe, sample_min_rtt);
456
457    min_rtt_expired = bbr->bbr_min_rtt != 0 && (bbr->bbr_ack_state.ack_time
458                                > bbr->bbr_min_rtt_timestamp + kMinRttExpiry);
459    if (min_rtt_expired || sample_min_rtt < bbr->bbr_min_rtt
460                                                    || 0 == bbr->bbr_min_rtt)
461    {
462        if (min_rtt_expired && should_extend_min_rtt_expiry(bbr))
463        {
464            LSQ_DEBUG("min rtt expiration extended, stay at: %"PRIu64,
465                bbr->bbr_min_rtt);
466            min_rtt_expired = 0;
467        }
468        else
469        {
470            LSQ_DEBUG("min rtt updated: %"PRIu64" -> %"PRIu64,
471                bbr->bbr_min_rtt, sample_min_rtt);
472            bbr->bbr_min_rtt = sample_min_rtt;
473        }
474        bbr->bbr_min_rtt_timestamp = bbr->bbr_ack_state.ack_time;
475        bbr->bbr_min_rtt_since_last_probe = UINT64_MAX;
476        bbr->bbr_flags &= ~BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT;
477    }
478
479    return min_rtt_expired;
480}
481
482
483/* Based on BbrSender::UpdateRecoveryState() */
484static void
485update_recovery_state (struct lsquic_bbr *bbr, int is_round_start)
486{
487    // Exit recovery when there are no losses for a round.
488    if (bbr->bbr_ack_state.has_losses)
489        bbr->bbr_end_recovery_at = bbr->bbr_last_sent_packno;
490
491    switch (bbr->bbr_recovery_state)
492    {
493    case BBR_RS_NOT_IN_RECOVERY:
494        // Enter conservation on the first loss.
495        if (bbr->bbr_ack_state.has_losses)
496        {
497            bbr->bbr_recovery_state = BBR_RS_CONSERVATION;
498            // This will cause the |bbr_recovery_window| to be set to the
499            // correct value in CalculateRecoveryWindow().
500            bbr->bbr_recovery_window = 0;
501            // Since the conservation phase is meant to be lasting for a whole
502            // round, extend the current round as if it were started right now.
503            bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno;
504        }
505        break;
506    case BBR_RS_CONSERVATION:
507        if (is_round_start)
508            bbr->bbr_recovery_state = BBR_RS_GROWTH;
509        /* Fall-through */
510    case BBR_RS_GROWTH:
511        // Exit recovery if appropriate.
512        if (!bbr->bbr_ack_state.has_losses
513                && bbr->bbr_ack_state.max_packno > bbr->bbr_end_recovery_at)
514            bbr->bbr_recovery_state = BBR_RS_NOT_IN_RECOVERY;
515        break;
516    }
517}
518
519
520static uint64_t
521update_ack_aggregation_bytes (struct lsquic_bbr *bbr,
522                                                uint64_t newly_acked_bytes)
523{
524    const lsquic_time_t ack_time = bbr->bbr_ack_state.ack_time;
525    uint64_t expected_bytes_acked, diff;
526
527    // Compute how many bytes are expected to be delivered, assuming max
528    // bandwidth is correct.
529    expected_bytes_acked = minmax_get(&bbr->bbr_max_bandwidth)
530                        * (ack_time - bbr->bbr_aggregation_epoch_start_time);
531
532    // Reset the current aggregation epoch as soon as the ack arrival rate is
533    // less than or equal to the max bandwidth.
534    if (bbr->bbr_aggregation_epoch_bytes <= expected_bytes_acked)
535    {
536        // Reset to start measuring a new aggregation epoch.
537        bbr->bbr_aggregation_epoch_bytes = newly_acked_bytes;
538        bbr->bbr_aggregation_epoch_start_time = ack_time;
539        return 0;
540    }
541
542    // Compute how many extra bytes were delivered vs max bandwidth.
543    // Include the bytes most recently acknowledged to account for stretch acks.
544    bbr->bbr_aggregation_epoch_bytes += newly_acked_bytes;
545    diff = bbr->bbr_aggregation_epoch_bytes - expected_bytes_acked;
546    minmax_upmax(&bbr->bbr_max_ack_height, bbr->bbr_round_count, diff);
547    return diff;
548}
549
550
551/* See BbrSender::UpdateGainCyclePhase() */
552static void
553update_gain_cycle_phase (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
554{
555    const uint64_t prior_in_flight = bbr->bbr_ack_state.in_flight;
556    const lsquic_time_t now = bbr->bbr_ack_state.ack_time;
557    // In most cases, the cycle is advanced after an RTT passes.
558    int should_advance_gain_cycling
559        = now - bbr->bbr_last_cycle_start > get_min_rtt(bbr);
560
561    // If the pacing gain is above 1.0, the connection is trying to probe the
562    // bandwidth by increasing the number of bytes in flight to at least
563    // pacing_gain * BDP.  Make sure that it actually reaches the target, as
564    // long as there are no losses suggesting that the buffers are not able to
565    // hold that much.
566    if (bbr->bbr_pacing_gain > 1.0
567            && !bbr->bbr_ack_state.has_losses
568            && prior_in_flight < get_target_cwnd(bbr, bbr->bbr_pacing_gain))
569        should_advance_gain_cycling = 0;
570
571    /* Several optimizations are possible here: "else if" instead of "if", as
572     * well as not calling get_target_cwnd() if `should_advance_gain_cycling'
573     * is already set to the target value.
574     */
575
576    // If pacing gain is below 1.0, the connection is trying to drain the extra
577    // queue which could have been incurred by probing prior to it.  If the
578    // number of bytes in flight falls down to the estimated BDP value earlier,
579    // conclude that the queue has been successfully drained and exit this cycle
580    // early.
581    if (bbr->bbr_pacing_gain < 1.0
582                                && bytes_in_flight <= get_target_cwnd(bbr, 1))
583        should_advance_gain_cycling = 1;
584
585    if (should_advance_gain_cycling)
586    {
587        bbr->bbr_cycle_current_offset =
588                        (bbr->bbr_cycle_current_offset + 1) % kGainCycleLength;
589        bbr->bbr_last_cycle_start = now;
590        // Stay in low gain mode until the target BDP is hit.  Low gain mode
591        // will be exited immediately when the target BDP is achieved.
592        if ((bbr->bbr_flags & BBR_FLAG_DRAIN_TO_TARGET)
593                && bbr->bbr_pacing_gain < 1
594                && kPacingGain[bbr->bbr_cycle_current_offset] == 1
595                && bytes_in_flight > get_target_cwnd(bbr, 1))
596              return;
597        bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset];
598        LSQ_DEBUG("advanced gain cycle, pacing gain set to %.2f",
599                                                        bbr->bbr_pacing_gain);
600    }
601}
602
603
604/* BbrSender::InRecovery() */
605static int
606in_recovery (const struct lsquic_bbr *bbr)
607{
608    return bbr->bbr_recovery_state != BBR_RS_NOT_IN_RECOVERY;
609}
610
611
612/* See BbrSender::CheckIfFullBandwidthReached() */
613static void
614check_if_full_bw_reached (struct lsquic_bbr *bbr)
615{
616    struct bandwidth target, bw;
617
618    if (bbr->bbr_flags & BBR_FLAG_LAST_SAMPLE_APP_LIMITED)
619    {
620        LSQ_DEBUG("last sample app limited: full BW not reached");
621        return;
622    }
623
624    target = BW_TIMES(&bbr->bbr_bw_at_last_round, kStartupGrowthTarget);
625    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
626    if (BW_VALUE(&bw) >= BW_VALUE(&target))
627    {
628        bbr->bbr_bw_at_last_round = bw;
629        bbr->bbr_round_wo_bw_gain = 0;
630        if (bbr->bbr_flags & BBR_FLAG_EXPIRE_ACK_AGG_IN_STARTUP)
631            // Expire old excess delivery measurements now that bandwidth
632            // increased.
633            minmax_reset(&bbr->bbr_max_ack_height,
634                        ((struct minmax_sample) { bbr->bbr_round_count, 0, }));
635        LSQ_DEBUG("BW estimate %"PRIu64"bps greater than or equal to target "
636            "%"PRIu64"bps: full BW not reached",
637            BW_VALUE(&bw), BW_VALUE(&target));
638        return;
639    }
640
641    ++bbr->bbr_round_wo_bw_gain;
642    if ((bbr->bbr_round_wo_bw_gain >= bbr->bbr_num_startup_rtts)
643            || ((bbr->bbr_flags & BBR_FLAG_EXIT_STARTUP_ON_LOSS)
644                                                    && in_recovery(bbr)))
645    {
646        assert(bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED);  /* DCHECK */
647        bbr->bbr_flags |= BBR_FLAG_IS_AT_FULL_BANDWIDTH;
648        LSQ_DEBUG("reached full BW");
649    }
650    else
651        LSQ_DEBUG("rounds w/o gain: %u, full BW not reached",
652                                                bbr->bbr_round_wo_bw_gain);
653}
654
655
656/* See BbrSender::OnExitStartup */
657static void
658on_exit_startup (struct lsquic_bbr *bbr, lsquic_time_t now)
659{
660    assert(bbr->bbr_mode == BBR_MODE_STARTUP);
661    /* Apparently this method is just to update stats, something that we
662     * don't do yet.
663     */
664}
665
666
667/* See BbrSender::EnterProbeBandwidthMode */
668static void
669enter_probe_bw_mode (struct lsquic_bbr *bbr, lsquic_time_t now)
670{
671    uint8_t rand;
672
673    set_mode(bbr, BBR_MODE_PROBE_BW);
674    bbr->bbr_cwnd_gain = kCwndGain;
675
676    // Pick a random offset for the gain cycle out of {0, 2..7} range. 1 is
677    // excluded because in that case increased gain and decreased gain would not
678    // follow each other.
679    rand = lsquic_crand_get_byte(bbr->bbr_conn_pub->enpub->enp_crand);
680    bbr->bbr_cycle_current_offset = rand % (kGainCycleLength - 1);
681    if (bbr->bbr_cycle_current_offset >= 1)
682        ++bbr->bbr_cycle_current_offset;
683
684    bbr->bbr_last_cycle_start = now;
685    bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset];
686}
687
688
689/* See BbrSender::EnterStartupMode */
690static void
691enter_startup_mode (struct lsquic_bbr *bbr, lsquic_time_t now)
692{
693    set_mode(bbr, BBR_MODE_STARTUP);
694    set_startup_values(bbr);
695}
696
697
698/* See  BbrSender::MaybeExitStartupOrDrain() */
699static void
700maybe_exit_startup_or_drain (struct lsquic_bbr *bbr, lsquic_time_t now,
701                                                    uint64_t bytes_in_flight)
702{
703    uint64_t target_cwnd;
704
705    if (bbr->bbr_mode == BBR_MODE_STARTUP
706                        && (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
707    {
708        on_exit_startup(bbr, now);
709        set_mode(bbr, BBR_MODE_DRAIN);
710        bbr->bbr_pacing_gain = bbr->bbr_drain_gain;
711        bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain;
712    }
713
714    if (bbr->bbr_mode == BBR_MODE_DRAIN)
715    {
716        target_cwnd = get_target_cwnd(bbr, 1);
717        LSQ_DEBUG("%s: bytes in flight: %"PRIu64"; target cwnd: %"PRIu64,
718                                    __func__, bytes_in_flight, target_cwnd);
719        if (bytes_in_flight <= target_cwnd)
720            enter_probe_bw_mode(bbr, now);
721    }
722}
723
724
725static int
726in_slow_start (const struct lsquic_bbr *bbr)
727{
728    return bbr->bbr_mode == BBR_MODE_STARTUP;
729}
730
731
732/* See QuicByteCount BbrSender::ProbeRttCongestionWindow() */
733static uint64_t
734get_probe_rtt_cwnd (const struct lsquic_bbr *bbr)
735{
736    if (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_BASED_ON_BDP)
737        return get_target_cwnd(bbr, kModerateProbeRttMultiplier);
738    else
739        return bbr->bbr_min_cwnd;
740}
741
742
743static uint64_t
744lsquic_bbr_get_cwnd (void *cong_ctl)
745{
746    struct lsquic_bbr *const bbr = cong_ctl;
747    uint64_t cwnd;
748
749    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
750        cwnd = get_probe_rtt_cwnd(bbr);
751    else if (in_recovery(bbr) &&
752                !((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP)
753                                    && bbr->bbr_mode == BBR_MODE_STARTUP))
754        cwnd = MIN(bbr->bbr_cwnd, bbr->bbr_recovery_window);
755    else
756        cwnd = bbr->bbr_cwnd;
757
758    return cwnd;
759}
760
761
762/* See BbrSender::MaybeEnterOrExitProbeRtt */
763static void
764maybe_enter_or_exit_probe_rtt (struct lsquic_bbr *bbr, lsquic_time_t now,
765            int is_round_start, int min_rtt_expired, uint64_t bytes_in_flight)
766{
767    if (min_rtt_expired
768            && !(bbr->bbr_flags & BBR_FLAG_EXITING_QUIESCENCE)
769                && bbr->bbr_mode != BBR_MODE_PROBE_RTT)
770    {
771        if (in_slow_start(bbr))
772            on_exit_startup(bbr, now);
773        set_mode(bbr, BBR_MODE_PROBE_RTT);
774        bbr->bbr_pacing_gain = 1;
775        // Do not decide on the time to exit PROBE_RTT until the
776        // |bytes_in_flight| is at the target small value.
777        bbr->bbr_exit_probe_rtt_at = 0;
778    }
779
780    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
781    {
782        lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler);
783        LSQ_DEBUG("%s: exit probe at: %"PRIu64"; now: %"PRIu64
784            "; round start: %d; round passed: %d; rtt: %"PRIu64" usec",
785            __func__, bbr->bbr_exit_probe_rtt_at, now, is_round_start,
786            !!(bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED),
787            lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats));
788        if (bbr->bbr_exit_probe_rtt_at == 0)
789        {
790            // If the window has reached the appropriate size, schedule exiting
791            // PROBE_RTT.  The CWND during PROBE_RTT is
792            // kMinimumCongestionWindow, but we allow an extra packet since QUIC
793            // checks CWND before sending a packet.
794            if (bytes_in_flight
795                        < get_probe_rtt_cwnd(bbr) + kMaxOutgoingPacketSize)
796            {
797                bbr->bbr_exit_probe_rtt_at = now + kProbeRttTime;
798                bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED;
799            }
800        }
801        else
802        {
803            if (is_round_start)
804                bbr->bbr_flags |= BBR_FLAG_PROBE_RTT_ROUND_PASSED;
805            if (now >= bbr->bbr_exit_probe_rtt_at
806                        && (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED))
807            {
808                bbr->bbr_min_rtt_timestamp = now;
809                if (!(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
810                    enter_startup_mode(bbr, now);
811                else
812                    enter_probe_bw_mode(bbr, now);
813            }
814        }
815    }
816
817    bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE;
818}
819
820
821/* See BbrSender::CalculatePacingRate */
822static void
823calculate_pacing_rate (struct lsquic_bbr *bbr)
824{
825    struct bandwidth bw, target_rate;
826
827    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
828    if (BW_IS_ZERO(&bw))
829        return;
830
831    LSQ_DEBUG("BW estimate: %"PRIu64, BW_VALUE(&bw));
832
833    target_rate = BW_TIMES(&bw, bbr->bbr_pacing_gain);
834    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
835    {
836        bbr->bbr_pacing_rate = target_rate;
837        return;
838    }
839
840    // Pace at the rate of initial_window / RTT as soon as RTT measurements are
841    // available.
842    if (BW_IS_ZERO(&bbr->bbr_pacing_rate)
843            && 0 != lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats))
844    {
845        bbr->bbr_pacing_rate = BW_FROM_BYTES_AND_DELTA(
846            bbr->bbr_init_cwnd,
847            lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats));
848        return;
849    }
850
851    // Slow the pacing rate in STARTUP once loss has ever been detected.
852    const int has_ever_detected_loss = bbr->bbr_end_recovery_at != 0;
853    if (has_ever_detected_loss
854            && (bbr->bbr_flags & (BBR_FLAG_SLOWER_STARTUP
855                                  |BBR_FLAG_HAS_NON_APP_LIMITED))
856                == (BBR_FLAG_SLOWER_STARTUP|BBR_FLAG_HAS_NON_APP_LIMITED))
857    {
858        bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupAfterLossGain);
859        return;
860    }
861
862    // Slow the pacing rate in STARTUP by the bytes_lost / CWND.
863    if (startup_rate_reduction_multiplier_ != 0
864            && has_ever_detected_loss
865                && (bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED))
866    {
867        bbr->bbr_pacing_rate = BW_TIMES(&target_rate,
868            (1 - (bbr->bbr_startup_bytes_lost
869                    * startup_rate_reduction_multiplier_ * 1.0f
870                        / bbr->bbr_cwnd_gain)));
871        // Ensure the pacing rate doesn't drop below the startup growth target
872        // times  the bandwidth estimate.
873        if (BW_VALUE(&bbr->bbr_pacing_rate)
874                                        < BW_VALUE(&bw) * kStartupGrowthTarget)
875            bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupGrowthTarget);
876        return;
877    }
878
879    // Do not decrease the pacing rate during startup.
880    if (BW_VALUE(&bbr->bbr_pacing_rate) < BW_VALUE(&target_rate))
881        bbr->bbr_pacing_rate = target_rate;
882}
883
884
885/* See BbrSender::CalculateCongestionWindow */
886static void
887calculate_cwnd (struct lsquic_bbr *bbr, uint64_t bytes_acked,
888                                                  uint64_t excess_acked)
889{
890    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
891        return;
892
893    uint64_t target_window = get_target_cwnd(bbr, bbr->bbr_cwnd_gain);
894    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
895        // Add the max recently measured ack aggregation to CWND.
896        target_window += minmax_get(&bbr->bbr_max_ack_height);
897    else if (bbr->bbr_flags & BBR_FLAG_ENABLE_ACK_AGG_IN_STARTUP)
898        // Add the most recent excess acked.  Because CWND never decreases in
899        // STARTUP, this will automatically create a very localized max filter.
900        target_window += excess_acked;
901
902    // Instead of immediately setting the target CWND as the new one, BBR grows
903    // the CWND towards |target_window| by only increasing it |bytes_acked| at a
904    // time.
905    const int add_bytes_acked =
906	!FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery || !in_recovery(bbr);
907    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
908        bbr->bbr_cwnd = MIN(target_window, bbr->bbr_cwnd + bytes_acked);
909    else if (add_bytes_acked &&
910             (bbr->bbr_cwnd_gain < target_window ||
911              lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler)
912                                                        < bbr->bbr_init_cwnd))
913        // If the connection is not yet out of startup phase, do not decrease
914        // the window.
915        bbr->bbr_cwnd += bytes_acked;
916
917    // Enforce the limits on the congestion window.
918    if (bbr->bbr_cwnd < bbr->bbr_min_cwnd)
919        bbr->bbr_cwnd = bbr->bbr_min_cwnd;
920    else if (bbr->bbr_cwnd > bbr->bbr_max_cwnd)
921    {
922        LSQ_DEBUG("exceed max cwnd");
923        bbr->bbr_cwnd = bbr->bbr_max_cwnd;
924    }
925}
926
927
928/* See BbrSender::CalculateRecoveryWindow */
929static void
930calculate_recovery_window (struct lsquic_bbr *bbr, uint64_t bytes_acked,
931                                uint64_t bytes_lost, uint64_t bytes_in_flight)
932{
933    if ((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP)
934                                        && bbr->bbr_mode == BBR_MODE_STARTUP)
935        return;
936
937    if (bbr->bbr_recovery_state == BBR_RS_NOT_IN_RECOVERY)
938        return;
939
940    // Set up the initial recovery window.
941    if (bbr->bbr_recovery_window == 0)
942    {
943        bbr->bbr_recovery_window = bytes_in_flight + bytes_acked;
944        bbr->bbr_recovery_window = MAX(bbr->bbr_min_cwnd,
945                                                    bbr->bbr_recovery_window);
946        return;
947    }
948
949    // Remove losses from the recovery window, while accounting for a potential
950    // integer underflow.
951    if (bbr->bbr_recovery_window >= bytes_lost)
952        bbr->bbr_recovery_window -= bytes_lost;
953    else
954        bbr->bbr_recovery_window = kMaxSegmentSize;
955
956    // In CONSERVATION mode, just subtracting losses is sufficient.  In GROWTH,
957    // release additional |bytes_acked| to achieve a slow-start-like behavior.
958    if (bbr->bbr_recovery_state == BBR_RS_GROWTH)
959        bbr->bbr_recovery_window += bytes_acked;
960
961    // Sanity checks.  Ensure that we always allow to send at least an MSS or
962    // |bytes_acked| in response, whichever is larger.
963    bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window,
964                                            bytes_in_flight + bytes_acked);
965    if (FLAG_quic_bbr_one_mss_conservation)
966        bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window,
967                                            bytes_in_flight + kMaxSegmentSize);
968    bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, bbr->bbr_min_cwnd);
969}
970
971
972static void
973lsquic_bbr_end_ack (void *cong_ctl, uint64_t in_flight)
974{
975    struct lsquic_bbr *const bbr = cong_ctl;
976    int is_round_start, min_rtt_expired;
977    uint64_t bytes_acked, excess_acked, bytes_lost;
978
979    assert(bbr->bbr_flags & BBR_FLAG_IN_ACK);
980    bbr->bbr_flags &= ~BBR_FLAG_IN_ACK;
981
982    LSQ_DEBUG("end_ack; mode: %s; in_flight: %"PRIu64, mode2str[bbr->bbr_mode],
983                                                                    in_flight);
984
985    bytes_acked = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler)
986                            - bbr->bbr_ack_state.total_bytes_acked_before;
987    if (bbr->bbr_ack_state.acked_bytes)
988    {
989        is_round_start = bbr->bbr_ack_state.max_packno
990                                    > bbr->bbr_current_round_trip_end
991                    || !is_valid_packno(bbr->bbr_current_round_trip_end);
992        if (is_round_start)
993        {
994            ++bbr->bbr_round_count;
995            bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno;
996            LSQ_DEBUG("up round count to %"PRIu64"; new rt end: %"PRIu64,
997                        bbr->bbr_round_count, bbr->bbr_current_round_trip_end);
998        }
999        min_rtt_expired = update_bandwidth_and_min_rtt(bbr);
1000        update_recovery_state(bbr, is_round_start);
1001        excess_acked = update_ack_aggregation_bytes(bbr, bytes_acked);
1002    }
1003    else
1004    {
1005        is_round_start = 0;
1006        min_rtt_expired = 0;
1007        excess_acked = 0;
1008    }
1009
1010    if (bbr->bbr_mode == BBR_MODE_PROBE_BW)
1011        update_gain_cycle_phase(bbr, in_flight);
1012
1013    if (is_round_start && !(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
1014        check_if_full_bw_reached(bbr);
1015
1016    maybe_exit_startup_or_drain(bbr, bbr->bbr_ack_state.ack_time, in_flight);
1017
1018    maybe_enter_or_exit_probe_rtt(bbr, bbr->bbr_ack_state.ack_time,
1019                                is_round_start, min_rtt_expired, in_flight);
1020
1021    // Calculate number of packets acked and lost.
1022    bytes_lost = bbr->bbr_ack_state.lost_bytes;
1023
1024    // After the model is updated, recalculate the pacing rate and congestion
1025    // window.
1026    calculate_pacing_rate(bbr);
1027    calculate_cwnd(bbr, bytes_acked, excess_acked);
1028    calculate_recovery_window(bbr, bytes_acked, bytes_lost, in_flight);
1029
1030    /* We don't need to clean up BW sampler */
1031}
1032
1033
1034static void
1035lsquic_bbr_cleanup (void *cong_ctl)
1036{
1037    struct lsquic_bbr *const bbr = cong_ctl;
1038
1039    lsquic_bw_sampler_cleanup(&bbr->bbr_bw_sampler);
1040    LSQ_DEBUG("cleanup");
1041}
1042
1043
1044static void
1045lsquic_bbr_loss (void *cong_ctl) {   /* Noop */   }
1046
1047
1048static void
1049lsquic_bbr_timeout (void *cong_ctl) {   /* Noop */   }
1050
1051
1052const struct cong_ctl_if lsquic_cong_bbr_if =
1053{
1054    .cci_ack           = lsquic_bbr_ack,
1055    .cci_begin_ack     = lsquic_bbr_begin_ack,
1056    .cci_end_ack       = lsquic_bbr_end_ack,
1057    .cci_cleanup       = lsquic_bbr_cleanup,
1058    .cci_get_cwnd      = lsquic_bbr_get_cwnd,
1059    .cci_init          = lsquic_bbr_init,
1060    .cci_pacing_rate   = lsquic_bbr_pacing_rate,
1061    .cci_loss          = lsquic_bbr_loss,
1062    .cci_lost          = lsquic_bbr_lost,
1063    .cci_timeout       = lsquic_bbr_timeout,
1064    .cci_sent          = lsquic_bbr_sent,
1065    .cci_was_quiet     = lsquic_bbr_was_quiet,
1066};
1067