lsquic_bbr.c revision cca25415
1/* Copyright (c) 2017 - 2019 LiteSpeed Technologies Inc.  See LICENSE. */
2// Copyright 2016 The Chromium Authors. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE.chrome file.
5
6#include <assert.h>
7#include <inttypes.h>
8#include <stdint.h>
9#include <stdlib.h>
10#include <string.h>
11#include <sys/queue.h>
12
13#include "lsquic.h"
14#include "lsquic_int_types.h"
15#include "lsquic_cong_ctl.h"
16#include "lsquic_minmax.h"
17#include "lsquic_packet_common.h"
18#include "lsquic_packet_out.h"
19#include "lsquic_bw_sampler.h"
20#include "lsquic_bbr.h"
21#include "lsquic_hash.h"
22#include "lsquic_conn.h"
23#include "lsquic_sfcw.h"
24#include "lsquic_conn_flow.h"
25#include "lsquic_varint.h"
26#include "lsquic_hq.h"
27#include "lsquic_stream.h"
28#include "lsquic_rtt.h"
29#include "lsquic_conn_public.h"
30#include "lsquic_util.h"
31#include "lsquic_malo.h"
32
33#define LSQUIC_LOGGER_MODULE LSQLM_BBR
34#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(bbr->bbr_conn)
35#include "lsquic_logger.h"
36
37#define MIN(a, b) ((a) < (b) ? (a) : (b))
38#define MAX(a, b) ((a) > (b) ? (a) : (b))
39
40#define ms(val_) ((val_) * 1000)
41#define sec(val_) ((val_) * 1000 * 1000)
42
43// Default maximum packet size used in the Linux TCP implementation.
44// Used in QUIC for congestion window computations in bytes.
45#define kDefaultTCPMSS 1460
46#define kMaxSegmentSize kDefaultTCPMSS
47
48// Constants based on TCP defaults.
49// The minimum CWND to ensure delayed acks don't reduce bandwidth measurements.
50// Does not inflate the pacing rate.
51#define kDefaultMinimumCongestionWindow  (4 * kDefaultTCPMSS)
52
53// The gain used for the STARTUP, equal to 2/ln(2).
54#define kDefaultHighGain 2.885f
55
56// The newly derived gain for STARTUP, equal to 4 * ln(2)
57#define kDerivedHighGain 2.773f
58
59// The newly derived CWND gain for STARTUP, 2.
60#define kDerivedHighCWNDGain 2.0f
61
62// The gain used in STARTUP after loss has been detected.
63// 1.5 is enough to allow for 25% exogenous loss and still observe a 25% growth
64// in measured bandwidth.
65#define kStartupAfterLossGain 1.5f
66
67// We match SPDY's use of 32 (since we'd compete with SPDY).
68#define kInitialCongestionWindow 32
69
70/* Taken from send_algorithm_interface.h */
71#define kDefaultMaxCongestionWindowPackets 2000
72
73// The time after which the current min_rtt value expires.
74#define kMinRttExpiry sec(10)
75
76// Coefficient to determine if a new RTT is sufficiently similar to min_rtt that
77// we don't need to enter PROBE_RTT.
78#define kSimilarMinRttThreshold 1.125f
79
80// If the bandwidth does not increase by the factor of |kStartupGrowthTarget|
81// within |kRoundTripsWithoutGrowthBeforeExitingStartup| rounds, the connection
82// will exit the STARTUP mode.
83#define kStartupGrowthTarget 1.25
84
85#define kRoundTripsWithoutGrowthBeforeExitingStartup 3
86
87#define startup_rate_reduction_multiplier_ 0
88
89// The cycle of gains used during the PROBE_BW stage.
90static const float kPacingGain[] = {1.25, 0.75, 1, 1, 1, 1, 1, 1};
91
92// The length of the gain cycle.
93static const size_t kGainCycleLength = sizeof(kPacingGain)
94                                                    / sizeof(kPacingGain[0]);
95
96// Coefficient of target congestion window to use when basing PROBE_RTT on BDP.
97#define kModerateProbeRttMultiplier 0.75
98
99// The maximum packet size of any QUIC packet over IPv6, based on ethernet's max
100// size, minus the IP and UDP headers. IPv6 has a 40 byte header, UDP adds an
101// additional 8 bytes.  This is a total overhead of 48 bytes.  Ethernet's
102// max packet size is 1500 bytes,  1500 - 48 = 1452.
103#define kMaxV6PacketSize 1452
104// The maximum packet size of any QUIC packet over IPv4.
105// 1500(Ethernet) - 20(IPv4 header) - 8(UDP header) = 1472.
106#define kMaxV4PacketSize 1472
107// The maximum incoming packet size allowed.
108#define kMaxIncomingPacketSize kMaxV4PacketSize
109// The maximum outgoing packet size allowed.
110#define kMaxOutgoingPacketSize kMaxV6PacketSize
111
112// The minimum time the connection can spend in PROBE_RTT mode.
113#define kProbeRttTime ms(200)
114
115/* FLAG* are from net/quic/quic_flags_list.h */
116
117// When in STARTUP and recovery, do not add bytes_acked to QUIC BBR's CWND in
118// CalculateCongestionWindow()
119#define FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery 0
120
121// When true, ensure BBR allows at least one MSS to be sent in response to an
122// ACK in packet conservation.
123#define FLAG_quic_bbr_one_mss_conservation 0
124
125/* From net/quic/quic_flags_list.h */
126#define kCwndGain 2.0
127
128
129static uint64_t lsquic_bbr_get_cwnd (void *);
130
131
132static const char *const mode2str[] =
133{
134    [BBR_MODE_STARTUP]   = "STARTUP",
135    [BBR_MODE_DRAIN]     = "DRAIN",
136    [BBR_MODE_PROBE_BW]  = "PROBE_BW",
137    [BBR_MODE_PROBE_RTT] = "PROBE_RTT",
138};
139
140
141static void
142set_mode (struct lsquic_bbr *bbr, enum bbr_mode mode)
143{
144    if (bbr->bbr_mode != mode)
145    {
146        LSQ_DEBUG("mode change %s -> %s", mode2str[bbr->bbr_mode],
147                                                        mode2str[mode]);
148        bbr->bbr_mode = mode;
149    }
150    else
151        LSQ_DEBUG("mode remains %s", mode2str[mode]);
152}
153
154
155static void
156set_startup_values (struct lsquic_bbr *bbr)
157{
158    bbr->bbr_pacing_gain = bbr->bbr_high_gain;
159    bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain;
160}
161
162
163static void
164lsquic_bbr_init (void *cong_ctl, const struct lsquic_conn_public *conn_pub,
165                                                enum quic_ft_bit retx_frames)
166{
167    struct lsquic_bbr *const bbr = cong_ctl;
168    bbr->bbr_conn = conn_pub->lconn;
169    lsquic_bw_sampler_init(&bbr->bbr_bw_sampler, bbr->bbr_conn, retx_frames);
170
171    bbr->bbr_rtt_stats = &conn_pub->rtt_stats;
172    bbr->bbr_mode = BBR_MODE_STARTUP;
173    bbr->bbr_round_count = 0;
174    minmax_init(&bbr->bbr_max_bandwidth, 10);
175    minmax_init(&bbr->bbr_max_ack_height, 10);
176    bbr->bbr_aggregation_epoch_bytes = 0;
177    bbr->bbr_aggregation_epoch_start_time = 0;
178    bbr->bbr_min_rtt = 0;
179    bbr->bbr_min_rtt_timestamp = 0;
180    bbr->bbr_init_cwnd = kInitialCongestionWindow * kDefaultTCPMSS;
181    bbr->bbr_cwnd = kInitialCongestionWindow * kDefaultTCPMSS;
182    bbr->bbr_max_cwnd = kDefaultMaxCongestionWindowPackets * kDefaultTCPMSS;
183    bbr->bbr_min_cwnd = kDefaultMinimumCongestionWindow;
184    bbr->bbr_high_gain = kDefaultHighGain;
185    bbr->bbr_high_cwnd_gain = kDefaultHighGain;
186    bbr->bbr_drain_gain = 1.0 / kDefaultHighGain;
187    bbr->bbr_pacing_rate = BW_ZERO();
188    bbr->bbr_pacing_gain = 1.0;
189    bbr->bbr_cwnd_gain = 1.0;
190    bbr->bbr_num_startup_rtts = kRoundTripsWithoutGrowthBeforeExitingStartup;
191    bbr->bbr_flags &= ~BBR_FLAG_EXIT_STARTUP_ON_LOSS;
192    bbr->bbr_cycle_current_offset = 0;
193    bbr->bbr_last_cycle_start = 0;
194    bbr->bbr_flags &= ~BBR_FLAG_IS_AT_FULL_BANDWIDTH;
195    bbr->bbr_round_wo_bw_gain = 0;
196    bbr->bbr_bw_at_last_round = BW_ZERO();
197    bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE;
198    bbr->bbr_exit_probe_rtt_at = 0;
199    bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED;
200    bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
201    bbr->bbr_flags &= ~BBR_FLAG_HAS_NON_APP_LIMITED;
202    bbr->bbr_flags &= ~BBR_FLAG_FLEXIBLE_APP_LIMITED;
203
204    set_startup_values(bbr);
205
206    LSQ_DEBUG("initialized");
207}
208
209
210static lsquic_time_t
211get_min_rtt (const struct lsquic_bbr *bbr)
212{
213    lsquic_time_t min_rtt;
214
215    if (bbr->bbr_min_rtt)
216        return bbr->bbr_min_rtt;
217    else
218    {
219        min_rtt = lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats);
220        if (min_rtt == 0)
221            min_rtt = 25000;
222        return min_rtt;
223    }
224}
225
226
227static uint64_t
228lsquic_bbr_pacing_rate (void *cong_ctl, int in_recovery)
229{
230    struct lsquic_bbr *const bbr = cong_ctl;
231    lsquic_time_t min_rtt;
232    struct bandwidth bw;
233
234    if (!BW_IS_ZERO(&bbr->bbr_pacing_rate))
235        bw = bbr->bbr_pacing_rate;
236    else
237    {
238        min_rtt = get_min_rtt(bbr);
239        bw = BW_FROM_BYTES_AND_DELTA(bbr->bbr_init_cwnd, min_rtt);
240        bw = BW_TIMES(&bw, bbr->bbr_high_cwnd_gain);
241    }
242
243    return BW_TO_BYTES_PER_SEC(&bw);
244}
245
246
247/* BbrSender::GetTargetCongestionWindow */
248static uint64_t
249get_target_cwnd (const struct lsquic_bbr *bbr, float gain)
250{
251    struct bandwidth bw;
252    uint64_t bdp, cwnd;
253
254    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
255    bdp = get_min_rtt(bbr) * BW_TO_BYTES_PER_SEC(&bw) / 1000000;
256    cwnd = gain * bdp;
257
258    // BDP estimate will be zero if no bandwidth samples are available yet.
259    if (cwnd == 0)
260        cwnd = gain * bbr->bbr_init_cwnd;
261
262    return MAX(cwnd, bbr->bbr_min_cwnd);
263}
264
265
266/* See BbrSender::IsPipeSufficientlyFull */
267static int
268is_pipe_sufficiently_full (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
269{
270    // See if we need more bytes in flight to see more bandwidth.
271    if (bbr->bbr_mode == BBR_MODE_STARTUP)
272        // STARTUP exits if it doesn't observe a 25% bandwidth increase, so
273        // the CWND must be more than 25% above the target.
274        return bytes_in_flight >= get_target_cwnd(bbr, 1.5);
275    else if (bbr->bbr_pacing_gain > 1)
276        // Super-unity PROBE_BW doesn't exit until 1.25 * BDP is achieved.
277        return bytes_in_flight >= get_target_cwnd(bbr, bbr->bbr_pacing_gain);
278    else
279        // If bytes_in_flight are above the target congestion window, it should
280        // be possible to observe the same or more bandwidth if it's available.
281        return bytes_in_flight >= get_target_cwnd(bbr, 1.1);
282}
283
284
285static void
286lsquic_bbr_was_quiet (void *cong_ctl, lsquic_time_t now, uint64_t in_flight)
287{
288    struct lsquic_bbr *const bbr = cong_ctl;
289    LSQ_DEBUG("was quiet");         /* Do nothing */
290}
291
292
293/* See BbrSender::OnApplicationLimited */
294static void
295bbr_app_limited (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
296{
297    uint64_t cwnd;
298
299    cwnd = lsquic_bbr_get_cwnd(bbr);
300    if (bytes_in_flight >= cwnd)
301        return;
302    if ((bbr->bbr_flags & BBR_FLAG_FLEXIBLE_APP_LIMITED)
303                            && is_pipe_sufficiently_full(bbr, bytes_in_flight))
304        return;
305
306    bbr->bbr_flags |= BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT;
307    lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler);
308    LSQ_DEBUG("becoming application-limited.  Last sent packet: %"PRIu64"; "
309                            "CWND: %"PRIu64, bbr->bbr_last_sent_packno, cwnd);
310}
311
312
313static void
314lsquic_bbr_ack (void *cong_ctl, struct lsquic_packet_out *packet_out,
315                  unsigned packet_sz, lsquic_time_t now_time, int app_limited)
316{
317    struct lsquic_bbr *const bbr = cong_ctl;
318    struct bw_sample *sample;
319
320    assert(bbr->bbr_flags & BBR_FLAG_IN_ACK);
321
322    sample = lsquic_bw_sampler_packet_acked(&bbr->bbr_bw_sampler, packet_out,
323                                                bbr->bbr_ack_state.ack_time);
324    if (sample)
325        TAILQ_INSERT_TAIL(&bbr->bbr_ack_state.samples, sample, next);
326
327    if (is_valid_packno(bbr->bbr_ack_state.max_packno))
328        /* We assume packet numbers are ordered */
329        assert(packet_out->po_packno > bbr->bbr_ack_state.max_packno);
330    bbr->bbr_ack_state.max_packno = packet_out->po_packno;
331    bbr->bbr_ack_state.acked_bytes += packet_sz;
332}
333
334
335static void
336lsquic_bbr_sent (void *cong_ctl, struct lsquic_packet_out *packet_out,
337                                        uint64_t in_flight, int app_limited)
338{
339    struct lsquic_bbr *const bbr = cong_ctl;
340
341    if (!(packet_out->po_flags & PO_MINI))
342        lsquic_bw_sampler_packet_sent(&bbr->bbr_bw_sampler, packet_out,
343                                                                in_flight);
344
345    /* Obviously we make an assumption that sent packet number are always
346     * increasing.
347     */
348    bbr->bbr_last_sent_packno = packet_out->po_packno;
349
350    if (app_limited)
351        bbr_app_limited(bbr, in_flight);
352}
353
354
355static void
356lsquic_bbr_lost (void *cong_ctl, struct lsquic_packet_out *packet_out,
357                                                        unsigned packet_sz)
358{
359    struct lsquic_bbr *const bbr = cong_ctl;
360
361    lsquic_bw_sampler_packet_lost(&bbr->bbr_bw_sampler, packet_out);
362    bbr->bbr_ack_state.has_losses = 1;
363    bbr->bbr_ack_state.lost_bytes += packet_sz;
364}
365
366
367static void
368lsquic_bbr_begin_ack (void *cong_ctl, lsquic_time_t ack_time, uint64_t in_flight)
369{
370    struct lsquic_bbr *const bbr = cong_ctl;
371
372    assert(!(bbr->bbr_flags & BBR_FLAG_IN_ACK));
373    bbr->bbr_flags |= BBR_FLAG_IN_ACK;
374    memset(&bbr->bbr_ack_state, 0, sizeof(bbr->bbr_ack_state));
375    TAILQ_INIT(&bbr->bbr_ack_state.samples);
376    bbr->bbr_ack_state.ack_time = ack_time;
377    bbr->bbr_ack_state.max_packno = UINT64_MAX;
378    bbr->bbr_ack_state.in_flight = in_flight;
379    bbr->bbr_ack_state.total_bytes_acked_before
380                        = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler);
381}
382
383
384/* Based on BbrSender::ShouldExtendMinRttExpiry() */
385static int
386should_extend_min_rtt_expiry (const struct lsquic_bbr *bbr)
387{
388    int increased_since_last_probe;
389
390    if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
391                          |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED))
392            == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
393               |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED))
394        // Extend the current min_rtt if we've been app limited recently.
395        return 1;
396
397    increased_since_last_probe = bbr->bbr_min_rtt_since_last_probe
398                                > bbr->bbr_min_rtt * kSimilarMinRttThreshold;
399    if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
400                          |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT))
401            == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
402               |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT)
403            && !increased_since_last_probe)
404        // Extend the current min_rtt if we've been app limited recently and an
405        // rtt has been measured in that time that's less than 12.5% more than
406        // the current min_rtt.
407        return 1;
408
409    return 0;
410}
411
412
413/* Based on BbrSender::UpdateBandwidthAndMinRtt */
414/* Returns true if min RTT expired, false otherwise */
415static int
416update_bandwidth_and_min_rtt (struct lsquic_bbr *bbr)
417{
418    struct bw_sample *sample, *next_sample;
419    uint64_t sample_min_rtt;
420    int min_rtt_expired;
421
422    sample_min_rtt = UINT64_MAX;
423    for (sample = TAILQ_FIRST(&bbr->bbr_ack_state.samples); sample;
424                                                        sample = next_sample)
425    {
426        next_sample = TAILQ_NEXT(sample, next);
427
428        if (sample->is_app_limited)
429            bbr->bbr_flags |= BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
430        else
431        {
432            bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
433            bbr->bbr_flags |=  BBR_FLAG_HAS_NON_APP_LIMITED;
434        }
435
436        if (sample_min_rtt == UINT64_MAX || sample->rtt < sample_min_rtt)
437            sample_min_rtt = sample->rtt;
438
439        if (!sample->is_app_limited
440                    || BW_VALUE(&sample->bandwidth)
441                                    > minmax_get(&bbr->bbr_max_bandwidth))
442            minmax_upmax(&bbr->bbr_max_bandwidth, bbr->bbr_round_count,
443                                                BW_VALUE(&sample->bandwidth));
444
445        lsquic_malo_put(sample);
446    }
447
448    if (sample_min_rtt == UINT64_MAX)
449        return 0;
450
451    bbr->bbr_min_rtt_since_last_probe
452                    = MIN(bbr->bbr_min_rtt_since_last_probe, sample_min_rtt);
453
454    min_rtt_expired = bbr->bbr_min_rtt != 0 && (bbr->bbr_ack_state.ack_time
455                                > bbr->bbr_min_rtt_timestamp + kMinRttExpiry);
456    if (min_rtt_expired || sample_min_rtt < bbr->bbr_min_rtt
457                                                    || 0 == bbr->bbr_min_rtt)
458    {
459        if (min_rtt_expired && should_extend_min_rtt_expiry(bbr))
460        {
461            LSQ_DEBUG("min rtt expiration extended, stay at: %"PRIu64,
462                bbr->bbr_min_rtt);
463            min_rtt_expired = 0;
464        }
465        else
466        {
467            LSQ_DEBUG("min rtt updated: %"PRIu64" -> %"PRIu64,
468                bbr->bbr_min_rtt, sample_min_rtt);
469            bbr->bbr_min_rtt = sample_min_rtt;
470        }
471        bbr->bbr_min_rtt_timestamp = bbr->bbr_ack_state.ack_time;
472        bbr->bbr_min_rtt_since_last_probe = UINT64_MAX;
473        bbr->bbr_flags &= ~BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT;
474    }
475
476    return min_rtt_expired;
477}
478
479
480/* Based on BbrSender::UpdateRecoveryState() */
481static void
482update_recovery_state (struct lsquic_bbr *bbr, int is_round_start)
483{
484    // Exit recovery when there are no losses for a round.
485    if (bbr->bbr_ack_state.has_losses)
486        bbr->bbr_end_recovery_at = bbr->bbr_last_sent_packno;
487
488    switch (bbr->bbr_recovery_state)
489    {
490    case BBR_RS_NOT_IN_RECOVERY:
491        // Enter conservation on the first loss.
492        if (bbr->bbr_ack_state.has_losses)
493        {
494            bbr->bbr_recovery_state = BBR_RS_CONSERVATION;
495            // This will cause the |bbr_recovery_window| to be set to the
496            // correct value in CalculateRecoveryWindow().
497            bbr->bbr_recovery_window = 0;
498            // Since the conservation phase is meant to be lasting for a whole
499            // round, extend the current round as if it were started right now.
500            bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno;
501        }
502        break;
503    case BBR_RS_CONSERVATION:
504        if (is_round_start)
505            bbr->bbr_recovery_state = BBR_RS_GROWTH;
506        /* Fall-through */
507    case BBR_RS_GROWTH:
508        // Exit recovery if appropriate.
509        if (!bbr->bbr_ack_state.has_losses
510                && bbr->bbr_ack_state.max_packno > bbr->bbr_end_recovery_at)
511            bbr->bbr_recovery_state = BBR_RS_NOT_IN_RECOVERY;
512        break;
513    }
514}
515
516
517static uint64_t
518update_ack_aggregation_bytes (struct lsquic_bbr *bbr,
519                                                uint64_t newly_acked_bytes)
520{
521    const lsquic_time_t ack_time = bbr->bbr_ack_state.ack_time;
522    uint64_t expected_bytes_acked, diff;
523
524    // Compute how many bytes are expected to be delivered, assuming max
525    // bandwidth is correct.
526    expected_bytes_acked = minmax_get(&bbr->bbr_max_bandwidth)
527                        * (ack_time - bbr->bbr_aggregation_epoch_start_time);
528
529    // Reset the current aggregation epoch as soon as the ack arrival rate is
530    // less than or equal to the max bandwidth.
531    if (bbr->bbr_aggregation_epoch_bytes <= expected_bytes_acked)
532    {
533        // Reset to start measuring a new aggregation epoch.
534        bbr->bbr_aggregation_epoch_bytes = newly_acked_bytes;
535        bbr->bbr_aggregation_epoch_start_time = ack_time;
536        return 0;
537    }
538
539    // Compute how many extra bytes were delivered vs max bandwidth.
540    // Include the bytes most recently acknowledged to account for stretch acks.
541    bbr->bbr_aggregation_epoch_bytes += newly_acked_bytes;
542    diff = bbr->bbr_aggregation_epoch_bytes - expected_bytes_acked;
543    minmax_upmax(&bbr->bbr_max_ack_height, bbr->bbr_round_count, diff);
544    return diff;
545}
546
547
548/* See BbrSender::UpdateGainCyclePhase() */
549static void
550update_gain_cycle_phase (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
551{
552    const uint64_t prior_in_flight = bbr->bbr_ack_state.in_flight;
553    const lsquic_time_t now = bbr->bbr_ack_state.ack_time;
554    // In most cases, the cycle is advanced after an RTT passes.
555    int should_advance_gain_cycling
556        = now - bbr->bbr_last_cycle_start > get_min_rtt(bbr);
557
558    // If the pacing gain is above 1.0, the connection is trying to probe the
559    // bandwidth by increasing the number of bytes in flight to at least
560    // pacing_gain * BDP.  Make sure that it actually reaches the target, as
561    // long as there are no losses suggesting that the buffers are not able to
562    // hold that much.
563    if (bbr->bbr_pacing_gain > 1.0
564            && !bbr->bbr_ack_state.has_losses
565            && prior_in_flight < get_target_cwnd(bbr, bbr->bbr_pacing_gain))
566        should_advance_gain_cycling = 0;
567
568    /* Several optimizations are possible here: "else if" instead of "if", as
569     * well as not calling get_target_cwnd() if `should_advance_gain_cycling'
570     * is already set to the target value.
571     */
572
573    // If pacing gain is below 1.0, the connection is trying to drain the extra
574    // queue which could have been incurred by probing prior to it.  If the
575    // number of bytes in flight falls down to the estimated BDP value earlier,
576    // conclude that the queue has been successfully drained and exit this cycle
577    // early.
578    if (bbr->bbr_pacing_gain < 1.0
579                                && bytes_in_flight <= get_target_cwnd(bbr, 1))
580        should_advance_gain_cycling = 1;
581
582    if (should_advance_gain_cycling)
583    {
584        bbr->bbr_cycle_current_offset =
585                        (bbr->bbr_cycle_current_offset + 1) % kGainCycleLength;
586        bbr->bbr_last_cycle_start = now;
587        // Stay in low gain mode until the target BDP is hit.  Low gain mode
588        // will be exited immediately when the target BDP is achieved.
589        if ((bbr->bbr_flags & BBR_FLAG_DRAIN_TO_TARGET)
590                && bbr->bbr_pacing_gain < 1
591                && kPacingGain[bbr->bbr_cycle_current_offset] == 1
592                && bytes_in_flight > get_target_cwnd(bbr, 1))
593              return;
594        bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset];
595        LSQ_DEBUG("advanced gain cycle, pacing gain set to %.2f",
596                                                        bbr->bbr_pacing_gain);
597    }
598}
599
600
601/* BbrSender::InRecovery() */
602static int
603in_recovery (const struct lsquic_bbr *bbr)
604{
605    return bbr->bbr_recovery_state != BBR_RS_NOT_IN_RECOVERY;
606}
607
608
609/* See BbrSender::CheckIfFullBandwidthReached() */
610static void
611check_if_full_bw_reached (struct lsquic_bbr *bbr)
612{
613    struct bandwidth target, bw;
614
615    if (bbr->bbr_flags & BBR_FLAG_LAST_SAMPLE_APP_LIMITED)
616    {
617        LSQ_DEBUG("last sample app limited: full BW not reached");
618        return;
619    }
620
621    target = BW_TIMES(&bbr->bbr_bw_at_last_round, kStartupGrowthTarget);
622    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
623    if (BW_VALUE(&bw) >= BW_VALUE(&target))
624    {
625        bbr->bbr_bw_at_last_round = bw;
626        bbr->bbr_round_wo_bw_gain = 0;
627        if (bbr->bbr_flags & BBR_FLAG_EXPIRE_ACK_AGG_IN_STARTUP)
628            // Expire old excess delivery measurements now that bandwidth
629            // increased.
630            minmax_reset(&bbr->bbr_max_ack_height,
631                        ((struct minmax_sample) { bbr->bbr_round_count, 0, }));
632        LSQ_DEBUG("BW estimate %"PRIu64"bps greater than or equal to target "
633            "%"PRIu64"bps: full BW not reached",
634            BW_VALUE(&bw), BW_VALUE(&target));
635        return;
636    }
637
638    ++bbr->bbr_round_wo_bw_gain;
639    if ((bbr->bbr_round_wo_bw_gain >= bbr->bbr_num_startup_rtts)
640            || ((bbr->bbr_flags & BBR_FLAG_EXIT_STARTUP_ON_LOSS)
641                                                    && in_recovery(bbr)))
642    {
643        assert(bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED);  /* DCHECK */
644        bbr->bbr_flags |= BBR_FLAG_IS_AT_FULL_BANDWIDTH;
645        LSQ_DEBUG("reached full BW");
646    }
647    else
648        LSQ_DEBUG("rounds w/o gain: %u, full BW not reached",
649                                                bbr->bbr_round_wo_bw_gain);
650}
651
652
653/* See BbrSender::OnExitStartup */
654static void
655on_exit_startup (struct lsquic_bbr *bbr, lsquic_time_t now)
656{
657    assert(bbr->bbr_mode == BBR_MODE_STARTUP);
658    /* Apparently this method is just to update stats, something that we
659     * don't do yet.
660     */
661}
662
663
664/* See BbrSender::EnterProbeBandwidthMode */
665static void
666enter_probe_bw_mode (struct lsquic_bbr *bbr, lsquic_time_t now)
667{
668    set_mode(bbr, BBR_MODE_PROBE_BW);
669    bbr->bbr_cwnd_gain = kCwndGain;
670
671    // Pick a random offset for the gain cycle out of {0, 2..7} range. 1 is
672    // excluded because in that case increased gain and decreased gain would not
673    // follow each other.
674    bbr->bbr_cycle_current_offset = rand() % (kGainCycleLength - 1);
675    if (bbr->bbr_cycle_current_offset >= 1)
676        ++bbr->bbr_cycle_current_offset;
677
678    bbr->bbr_last_cycle_start = now;
679    bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset];
680}
681
682
683/* See BbrSender::EnterStartupMode */
684static void
685enter_startup_mode (struct lsquic_bbr *bbr, lsquic_time_t now)
686{
687    set_mode(bbr, BBR_MODE_STARTUP);
688    set_startup_values(bbr);
689}
690
691
692/* See  BbrSender::MaybeExitStartupOrDrain() */
693static void
694maybe_exit_startup_or_drain (struct lsquic_bbr *bbr, lsquic_time_t now,
695                                                    uint64_t bytes_in_flight)
696{
697    uint64_t target_cwnd;
698
699    if (bbr->bbr_mode == BBR_MODE_STARTUP
700                        && (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
701    {
702        on_exit_startup(bbr, now);
703        set_mode(bbr, BBR_MODE_DRAIN);
704        bbr->bbr_pacing_gain = bbr->bbr_drain_gain;
705        bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain;
706    }
707
708    if (bbr->bbr_mode == BBR_MODE_DRAIN)
709    {
710        target_cwnd = get_target_cwnd(bbr, 1);
711        LSQ_DEBUG("%s: bytes in flight: %"PRIu64"; target cwnd: %"PRIu64,
712                                    __func__, bytes_in_flight, target_cwnd);
713        if (bytes_in_flight <= target_cwnd)
714            enter_probe_bw_mode(bbr, now);
715    }
716}
717
718
719static int
720in_slow_start (const struct lsquic_bbr *bbr)
721{
722    return bbr->bbr_mode == BBR_MODE_STARTUP;
723}
724
725
726/* See QuicByteCount BbrSender::ProbeRttCongestionWindow() */
727static uint64_t
728get_probe_rtt_cwnd (const struct lsquic_bbr *bbr)
729{
730    if (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_BASED_ON_BDP)
731        return get_target_cwnd(bbr, kModerateProbeRttMultiplier);
732    else
733        return bbr->bbr_min_cwnd;
734}
735
736
737static uint64_t
738lsquic_bbr_get_cwnd (void *cong_ctl)
739{
740    struct lsquic_bbr *const bbr = cong_ctl;
741    uint64_t cwnd;
742
743    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
744        cwnd = get_probe_rtt_cwnd(bbr);
745    else if (in_recovery(bbr) &&
746                !((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP)
747                                    && bbr->bbr_mode == BBR_MODE_STARTUP))
748        cwnd = MIN(bbr->bbr_cwnd, bbr->bbr_recovery_window);
749    else
750        cwnd = bbr->bbr_cwnd;
751
752    return cwnd;
753}
754
755
756/* See BbrSender::MaybeEnterOrExitProbeRtt */
757static void
758maybe_enter_or_exit_probe_rtt (struct lsquic_bbr *bbr, lsquic_time_t now,
759            int is_round_start, int min_rtt_expired, uint64_t bytes_in_flight)
760{
761    if (min_rtt_expired
762            && !(bbr->bbr_flags & BBR_FLAG_EXITING_QUIESCENCE)
763                && bbr->bbr_mode != BBR_MODE_PROBE_RTT)
764    {
765        if (in_slow_start(bbr))
766            on_exit_startup(bbr, now);
767        set_mode(bbr, BBR_MODE_PROBE_RTT);
768        bbr->bbr_pacing_gain = 1;
769        // Do not decide on the time to exit PROBE_RTT until the
770        // |bytes_in_flight| is at the target small value.
771        bbr->bbr_exit_probe_rtt_at = 0;
772    }
773
774    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
775    {
776        lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler);
777        LSQ_DEBUG("%s: exit probe at: %"PRIu64"; now: %"PRIu64
778            "; round start: %d; round passed: %d; rtt: %"PRIu64" usec",
779            __func__, bbr->bbr_exit_probe_rtt_at, now, is_round_start,
780            !!(bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED),
781            lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats));
782        if (bbr->bbr_exit_probe_rtt_at == 0)
783        {
784            // If the window has reached the appropriate size, schedule exiting
785            // PROBE_RTT.  The CWND during PROBE_RTT is
786            // kMinimumCongestionWindow, but we allow an extra packet since QUIC
787            // checks CWND before sending a packet.
788            if (bytes_in_flight
789                        < get_probe_rtt_cwnd(bbr) + kMaxOutgoingPacketSize)
790            {
791                bbr->bbr_exit_probe_rtt_at = now + kProbeRttTime;
792                bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED;
793            }
794        }
795        else
796        {
797            if (is_round_start)
798                bbr->bbr_flags |= BBR_FLAG_PROBE_RTT_ROUND_PASSED;
799            if (now >= bbr->bbr_exit_probe_rtt_at
800                        && (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED))
801            {
802                bbr->bbr_min_rtt_timestamp = now;
803                if (!(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
804                    enter_startup_mode(bbr, now);
805                else
806                    enter_probe_bw_mode(bbr, now);
807            }
808        }
809    }
810
811    bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE;
812}
813
814
815/* See BbrSender::CalculatePacingRate */
816static void
817calculate_pacing_rate (struct lsquic_bbr *bbr)
818{
819    struct bandwidth bw, target_rate;
820
821    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
822    if (BW_IS_ZERO(&bw))
823        return;
824
825    LSQ_DEBUG("BW estimate: %"PRIu64, BW_VALUE(&bw));
826
827    target_rate = BW_TIMES(&bw, bbr->bbr_pacing_gain);
828    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
829    {
830        bbr->bbr_pacing_rate = target_rate;
831        return;
832    }
833
834    // Pace at the rate of initial_window / RTT as soon as RTT measurements are
835    // available.
836    if (BW_IS_ZERO(&bbr->bbr_pacing_rate)
837            && 0 != lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats))
838    {
839        bbr->bbr_pacing_rate = BW_FROM_BYTES_AND_DELTA(
840            bbr->bbr_init_cwnd,
841            lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats));
842        return;
843    }
844
845    // Slow the pacing rate in STARTUP once loss has ever been detected.
846    const int has_ever_detected_loss = bbr->bbr_end_recovery_at != 0;
847    if (has_ever_detected_loss
848            && (bbr->bbr_flags & (BBR_FLAG_SLOWER_STARTUP
849                                  |BBR_FLAG_HAS_NON_APP_LIMITED))
850                == (BBR_FLAG_SLOWER_STARTUP|BBR_FLAG_HAS_NON_APP_LIMITED))
851    {
852        bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupAfterLossGain);
853        return;
854    }
855
856    // Slow the pacing rate in STARTUP by the bytes_lost / CWND.
857    if (startup_rate_reduction_multiplier_ != 0
858            && has_ever_detected_loss
859                && (bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED))
860    {
861        bbr->bbr_pacing_rate = BW_TIMES(&target_rate,
862            (1 - (bbr->bbr_startup_bytes_lost
863                    * startup_rate_reduction_multiplier_ * 1.0f
864                        / bbr->bbr_cwnd_gain)));
865        // Ensure the pacing rate doesn't drop below the startup growth target
866        // times  the bandwidth estimate.
867        if (BW_VALUE(&bbr->bbr_pacing_rate)
868                                        < BW_VALUE(&bw) * kStartupGrowthTarget)
869            bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupGrowthTarget);
870        return;
871    }
872
873    // Do not decrease the pacing rate during startup.
874    if (BW_VALUE(&bbr->bbr_pacing_rate) < BW_VALUE(&target_rate))
875        bbr->bbr_pacing_rate = target_rate;
876}
877
878
879/* See BbrSender::CalculateCongestionWindow */
880static void
881calculate_cwnd (struct lsquic_bbr *bbr, uint64_t bytes_acked,
882                                                  uint64_t excess_acked)
883{
884    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
885        return;
886
887    uint64_t target_window = get_target_cwnd(bbr, bbr->bbr_cwnd_gain);
888    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
889        // Add the max recently measured ack aggregation to CWND.
890        target_window += minmax_get(&bbr->bbr_max_ack_height);
891    else if (bbr->bbr_flags & BBR_FLAG_ENABLE_ACK_AGG_IN_STARTUP)
892        // Add the most recent excess acked.  Because CWND never decreases in
893        // STARTUP, this will automatically create a very localized max filter.
894        target_window += excess_acked;
895
896    // Instead of immediately setting the target CWND as the new one, BBR grows
897    // the CWND towards |target_window| by only increasing it |bytes_acked| at a
898    // time.
899    const int add_bytes_acked =
900	!FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery || !in_recovery(bbr);
901    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
902        bbr->bbr_cwnd = MIN(target_window, bbr->bbr_cwnd + bytes_acked);
903    else if (add_bytes_acked &&
904             (bbr->bbr_cwnd_gain < target_window ||
905              lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler)
906                                                        < bbr->bbr_init_cwnd))
907        // If the connection is not yet out of startup phase, do not decrease
908        // the window.
909        bbr->bbr_cwnd += bytes_acked;
910
911    // Enforce the limits on the congestion window.
912    if (bbr->bbr_cwnd < bbr->bbr_min_cwnd)
913        bbr->bbr_cwnd = bbr->bbr_min_cwnd;
914    else if (bbr->bbr_cwnd > bbr->bbr_max_cwnd)
915    {
916        LSQ_DEBUG("exceed max cwnd");
917        bbr->bbr_cwnd = bbr->bbr_max_cwnd;
918    }
919}
920
921
922/* See BbrSender::CalculateRecoveryWindow */
923static void
924calculate_recovery_window (struct lsquic_bbr *bbr, uint64_t bytes_acked,
925                                uint64_t bytes_lost, uint64_t bytes_in_flight)
926{
927    if ((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP)
928                                        && bbr->bbr_mode == BBR_MODE_STARTUP)
929        return;
930
931    if (bbr->bbr_recovery_state == BBR_RS_NOT_IN_RECOVERY)
932        return;
933
934    // Set up the initial recovery window.
935    if (bbr->bbr_recovery_window == 0)
936    {
937        bbr->bbr_recovery_window = bytes_in_flight + bytes_acked;
938        bbr->bbr_recovery_window = MAX(bbr->bbr_min_cwnd,
939                                                    bbr->bbr_recovery_window);
940        return;
941    }
942
943    // Remove losses from the recovery window, while accounting for a potential
944    // integer underflow.
945    if (bbr->bbr_recovery_window >= bytes_lost)
946        bbr->bbr_recovery_window -= bytes_lost;
947    else
948        bbr->bbr_recovery_window = kMaxSegmentSize;
949
950    // In CONSERVATION mode, just subtracting losses is sufficient.  In GROWTH,
951    // release additional |bytes_acked| to achieve a slow-start-like behavior.
952    if (bbr->bbr_recovery_state == BBR_RS_GROWTH)
953        bbr->bbr_recovery_window += bytes_acked;
954
955    // Sanity checks.  Ensure that we always allow to send at least an MSS or
956    // |bytes_acked| in response, whichever is larger.
957    bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window,
958                                            bytes_in_flight + bytes_acked);
959    if (FLAG_quic_bbr_one_mss_conservation)
960        bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window,
961                                            bytes_in_flight + kMaxSegmentSize);
962    bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, bbr->bbr_min_cwnd);
963}
964
965
966static void
967lsquic_bbr_end_ack (void *cong_ctl, uint64_t in_flight)
968{
969    struct lsquic_bbr *const bbr = cong_ctl;
970    int is_round_start, min_rtt_expired;
971    uint64_t bytes_acked, excess_acked, bytes_lost;
972
973    assert(bbr->bbr_flags & BBR_FLAG_IN_ACK);
974    bbr->bbr_flags &= ~BBR_FLAG_IN_ACK;
975
976    LSQ_DEBUG("end_ack; mode: %s; in_flight: %"PRIu64, mode2str[bbr->bbr_mode],
977                                                                    in_flight);
978
979    bytes_acked = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler)
980                            - bbr->bbr_ack_state.total_bytes_acked_before;
981    if (bbr->bbr_ack_state.acked_bytes)
982    {
983        is_round_start = bbr->bbr_ack_state.max_packno
984                                    > bbr->bbr_current_round_trip_end
985                    || !is_valid_packno(bbr->bbr_current_round_trip_end);
986        if (is_round_start)
987        {
988            ++bbr->bbr_round_count;
989            bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno;
990            LSQ_DEBUG("up round count to %"PRIu64"; new rt end: %"PRIu64,
991                        bbr->bbr_round_count, bbr->bbr_current_round_trip_end);
992        }
993        min_rtt_expired = update_bandwidth_and_min_rtt(bbr);
994        update_recovery_state(bbr, is_round_start);
995        excess_acked = update_ack_aggregation_bytes(bbr, bytes_acked);
996    }
997    else
998    {
999        is_round_start = 0;
1000        min_rtt_expired = 0;
1001        excess_acked = 0;
1002    }
1003
1004    if (bbr->bbr_mode == BBR_MODE_PROBE_BW)
1005        update_gain_cycle_phase(bbr, in_flight);
1006
1007    if (is_round_start && !(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
1008        check_if_full_bw_reached(bbr);
1009
1010    maybe_exit_startup_or_drain(bbr, bbr->bbr_ack_state.ack_time, in_flight);
1011
1012    maybe_enter_or_exit_probe_rtt(bbr, bbr->bbr_ack_state.ack_time,
1013                                is_round_start, min_rtt_expired, in_flight);
1014
1015    // Calculate number of packets acked and lost.
1016    bytes_lost = bbr->bbr_ack_state.lost_bytes;
1017
1018    // After the model is updated, recalculate the pacing rate and congestion
1019    // window.
1020    calculate_pacing_rate(bbr);
1021    calculate_cwnd(bbr, bytes_acked, excess_acked);
1022    calculate_recovery_window(bbr, bytes_acked, bytes_lost, in_flight);
1023
1024    /* We don't need to clean up BW sampler */
1025}
1026
1027
1028static void
1029lsquic_bbr_cleanup (void *cong_ctl)
1030{
1031    struct lsquic_bbr *const bbr = cong_ctl;
1032
1033    LSQ_DEBUG("cleanup");
1034}
1035
1036
1037static void
1038lsquic_bbr_loss (void *cong_ctl) {   /* Noop */   }
1039
1040
1041static void
1042lsquic_bbr_timeout (void *cong_ctl) {   /* Noop */   }
1043
1044
1045const struct cong_ctl_if lsquic_cong_bbr_if =
1046{
1047    .cci_ack           = lsquic_bbr_ack,
1048    .cci_begin_ack     = lsquic_bbr_begin_ack,
1049    .cci_end_ack       = lsquic_bbr_end_ack,
1050    .cci_cleanup       = lsquic_bbr_cleanup,
1051    .cci_get_cwnd      = lsquic_bbr_get_cwnd,
1052    .cci_init          = lsquic_bbr_init,
1053    .cci_pacing_rate   = lsquic_bbr_pacing_rate,
1054    .cci_loss          = lsquic_bbr_loss,
1055    .cci_lost          = lsquic_bbr_lost,
1056    .cci_timeout       = lsquic_bbr_timeout,
1057    .cci_sent          = lsquic_bbr_sent,
1058    .cci_was_quiet     = lsquic_bbr_was_quiet,
1059};
1060