lsquic_bbr.c revision 9fc12041
1/* Copyright (c) 2017 - 2020 LiteSpeed Technologies Inc.  See LICENSE. */
2// Copyright 2016 The Chromium Authors. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE.chrome file.
5
6#include <assert.h>
7#include <inttypes.h>
8#include <stdint.h>
9#include <stdlib.h>
10#include <string.h>
11#include <sys/queue.h>
12
13#include "lsquic.h"
14#include "lsquic_int_types.h"
15#include "lsquic_cong_ctl.h"
16#include "lsquic_minmax.h"
17#include "lsquic_packet_common.h"
18#include "lsquic_packet_out.h"
19#include "lsquic_bw_sampler.h"
20#include "lsquic_bbr.h"
21#include "lsquic_hash.h"
22#include "lsquic_conn.h"
23#include "lsquic_sfcw.h"
24#include "lsquic_conn_flow.h"
25#include "lsquic_varint.h"
26#include "lsquic_hq.h"
27#include "lsquic_stream.h"
28#include "lsquic_rtt.h"
29#include "lsquic_conn_public.h"
30#include "lsquic_util.h"
31#include "lsquic_malo.h"
32#include "lsquic_crand.h"
33#include "lsquic_mm.h"
34#include "lsquic_engine_public.h"
35
36#define LSQUIC_LOGGER_MODULE LSQLM_BBR
37#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(bbr->bbr_conn_pub->lconn)
38#include "lsquic_logger.h"
39
40#define MIN(a, b) ((a) < (b) ? (a) : (b))
41#define MAX(a, b) ((a) > (b) ? (a) : (b))
42
43#define ms(val_) ((val_) * 1000)
44#define sec(val_) ((val_) * 1000 * 1000)
45
46// Default maximum packet size used in the Linux TCP implementation.
47// Used in QUIC for congestion window computations in bytes.
48#define kDefaultTCPMSS 1460
49#define kMaxSegmentSize kDefaultTCPMSS
50
51// Constants based on TCP defaults.
52// The minimum CWND to ensure delayed acks don't reduce bandwidth measurements.
53// Does not inflate the pacing rate.
54#define kDefaultMinimumCongestionWindow  (4 * kDefaultTCPMSS)
55
56// The gain used for the STARTUP, equal to 2/ln(2).
57#define kDefaultHighGain 2.885f
58
59// The newly derived gain for STARTUP, equal to 4 * ln(2)
60#define kDerivedHighGain 2.773f
61
62// The newly derived CWND gain for STARTUP, 2.
63#define kDerivedHighCWNDGain 2.0f
64
65// The gain used in STARTUP after loss has been detected.
66// 1.5 is enough to allow for 25% exogenous loss and still observe a 25% growth
67// in measured bandwidth.
68#define kStartupAfterLossGain 1.5f
69
70// We match SPDY's use of 32 (since we'd compete with SPDY).
71#define kInitialCongestionWindow 32
72
73/* Taken from send_algorithm_interface.h */
74#define kDefaultMaxCongestionWindowPackets 2000
75
76// The time after which the current min_rtt value expires.
77#define kMinRttExpiry sec(10)
78
79// Coefficient to determine if a new RTT is sufficiently similar to min_rtt that
80// we don't need to enter PROBE_RTT.
81#define kSimilarMinRttThreshold 1.125f
82
83// If the bandwidth does not increase by the factor of |kStartupGrowthTarget|
84// within |kRoundTripsWithoutGrowthBeforeExitingStartup| rounds, the connection
85// will exit the STARTUP mode.
86#define kStartupGrowthTarget 1.25
87
88#define kRoundTripsWithoutGrowthBeforeExitingStartup 3
89
90#define startup_rate_reduction_multiplier_ 0
91
92// The cycle of gains used during the PROBE_BW stage.
93static const float kPacingGain[] = {1.25, 0.75, 1, 1, 1, 1, 1, 1};
94
95// The length of the gain cycle.
96static const size_t kGainCycleLength = sizeof(kPacingGain)
97                                                    / sizeof(kPacingGain[0]);
98
99// Coefficient of target congestion window to use when basing PROBE_RTT on BDP.
100#define kModerateProbeRttMultiplier 0.75
101
102// The maximum packet size of any QUIC packet over IPv6, based on ethernet's max
103// size, minus the IP and UDP headers. IPv6 has a 40 byte header, UDP adds an
104// additional 8 bytes.  This is a total overhead of 48 bytes.  Ethernet's
105// max packet size is 1500 bytes,  1500 - 48 = 1452.
106#define kMaxV6PacketSize 1452
107// The maximum packet size of any QUIC packet over IPv4.
108// 1500(Ethernet) - 20(IPv4 header) - 8(UDP header) = 1472.
109#define kMaxV4PacketSize 1472
110// The maximum incoming packet size allowed.
111#define kMaxIncomingPacketSize kMaxV4PacketSize
112// The maximum outgoing packet size allowed.
113#define kMaxOutgoingPacketSize kMaxV6PacketSize
114
115// The minimum time the connection can spend in PROBE_RTT mode.
116#define kProbeRttTime ms(200)
117
118/* FLAG* are from net/quic/quic_flags_list.h */
119
120// When in STARTUP and recovery, do not add bytes_acked to QUIC BBR's CWND in
121// CalculateCongestionWindow()
122#define FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery 0
123
124// When true, ensure BBR allows at least one MSS to be sent in response to an
125// ACK in packet conservation.
126#define FLAG_quic_bbr_one_mss_conservation 0
127
128/* From net/quic/quic_flags_list.h */
129#define kCwndGain 2.0
130
131
132static uint64_t lsquic_bbr_get_cwnd (void *);
133
134
135static const char *const mode2str[] =
136{
137    [BBR_MODE_STARTUP]   = "STARTUP",
138    [BBR_MODE_DRAIN]     = "DRAIN",
139    [BBR_MODE_PROBE_BW]  = "PROBE_BW",
140    [BBR_MODE_PROBE_RTT] = "PROBE_RTT",
141};
142
143
144static void
145set_mode (struct lsquic_bbr *bbr, enum bbr_mode mode)
146{
147    if (bbr->bbr_mode != mode)
148    {
149        LSQ_DEBUG("mode change %s -> %s", mode2str[bbr->bbr_mode],
150                                                        mode2str[mode]);
151        bbr->bbr_mode = mode;
152    }
153    else
154        LSQ_DEBUG("mode remains %s", mode2str[mode]);
155}
156
157
158static void
159set_startup_values (struct lsquic_bbr *bbr)
160{
161    bbr->bbr_pacing_gain = bbr->bbr_high_gain;
162    bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain;
163}
164
165
166static void
167init_bbr (struct lsquic_bbr *bbr)
168{
169    bbr->bbr_mode = BBR_MODE_STARTUP;
170    bbr->bbr_round_count = 0;
171    minmax_init(&bbr->bbr_max_bandwidth, 10);
172    minmax_init(&bbr->bbr_max_ack_height, 10);
173    bbr->bbr_aggregation_epoch_bytes = 0;
174    bbr->bbr_aggregation_epoch_start_time = 0;
175    bbr->bbr_min_rtt = 0;
176    bbr->bbr_min_rtt_timestamp = 0;
177    bbr->bbr_init_cwnd = kInitialCongestionWindow * kDefaultTCPMSS;
178    bbr->bbr_cwnd = kInitialCongestionWindow * kDefaultTCPMSS;
179    bbr->bbr_max_cwnd = kDefaultMaxCongestionWindowPackets * kDefaultTCPMSS;
180    bbr->bbr_min_cwnd = kDefaultMinimumCongestionWindow;
181    bbr->bbr_high_gain = kDefaultHighGain;
182    bbr->bbr_high_cwnd_gain = kDefaultHighGain;
183    bbr->bbr_drain_gain = 1.0 / kDefaultHighGain;
184    bbr->bbr_pacing_rate = BW_ZERO();
185    bbr->bbr_pacing_gain = 1.0;
186    bbr->bbr_cwnd_gain = 1.0;
187    bbr->bbr_num_startup_rtts = kRoundTripsWithoutGrowthBeforeExitingStartup;
188    bbr->bbr_flags &= ~BBR_FLAG_EXIT_STARTUP_ON_LOSS;
189    bbr->bbr_cycle_current_offset = 0;
190    bbr->bbr_last_cycle_start = 0;
191    bbr->bbr_flags &= ~BBR_FLAG_IS_AT_FULL_BANDWIDTH;
192    bbr->bbr_round_wo_bw_gain = 0;
193    bbr->bbr_bw_at_last_round = BW_ZERO();
194    bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE;
195    bbr->bbr_exit_probe_rtt_at = 0;
196    bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED;
197    bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
198    bbr->bbr_flags &= ~BBR_FLAG_HAS_NON_APP_LIMITED;
199    bbr->bbr_flags &= ~BBR_FLAG_FLEXIBLE_APP_LIMITED;
200    set_startup_values(bbr);
201}
202
203
204static void
205lsquic_bbr_init (void *cong_ctl, const struct lsquic_conn_public *conn_pub,
206                                                enum quic_ft_bit retx_frames)
207{
208    struct lsquic_bbr *const bbr = cong_ctl;
209    bbr->bbr_conn_pub = conn_pub;
210    lsquic_bw_sampler_init(&bbr->bbr_bw_sampler, conn_pub->lconn, retx_frames);
211    bbr->bbr_rtt_stats = &conn_pub->rtt_stats;
212
213    init_bbr(bbr);
214
215    LSQ_DEBUG("initialized");
216}
217
218
219static void
220lsquic_bbr_reinit (void *cong_ctl)
221{
222    struct lsquic_bbr *const bbr = cong_ctl;
223
224    init_bbr(bbr);
225
226    LSQ_DEBUG("re-initialized");
227}
228
229
230static lsquic_time_t
231get_min_rtt (const struct lsquic_bbr *bbr)
232{
233    lsquic_time_t min_rtt;
234
235    if (bbr->bbr_min_rtt)
236        return bbr->bbr_min_rtt;
237    else
238    {
239        min_rtt = lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats);
240        if (min_rtt == 0)
241            min_rtt = 25000;
242        return min_rtt;
243    }
244}
245
246
247static uint64_t
248lsquic_bbr_pacing_rate (void *cong_ctl, int in_recovery)
249{
250    struct lsquic_bbr *const bbr = cong_ctl;
251    lsquic_time_t min_rtt;
252    struct bandwidth bw;
253
254    if (!BW_IS_ZERO(&bbr->bbr_pacing_rate))
255        bw = bbr->bbr_pacing_rate;
256    else
257    {
258        min_rtt = get_min_rtt(bbr);
259        bw = BW_FROM_BYTES_AND_DELTA(bbr->bbr_init_cwnd, min_rtt);
260        bw = BW_TIMES(&bw, bbr->bbr_high_cwnd_gain);
261    }
262
263    return BW_TO_BYTES_PER_SEC(&bw);
264}
265
266
267/* BbrSender::GetTargetCongestionWindow */
268static uint64_t
269get_target_cwnd (const struct lsquic_bbr *bbr, float gain)
270{
271    struct bandwidth bw;
272    uint64_t bdp, cwnd;
273
274    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
275    bdp = get_min_rtt(bbr) * BW_TO_BYTES_PER_SEC(&bw) / 1000000;
276    cwnd = gain * bdp;
277
278    // BDP estimate will be zero if no bandwidth samples are available yet.
279    if (cwnd == 0)
280        cwnd = gain * bbr->bbr_init_cwnd;
281
282    return MAX(cwnd, bbr->bbr_min_cwnd);
283}
284
285
286/* See BbrSender::IsPipeSufficientlyFull */
287static int
288is_pipe_sufficiently_full (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
289{
290    // See if we need more bytes in flight to see more bandwidth.
291    if (bbr->bbr_mode == BBR_MODE_STARTUP)
292        // STARTUP exits if it doesn't observe a 25% bandwidth increase, so
293        // the CWND must be more than 25% above the target.
294        return bytes_in_flight >= get_target_cwnd(bbr, 1.5);
295    else if (bbr->bbr_pacing_gain > 1)
296        // Super-unity PROBE_BW doesn't exit until 1.25 * BDP is achieved.
297        return bytes_in_flight >= get_target_cwnd(bbr, bbr->bbr_pacing_gain);
298    else
299        // If bytes_in_flight are above the target congestion window, it should
300        // be possible to observe the same or more bandwidth if it's available.
301        return bytes_in_flight >= get_target_cwnd(bbr, 1.1);
302}
303
304
305static void
306lsquic_bbr_was_quiet (void *cong_ctl, lsquic_time_t now, uint64_t in_flight)
307{
308    struct lsquic_bbr *const bbr = cong_ctl;
309    LSQ_DEBUG("was quiet");         /* Do nothing */
310}
311
312
313/* See BbrSender::OnApplicationLimited */
314static void
315bbr_app_limited (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
316{
317    uint64_t cwnd;
318
319    cwnd = lsquic_bbr_get_cwnd(bbr);
320    if (bytes_in_flight >= cwnd)
321        return;
322    if ((bbr->bbr_flags & BBR_FLAG_FLEXIBLE_APP_LIMITED)
323                            && is_pipe_sufficiently_full(bbr, bytes_in_flight))
324        return;
325
326    bbr->bbr_flags |= BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT;
327    lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler);
328    LSQ_DEBUG("becoming application-limited.  Last sent packet: %"PRIu64"; "
329                            "CWND: %"PRIu64, bbr->bbr_last_sent_packno, cwnd);
330}
331
332
333static void
334lsquic_bbr_ack (void *cong_ctl, struct lsquic_packet_out *packet_out,
335                  unsigned packet_sz, lsquic_time_t now_time, int app_limited)
336{
337    struct lsquic_bbr *const bbr = cong_ctl;
338    struct bw_sample *sample;
339
340    assert(bbr->bbr_flags & BBR_FLAG_IN_ACK);
341
342    sample = lsquic_bw_sampler_packet_acked(&bbr->bbr_bw_sampler, packet_out,
343                                                bbr->bbr_ack_state.ack_time);
344    if (sample)
345        TAILQ_INSERT_TAIL(&bbr->bbr_ack_state.samples, sample, next);
346
347    if (is_valid_packno(bbr->bbr_ack_state.max_packno))
348        /* We assume packet numbers are ordered */
349        assert(packet_out->po_packno > bbr->bbr_ack_state.max_packno);
350    bbr->bbr_ack_state.max_packno = packet_out->po_packno;
351    bbr->bbr_ack_state.acked_bytes += packet_sz;
352}
353
354
355static void
356lsquic_bbr_sent (void *cong_ctl, struct lsquic_packet_out *packet_out,
357                                        uint64_t in_flight, int app_limited)
358{
359    struct lsquic_bbr *const bbr = cong_ctl;
360
361    if (!(packet_out->po_flags & PO_MINI))
362        lsquic_bw_sampler_packet_sent(&bbr->bbr_bw_sampler, packet_out,
363                                                                in_flight);
364
365    /* Obviously we make an assumption that sent packet number are always
366     * increasing.
367     */
368    bbr->bbr_last_sent_packno = packet_out->po_packno;
369
370    if (app_limited)
371        bbr_app_limited(bbr, in_flight);
372}
373
374
375static void
376lsquic_bbr_lost (void *cong_ctl, struct lsquic_packet_out *packet_out,
377                                                        unsigned packet_sz)
378{
379    struct lsquic_bbr *const bbr = cong_ctl;
380
381    lsquic_bw_sampler_packet_lost(&bbr->bbr_bw_sampler, packet_out);
382    bbr->bbr_ack_state.has_losses = 1;
383    bbr->bbr_ack_state.lost_bytes += packet_sz;
384}
385
386
387static void
388lsquic_bbr_begin_ack (void *cong_ctl, lsquic_time_t ack_time, uint64_t in_flight)
389{
390    struct lsquic_bbr *const bbr = cong_ctl;
391
392    assert(!(bbr->bbr_flags & BBR_FLAG_IN_ACK));
393    bbr->bbr_flags |= BBR_FLAG_IN_ACK;
394    memset(&bbr->bbr_ack_state, 0, sizeof(bbr->bbr_ack_state));
395    TAILQ_INIT(&bbr->bbr_ack_state.samples);
396    bbr->bbr_ack_state.ack_time = ack_time;
397    bbr->bbr_ack_state.max_packno = UINT64_MAX;
398    bbr->bbr_ack_state.in_flight = in_flight;
399    bbr->bbr_ack_state.total_bytes_acked_before
400                        = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler);
401}
402
403
404/* Based on BbrSender::ShouldExtendMinRttExpiry() */
405static int
406should_extend_min_rtt_expiry (const struct lsquic_bbr *bbr)
407{
408    int increased_since_last_probe;
409
410    if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
411                          |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED))
412            == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
413               |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED))
414        // Extend the current min_rtt if we've been app limited recently.
415        return 1;
416
417    increased_since_last_probe = bbr->bbr_min_rtt_since_last_probe
418                                > bbr->bbr_min_rtt * kSimilarMinRttThreshold;
419    if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
420                          |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT))
421            == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
422               |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT)
423            && !increased_since_last_probe)
424        // Extend the current min_rtt if we've been app limited recently and an
425        // rtt has been measured in that time that's less than 12.5% more than
426        // the current min_rtt.
427        return 1;
428
429    return 0;
430}
431
432
433/* Based on BbrSender::UpdateBandwidthAndMinRtt */
434/* Returns true if min RTT expired, false otherwise */
435static int
436update_bandwidth_and_min_rtt (struct lsquic_bbr *bbr)
437{
438    struct bw_sample *sample, *next_sample;
439    uint64_t sample_min_rtt;
440    int min_rtt_expired;
441
442    sample_min_rtt = UINT64_MAX;
443    for (sample = TAILQ_FIRST(&bbr->bbr_ack_state.samples); sample;
444                                                        sample = next_sample)
445    {
446        next_sample = TAILQ_NEXT(sample, next);
447
448        if (sample->is_app_limited)
449            bbr->bbr_flags |= BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
450        else
451        {
452            bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
453            bbr->bbr_flags |=  BBR_FLAG_HAS_NON_APP_LIMITED;
454        }
455
456        if (sample_min_rtt == UINT64_MAX || sample->rtt < sample_min_rtt)
457            sample_min_rtt = sample->rtt;
458
459        if (!sample->is_app_limited
460                    || BW_VALUE(&sample->bandwidth)
461                                    > minmax_get(&bbr->bbr_max_bandwidth))
462            minmax_upmax(&bbr->bbr_max_bandwidth, bbr->bbr_round_count,
463                                                BW_VALUE(&sample->bandwidth));
464
465        lsquic_malo_put(sample);
466    }
467
468    if (sample_min_rtt == UINT64_MAX)
469        return 0;
470
471    bbr->bbr_min_rtt_since_last_probe
472                    = MIN(bbr->bbr_min_rtt_since_last_probe, sample_min_rtt);
473
474    min_rtt_expired = bbr->bbr_min_rtt != 0 && (bbr->bbr_ack_state.ack_time
475                                > bbr->bbr_min_rtt_timestamp + kMinRttExpiry);
476    if (min_rtt_expired || sample_min_rtt < bbr->bbr_min_rtt
477                                                    || 0 == bbr->bbr_min_rtt)
478    {
479        if (min_rtt_expired && should_extend_min_rtt_expiry(bbr))
480        {
481            LSQ_DEBUG("min rtt expiration extended, stay at: %"PRIu64,
482                bbr->bbr_min_rtt);
483            min_rtt_expired = 0;
484        }
485        else
486        {
487            LSQ_DEBUG("min rtt updated: %"PRIu64" -> %"PRIu64,
488                bbr->bbr_min_rtt, sample_min_rtt);
489            bbr->bbr_min_rtt = sample_min_rtt;
490        }
491        bbr->bbr_min_rtt_timestamp = bbr->bbr_ack_state.ack_time;
492        bbr->bbr_min_rtt_since_last_probe = UINT64_MAX;
493        bbr->bbr_flags &= ~BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT;
494    }
495
496    return min_rtt_expired;
497}
498
499
500/* Based on BbrSender::UpdateRecoveryState() */
501static void
502update_recovery_state (struct lsquic_bbr *bbr, int is_round_start)
503{
504    // Exit recovery when there are no losses for a round.
505    if (bbr->bbr_ack_state.has_losses)
506        bbr->bbr_end_recovery_at = bbr->bbr_last_sent_packno;
507
508    switch (bbr->bbr_recovery_state)
509    {
510    case BBR_RS_NOT_IN_RECOVERY:
511        // Enter conservation on the first loss.
512        if (bbr->bbr_ack_state.has_losses)
513        {
514            bbr->bbr_recovery_state = BBR_RS_CONSERVATION;
515            // This will cause the |bbr_recovery_window| to be set to the
516            // correct value in CalculateRecoveryWindow().
517            bbr->bbr_recovery_window = 0;
518            // Since the conservation phase is meant to be lasting for a whole
519            // round, extend the current round as if it were started right now.
520            bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno;
521        }
522        break;
523    case BBR_RS_CONSERVATION:
524        if (is_round_start)
525            bbr->bbr_recovery_state = BBR_RS_GROWTH;
526        /* Fall-through */
527    case BBR_RS_GROWTH:
528        // Exit recovery if appropriate.
529        if (!bbr->bbr_ack_state.has_losses
530                && bbr->bbr_ack_state.max_packno > bbr->bbr_end_recovery_at)
531            bbr->bbr_recovery_state = BBR_RS_NOT_IN_RECOVERY;
532        break;
533    }
534}
535
536
537static uint64_t
538update_ack_aggregation_bytes (struct lsquic_bbr *bbr,
539                                                uint64_t newly_acked_bytes)
540{
541    const lsquic_time_t ack_time = bbr->bbr_ack_state.ack_time;
542    uint64_t expected_bytes_acked, diff;
543
544    // Compute how many bytes are expected to be delivered, assuming max
545    // bandwidth is correct.
546    expected_bytes_acked = minmax_get(&bbr->bbr_max_bandwidth)
547                        * (ack_time - bbr->bbr_aggregation_epoch_start_time);
548
549    // Reset the current aggregation epoch as soon as the ack arrival rate is
550    // less than or equal to the max bandwidth.
551    if (bbr->bbr_aggregation_epoch_bytes <= expected_bytes_acked)
552    {
553        // Reset to start measuring a new aggregation epoch.
554        bbr->bbr_aggregation_epoch_bytes = newly_acked_bytes;
555        bbr->bbr_aggregation_epoch_start_time = ack_time;
556        return 0;
557    }
558
559    // Compute how many extra bytes were delivered vs max bandwidth.
560    // Include the bytes most recently acknowledged to account for stretch acks.
561    bbr->bbr_aggregation_epoch_bytes += newly_acked_bytes;
562    diff = bbr->bbr_aggregation_epoch_bytes - expected_bytes_acked;
563    minmax_upmax(&bbr->bbr_max_ack_height, bbr->bbr_round_count, diff);
564    return diff;
565}
566
567
568/* See BbrSender::UpdateGainCyclePhase() */
569static void
570update_gain_cycle_phase (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
571{
572    const uint64_t prior_in_flight = bbr->bbr_ack_state.in_flight;
573    const lsquic_time_t now = bbr->bbr_ack_state.ack_time;
574    // In most cases, the cycle is advanced after an RTT passes.
575    int should_advance_gain_cycling
576        = now - bbr->bbr_last_cycle_start > get_min_rtt(bbr);
577
578    // If the pacing gain is above 1.0, the connection is trying to probe the
579    // bandwidth by increasing the number of bytes in flight to at least
580    // pacing_gain * BDP.  Make sure that it actually reaches the target, as
581    // long as there are no losses suggesting that the buffers are not able to
582    // hold that much.
583    if (bbr->bbr_pacing_gain > 1.0
584            && !bbr->bbr_ack_state.has_losses
585            && prior_in_flight < get_target_cwnd(bbr, bbr->bbr_pacing_gain))
586        should_advance_gain_cycling = 0;
587
588    /* Several optimizations are possible here: "else if" instead of "if", as
589     * well as not calling get_target_cwnd() if `should_advance_gain_cycling'
590     * is already set to the target value.
591     */
592
593    // If pacing gain is below 1.0, the connection is trying to drain the extra
594    // queue which could have been incurred by probing prior to it.  If the
595    // number of bytes in flight falls down to the estimated BDP value earlier,
596    // conclude that the queue has been successfully drained and exit this cycle
597    // early.
598    if (bbr->bbr_pacing_gain < 1.0
599                                && bytes_in_flight <= get_target_cwnd(bbr, 1))
600        should_advance_gain_cycling = 1;
601
602    if (should_advance_gain_cycling)
603    {
604        bbr->bbr_cycle_current_offset =
605                        (bbr->bbr_cycle_current_offset + 1) % kGainCycleLength;
606        bbr->bbr_last_cycle_start = now;
607        // Stay in low gain mode until the target BDP is hit.  Low gain mode
608        // will be exited immediately when the target BDP is achieved.
609        if ((bbr->bbr_flags & BBR_FLAG_DRAIN_TO_TARGET)
610                && bbr->bbr_pacing_gain < 1
611                && kPacingGain[bbr->bbr_cycle_current_offset] == 1
612                && bytes_in_flight > get_target_cwnd(bbr, 1))
613              return;
614        bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset];
615        LSQ_DEBUG("advanced gain cycle, pacing gain set to %.2f",
616                                                        bbr->bbr_pacing_gain);
617    }
618}
619
620
621/* BbrSender::InRecovery() */
622static int
623in_recovery (const struct lsquic_bbr *bbr)
624{
625    return bbr->bbr_recovery_state != BBR_RS_NOT_IN_RECOVERY;
626}
627
628
629/* See BbrSender::CheckIfFullBandwidthReached() */
630static void
631check_if_full_bw_reached (struct lsquic_bbr *bbr)
632{
633    struct bandwidth target, bw;
634
635    if (bbr->bbr_flags & BBR_FLAG_LAST_SAMPLE_APP_LIMITED)
636    {
637        LSQ_DEBUG("last sample app limited: full BW not reached");
638        return;
639    }
640
641    target = BW_TIMES(&bbr->bbr_bw_at_last_round, kStartupGrowthTarget);
642    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
643    if (BW_VALUE(&bw) >= BW_VALUE(&target))
644    {
645        bbr->bbr_bw_at_last_round = bw;
646        bbr->bbr_round_wo_bw_gain = 0;
647        if (bbr->bbr_flags & BBR_FLAG_EXPIRE_ACK_AGG_IN_STARTUP)
648            // Expire old excess delivery measurements now that bandwidth
649            // increased.
650            minmax_reset(&bbr->bbr_max_ack_height,
651                        ((struct minmax_sample) { bbr->bbr_round_count, 0, }));
652        LSQ_DEBUG("BW estimate %"PRIu64"bps greater than or equal to target "
653            "%"PRIu64"bps: full BW not reached",
654            BW_VALUE(&bw), BW_VALUE(&target));
655        return;
656    }
657
658    ++bbr->bbr_round_wo_bw_gain;
659    if ((bbr->bbr_round_wo_bw_gain >= bbr->bbr_num_startup_rtts)
660            || ((bbr->bbr_flags & BBR_FLAG_EXIT_STARTUP_ON_LOSS)
661                                                    && in_recovery(bbr)))
662    {
663        assert(bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED);  /* DCHECK */
664        bbr->bbr_flags |= BBR_FLAG_IS_AT_FULL_BANDWIDTH;
665        LSQ_DEBUG("reached full BW");
666    }
667    else
668        LSQ_DEBUG("rounds w/o gain: %u, full BW not reached",
669                                                bbr->bbr_round_wo_bw_gain);
670}
671
672
673/* See BbrSender::OnExitStartup */
674static void
675on_exit_startup (struct lsquic_bbr *bbr, lsquic_time_t now)
676{
677    assert(bbr->bbr_mode == BBR_MODE_STARTUP);
678    /* Apparently this method is just to update stats, something that we
679     * don't do yet.
680     */
681}
682
683
684/* See BbrSender::EnterProbeBandwidthMode */
685static void
686enter_probe_bw_mode (struct lsquic_bbr *bbr, lsquic_time_t now)
687{
688    uint8_t rand;
689
690    set_mode(bbr, BBR_MODE_PROBE_BW);
691    bbr->bbr_cwnd_gain = kCwndGain;
692
693    // Pick a random offset for the gain cycle out of {0, 2..7} range. 1 is
694    // excluded because in that case increased gain and decreased gain would not
695    // follow each other.
696    rand = lsquic_crand_get_byte(bbr->bbr_conn_pub->enpub->enp_crand);
697    bbr->bbr_cycle_current_offset = rand % (kGainCycleLength - 1);
698    if (bbr->bbr_cycle_current_offset >= 1)
699        ++bbr->bbr_cycle_current_offset;
700
701    bbr->bbr_last_cycle_start = now;
702    bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset];
703}
704
705
706/* See BbrSender::EnterStartupMode */
707static void
708enter_startup_mode (struct lsquic_bbr *bbr, lsquic_time_t now)
709{
710    set_mode(bbr, BBR_MODE_STARTUP);
711    set_startup_values(bbr);
712}
713
714
715/* See  BbrSender::MaybeExitStartupOrDrain() */
716static void
717maybe_exit_startup_or_drain (struct lsquic_bbr *bbr, lsquic_time_t now,
718                                                    uint64_t bytes_in_flight)
719{
720    uint64_t target_cwnd;
721
722    if (bbr->bbr_mode == BBR_MODE_STARTUP
723                        && (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
724    {
725        on_exit_startup(bbr, now);
726        set_mode(bbr, BBR_MODE_DRAIN);
727        bbr->bbr_pacing_gain = bbr->bbr_drain_gain;
728        bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain;
729    }
730
731    if (bbr->bbr_mode == BBR_MODE_DRAIN)
732    {
733        target_cwnd = get_target_cwnd(bbr, 1);
734        LSQ_DEBUG("%s: bytes in flight: %"PRIu64"; target cwnd: %"PRIu64,
735                                    __func__, bytes_in_flight, target_cwnd);
736        if (bytes_in_flight <= target_cwnd)
737            enter_probe_bw_mode(bbr, now);
738    }
739}
740
741
742static int
743in_slow_start (const struct lsquic_bbr *bbr)
744{
745    return bbr->bbr_mode == BBR_MODE_STARTUP;
746}
747
748
749/* See QuicByteCount BbrSender::ProbeRttCongestionWindow() */
750static uint64_t
751get_probe_rtt_cwnd (const struct lsquic_bbr *bbr)
752{
753    if (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_BASED_ON_BDP)
754        return get_target_cwnd(bbr, kModerateProbeRttMultiplier);
755    else
756        return bbr->bbr_min_cwnd;
757}
758
759
760static uint64_t
761lsquic_bbr_get_cwnd (void *cong_ctl)
762{
763    struct lsquic_bbr *const bbr = cong_ctl;
764    uint64_t cwnd;
765
766    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
767        cwnd = get_probe_rtt_cwnd(bbr);
768    else if (in_recovery(bbr) &&
769                !((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP)
770                                    && bbr->bbr_mode == BBR_MODE_STARTUP))
771        cwnd = MIN(bbr->bbr_cwnd, bbr->bbr_recovery_window);
772    else
773        cwnd = bbr->bbr_cwnd;
774
775    return cwnd;
776}
777
778
779/* See BbrSender::MaybeEnterOrExitProbeRtt */
780static void
781maybe_enter_or_exit_probe_rtt (struct lsquic_bbr *bbr, lsquic_time_t now,
782            int is_round_start, int min_rtt_expired, uint64_t bytes_in_flight)
783{
784    if (min_rtt_expired
785            && !(bbr->bbr_flags & BBR_FLAG_EXITING_QUIESCENCE)
786                && bbr->bbr_mode != BBR_MODE_PROBE_RTT)
787    {
788        if (in_slow_start(bbr))
789            on_exit_startup(bbr, now);
790        set_mode(bbr, BBR_MODE_PROBE_RTT);
791        bbr->bbr_pacing_gain = 1;
792        // Do not decide on the time to exit PROBE_RTT until the
793        // |bytes_in_flight| is at the target small value.
794        bbr->bbr_exit_probe_rtt_at = 0;
795    }
796
797    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
798    {
799        lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler);
800        LSQ_DEBUG("%s: exit probe at: %"PRIu64"; now: %"PRIu64
801            "; round start: %d; round passed: %d; rtt: %"PRIu64" usec",
802            __func__, bbr->bbr_exit_probe_rtt_at, now, is_round_start,
803            !!(bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED),
804            lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats));
805        if (bbr->bbr_exit_probe_rtt_at == 0)
806        {
807            // If the window has reached the appropriate size, schedule exiting
808            // PROBE_RTT.  The CWND during PROBE_RTT is
809            // kMinimumCongestionWindow, but we allow an extra packet since QUIC
810            // checks CWND before sending a packet.
811            if (bytes_in_flight
812                        < get_probe_rtt_cwnd(bbr) + kMaxOutgoingPacketSize)
813            {
814                bbr->bbr_exit_probe_rtt_at = now + kProbeRttTime;
815                bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED;
816            }
817        }
818        else
819        {
820            if (is_round_start)
821                bbr->bbr_flags |= BBR_FLAG_PROBE_RTT_ROUND_PASSED;
822            if (now >= bbr->bbr_exit_probe_rtt_at
823                        && (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED))
824            {
825                bbr->bbr_min_rtt_timestamp = now;
826                if (!(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
827                    enter_startup_mode(bbr, now);
828                else
829                    enter_probe_bw_mode(bbr, now);
830            }
831        }
832    }
833
834    bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE;
835}
836
837
838/* See BbrSender::CalculatePacingRate */
839static void
840calculate_pacing_rate (struct lsquic_bbr *bbr)
841{
842    struct bandwidth bw, target_rate;
843
844    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
845    if (BW_IS_ZERO(&bw))
846        return;
847
848    LSQ_DEBUG("BW estimate: %"PRIu64, BW_VALUE(&bw));
849
850    target_rate = BW_TIMES(&bw, bbr->bbr_pacing_gain);
851    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
852    {
853        bbr->bbr_pacing_rate = target_rate;
854        return;
855    }
856
857    // Pace at the rate of initial_window / RTT as soon as RTT measurements are
858    // available.
859    if (BW_IS_ZERO(&bbr->bbr_pacing_rate)
860            && 0 != lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats))
861    {
862        bbr->bbr_pacing_rate = BW_FROM_BYTES_AND_DELTA(
863            bbr->bbr_init_cwnd,
864            lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats));
865        return;
866    }
867
868    // Slow the pacing rate in STARTUP once loss has ever been detected.
869    const int has_ever_detected_loss = bbr->bbr_end_recovery_at != 0;
870    if (has_ever_detected_loss
871            && (bbr->bbr_flags & (BBR_FLAG_SLOWER_STARTUP
872                                  |BBR_FLAG_HAS_NON_APP_LIMITED))
873                == (BBR_FLAG_SLOWER_STARTUP|BBR_FLAG_HAS_NON_APP_LIMITED))
874    {
875        bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupAfterLossGain);
876        return;
877    }
878
879    // Slow the pacing rate in STARTUP by the bytes_lost / CWND.
880    if (startup_rate_reduction_multiplier_ != 0
881            && has_ever_detected_loss
882                && (bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED))
883    {
884        bbr->bbr_pacing_rate = BW_TIMES(&target_rate,
885            (1 - (bbr->bbr_startup_bytes_lost
886                    * startup_rate_reduction_multiplier_ * 1.0f
887                        / bbr->bbr_cwnd_gain)));
888        // Ensure the pacing rate doesn't drop below the startup growth target
889        // times  the bandwidth estimate.
890        if (BW_VALUE(&bbr->bbr_pacing_rate)
891                                        < BW_VALUE(&bw) * kStartupGrowthTarget)
892            bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupGrowthTarget);
893        return;
894    }
895
896    // Do not decrease the pacing rate during startup.
897    if (BW_VALUE(&bbr->bbr_pacing_rate) < BW_VALUE(&target_rate))
898        bbr->bbr_pacing_rate = target_rate;
899}
900
901
902/* See BbrSender::CalculateCongestionWindow */
903static void
904calculate_cwnd (struct lsquic_bbr *bbr, uint64_t bytes_acked,
905                                                  uint64_t excess_acked)
906{
907    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
908        return;
909
910    uint64_t target_window = get_target_cwnd(bbr, bbr->bbr_cwnd_gain);
911    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
912        // Add the max recently measured ack aggregation to CWND.
913        target_window += minmax_get(&bbr->bbr_max_ack_height);
914    else if (bbr->bbr_flags & BBR_FLAG_ENABLE_ACK_AGG_IN_STARTUP)
915        // Add the most recent excess acked.  Because CWND never decreases in
916        // STARTUP, this will automatically create a very localized max filter.
917        target_window += excess_acked;
918
919    // Instead of immediately setting the target CWND as the new one, BBR grows
920    // the CWND towards |target_window| by only increasing it |bytes_acked| at a
921    // time.
922    const int add_bytes_acked =
923	!FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery || !in_recovery(bbr);
924    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
925        bbr->bbr_cwnd = MIN(target_window, bbr->bbr_cwnd + bytes_acked);
926    else if (add_bytes_acked &&
927             (bbr->bbr_cwnd_gain < target_window ||
928              lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler)
929                                                        < bbr->bbr_init_cwnd))
930        // If the connection is not yet out of startup phase, do not decrease
931        // the window.
932        bbr->bbr_cwnd += bytes_acked;
933
934    // Enforce the limits on the congestion window.
935    if (bbr->bbr_cwnd < bbr->bbr_min_cwnd)
936        bbr->bbr_cwnd = bbr->bbr_min_cwnd;
937    else if (bbr->bbr_cwnd > bbr->bbr_max_cwnd)
938    {
939        LSQ_DEBUG("exceed max cwnd");
940        bbr->bbr_cwnd = bbr->bbr_max_cwnd;
941    }
942}
943
944
945/* See BbrSender::CalculateRecoveryWindow */
946static void
947calculate_recovery_window (struct lsquic_bbr *bbr, uint64_t bytes_acked,
948                                uint64_t bytes_lost, uint64_t bytes_in_flight)
949{
950    if ((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP)
951                                        && bbr->bbr_mode == BBR_MODE_STARTUP)
952        return;
953
954    if (bbr->bbr_recovery_state == BBR_RS_NOT_IN_RECOVERY)
955        return;
956
957    // Set up the initial recovery window.
958    if (bbr->bbr_recovery_window == 0)
959    {
960        bbr->bbr_recovery_window = bytes_in_flight + bytes_acked;
961        bbr->bbr_recovery_window = MAX(bbr->bbr_min_cwnd,
962                                                    bbr->bbr_recovery_window);
963        return;
964    }
965
966    // Remove losses from the recovery window, while accounting for a potential
967    // integer underflow.
968    if (bbr->bbr_recovery_window >= bytes_lost)
969        bbr->bbr_recovery_window -= bytes_lost;
970    else
971        bbr->bbr_recovery_window = kMaxSegmentSize;
972
973    // In CONSERVATION mode, just subtracting losses is sufficient.  In GROWTH,
974    // release additional |bytes_acked| to achieve a slow-start-like behavior.
975    if (bbr->bbr_recovery_state == BBR_RS_GROWTH)
976        bbr->bbr_recovery_window += bytes_acked;
977
978    // Sanity checks.  Ensure that we always allow to send at least an MSS or
979    // |bytes_acked| in response, whichever is larger.
980    bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window,
981                                            bytes_in_flight + bytes_acked);
982    if (FLAG_quic_bbr_one_mss_conservation)
983        bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window,
984                                            bytes_in_flight + kMaxSegmentSize);
985    bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, bbr->bbr_min_cwnd);
986}
987
988
989static void
990lsquic_bbr_end_ack (void *cong_ctl, uint64_t in_flight)
991{
992    struct lsquic_bbr *const bbr = cong_ctl;
993    int is_round_start, min_rtt_expired;
994    uint64_t bytes_acked, excess_acked, bytes_lost;
995
996    assert(bbr->bbr_flags & BBR_FLAG_IN_ACK);
997    bbr->bbr_flags &= ~BBR_FLAG_IN_ACK;
998
999    LSQ_DEBUG("end_ack; mode: %s; in_flight: %"PRIu64, mode2str[bbr->bbr_mode],
1000                                                                    in_flight);
1001
1002    bytes_acked = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler)
1003                            - bbr->bbr_ack_state.total_bytes_acked_before;
1004    if (bbr->bbr_ack_state.acked_bytes)
1005    {
1006        is_round_start = bbr->bbr_ack_state.max_packno
1007                                    > bbr->bbr_current_round_trip_end
1008                    || !is_valid_packno(bbr->bbr_current_round_trip_end);
1009        if (is_round_start)
1010        {
1011            ++bbr->bbr_round_count;
1012            bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno;
1013            LSQ_DEBUG("up round count to %"PRIu64"; new rt end: %"PRIu64,
1014                        bbr->bbr_round_count, bbr->bbr_current_round_trip_end);
1015        }
1016        min_rtt_expired = update_bandwidth_and_min_rtt(bbr);
1017        update_recovery_state(bbr, is_round_start);
1018        excess_acked = update_ack_aggregation_bytes(bbr, bytes_acked);
1019    }
1020    else
1021    {
1022        is_round_start = 0;
1023        min_rtt_expired = 0;
1024        excess_acked = 0;
1025    }
1026
1027    if (bbr->bbr_mode == BBR_MODE_PROBE_BW)
1028        update_gain_cycle_phase(bbr, in_flight);
1029
1030    if (is_round_start && !(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
1031        check_if_full_bw_reached(bbr);
1032
1033    maybe_exit_startup_or_drain(bbr, bbr->bbr_ack_state.ack_time, in_flight);
1034
1035    maybe_enter_or_exit_probe_rtt(bbr, bbr->bbr_ack_state.ack_time,
1036                                is_round_start, min_rtt_expired, in_flight);
1037
1038    // Calculate number of packets acked and lost.
1039    bytes_lost = bbr->bbr_ack_state.lost_bytes;
1040
1041    // After the model is updated, recalculate the pacing rate and congestion
1042    // window.
1043    calculate_pacing_rate(bbr);
1044    calculate_cwnd(bbr, bytes_acked, excess_acked);
1045    calculate_recovery_window(bbr, bytes_acked, bytes_lost, in_flight);
1046
1047    /* We don't need to clean up BW sampler */
1048}
1049
1050
1051static void
1052lsquic_bbr_cleanup (void *cong_ctl)
1053{
1054    struct lsquic_bbr *const bbr = cong_ctl;
1055
1056    lsquic_bw_sampler_cleanup(&bbr->bbr_bw_sampler);
1057    LSQ_DEBUG("cleanup");
1058}
1059
1060
1061static void
1062lsquic_bbr_loss (void *cong_ctl) {   /* Noop */   }
1063
1064
1065static void
1066lsquic_bbr_timeout (void *cong_ctl) {   /* Noop */   }
1067
1068
1069const struct cong_ctl_if lsquic_cong_bbr_if =
1070{
1071    .cci_ack           = lsquic_bbr_ack,
1072    .cci_begin_ack     = lsquic_bbr_begin_ack,
1073    .cci_end_ack       = lsquic_bbr_end_ack,
1074    .cci_cleanup       = lsquic_bbr_cleanup,
1075    .cci_get_cwnd      = lsquic_bbr_get_cwnd,
1076    .cci_init          = lsquic_bbr_init,
1077    .cci_pacing_rate   = lsquic_bbr_pacing_rate,
1078    .cci_loss          = lsquic_bbr_loss,
1079    .cci_lost          = lsquic_bbr_lost,
1080    .cci_reinit        = lsquic_bbr_reinit,
1081    .cci_timeout       = lsquic_bbr_timeout,
1082    .cci_sent          = lsquic_bbr_sent,
1083    .cci_was_quiet     = lsquic_bbr_was_quiet,
1084};
1085