1/* Copyright (c) 2017 - 2022 LiteSpeed Technologies Inc.  See LICENSE. */
2// Copyright 2016 The Chromium Authors. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE.chrome file.
5
6#include <assert.h>
7#include <inttypes.h>
8#include <stdint.h>
9#include <stdlib.h>
10#include <string.h>
11#include <sys/queue.h>
12
13#include "lsquic.h"
14#include "lsquic_int_types.h"
15#include "lsquic_cong_ctl.h"
16#include "lsquic_minmax.h"
17#include "lsquic_packet_common.h"
18#include "lsquic_packet_out.h"
19#include "lsquic_bw_sampler.h"
20#include "lsquic_bbr.h"
21#include "lsquic_hash.h"
22#include "lsquic_conn.h"
23#include "lsquic_sfcw.h"
24#include "lsquic_conn_flow.h"
25#include "lsquic_varint.h"
26#include "lsquic_hq.h"
27#include "lsquic_stream.h"
28#include "lsquic_rtt.h"
29#include "lsquic_conn_public.h"
30#include "lsquic_util.h"
31#include "lsquic_malo.h"
32#include "lsquic_crand.h"
33#include "lsquic_mm.h"
34#include "lsquic_engine_public.h"
35
36#define LSQUIC_LOGGER_MODULE LSQLM_BBR
37#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(bbr->bbr_conn_pub->lconn)
38#include "lsquic_logger.h"
39
40#define MIN(a, b) ((a) < (b) ? (a) : (b))
41#define MAX(a, b) ((a) > (b) ? (a) : (b))
42
43#define ms(val_) ((val_) * 1000)
44#define sec(val_) ((val_) * 1000 * 1000)
45
46// Default maximum packet size used in the Linux TCP implementation.
47// Used in QUIC for congestion window computations in bytes.
48#define kDefaultTCPMSS 1460
49#define kMaxSegmentSize kDefaultTCPMSS
50
51// Constants based on TCP defaults.
52// The minimum CWND to ensure delayed acks don't reduce bandwidth measurements.
53// Does not inflate the pacing rate.
54#define kDefaultMinimumCongestionWindow  (4 * kDefaultTCPMSS)
55
56// The gain used for the STARTUP, equal to 2/ln(2).
57#define kDefaultHighGain 2.885f
58
59// The newly derived gain for STARTUP, equal to 4 * ln(2)
60#define kDerivedHighGain 2.773f
61
62// The newly derived CWND gain for STARTUP, 2.
63#define kDerivedHighCWNDGain 2.0f
64
65// The gain used in STARTUP after loss has been detected.
66// 1.5 is enough to allow for 25% exogenous loss and still observe a 25% growth
67// in measured bandwidth.
68#define kStartupAfterLossGain 1.5f
69
70// We match SPDY's use of 32 (since we'd compete with SPDY).
71#define kInitialCongestionWindow 32
72
73/* Taken from send_algorithm_interface.h */
74#define kDefaultMaxCongestionWindowPackets 2000
75
76// The time after which the current min_rtt value expires.
77#define kMinRttExpiry sec(10)
78
79// Coefficient to determine if a new RTT is sufficiently similar to min_rtt that
80// we don't need to enter PROBE_RTT.
81#define kSimilarMinRttThreshold 1.125f
82
83// If the bandwidth does not increase by the factor of |kStartupGrowthTarget|
84// within |kRoundTripsWithoutGrowthBeforeExitingStartup| rounds, the connection
85// will exit the STARTUP mode.
86#define kStartupGrowthTarget 1.25
87
88#define kRoundTripsWithoutGrowthBeforeExitingStartup 3
89
90#define startup_rate_reduction_multiplier_ 0
91
92// The cycle of gains used during the PROBE_BW stage.
93static const float kPacingGain[] = {1.25, 0.75, 1, 1, 1, 1, 1, 1};
94
95// The length of the gain cycle.
96static const size_t kGainCycleLength = sizeof(kPacingGain)
97                                                    / sizeof(kPacingGain[0]);
98
99// Coefficient of target congestion window to use when basing PROBE_RTT on BDP.
100#define kModerateProbeRttMultiplier 0.75
101
102// The maximum packet size of any QUIC packet over IPv6, based on ethernet's max
103// size, minus the IP and UDP headers. IPv6 has a 40 byte header, UDP adds an
104// additional 8 bytes.  This is a total overhead of 48 bytes.  Ethernet's
105// max packet size is 1500 bytes,  1500 - 48 = 1452.
106#define kMaxV6PacketSize 1452
107// The maximum packet size of any QUIC packet over IPv4.
108// 1500(Ethernet) - 20(IPv4 header) - 8(UDP header) = 1472.
109#define kMaxV4PacketSize 1472
110// The maximum incoming packet size allowed.
111#define kMaxIncomingPacketSize kMaxV4PacketSize
112// The maximum outgoing packet size allowed.
113#define kMaxOutgoingPacketSize kMaxV6PacketSize
114
115// The minimum time the connection can spend in PROBE_RTT mode.
116#define kProbeRttTime ms(200)
117
118/* FLAG* are from net/quic/quic_flags_list.h */
119
120// When in STARTUP and recovery, do not add bytes_acked to QUIC BBR's CWND in
121// CalculateCongestionWindow()
122#define FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery 0
123
124// When true, ensure BBR allows at least one MSS to be sent in response to an
125// ACK in packet conservation.
126#define FLAG_quic_bbr_one_mss_conservation 0
127
128/* From net/quic/quic_flags_list.h */
129#define kCwndGain 2.0
130
131
132static uint64_t lsquic_bbr_get_cwnd (void *);
133
134
135static const char *const mode2str[] =
136{
137    [BBR_MODE_STARTUP]   = "STARTUP",
138    [BBR_MODE_DRAIN]     = "DRAIN",
139    [BBR_MODE_PROBE_BW]  = "PROBE_BW",
140    [BBR_MODE_PROBE_RTT] = "PROBE_RTT",
141};
142
143
144static void
145set_mode (struct lsquic_bbr *bbr, enum bbr_mode mode)
146{
147    if (bbr->bbr_mode != mode)
148    {
149        LSQ_DEBUG("mode change %s -> %s", mode2str[bbr->bbr_mode],
150                                                        mode2str[mode]);
151        bbr->bbr_mode = mode;
152    }
153    else
154        LSQ_DEBUG("mode remains %s", mode2str[mode]);
155}
156
157
158static void
159set_startup_values (struct lsquic_bbr *bbr)
160{
161    bbr->bbr_pacing_gain = bbr->bbr_high_gain;
162    bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain;
163}
164
165
166static void
167init_bbr (struct lsquic_bbr *bbr)
168{
169    bbr->bbr_mode = BBR_MODE_STARTUP;
170    bbr->bbr_round_count = 0;
171    minmax_init(&bbr->bbr_max_bandwidth, 10);
172    minmax_init(&bbr->bbr_max_ack_height, 10);
173    bbr->bbr_aggregation_epoch_bytes = 0;
174    bbr->bbr_aggregation_epoch_start_time = 0;
175    bbr->bbr_min_rtt = 0;
176    bbr->bbr_min_rtt_timestamp = 0;
177    bbr->bbr_init_cwnd = kInitialCongestionWindow * kDefaultTCPMSS;
178    bbr->bbr_cwnd = kInitialCongestionWindow * kDefaultTCPMSS;
179    bbr->bbr_max_cwnd = kDefaultMaxCongestionWindowPackets * kDefaultTCPMSS;
180    bbr->bbr_min_cwnd = kDefaultMinimumCongestionWindow;
181    bbr->bbr_high_gain = kDefaultHighGain;
182    bbr->bbr_high_cwnd_gain = kDefaultHighGain;
183    bbr->bbr_drain_gain = 1.0f / kDefaultHighGain;
184    bbr->bbr_pacing_rate = BW_ZERO();
185    bbr->bbr_pacing_gain = 1.0;
186    bbr->bbr_cwnd_gain = 1.0;
187    bbr->bbr_num_startup_rtts = kRoundTripsWithoutGrowthBeforeExitingStartup;
188    bbr->bbr_flags &= ~BBR_FLAG_EXIT_STARTUP_ON_LOSS;
189    bbr->bbr_cycle_current_offset = 0;
190    bbr->bbr_last_cycle_start = 0;
191    bbr->bbr_flags &= ~BBR_FLAG_IS_AT_FULL_BANDWIDTH;
192    bbr->bbr_round_wo_bw_gain = 0;
193    bbr->bbr_bw_at_last_round = BW_ZERO();
194    bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE;
195    bbr->bbr_exit_probe_rtt_at = 0;
196    bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED;
197    bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
198    bbr->bbr_flags &= ~BBR_FLAG_HAS_NON_APP_LIMITED;
199    bbr->bbr_flags &= ~BBR_FLAG_FLEXIBLE_APP_LIMITED;
200    set_startup_values(bbr);
201}
202
203
204static void
205lsquic_bbr_init (void *cong_ctl, const struct lsquic_conn_public *conn_pub,
206                                                enum quic_ft_bit retx_frames)
207{
208    struct lsquic_bbr *const bbr = cong_ctl;
209    bbr->bbr_conn_pub = conn_pub;
210    lsquic_bw_sampler_init(&bbr->bbr_bw_sampler, conn_pub->lconn, retx_frames);
211    bbr->bbr_rtt_stats = &conn_pub->rtt_stats;
212
213    init_bbr(bbr);
214
215    LSQ_DEBUG("initialized");
216}
217
218
219static void
220lsquic_bbr_reinit (void *cong_ctl)
221{
222    struct lsquic_bbr *const bbr = cong_ctl;
223
224    init_bbr(bbr);
225
226    LSQ_DEBUG("re-initialized");
227}
228
229
230static lsquic_time_t
231get_min_rtt (const struct lsquic_bbr *bbr)
232{
233    lsquic_time_t min_rtt;
234
235    if (bbr->bbr_min_rtt)
236        return bbr->bbr_min_rtt;
237    else
238    {
239        min_rtt = lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats);
240        if (min_rtt == 0)
241            min_rtt = 25000;
242        return min_rtt;
243    }
244}
245
246
247static uint64_t
248lsquic_bbr_pacing_rate (void *cong_ctl, int in_recovery)
249{
250    struct lsquic_bbr *const bbr = cong_ctl;
251    lsquic_time_t min_rtt;
252    struct bandwidth bw;
253
254    if (!BW_IS_ZERO(&bbr->bbr_pacing_rate))
255        bw = bbr->bbr_pacing_rate;
256    else
257    {
258        min_rtt = get_min_rtt(bbr);
259        bw = BW_FROM_BYTES_AND_DELTA(bbr->bbr_init_cwnd, min_rtt);
260        bw = BW_TIMES(&bw, bbr->bbr_high_cwnd_gain);
261    }
262
263    return BW_TO_BYTES_PER_SEC(&bw);
264}
265
266
267/* BbrSender::GetTargetCongestionWindow */
268static uint64_t
269get_target_cwnd (const struct lsquic_bbr *bbr, float gain)
270{
271    struct bandwidth bw;
272    uint64_t bdp, cwnd;
273
274    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
275    bdp = get_min_rtt(bbr) * BW_TO_BYTES_PER_SEC(&bw) / 1000000;
276    cwnd = gain * bdp;
277
278    // BDP estimate will be zero if no bandwidth samples are available yet.
279    if (cwnd == 0)
280        cwnd = gain * bbr->bbr_init_cwnd;
281
282    return MAX(cwnd, bbr->bbr_min_cwnd);
283}
284
285
286/* See BbrSender::IsPipeSufficientlyFull */
287static int
288is_pipe_sufficiently_full (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
289{
290    // See if we need more bytes in flight to see more bandwidth.
291    if (bbr->bbr_mode == BBR_MODE_STARTUP)
292        // STARTUP exits if it doesn't observe a 25% bandwidth increase, so
293        // the CWND must be more than 25% above the target.
294        return bytes_in_flight >= get_target_cwnd(bbr, 1.5);
295    else if (bbr->bbr_pacing_gain > 1)
296        // Super-unity PROBE_BW doesn't exit until 1.25 * BDP is achieved.
297        return bytes_in_flight >= get_target_cwnd(bbr, bbr->bbr_pacing_gain);
298    else
299        // If bytes_in_flight are above the target congestion window, it should
300        // be possible to observe the same or more bandwidth if it's available.
301        return bytes_in_flight >= get_target_cwnd(bbr, 1.1f);
302}
303
304
305static void
306lsquic_bbr_was_quiet (void *cong_ctl, lsquic_time_t now, uint64_t in_flight)
307{
308    struct lsquic_bbr *const bbr = cong_ctl;
309    LSQ_DEBUG("was quiet");         /* Do nothing */
310}
311
312
313/* See BbrSender::OnApplicationLimited */
314static void
315bbr_app_limited (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
316{
317    uint64_t cwnd;
318
319    cwnd = lsquic_bbr_get_cwnd(bbr);
320    if (bytes_in_flight >= cwnd)
321        return;
322    if ((bbr->bbr_flags & BBR_FLAG_FLEXIBLE_APP_LIMITED)
323                            && is_pipe_sufficiently_full(bbr, bytes_in_flight))
324        return;
325
326    bbr->bbr_flags |= BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT;
327    lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler);
328    LSQ_DEBUG("becoming application-limited.  Last sent packet: %"PRIu64"; "
329                            "CWND: %"PRIu64, bbr->bbr_last_sent_packno, cwnd);
330}
331
332
333static void
334lsquic_bbr_ack (void *cong_ctl, struct lsquic_packet_out *packet_out,
335                  unsigned packet_sz, lsquic_time_t now_time, int app_limited)
336{
337    struct lsquic_bbr *const bbr = cong_ctl;
338    struct bw_sample *sample;
339
340    assert(bbr->bbr_flags & BBR_FLAG_IN_ACK);
341
342    sample = lsquic_bw_sampler_packet_acked(&bbr->bbr_bw_sampler, packet_out,
343                                                bbr->bbr_ack_state.ack_time);
344    if (sample)
345        TAILQ_INSERT_TAIL(&bbr->bbr_ack_state.samples, sample, next);
346
347    if (!is_valid_packno(bbr->bbr_ack_state.max_packno)
348                /* Packet ordering is checked for, and warned about, in
349                 * lsquic_senhist_add().
350                 */
351            || packet_out->po_packno > bbr->bbr_ack_state.max_packno)
352        bbr->bbr_ack_state.max_packno = packet_out->po_packno;
353    bbr->bbr_ack_state.acked_bytes += packet_sz;
354}
355
356
357static void
358lsquic_bbr_sent (void *cong_ctl, struct lsquic_packet_out *packet_out,
359                                        uint64_t in_flight, int app_limited)
360{
361    struct lsquic_bbr *const bbr = cong_ctl;
362
363    if (!(packet_out->po_flags & PO_MINI))
364        lsquic_bw_sampler_packet_sent(&bbr->bbr_bw_sampler, packet_out,
365                                                                in_flight);
366
367    /* Obviously we make an assumption that sent packet number are always
368     * increasing.
369     */
370    bbr->bbr_last_sent_packno = packet_out->po_packno;
371
372    if (app_limited)
373        bbr_app_limited(bbr, in_flight);
374}
375
376
377static void
378lsquic_bbr_lost (void *cong_ctl, struct lsquic_packet_out *packet_out,
379                                                        unsigned packet_sz)
380{
381    struct lsquic_bbr *const bbr = cong_ctl;
382
383    lsquic_bw_sampler_packet_lost(&bbr->bbr_bw_sampler, packet_out);
384    bbr->bbr_ack_state.has_losses = 1;
385    bbr->bbr_ack_state.lost_bytes += packet_sz;
386}
387
388
389static void
390lsquic_bbr_begin_ack (void *cong_ctl, lsquic_time_t ack_time, uint64_t in_flight)
391{
392    struct lsquic_bbr *const bbr = cong_ctl;
393
394    assert(!(bbr->bbr_flags & BBR_FLAG_IN_ACK));
395    bbr->bbr_flags |= BBR_FLAG_IN_ACK;
396    memset(&bbr->bbr_ack_state, 0, sizeof(bbr->bbr_ack_state));
397    TAILQ_INIT(&bbr->bbr_ack_state.samples);
398    bbr->bbr_ack_state.ack_time = ack_time;
399    bbr->bbr_ack_state.max_packno = UINT64_MAX;
400    bbr->bbr_ack_state.in_flight = in_flight;
401    bbr->bbr_ack_state.total_bytes_acked_before
402                        = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler);
403}
404
405
406/* Based on BbrSender::ShouldExtendMinRttExpiry() */
407static int
408should_extend_min_rtt_expiry (const struct lsquic_bbr *bbr)
409{
410    int increased_since_last_probe;
411
412    if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
413                          |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED))
414            == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
415               |BBR_FLAG_PROBE_RTT_DISABLED_IF_APP_LIMITED))
416        // Extend the current min_rtt if we've been app limited recently.
417        return 1;
418
419    increased_since_last_probe = bbr->bbr_min_rtt_since_last_probe
420                                > bbr->bbr_min_rtt * kSimilarMinRttThreshold;
421    if ((bbr->bbr_flags & (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
422                          |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT))
423            == (BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT
424               |BBR_FLAG_PROBE_RTT_SKIPPED_IF_SIMILAR_RTT)
425            && !increased_since_last_probe)
426        // Extend the current min_rtt if we've been app limited recently and an
427        // rtt has been measured in that time that's less than 12.5% more than
428        // the current min_rtt.
429        return 1;
430
431    return 0;
432}
433
434
435/* Based on BbrSender::UpdateBandwidthAndMinRtt */
436/* Returns true if min RTT expired, false otherwise */
437static int
438update_bandwidth_and_min_rtt (struct lsquic_bbr *bbr)
439{
440    struct bw_sample *sample, *next_sample;
441    uint64_t sample_min_rtt;
442    int min_rtt_expired;
443
444    sample_min_rtt = UINT64_MAX;
445    for (sample = TAILQ_FIRST(&bbr->bbr_ack_state.samples); sample;
446                                                        sample = next_sample)
447    {
448        next_sample = TAILQ_NEXT(sample, next);
449
450        if (sample->is_app_limited)
451            bbr->bbr_flags |= BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
452        else
453        {
454            bbr->bbr_flags &= ~BBR_FLAG_LAST_SAMPLE_APP_LIMITED;
455            bbr->bbr_flags |=  BBR_FLAG_HAS_NON_APP_LIMITED;
456        }
457
458        if (sample_min_rtt == UINT64_MAX || sample->rtt < sample_min_rtt)
459            sample_min_rtt = sample->rtt;
460
461        if (!sample->is_app_limited
462                    || BW_VALUE(&sample->bandwidth)
463                                    > minmax_get(&bbr->bbr_max_bandwidth))
464            minmax_upmax(&bbr->bbr_max_bandwidth, bbr->bbr_round_count,
465                                                BW_VALUE(&sample->bandwidth));
466
467        lsquic_malo_put(sample);
468    }
469
470    if (sample_min_rtt == UINT64_MAX)
471        return 0;
472
473    bbr->bbr_min_rtt_since_last_probe
474                    = MIN(bbr->bbr_min_rtt_since_last_probe, sample_min_rtt);
475
476    min_rtt_expired = bbr->bbr_min_rtt != 0 && (bbr->bbr_ack_state.ack_time
477                                > bbr->bbr_min_rtt_timestamp + kMinRttExpiry);
478    if (min_rtt_expired || sample_min_rtt < bbr->bbr_min_rtt
479                                                    || 0 == bbr->bbr_min_rtt)
480    {
481        if (min_rtt_expired && should_extend_min_rtt_expiry(bbr))
482        {
483            LSQ_DEBUG("min rtt expiration extended, stay at: %"PRIu64,
484                bbr->bbr_min_rtt);
485            min_rtt_expired = 0;
486        }
487        else
488        {
489            LSQ_DEBUG("min rtt updated: %"PRIu64" -> %"PRIu64,
490                bbr->bbr_min_rtt, sample_min_rtt);
491            bbr->bbr_min_rtt = sample_min_rtt;
492        }
493        bbr->bbr_min_rtt_timestamp = bbr->bbr_ack_state.ack_time;
494        bbr->bbr_min_rtt_since_last_probe = UINT64_MAX;
495        bbr->bbr_flags &= ~BBR_FLAG_APP_LIMITED_SINCE_LAST_PROBE_RTT;
496    }
497
498    return min_rtt_expired;
499}
500
501
502/* Based on BbrSender::UpdateRecoveryState() */
503static void
504update_recovery_state (struct lsquic_bbr *bbr, int is_round_start)
505{
506    // Exit recovery when there are no losses for a round.
507    if (bbr->bbr_ack_state.has_losses)
508        bbr->bbr_end_recovery_at = bbr->bbr_last_sent_packno;
509
510    switch (bbr->bbr_recovery_state)
511    {
512    case BBR_RS_NOT_IN_RECOVERY:
513        // Enter conservation on the first loss.
514        if (bbr->bbr_ack_state.has_losses)
515        {
516            bbr->bbr_recovery_state = BBR_RS_CONSERVATION;
517            // This will cause the |bbr_recovery_window| to be set to the
518            // correct value in CalculateRecoveryWindow().
519            bbr->bbr_recovery_window = 0;
520            // Since the conservation phase is meant to be lasting for a whole
521            // round, extend the current round as if it were started right now.
522            bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno;
523        }
524        break;
525    case BBR_RS_CONSERVATION:
526        if (is_round_start)
527            bbr->bbr_recovery_state = BBR_RS_GROWTH;
528        /* Fall-through */
529    case BBR_RS_GROWTH:
530        // Exit recovery if appropriate.
531        if (!bbr->bbr_ack_state.has_losses
532                && bbr->bbr_ack_state.max_packno > bbr->bbr_end_recovery_at)
533            bbr->bbr_recovery_state = BBR_RS_NOT_IN_RECOVERY;
534        break;
535    }
536}
537
538
539static uint64_t
540update_ack_aggregation_bytes (struct lsquic_bbr *bbr,
541                                                uint64_t newly_acked_bytes)
542{
543    const lsquic_time_t ack_time = bbr->bbr_ack_state.ack_time;
544    uint64_t expected_bytes_acked, diff;
545
546    // Compute how many bytes are expected to be delivered, assuming max
547    // bandwidth is correct.
548    expected_bytes_acked = minmax_get(&bbr->bbr_max_bandwidth)
549                        * (ack_time - bbr->bbr_aggregation_epoch_start_time);
550
551    // Reset the current aggregation epoch as soon as the ack arrival rate is
552    // less than or equal to the max bandwidth.
553    if (bbr->bbr_aggregation_epoch_bytes <= expected_bytes_acked)
554    {
555        // Reset to start measuring a new aggregation epoch.
556        bbr->bbr_aggregation_epoch_bytes = newly_acked_bytes;
557        bbr->bbr_aggregation_epoch_start_time = ack_time;
558        return 0;
559    }
560
561    // Compute how many extra bytes were delivered vs max bandwidth.
562    // Include the bytes most recently acknowledged to account for stretch acks.
563    bbr->bbr_aggregation_epoch_bytes += newly_acked_bytes;
564    diff = bbr->bbr_aggregation_epoch_bytes - expected_bytes_acked;
565    minmax_upmax(&bbr->bbr_max_ack_height, bbr->bbr_round_count, diff);
566    return diff;
567}
568
569
570/* See BbrSender::UpdateGainCyclePhase() */
571static void
572update_gain_cycle_phase (struct lsquic_bbr *bbr, uint64_t bytes_in_flight)
573{
574    const uint64_t prior_in_flight = bbr->bbr_ack_state.in_flight;
575    const lsquic_time_t now = bbr->bbr_ack_state.ack_time;
576    // In most cases, the cycle is advanced after an RTT passes.
577    int should_advance_gain_cycling
578        = now - bbr->bbr_last_cycle_start > get_min_rtt(bbr);
579
580    // If the pacing gain is above 1.0, the connection is trying to probe the
581    // bandwidth by increasing the number of bytes in flight to at least
582    // pacing_gain * BDP.  Make sure that it actually reaches the target, as
583    // long as there are no losses suggesting that the buffers are not able to
584    // hold that much.
585    if (bbr->bbr_pacing_gain > 1.0
586            && !bbr->bbr_ack_state.has_losses
587            && prior_in_flight < get_target_cwnd(bbr, bbr->bbr_pacing_gain))
588        should_advance_gain_cycling = 0;
589
590    /* Several optimizations are possible here: "else if" instead of "if", as
591     * well as not calling get_target_cwnd() if `should_advance_gain_cycling'
592     * is already set to the target value.
593     */
594
595    // If pacing gain is below 1.0, the connection is trying to drain the extra
596    // queue which could have been incurred by probing prior to it.  If the
597    // number of bytes in flight falls down to the estimated BDP value earlier,
598    // conclude that the queue has been successfully drained and exit this cycle
599    // early.
600    if (bbr->bbr_pacing_gain < 1.0
601                                && bytes_in_flight <= get_target_cwnd(bbr, 1))
602        should_advance_gain_cycling = 1;
603
604    if (should_advance_gain_cycling)
605    {
606        bbr->bbr_cycle_current_offset =
607                        (bbr->bbr_cycle_current_offset + 1) % kGainCycleLength;
608        bbr->bbr_last_cycle_start = now;
609        // Stay in low gain mode until the target BDP is hit.  Low gain mode
610        // will be exited immediately when the target BDP is achieved.
611        if ((bbr->bbr_flags & BBR_FLAG_DRAIN_TO_TARGET)
612                && bbr->bbr_pacing_gain < 1
613                && kPacingGain[bbr->bbr_cycle_current_offset] == 1
614                && bytes_in_flight > get_target_cwnd(bbr, 1))
615              return;
616        bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset];
617        LSQ_DEBUG("advanced gain cycle, pacing gain set to %.2f",
618                                                        bbr->bbr_pacing_gain);
619    }
620}
621
622
623/* BbrSender::InRecovery() */
624static int
625in_recovery (const struct lsquic_bbr *bbr)
626{
627    return bbr->bbr_recovery_state != BBR_RS_NOT_IN_RECOVERY;
628}
629
630
631/* See BbrSender::CheckIfFullBandwidthReached() */
632static void
633check_if_full_bw_reached (struct lsquic_bbr *bbr)
634{
635    struct bandwidth target, bw;
636
637    if (bbr->bbr_flags & BBR_FLAG_LAST_SAMPLE_APP_LIMITED)
638    {
639        LSQ_DEBUG("last sample app limited: full BW not reached");
640        return;
641    }
642
643    target = BW_TIMES(&bbr->bbr_bw_at_last_round, kStartupGrowthTarget);
644    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
645    if (BW_VALUE(&bw) >= BW_VALUE(&target))
646    {
647        bbr->bbr_bw_at_last_round = bw;
648        bbr->bbr_round_wo_bw_gain = 0;
649        if (bbr->bbr_flags & BBR_FLAG_EXPIRE_ACK_AGG_IN_STARTUP)
650            // Expire old excess delivery measurements now that bandwidth
651            // increased.
652            minmax_reset(&bbr->bbr_max_ack_height,
653                        ((struct minmax_sample) { bbr->bbr_round_count, 0, }));
654        LSQ_DEBUG("BW estimate %"PRIu64"bps greater than or equal to target "
655            "%"PRIu64"bps: full BW not reached",
656            BW_VALUE(&bw), BW_VALUE(&target));
657        return;
658    }
659
660    ++bbr->bbr_round_wo_bw_gain;
661    if ((bbr->bbr_round_wo_bw_gain >= bbr->bbr_num_startup_rtts)
662            || ((bbr->bbr_flags & BBR_FLAG_EXIT_STARTUP_ON_LOSS)
663                                                    && in_recovery(bbr)))
664    {
665        assert(bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED);  /* DCHECK */
666        bbr->bbr_flags |= BBR_FLAG_IS_AT_FULL_BANDWIDTH;
667        LSQ_DEBUG("reached full BW");
668    }
669    else
670        LSQ_DEBUG("rounds w/o gain: %u, full BW not reached",
671                                                bbr->bbr_round_wo_bw_gain);
672}
673
674
675/* See BbrSender::OnExitStartup */
676static void
677on_exit_startup (struct lsquic_bbr *bbr, lsquic_time_t now)
678{
679    assert(bbr->bbr_mode == BBR_MODE_STARTUP);
680    /* Apparently this method is just to update stats, something that we
681     * don't do yet.
682     */
683}
684
685
686/* See BbrSender::EnterProbeBandwidthMode */
687static void
688enter_probe_bw_mode (struct lsquic_bbr *bbr, lsquic_time_t now)
689{
690    uint8_t rand;
691
692    set_mode(bbr, BBR_MODE_PROBE_BW);
693    bbr->bbr_cwnd_gain = kCwndGain;
694
695    // Pick a random offset for the gain cycle out of {0, 2..7} range. 1 is
696    // excluded because in that case increased gain and decreased gain would not
697    // follow each other.
698    rand = lsquic_crand_get_byte(bbr->bbr_conn_pub->enpub->enp_crand);
699    bbr->bbr_cycle_current_offset = rand % (kGainCycleLength - 1);
700    if (bbr->bbr_cycle_current_offset >= 1)
701        ++bbr->bbr_cycle_current_offset;
702
703    bbr->bbr_last_cycle_start = now;
704    bbr->bbr_pacing_gain = kPacingGain[bbr->bbr_cycle_current_offset];
705}
706
707
708/* See BbrSender::EnterStartupMode */
709static void
710enter_startup_mode (struct lsquic_bbr *bbr, lsquic_time_t now)
711{
712    set_mode(bbr, BBR_MODE_STARTUP);
713    set_startup_values(bbr);
714}
715
716
717/* See  BbrSender::MaybeExitStartupOrDrain() */
718static void
719maybe_exit_startup_or_drain (struct lsquic_bbr *bbr, lsquic_time_t now,
720                                                    uint64_t bytes_in_flight)
721{
722    uint64_t target_cwnd;
723
724    if (bbr->bbr_mode == BBR_MODE_STARTUP
725                        && (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
726    {
727        on_exit_startup(bbr, now);
728        set_mode(bbr, BBR_MODE_DRAIN);
729        bbr->bbr_pacing_gain = bbr->bbr_drain_gain;
730        bbr->bbr_cwnd_gain = bbr->bbr_high_cwnd_gain;
731    }
732
733    if (bbr->bbr_mode == BBR_MODE_DRAIN)
734    {
735        target_cwnd = get_target_cwnd(bbr, 1);
736        LSQ_DEBUG("%s: bytes in flight: %"PRIu64"; target cwnd: %"PRIu64,
737                                    __func__, bytes_in_flight, target_cwnd);
738        if (bytes_in_flight <= target_cwnd)
739            enter_probe_bw_mode(bbr, now);
740    }
741}
742
743
744static int
745in_slow_start (const struct lsquic_bbr *bbr)
746{
747    return bbr->bbr_mode == BBR_MODE_STARTUP;
748}
749
750
751/* See QuicByteCount BbrSender::ProbeRttCongestionWindow() */
752static uint64_t
753get_probe_rtt_cwnd (const struct lsquic_bbr *bbr)
754{
755    if (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_BASED_ON_BDP)
756        return get_target_cwnd(bbr, kModerateProbeRttMultiplier);
757    else
758        return bbr->bbr_min_cwnd;
759}
760
761
762static uint64_t
763lsquic_bbr_get_cwnd (void *cong_ctl)
764{
765    struct lsquic_bbr *const bbr = cong_ctl;
766    uint64_t cwnd;
767
768    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
769        cwnd = get_probe_rtt_cwnd(bbr);
770    else if (in_recovery(bbr) &&
771                !((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP)
772                                    && bbr->bbr_mode == BBR_MODE_STARTUP))
773        cwnd = MIN(bbr->bbr_cwnd, bbr->bbr_recovery_window);
774    else
775        cwnd = bbr->bbr_cwnd;
776
777    return cwnd;
778}
779
780
781/* See BbrSender::MaybeEnterOrExitProbeRtt */
782static void
783maybe_enter_or_exit_probe_rtt (struct lsquic_bbr *bbr, lsquic_time_t now,
784            int is_round_start, int min_rtt_expired, uint64_t bytes_in_flight)
785{
786    if (min_rtt_expired
787            && !(bbr->bbr_flags & BBR_FLAG_EXITING_QUIESCENCE)
788                && bbr->bbr_mode != BBR_MODE_PROBE_RTT)
789    {
790        if (in_slow_start(bbr))
791            on_exit_startup(bbr, now);
792        set_mode(bbr, BBR_MODE_PROBE_RTT);
793        bbr->bbr_pacing_gain = 1;
794        // Do not decide on the time to exit PROBE_RTT until the
795        // |bytes_in_flight| is at the target small value.
796        bbr->bbr_exit_probe_rtt_at = 0;
797    }
798
799    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
800    {
801        lsquic_bw_sampler_app_limited(&bbr->bbr_bw_sampler);
802        LSQ_DEBUG("%s: exit probe at: %"PRIu64"; now: %"PRIu64
803            "; round start: %d; round passed: %d; rtt: %"PRIu64" usec",
804            __func__, bbr->bbr_exit_probe_rtt_at, now, is_round_start,
805            !!(bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED),
806            lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats));
807        if (bbr->bbr_exit_probe_rtt_at == 0)
808        {
809            // If the window has reached the appropriate size, schedule exiting
810            // PROBE_RTT.  The CWND during PROBE_RTT is
811            // kMinimumCongestionWindow, but we allow an extra packet since QUIC
812            // checks CWND before sending a packet.
813            if (bytes_in_flight
814                        < get_probe_rtt_cwnd(bbr) + kMaxOutgoingPacketSize)
815            {
816                bbr->bbr_exit_probe_rtt_at = now + kProbeRttTime;
817                bbr->bbr_flags &= ~BBR_FLAG_PROBE_RTT_ROUND_PASSED;
818            }
819        }
820        else
821        {
822            if (is_round_start)
823                bbr->bbr_flags |= BBR_FLAG_PROBE_RTT_ROUND_PASSED;
824            if (now >= bbr->bbr_exit_probe_rtt_at
825                        && (bbr->bbr_flags & BBR_FLAG_PROBE_RTT_ROUND_PASSED))
826            {
827                bbr->bbr_min_rtt_timestamp = now;
828                if (!(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
829                    enter_startup_mode(bbr, now);
830                else
831                    enter_probe_bw_mode(bbr, now);
832            }
833        }
834    }
835
836    bbr->bbr_flags &= ~BBR_FLAG_EXITING_QUIESCENCE;
837}
838
839
840/* See BbrSender::CalculatePacingRate */
841static void
842calculate_pacing_rate (struct lsquic_bbr *bbr)
843{
844    struct bandwidth bw, target_rate;
845
846    bw = BW(minmax_get(&bbr->bbr_max_bandwidth));
847    if (BW_IS_ZERO(&bw))
848        return;
849
850    LSQ_DEBUG("BW estimate: %"PRIu64, BW_VALUE(&bw));
851
852    target_rate = BW_TIMES(&bw, bbr->bbr_pacing_gain);
853    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
854    {
855        bbr->bbr_pacing_rate = target_rate;
856        return;
857    }
858
859    // Pace at the rate of initial_window / RTT as soon as RTT measurements are
860    // available.
861    if (BW_IS_ZERO(&bbr->bbr_pacing_rate)
862            && 0 != lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats))
863    {
864        bbr->bbr_pacing_rate = BW_FROM_BYTES_AND_DELTA(
865            bbr->bbr_init_cwnd,
866            lsquic_rtt_stats_get_min_rtt(bbr->bbr_rtt_stats));
867        return;
868    }
869
870    // Slow the pacing rate in STARTUP once loss has ever been detected.
871    const int has_ever_detected_loss = bbr->bbr_end_recovery_at != 0;
872    if (has_ever_detected_loss
873            && (bbr->bbr_flags & (BBR_FLAG_SLOWER_STARTUP
874                                  |BBR_FLAG_HAS_NON_APP_LIMITED))
875                == (BBR_FLAG_SLOWER_STARTUP|BBR_FLAG_HAS_NON_APP_LIMITED))
876    {
877        bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupAfterLossGain);
878        return;
879    }
880
881    // Slow the pacing rate in STARTUP by the bytes_lost / CWND.
882    if (startup_rate_reduction_multiplier_ != 0
883            && has_ever_detected_loss
884                && (bbr->bbr_flags & BBR_FLAG_HAS_NON_APP_LIMITED))
885    {
886        bbr->bbr_pacing_rate = BW_TIMES(&target_rate,
887            (1 - (bbr->bbr_startup_bytes_lost
888                    * startup_rate_reduction_multiplier_ * 1.0f
889                        / bbr->bbr_cwnd_gain)));
890        // Ensure the pacing rate doesn't drop below the startup growth target
891        // times  the bandwidth estimate.
892        if (BW_VALUE(&bbr->bbr_pacing_rate)
893                                        < BW_VALUE(&bw) * kStartupGrowthTarget)
894            bbr->bbr_pacing_rate = BW_TIMES(&bw, kStartupGrowthTarget);
895        return;
896    }
897
898    // Do not decrease the pacing rate during startup.
899    if (BW_VALUE(&bbr->bbr_pacing_rate) < BW_VALUE(&target_rate))
900        bbr->bbr_pacing_rate = target_rate;
901}
902
903
904/* See BbrSender::CalculateCongestionWindow */
905static void
906calculate_cwnd (struct lsquic_bbr *bbr, uint64_t bytes_acked,
907                                                  uint64_t excess_acked)
908{
909    if (bbr->bbr_mode == BBR_MODE_PROBE_RTT)
910        return;
911
912    uint64_t target_window = get_target_cwnd(bbr, bbr->bbr_cwnd_gain);
913    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
914        // Add the max recently measured ack aggregation to CWND.
915        target_window += minmax_get(&bbr->bbr_max_ack_height);
916    else if (bbr->bbr_flags & BBR_FLAG_ENABLE_ACK_AGG_IN_STARTUP)
917        // Add the most recent excess acked.  Because CWND never decreases in
918        // STARTUP, this will automatically create a very localized max filter.
919        target_window += excess_acked;
920
921    // Instead of immediately setting the target CWND as the new one, BBR grows
922    // the CWND towards |target_window| by only increasing it |bytes_acked| at a
923    // time.
924    const int add_bytes_acked =
925	!FLAGS_quic_bbr_no_bytes_acked_in_startup_recovery || !in_recovery(bbr);
926    if (bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH)
927        bbr->bbr_cwnd = MIN(target_window, bbr->bbr_cwnd + bytes_acked);
928    else if (add_bytes_acked &&
929             (bbr->bbr_cwnd < target_window ||
930              lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler)
931                                                        < bbr->bbr_init_cwnd))
932        // If the connection is not yet out of startup phase, do not decrease
933        // the window.
934        bbr->bbr_cwnd += bytes_acked;
935
936    // Enforce the limits on the congestion window.
937    if (bbr->bbr_cwnd < bbr->bbr_min_cwnd)
938        bbr->bbr_cwnd = bbr->bbr_min_cwnd;
939    else if (bbr->bbr_cwnd > bbr->bbr_max_cwnd)
940    {
941        LSQ_DEBUG("exceed max cwnd");
942        bbr->bbr_cwnd = bbr->bbr_max_cwnd;
943    }
944}
945
946
947/* See BbrSender::CalculateRecoveryWindow */
948static void
949calculate_recovery_window (struct lsquic_bbr *bbr, uint64_t bytes_acked,
950                                uint64_t bytes_lost, uint64_t bytes_in_flight)
951{
952    if ((bbr->bbr_flags & BBR_FLAG_RATE_BASED_STARTUP)
953                                        && bbr->bbr_mode == BBR_MODE_STARTUP)
954        return;
955
956    if (bbr->bbr_recovery_state == BBR_RS_NOT_IN_RECOVERY)
957        return;
958
959    // Set up the initial recovery window.
960    if (bbr->bbr_recovery_window == 0)
961    {
962        bbr->bbr_recovery_window = bytes_in_flight + bytes_acked;
963        bbr->bbr_recovery_window = MAX(bbr->bbr_min_cwnd,
964                                                    bbr->bbr_recovery_window);
965        return;
966    }
967
968    // Remove losses from the recovery window, while accounting for a potential
969    // integer underflow.
970    if (bbr->bbr_recovery_window >= bytes_lost)
971        bbr->bbr_recovery_window -= bytes_lost;
972    else
973        bbr->bbr_recovery_window = kMaxSegmentSize;
974
975    // In CONSERVATION mode, just subtracting losses is sufficient.  In GROWTH,
976    // release additional |bytes_acked| to achieve a slow-start-like behavior.
977    if (bbr->bbr_recovery_state == BBR_RS_GROWTH)
978        bbr->bbr_recovery_window += bytes_acked;
979
980    // Sanity checks.  Ensure that we always allow to send at least an MSS or
981    // |bytes_acked| in response, whichever is larger.
982    bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window,
983                                            bytes_in_flight + bytes_acked);
984    if (FLAG_quic_bbr_one_mss_conservation)
985        bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window,
986                                            bytes_in_flight + kMaxSegmentSize);
987    bbr->bbr_recovery_window = MAX(bbr->bbr_recovery_window, bbr->bbr_min_cwnd);
988}
989
990
991static void
992lsquic_bbr_end_ack (void *cong_ctl, uint64_t in_flight)
993{
994    struct lsquic_bbr *const bbr = cong_ctl;
995    int is_round_start, min_rtt_expired;
996    uint64_t bytes_acked, excess_acked, bytes_lost;
997
998    assert(bbr->bbr_flags & BBR_FLAG_IN_ACK);
999    bbr->bbr_flags &= ~BBR_FLAG_IN_ACK;
1000
1001    LSQ_DEBUG("end_ack; mode: %s; in_flight: %"PRIu64, mode2str[bbr->bbr_mode],
1002                                                                    in_flight);
1003
1004    bytes_acked = lsquic_bw_sampler_total_acked(&bbr->bbr_bw_sampler)
1005                            - bbr->bbr_ack_state.total_bytes_acked_before;
1006    if (bbr->bbr_ack_state.acked_bytes)
1007    {
1008        is_round_start = bbr->bbr_ack_state.max_packno
1009                                    > bbr->bbr_current_round_trip_end
1010                    || !is_valid_packno(bbr->bbr_current_round_trip_end);
1011        if (is_round_start)
1012        {
1013            ++bbr->bbr_round_count;
1014            bbr->bbr_current_round_trip_end = bbr->bbr_last_sent_packno;
1015            LSQ_DEBUG("up round count to %"PRIu64"; new rt end: %"PRIu64,
1016                        bbr->bbr_round_count, bbr->bbr_current_round_trip_end);
1017        }
1018        min_rtt_expired = update_bandwidth_and_min_rtt(bbr);
1019        update_recovery_state(bbr, is_round_start);
1020        excess_acked = update_ack_aggregation_bytes(bbr, bytes_acked);
1021    }
1022    else
1023    {
1024        is_round_start = 0;
1025        min_rtt_expired = 0;
1026        excess_acked = 0;
1027    }
1028
1029    if (bbr->bbr_mode == BBR_MODE_PROBE_BW)
1030        update_gain_cycle_phase(bbr, in_flight);
1031
1032    if (is_round_start && !(bbr->bbr_flags & BBR_FLAG_IS_AT_FULL_BANDWIDTH))
1033        check_if_full_bw_reached(bbr);
1034
1035    maybe_exit_startup_or_drain(bbr, bbr->bbr_ack_state.ack_time, in_flight);
1036
1037    maybe_enter_or_exit_probe_rtt(bbr, bbr->bbr_ack_state.ack_time,
1038                                is_round_start, min_rtt_expired, in_flight);
1039
1040    // Calculate number of packets acked and lost.
1041    bytes_lost = bbr->bbr_ack_state.lost_bytes;
1042
1043    // After the model is updated, recalculate the pacing rate and congestion
1044    // window.
1045    calculate_pacing_rate(bbr);
1046    calculate_cwnd(bbr, bytes_acked, excess_acked);
1047    calculate_recovery_window(bbr, bytes_acked, bytes_lost, in_flight);
1048
1049    /* We don't need to clean up BW sampler */
1050}
1051
1052
1053static void
1054lsquic_bbr_cleanup (void *cong_ctl)
1055{
1056    struct lsquic_bbr *const bbr = cong_ctl;
1057
1058    lsquic_bw_sampler_cleanup(&bbr->bbr_bw_sampler);
1059    LSQ_DEBUG("cleanup");
1060}
1061
1062
1063static void
1064lsquic_bbr_loss (void *cong_ctl) {   /* Noop */   }
1065
1066
1067static void
1068lsquic_bbr_timeout (void *cong_ctl) {   /* Noop */   }
1069
1070
1071const struct cong_ctl_if lsquic_cong_bbr_if =
1072{
1073    .cci_ack           = lsquic_bbr_ack,
1074    .cci_begin_ack     = lsquic_bbr_begin_ack,
1075    .cci_end_ack       = lsquic_bbr_end_ack,
1076    .cci_cleanup       = lsquic_bbr_cleanup,
1077    .cci_get_cwnd      = lsquic_bbr_get_cwnd,
1078    .cci_init          = lsquic_bbr_init,
1079    .cci_pacing_rate   = lsquic_bbr_pacing_rate,
1080    .cci_loss          = lsquic_bbr_loss,
1081    .cci_lost          = lsquic_bbr_lost,
1082    .cci_reinit        = lsquic_bbr_reinit,
1083    .cci_timeout       = lsquic_bbr_timeout,
1084    .cci_sent          = lsquic_bbr_sent,
1085    .cci_was_quiet     = lsquic_bbr_was_quiet,
1086};
1087