1a74702c6SGeorge Wang/* Copyright (c) 2017 - 2022 LiteSpeed Technologies Inc.  See LICENSE. */
25392f7a3SLiteSpeed Tech#include <errno.h>
35392f7a3SLiteSpeed Tech#include <inttypes.h>
45392f7a3SLiteSpeed Tech#include <stddef.h>
55392f7a3SLiteSpeed Tech#include <stdint.h>
65392f7a3SLiteSpeed Tech#include <stdlib.h>
75392f7a3SLiteSpeed Tech#include <string.h>
85392f7a3SLiteSpeed Tech#include <sys/queue.h>
95392f7a3SLiteSpeed Tech
105392f7a3SLiteSpeed Tech#include "lsquic.h"
115392f7a3SLiteSpeed Tech#include "lsquic_types.h"
125392f7a3SLiteSpeed Tech#include "lsquic_int_types.h"
135392f7a3SLiteSpeed Tech#include "lsquic_xxhash.h"
145392f7a3SLiteSpeed Tech#include "lsquic_purga.h"
155392f7a3SLiteSpeed Tech
165392f7a3SLiteSpeed Tech#define LSQUIC_LOGGER_MODULE LSQLM_PURGA
175392f7a3SLiteSpeed Tech#include "lsquic_logger.h"
185392f7a3SLiteSpeed Tech
195392f7a3SLiteSpeed Tech
205392f7a3SLiteSpeed Tech/* To avoid scannig the whole list of CIDs, we use a Bloom filter.
215392f7a3SLiteSpeed Tech *
225392f7a3SLiteSpeed Tech * The Bloom filter is constructed using a 8192-bit bit field and 6 hash
235392f7a3SLiteSpeed Tech * functions.  With 273 elements per page, this gives us 0.004% possibility
245392f7a3SLiteSpeed Tech * of a false positive.  In other words, when we do have to search a page
255392f7a3SLiteSpeed Tech * for a particular CID, the chance of finding the CID is 99.99%.
265392f7a3SLiteSpeed Tech *
275392f7a3SLiteSpeed Tech * Quick calc:
285392f7a3SLiteSpeed Tech *   perl -E '$k=6;$m=1<<13;$n=273;printf("%f\n", (1-exp(1)**-($k*$n/$m))**$k)'
295392f7a3SLiteSpeed Tech *
305392f7a3SLiteSpeed Tech * To extract 6 13-bit values from a 64-bit integer, they are overlapped:
315392f7a3SLiteSpeed Tech *  0         10        20        30        40        50        60
325392f7a3SLiteSpeed Tech * +----------------------------------------------------------------+
335392f7a3SLiteSpeed Tech * |                                                                |
345392f7a3SLiteSpeed Tech * +----------------------------------------------------------------+
355392f7a3SLiteSpeed Tech *  1111111111111
365392f7a3SLiteSpeed Tech *            2222222222222
375392f7a3SLiteSpeed Tech *                      3333333333333
385392f7a3SLiteSpeed Tech *                                4444444444444
395392f7a3SLiteSpeed Tech *                                          5555555555555
405392f7a3SLiteSpeed Tech *                                                    6666666666666
415392f7a3SLiteSpeed Tech *
425392f7a3SLiteSpeed Tech * This is not 100% kosher, but having 6 functions gives a better guarantee
435392f7a3SLiteSpeed Tech * and it happens to work in practice.
445392f7a3SLiteSpeed Tech */
455392f7a3SLiteSpeed Tech
465392f7a3SLiteSpeed Tech#define BLOOM_N_FUNCS 6
475392f7a3SLiteSpeed Tech#define BLOOM_SHIFT 10
485392f7a3SLiteSpeed Tech#define BLOOM_N_BITS (1 << 13)
495392f7a3SLiteSpeed Techtypedef uint64_t bloom_mask_el_t;
505392f7a3SLiteSpeed Tech#define BLOOM_SET_SHIFT 6   /* log2(sizeof(bloom_mask_el_t)) */
515392f7a3SLiteSpeed Tech#define BLOOM_BITS_PER_EL (1 << BLOOM_SET_SHIFT)
525392f7a3SLiteSpeed Tech#define BLOOM_N_MASK_ELS (BLOOM_N_BITS / BLOOM_BITS_PER_EL)
535392f7a3SLiteSpeed Tech#define BLOOM_ONE 1ull
545392f7a3SLiteSpeed Tech
555392f7a3SLiteSpeed Tech#define PURGA_ELS_PER_PAGE 273
565392f7a3SLiteSpeed Tech
575392f7a3SLiteSpeed Techstruct purga_page
585392f7a3SLiteSpeed Tech{
595392f7a3SLiteSpeed Tech    TAILQ_ENTRY(purga_page)     pupa_next;
605392f7a3SLiteSpeed Tech    lsquic_time_t               pupa_last;
615392f7a3SLiteSpeed Tech    unsigned                    pupa_count;
625392f7a3SLiteSpeed Tech    bloom_mask_el_t             pupa_mask[BLOOM_N_MASK_ELS];
635392f7a3SLiteSpeed Tech    lsquic_cid_t                pupa_cids[PURGA_ELS_PER_PAGE];
645392f7a3SLiteSpeed Tech    void *                      pupa_peer_ctx[PURGA_ELS_PER_PAGE];
655392f7a3SLiteSpeed Tech    struct purga_el             pupa_els[PURGA_ELS_PER_PAGE];
665392f7a3SLiteSpeed Tech};
675392f7a3SLiteSpeed Tech
685392f7a3SLiteSpeed Tech#define PAGE_IS_FULL(page) ((page)->pupa_count >= PURGA_ELS_PER_PAGE)
695392f7a3SLiteSpeed Tech
705392f7a3SLiteSpeed TechTAILQ_HEAD(purga_pages, purga_page);
715392f7a3SLiteSpeed Tech
725392f7a3SLiteSpeed Techstruct lsquic_purga
735392f7a3SLiteSpeed Tech{
745392f7a3SLiteSpeed Tech    lsquic_time_t              pur_min_life;
755392f7a3SLiteSpeed Tech    lsquic_cids_update_f       pur_remove_cids;
765392f7a3SLiteSpeed Tech    void                      *pur_remove_ctx;
775392f7a3SLiteSpeed Tech    struct purga_pages         pur_pages;
785392f7a3SLiteSpeed Tech#ifndef NDEBUG
795392f7a3SLiteSpeed Tech    struct purga_bloom_stats   pur_stats;
805392f7a3SLiteSpeed Tech#endif
815392f7a3SLiteSpeed Tech};
825392f7a3SLiteSpeed Tech
835392f7a3SLiteSpeed Tech
845392f7a3SLiteSpeed Techstruct lsquic_purga *
855392f7a3SLiteSpeed Techlsquic_purga_new (lsquic_time_t min_life, lsquic_cids_update_f remove_cids,
865392f7a3SLiteSpeed Tech                                                                void *remove_ctx)
875392f7a3SLiteSpeed Tech{
885392f7a3SLiteSpeed Tech    struct lsquic_purga *purga;
895392f7a3SLiteSpeed Tech
905392f7a3SLiteSpeed Tech    purga = calloc(1, sizeof(*purga));
915392f7a3SLiteSpeed Tech    if (!purga)
925392f7a3SLiteSpeed Tech    {
935392f7a3SLiteSpeed Tech        LSQ_WARN("cannot create purgatory: malloc failed: %s", strerror(errno));
945392f7a3SLiteSpeed Tech        return NULL;
955392f7a3SLiteSpeed Tech    }
965392f7a3SLiteSpeed Tech
975392f7a3SLiteSpeed Tech    purga->pur_min_life = min_life;
985392f7a3SLiteSpeed Tech    purga->pur_remove_cids = remove_cids;
995392f7a3SLiteSpeed Tech    purga->pur_remove_ctx = remove_ctx;
1005392f7a3SLiteSpeed Tech    TAILQ_INIT(&purga->pur_pages);
1015392f7a3SLiteSpeed Tech    LSQ_INFO("create purgatory, min life %"PRIu64" usec", min_life);
1025392f7a3SLiteSpeed Tech
1035392f7a3SLiteSpeed Tech    return purga;
1045392f7a3SLiteSpeed Tech}
1055392f7a3SLiteSpeed Tech
1065392f7a3SLiteSpeed Tech
1075392f7a3SLiteSpeed Techstatic struct purga_page *
1085392f7a3SLiteSpeed Techpurga_get_page (struct lsquic_purga *purga)
1095392f7a3SLiteSpeed Tech{
1105392f7a3SLiteSpeed Tech    struct purga_page *page;
1115392f7a3SLiteSpeed Tech
1125392f7a3SLiteSpeed Tech    page = TAILQ_LAST(&purga->pur_pages, purga_pages);
1135392f7a3SLiteSpeed Tech    if (page && !PAGE_IS_FULL(page))
1145392f7a3SLiteSpeed Tech        return page;
1155392f7a3SLiteSpeed Tech
1165392f7a3SLiteSpeed Tech    page = malloc(sizeof(*page));
1175392f7a3SLiteSpeed Tech    if (!page)
1185392f7a3SLiteSpeed Tech    {
1195392f7a3SLiteSpeed Tech        LSQ_INFO("failed to allocate page: %s", strerror(errno));
1205392f7a3SLiteSpeed Tech        return NULL;
1215392f7a3SLiteSpeed Tech    }
1225392f7a3SLiteSpeed Tech
1235392f7a3SLiteSpeed Tech    page->pupa_count = 0;
1245392f7a3SLiteSpeed Tech    page->pupa_last  = 0;
1255392f7a3SLiteSpeed Tech    memset(page->pupa_mask, 0, sizeof(page->pupa_mask));
1265392f7a3SLiteSpeed Tech    TAILQ_INSERT_TAIL(&purga->pur_pages, page, pupa_next);
1275392f7a3SLiteSpeed Tech    LSQ_DEBUG("allocated new page");
1285392f7a3SLiteSpeed Tech    return page;
1295392f7a3SLiteSpeed Tech}
1305392f7a3SLiteSpeed Tech
1315392f7a3SLiteSpeed Tech
1325392f7a3SLiteSpeed Techstatic void
1335392f7a3SLiteSpeed Techpurga_remove_cids (struct lsquic_purga *purga, struct purga_page *page)
1345392f7a3SLiteSpeed Tech{
1355392f7a3SLiteSpeed Tech    LSQ_DEBUG("calling remove_cids with %u CID%.*s", page->pupa_count,
1365392f7a3SLiteSpeed Tech                                                page->pupa_count != 1, "s");
1375392f7a3SLiteSpeed Tech    /* XXX It is interesting that pur_remove_ctx is called with peer_ctx
1385392f7a3SLiteSpeed Tech     * as an argument, which should could vary (at least theoretically or
1395392f7a3SLiteSpeed Tech     * in the future) by path.
1405392f7a3SLiteSpeed Tech     */
1415392f7a3SLiteSpeed Tech    purga->pur_remove_cids(purga->pur_remove_ctx, page->pupa_peer_ctx,
1425392f7a3SLiteSpeed Tech                                        page->pupa_cids, page->pupa_count);
1435392f7a3SLiteSpeed Tech}
1445392f7a3SLiteSpeed Tech
1455392f7a3SLiteSpeed Tech
1465392f7a3SLiteSpeed Techstruct purga_el *
1475392f7a3SLiteSpeed Techlsquic_purga_add (struct lsquic_purga *purga, const lsquic_cid_t *cid,
1485392f7a3SLiteSpeed Tech                    void *peer_ctx, enum purga_type putype, lsquic_time_t now)
1495392f7a3SLiteSpeed Tech{
1505392f7a3SLiteSpeed Tech    struct purga_page *last_page, *page;
1515392f7a3SLiteSpeed Tech    uint64_t hash;
1525392f7a3SLiteSpeed Tech    unsigned i, idx, set, bit;
1535392f7a3SLiteSpeed Tech
1545392f7a3SLiteSpeed Tech    last_page = purga_get_page(purga);
1555392f7a3SLiteSpeed Tech    if (!last_page)
1565392f7a3SLiteSpeed Tech        return NULL;     /* We do best effort, nothing to do if malloc fails */
1575392f7a3SLiteSpeed Tech
1585392f7a3SLiteSpeed Tech    idx = last_page->pupa_count++;
1595392f7a3SLiteSpeed Tech    last_page->pupa_cids    [idx] = *cid;
1605392f7a3SLiteSpeed Tech    last_page->pupa_peer_ctx[idx] = peer_ctx;
1615392f7a3SLiteSpeed Tech    last_page->pupa_els     [idx] = (struct purga_el) {
1625392f7a3SLiteSpeed Tech        .puel_type      = putype,
1635392f7a3SLiteSpeed Tech    };
1645392f7a3SLiteSpeed Tech
1655392f7a3SLiteSpeed Tech    hash = XXH64(cid->idbuf, cid->len, 0);
1665392f7a3SLiteSpeed Tech    for (i = 0; i < BLOOM_N_FUNCS; ++i)
1675392f7a3SLiteSpeed Tech    {
1685392f7a3SLiteSpeed Tech        bit = (unsigned) hash & (BLOOM_BITS_PER_EL - 1);
1695392f7a3SLiteSpeed Tech        set = ((unsigned) hash >> BLOOM_SET_SHIFT) & (BLOOM_N_MASK_ELS - 1);
1705392f7a3SLiteSpeed Tech        last_page->pupa_mask[set] |= BLOOM_ONE << bit;
1715392f7a3SLiteSpeed Tech        hash >>= BLOOM_SHIFT;
1725392f7a3SLiteSpeed Tech    }
1735392f7a3SLiteSpeed Tech
1745392f7a3SLiteSpeed Tech    LSQ_DEBUGC("added %"CID_FMT" to the set", CID_BITS(cid));
1755392f7a3SLiteSpeed Tech    if (PAGE_IS_FULL(last_page))
1765392f7a3SLiteSpeed Tech    {
1775392f7a3SLiteSpeed Tech        LSQ_DEBUG("last page is full, set timestamp to %"PRIu64, now);
1785392f7a3SLiteSpeed Tech        last_page->pupa_last = now;
1795392f7a3SLiteSpeed Tech    }
1805392f7a3SLiteSpeed Tech
1815392f7a3SLiteSpeed Tech    while ((page = TAILQ_FIRST(&purga->pur_pages))
1825392f7a3SLiteSpeed Tech                && PAGE_IS_FULL(page)
1835392f7a3SLiteSpeed Tech                && page != last_page    /* This is theoretical, but still... */
1845392f7a3SLiteSpeed Tech                && page->pupa_last + purga->pur_min_life < now)
1855392f7a3SLiteSpeed Tech    {
1865392f7a3SLiteSpeed Tech        LSQ_DEBUG("page at timestamp %"PRIu64" expired; now is %"PRIu64,
1875392f7a3SLiteSpeed Tech            page->pupa_last, now);
1885392f7a3SLiteSpeed Tech        TAILQ_REMOVE(&purga->pur_pages, page, pupa_next);
1895392f7a3SLiteSpeed Tech        if (purga->pur_remove_cids && page->pupa_count)
1905392f7a3SLiteSpeed Tech            purga_remove_cids(purga, page);
1915392f7a3SLiteSpeed Tech        free(page);
1925392f7a3SLiteSpeed Tech    }
1935392f7a3SLiteSpeed Tech
1945392f7a3SLiteSpeed Tech    return &last_page->pupa_els[idx];
1955392f7a3SLiteSpeed Tech}
1965392f7a3SLiteSpeed Tech
1975392f7a3SLiteSpeed Tech
1985392f7a3SLiteSpeed Techstruct purga_el *
1995392f7a3SLiteSpeed Techlsquic_purga_contains (struct lsquic_purga *purga, const lsquic_cid_t *cid)
2005392f7a3SLiteSpeed Tech{
2015392f7a3SLiteSpeed Tech    struct purga_page *page;
2025392f7a3SLiteSpeed Tech    unsigned i, bit, set, hits;
2035392f7a3SLiteSpeed Tech    uint64_t cid_hash, hash;
2045392f7a3SLiteSpeed Tech
2055392f7a3SLiteSpeed Tech    page = TAILQ_FIRST(&purga->pur_pages);
2065392f7a3SLiteSpeed Tech    if (!page)
2075392f7a3SLiteSpeed Tech        goto end;
2085392f7a3SLiteSpeed Tech
2095392f7a3SLiteSpeed Tech    cid_hash = XXH64(cid->idbuf, cid->len, 0);
2105392f7a3SLiteSpeed Tech    do
2115392f7a3SLiteSpeed Tech    {
2125392f7a3SLiteSpeed Tech#ifndef NDEBUG
2135392f7a3SLiteSpeed Tech        ++purga->pur_stats.searches;
2145392f7a3SLiteSpeed Tech#endif
2155392f7a3SLiteSpeed Tech        hash = cid_hash;
2165392f7a3SLiteSpeed Tech        hits = 0;
2175392f7a3SLiteSpeed Tech        for (i = 0; i < BLOOM_N_FUNCS; ++i)
2185392f7a3SLiteSpeed Tech        {
2195392f7a3SLiteSpeed Tech            bit = (unsigned) hash & (BLOOM_BITS_PER_EL - 1);
2205392f7a3SLiteSpeed Tech            set = ((unsigned) hash >> BLOOM_SET_SHIFT) & (BLOOM_N_MASK_ELS - 1);
2215392f7a3SLiteSpeed Tech            hits += (page->pupa_mask[set] & (BLOOM_ONE << bit)) != 0;
2225392f7a3SLiteSpeed Tech            hash >>= BLOOM_SHIFT;
2235392f7a3SLiteSpeed Tech        }
2245392f7a3SLiteSpeed Tech        if (hits < BLOOM_N_FUNCS)
2255392f7a3SLiteSpeed Tech            goto next_page;
2265392f7a3SLiteSpeed Tech        for (i = 0; i < page->pupa_count; ++i)
2275392f7a3SLiteSpeed Tech            if (LSQUIC_CIDS_EQ(&page->pupa_cids[i], cid))
2285392f7a3SLiteSpeed Tech            {
2295392f7a3SLiteSpeed Tech                LSQ_DEBUGC("found %"CID_FMT, CID_BITS(cid));
2305392f7a3SLiteSpeed Tech                return &page->pupa_els[i];
2315392f7a3SLiteSpeed Tech            }
2325392f7a3SLiteSpeed Tech#ifndef NDEBUG
2335392f7a3SLiteSpeed Tech        ++purga->pur_stats.false_hits;
2345392f7a3SLiteSpeed Tech#endif
2355392f7a3SLiteSpeed Tech  next_page:
2365392f7a3SLiteSpeed Tech        page = TAILQ_NEXT(page, pupa_next);
2375392f7a3SLiteSpeed Tech    }
2385392f7a3SLiteSpeed Tech    while (page);
2395392f7a3SLiteSpeed Tech
2405392f7a3SLiteSpeed Tech  end:
2415392f7a3SLiteSpeed Tech    LSQ_DEBUGC("%"CID_FMT" not found", CID_BITS(cid));
2425392f7a3SLiteSpeed Tech    return NULL;
2435392f7a3SLiteSpeed Tech}
2445392f7a3SLiteSpeed Tech
2455392f7a3SLiteSpeed Tech
2465392f7a3SLiteSpeed Techvoid
2475392f7a3SLiteSpeed Techlsquic_purga_destroy (struct lsquic_purga *purga)
2485392f7a3SLiteSpeed Tech{
2495392f7a3SLiteSpeed Tech    struct purga_page *page;
2505392f7a3SLiteSpeed Tech
2515392f7a3SLiteSpeed Tech    while ((page = TAILQ_FIRST(&purga->pur_pages)))
2525392f7a3SLiteSpeed Tech    {
2535392f7a3SLiteSpeed Tech        TAILQ_REMOVE(&purga->pur_pages, page, pupa_next);
2545392f7a3SLiteSpeed Tech        if (purga->pur_remove_cids && page->pupa_count)
2555392f7a3SLiteSpeed Tech            purga_remove_cids(purga, page);
2565392f7a3SLiteSpeed Tech        free(page);
2575392f7a3SLiteSpeed Tech    }
2585392f7a3SLiteSpeed Tech    free(purga);
2595392f7a3SLiteSpeed Tech    LSQ_INFO("destroyed");
2605392f7a3SLiteSpeed Tech}
2615392f7a3SLiteSpeed Tech
2625392f7a3SLiteSpeed Tech
2635392f7a3SLiteSpeed Techunsigned
2645392f7a3SLiteSpeed Techlsquic_purga_cids_per_page (void)
2655392f7a3SLiteSpeed Tech{
2665392f7a3SLiteSpeed Tech    LSQ_DEBUG("CIDs per page: %u", PURGA_ELS_PER_PAGE);
2675392f7a3SLiteSpeed Tech    return PURGA_ELS_PER_PAGE;
2685392f7a3SLiteSpeed Tech}
2695392f7a3SLiteSpeed Tech
2705392f7a3SLiteSpeed Tech
2715392f7a3SLiteSpeed Tech#ifndef NDEBUG
2725392f7a3SLiteSpeed Techstruct purga_bloom_stats *
2735392f7a3SLiteSpeed Techlsquic_purga_get_bloom_stats (struct lsquic_purga *purga)
2745392f7a3SLiteSpeed Tech{
2755392f7a3SLiteSpeed Tech    return &purga->pur_stats;
2765392f7a3SLiteSpeed Tech}
2775392f7a3SLiteSpeed Tech#endif
278