1a74702c6SGeorge Wang/* Copyright (c) 2017 - 2022 LiteSpeed Technologies Inc. See LICENSE. */ 25392f7a3SLiteSpeed Tech#include <errno.h> 35392f7a3SLiteSpeed Tech#include <inttypes.h> 45392f7a3SLiteSpeed Tech#include <stddef.h> 55392f7a3SLiteSpeed Tech#include <stdint.h> 65392f7a3SLiteSpeed Tech#include <stdlib.h> 75392f7a3SLiteSpeed Tech#include <string.h> 85392f7a3SLiteSpeed Tech#include <sys/queue.h> 95392f7a3SLiteSpeed Tech 105392f7a3SLiteSpeed Tech#include "lsquic.h" 115392f7a3SLiteSpeed Tech#include "lsquic_types.h" 125392f7a3SLiteSpeed Tech#include "lsquic_int_types.h" 135392f7a3SLiteSpeed Tech#include "lsquic_xxhash.h" 145392f7a3SLiteSpeed Tech#include "lsquic_purga.h" 155392f7a3SLiteSpeed Tech 165392f7a3SLiteSpeed Tech#define LSQUIC_LOGGER_MODULE LSQLM_PURGA 175392f7a3SLiteSpeed Tech#include "lsquic_logger.h" 185392f7a3SLiteSpeed Tech 195392f7a3SLiteSpeed Tech 205392f7a3SLiteSpeed Tech/* To avoid scannig the whole list of CIDs, we use a Bloom filter. 215392f7a3SLiteSpeed Tech * 225392f7a3SLiteSpeed Tech * The Bloom filter is constructed using a 8192-bit bit field and 6 hash 235392f7a3SLiteSpeed Tech * functions. With 273 elements per page, this gives us 0.004% possibility 245392f7a3SLiteSpeed Tech * of a false positive. In other words, when we do have to search a page 255392f7a3SLiteSpeed Tech * for a particular CID, the chance of finding the CID is 99.99%. 265392f7a3SLiteSpeed Tech * 275392f7a3SLiteSpeed Tech * Quick calc: 285392f7a3SLiteSpeed Tech * perl -E '$k=6;$m=1<<13;$n=273;printf("%f\n", (1-exp(1)**-($k*$n/$m))**$k)' 295392f7a3SLiteSpeed Tech * 305392f7a3SLiteSpeed Tech * To extract 6 13-bit values from a 64-bit integer, they are overlapped: 315392f7a3SLiteSpeed Tech * 0 10 20 30 40 50 60 325392f7a3SLiteSpeed Tech * +----------------------------------------------------------------+ 335392f7a3SLiteSpeed Tech * | | 345392f7a3SLiteSpeed Tech * +----------------------------------------------------------------+ 355392f7a3SLiteSpeed Tech * 1111111111111 365392f7a3SLiteSpeed Tech * 2222222222222 375392f7a3SLiteSpeed Tech * 3333333333333 385392f7a3SLiteSpeed Tech * 4444444444444 395392f7a3SLiteSpeed Tech * 5555555555555 405392f7a3SLiteSpeed Tech * 6666666666666 415392f7a3SLiteSpeed Tech * 425392f7a3SLiteSpeed Tech * This is not 100% kosher, but having 6 functions gives a better guarantee 435392f7a3SLiteSpeed Tech * and it happens to work in practice. 445392f7a3SLiteSpeed Tech */ 455392f7a3SLiteSpeed Tech 465392f7a3SLiteSpeed Tech#define BLOOM_N_FUNCS 6 475392f7a3SLiteSpeed Tech#define BLOOM_SHIFT 10 485392f7a3SLiteSpeed Tech#define BLOOM_N_BITS (1 << 13) 495392f7a3SLiteSpeed Techtypedef uint64_t bloom_mask_el_t; 505392f7a3SLiteSpeed Tech#define BLOOM_SET_SHIFT 6 /* log2(sizeof(bloom_mask_el_t)) */ 515392f7a3SLiteSpeed Tech#define BLOOM_BITS_PER_EL (1 << BLOOM_SET_SHIFT) 525392f7a3SLiteSpeed Tech#define BLOOM_N_MASK_ELS (BLOOM_N_BITS / BLOOM_BITS_PER_EL) 535392f7a3SLiteSpeed Tech#define BLOOM_ONE 1ull 545392f7a3SLiteSpeed Tech 555392f7a3SLiteSpeed Tech#define PURGA_ELS_PER_PAGE 273 565392f7a3SLiteSpeed Tech 575392f7a3SLiteSpeed Techstruct purga_page 585392f7a3SLiteSpeed Tech{ 595392f7a3SLiteSpeed Tech TAILQ_ENTRY(purga_page) pupa_next; 605392f7a3SLiteSpeed Tech lsquic_time_t pupa_last; 615392f7a3SLiteSpeed Tech unsigned pupa_count; 625392f7a3SLiteSpeed Tech bloom_mask_el_t pupa_mask[BLOOM_N_MASK_ELS]; 635392f7a3SLiteSpeed Tech lsquic_cid_t pupa_cids[PURGA_ELS_PER_PAGE]; 645392f7a3SLiteSpeed Tech void * pupa_peer_ctx[PURGA_ELS_PER_PAGE]; 655392f7a3SLiteSpeed Tech struct purga_el pupa_els[PURGA_ELS_PER_PAGE]; 665392f7a3SLiteSpeed Tech}; 675392f7a3SLiteSpeed Tech 685392f7a3SLiteSpeed Tech#define PAGE_IS_FULL(page) ((page)->pupa_count >= PURGA_ELS_PER_PAGE) 695392f7a3SLiteSpeed Tech 705392f7a3SLiteSpeed TechTAILQ_HEAD(purga_pages, purga_page); 715392f7a3SLiteSpeed Tech 725392f7a3SLiteSpeed Techstruct lsquic_purga 735392f7a3SLiteSpeed Tech{ 745392f7a3SLiteSpeed Tech lsquic_time_t pur_min_life; 755392f7a3SLiteSpeed Tech lsquic_cids_update_f pur_remove_cids; 765392f7a3SLiteSpeed Tech void *pur_remove_ctx; 775392f7a3SLiteSpeed Tech struct purga_pages pur_pages; 785392f7a3SLiteSpeed Tech#ifndef NDEBUG 795392f7a3SLiteSpeed Tech struct purga_bloom_stats pur_stats; 805392f7a3SLiteSpeed Tech#endif 815392f7a3SLiteSpeed Tech}; 825392f7a3SLiteSpeed Tech 835392f7a3SLiteSpeed Tech 845392f7a3SLiteSpeed Techstruct lsquic_purga * 855392f7a3SLiteSpeed Techlsquic_purga_new (lsquic_time_t min_life, lsquic_cids_update_f remove_cids, 865392f7a3SLiteSpeed Tech void *remove_ctx) 875392f7a3SLiteSpeed Tech{ 885392f7a3SLiteSpeed Tech struct lsquic_purga *purga; 895392f7a3SLiteSpeed Tech 905392f7a3SLiteSpeed Tech purga = calloc(1, sizeof(*purga)); 915392f7a3SLiteSpeed Tech if (!purga) 925392f7a3SLiteSpeed Tech { 935392f7a3SLiteSpeed Tech LSQ_WARN("cannot create purgatory: malloc failed: %s", strerror(errno)); 945392f7a3SLiteSpeed Tech return NULL; 955392f7a3SLiteSpeed Tech } 965392f7a3SLiteSpeed Tech 975392f7a3SLiteSpeed Tech purga->pur_min_life = min_life; 985392f7a3SLiteSpeed Tech purga->pur_remove_cids = remove_cids; 995392f7a3SLiteSpeed Tech purga->pur_remove_ctx = remove_ctx; 1005392f7a3SLiteSpeed Tech TAILQ_INIT(&purga->pur_pages); 1015392f7a3SLiteSpeed Tech LSQ_INFO("create purgatory, min life %"PRIu64" usec", min_life); 1025392f7a3SLiteSpeed Tech 1035392f7a3SLiteSpeed Tech return purga; 1045392f7a3SLiteSpeed Tech} 1055392f7a3SLiteSpeed Tech 1065392f7a3SLiteSpeed Tech 1075392f7a3SLiteSpeed Techstatic struct purga_page * 1085392f7a3SLiteSpeed Techpurga_get_page (struct lsquic_purga *purga) 1095392f7a3SLiteSpeed Tech{ 1105392f7a3SLiteSpeed Tech struct purga_page *page; 1115392f7a3SLiteSpeed Tech 1125392f7a3SLiteSpeed Tech page = TAILQ_LAST(&purga->pur_pages, purga_pages); 1135392f7a3SLiteSpeed Tech if (page && !PAGE_IS_FULL(page)) 1145392f7a3SLiteSpeed Tech return page; 1155392f7a3SLiteSpeed Tech 1165392f7a3SLiteSpeed Tech page = malloc(sizeof(*page)); 1175392f7a3SLiteSpeed Tech if (!page) 1185392f7a3SLiteSpeed Tech { 1195392f7a3SLiteSpeed Tech LSQ_INFO("failed to allocate page: %s", strerror(errno)); 1205392f7a3SLiteSpeed Tech return NULL; 1215392f7a3SLiteSpeed Tech } 1225392f7a3SLiteSpeed Tech 1235392f7a3SLiteSpeed Tech page->pupa_count = 0; 1245392f7a3SLiteSpeed Tech page->pupa_last = 0; 1255392f7a3SLiteSpeed Tech memset(page->pupa_mask, 0, sizeof(page->pupa_mask)); 1265392f7a3SLiteSpeed Tech TAILQ_INSERT_TAIL(&purga->pur_pages, page, pupa_next); 1275392f7a3SLiteSpeed Tech LSQ_DEBUG("allocated new page"); 1285392f7a3SLiteSpeed Tech return page; 1295392f7a3SLiteSpeed Tech} 1305392f7a3SLiteSpeed Tech 1315392f7a3SLiteSpeed Tech 1325392f7a3SLiteSpeed Techstatic void 1335392f7a3SLiteSpeed Techpurga_remove_cids (struct lsquic_purga *purga, struct purga_page *page) 1345392f7a3SLiteSpeed Tech{ 1355392f7a3SLiteSpeed Tech LSQ_DEBUG("calling remove_cids with %u CID%.*s", page->pupa_count, 1365392f7a3SLiteSpeed Tech page->pupa_count != 1, "s"); 1375392f7a3SLiteSpeed Tech /* XXX It is interesting that pur_remove_ctx is called with peer_ctx 1385392f7a3SLiteSpeed Tech * as an argument, which should could vary (at least theoretically or 1395392f7a3SLiteSpeed Tech * in the future) by path. 1405392f7a3SLiteSpeed Tech */ 1415392f7a3SLiteSpeed Tech purga->pur_remove_cids(purga->pur_remove_ctx, page->pupa_peer_ctx, 1425392f7a3SLiteSpeed Tech page->pupa_cids, page->pupa_count); 1435392f7a3SLiteSpeed Tech} 1445392f7a3SLiteSpeed Tech 1455392f7a3SLiteSpeed Tech 1465392f7a3SLiteSpeed Techstruct purga_el * 1475392f7a3SLiteSpeed Techlsquic_purga_add (struct lsquic_purga *purga, const lsquic_cid_t *cid, 1485392f7a3SLiteSpeed Tech void *peer_ctx, enum purga_type putype, lsquic_time_t now) 1495392f7a3SLiteSpeed Tech{ 1505392f7a3SLiteSpeed Tech struct purga_page *last_page, *page; 1515392f7a3SLiteSpeed Tech uint64_t hash; 1525392f7a3SLiteSpeed Tech unsigned i, idx, set, bit; 1535392f7a3SLiteSpeed Tech 1545392f7a3SLiteSpeed Tech last_page = purga_get_page(purga); 1555392f7a3SLiteSpeed Tech if (!last_page) 1565392f7a3SLiteSpeed Tech return NULL; /* We do best effort, nothing to do if malloc fails */ 1575392f7a3SLiteSpeed Tech 1585392f7a3SLiteSpeed Tech idx = last_page->pupa_count++; 1595392f7a3SLiteSpeed Tech last_page->pupa_cids [idx] = *cid; 1605392f7a3SLiteSpeed Tech last_page->pupa_peer_ctx[idx] = peer_ctx; 1615392f7a3SLiteSpeed Tech last_page->pupa_els [idx] = (struct purga_el) { 1625392f7a3SLiteSpeed Tech .puel_type = putype, 1635392f7a3SLiteSpeed Tech }; 1645392f7a3SLiteSpeed Tech 1655392f7a3SLiteSpeed Tech hash = XXH64(cid->idbuf, cid->len, 0); 1665392f7a3SLiteSpeed Tech for (i = 0; i < BLOOM_N_FUNCS; ++i) 1675392f7a3SLiteSpeed Tech { 1685392f7a3SLiteSpeed Tech bit = (unsigned) hash & (BLOOM_BITS_PER_EL - 1); 1695392f7a3SLiteSpeed Tech set = ((unsigned) hash >> BLOOM_SET_SHIFT) & (BLOOM_N_MASK_ELS - 1); 1705392f7a3SLiteSpeed Tech last_page->pupa_mask[set] |= BLOOM_ONE << bit; 1715392f7a3SLiteSpeed Tech hash >>= BLOOM_SHIFT; 1725392f7a3SLiteSpeed Tech } 1735392f7a3SLiteSpeed Tech 1745392f7a3SLiteSpeed Tech LSQ_DEBUGC("added %"CID_FMT" to the set", CID_BITS(cid)); 1755392f7a3SLiteSpeed Tech if (PAGE_IS_FULL(last_page)) 1765392f7a3SLiteSpeed Tech { 1775392f7a3SLiteSpeed Tech LSQ_DEBUG("last page is full, set timestamp to %"PRIu64, now); 1785392f7a3SLiteSpeed Tech last_page->pupa_last = now; 1795392f7a3SLiteSpeed Tech } 1805392f7a3SLiteSpeed Tech 1815392f7a3SLiteSpeed Tech while ((page = TAILQ_FIRST(&purga->pur_pages)) 1825392f7a3SLiteSpeed Tech && PAGE_IS_FULL(page) 1835392f7a3SLiteSpeed Tech && page != last_page /* This is theoretical, but still... */ 1845392f7a3SLiteSpeed Tech && page->pupa_last + purga->pur_min_life < now) 1855392f7a3SLiteSpeed Tech { 1865392f7a3SLiteSpeed Tech LSQ_DEBUG("page at timestamp %"PRIu64" expired; now is %"PRIu64, 1875392f7a3SLiteSpeed Tech page->pupa_last, now); 1885392f7a3SLiteSpeed Tech TAILQ_REMOVE(&purga->pur_pages, page, pupa_next); 1895392f7a3SLiteSpeed Tech if (purga->pur_remove_cids && page->pupa_count) 1905392f7a3SLiteSpeed Tech purga_remove_cids(purga, page); 1915392f7a3SLiteSpeed Tech free(page); 1925392f7a3SLiteSpeed Tech } 1935392f7a3SLiteSpeed Tech 1945392f7a3SLiteSpeed Tech return &last_page->pupa_els[idx]; 1955392f7a3SLiteSpeed Tech} 1965392f7a3SLiteSpeed Tech 1975392f7a3SLiteSpeed Tech 1985392f7a3SLiteSpeed Techstruct purga_el * 1995392f7a3SLiteSpeed Techlsquic_purga_contains (struct lsquic_purga *purga, const lsquic_cid_t *cid) 2005392f7a3SLiteSpeed Tech{ 2015392f7a3SLiteSpeed Tech struct purga_page *page; 2025392f7a3SLiteSpeed Tech unsigned i, bit, set, hits; 2035392f7a3SLiteSpeed Tech uint64_t cid_hash, hash; 2045392f7a3SLiteSpeed Tech 2055392f7a3SLiteSpeed Tech page = TAILQ_FIRST(&purga->pur_pages); 2065392f7a3SLiteSpeed Tech if (!page) 2075392f7a3SLiteSpeed Tech goto end; 2085392f7a3SLiteSpeed Tech 2095392f7a3SLiteSpeed Tech cid_hash = XXH64(cid->idbuf, cid->len, 0); 2105392f7a3SLiteSpeed Tech do 2115392f7a3SLiteSpeed Tech { 2125392f7a3SLiteSpeed Tech#ifndef NDEBUG 2135392f7a3SLiteSpeed Tech ++purga->pur_stats.searches; 2145392f7a3SLiteSpeed Tech#endif 2155392f7a3SLiteSpeed Tech hash = cid_hash; 2165392f7a3SLiteSpeed Tech hits = 0; 2175392f7a3SLiteSpeed Tech for (i = 0; i < BLOOM_N_FUNCS; ++i) 2185392f7a3SLiteSpeed Tech { 2195392f7a3SLiteSpeed Tech bit = (unsigned) hash & (BLOOM_BITS_PER_EL - 1); 2205392f7a3SLiteSpeed Tech set = ((unsigned) hash >> BLOOM_SET_SHIFT) & (BLOOM_N_MASK_ELS - 1); 2215392f7a3SLiteSpeed Tech hits += (page->pupa_mask[set] & (BLOOM_ONE << bit)) != 0; 2225392f7a3SLiteSpeed Tech hash >>= BLOOM_SHIFT; 2235392f7a3SLiteSpeed Tech } 2245392f7a3SLiteSpeed Tech if (hits < BLOOM_N_FUNCS) 2255392f7a3SLiteSpeed Tech goto next_page; 2265392f7a3SLiteSpeed Tech for (i = 0; i < page->pupa_count; ++i) 2275392f7a3SLiteSpeed Tech if (LSQUIC_CIDS_EQ(&page->pupa_cids[i], cid)) 2285392f7a3SLiteSpeed Tech { 2295392f7a3SLiteSpeed Tech LSQ_DEBUGC("found %"CID_FMT, CID_BITS(cid)); 2305392f7a3SLiteSpeed Tech return &page->pupa_els[i]; 2315392f7a3SLiteSpeed Tech } 2325392f7a3SLiteSpeed Tech#ifndef NDEBUG 2335392f7a3SLiteSpeed Tech ++purga->pur_stats.false_hits; 2345392f7a3SLiteSpeed Tech#endif 2355392f7a3SLiteSpeed Tech next_page: 2365392f7a3SLiteSpeed Tech page = TAILQ_NEXT(page, pupa_next); 2375392f7a3SLiteSpeed Tech } 2385392f7a3SLiteSpeed Tech while (page); 2395392f7a3SLiteSpeed Tech 2405392f7a3SLiteSpeed Tech end: 2415392f7a3SLiteSpeed Tech LSQ_DEBUGC("%"CID_FMT" not found", CID_BITS(cid)); 2425392f7a3SLiteSpeed Tech return NULL; 2435392f7a3SLiteSpeed Tech} 2445392f7a3SLiteSpeed Tech 2455392f7a3SLiteSpeed Tech 2465392f7a3SLiteSpeed Techvoid 2475392f7a3SLiteSpeed Techlsquic_purga_destroy (struct lsquic_purga *purga) 2485392f7a3SLiteSpeed Tech{ 2495392f7a3SLiteSpeed Tech struct purga_page *page; 2505392f7a3SLiteSpeed Tech 2515392f7a3SLiteSpeed Tech while ((page = TAILQ_FIRST(&purga->pur_pages))) 2525392f7a3SLiteSpeed Tech { 2535392f7a3SLiteSpeed Tech TAILQ_REMOVE(&purga->pur_pages, page, pupa_next); 2545392f7a3SLiteSpeed Tech if (purga->pur_remove_cids && page->pupa_count) 2555392f7a3SLiteSpeed Tech purga_remove_cids(purga, page); 2565392f7a3SLiteSpeed Tech free(page); 2575392f7a3SLiteSpeed Tech } 2585392f7a3SLiteSpeed Tech free(purga); 2595392f7a3SLiteSpeed Tech LSQ_INFO("destroyed"); 2605392f7a3SLiteSpeed Tech} 2615392f7a3SLiteSpeed Tech 2625392f7a3SLiteSpeed Tech 2635392f7a3SLiteSpeed Techunsigned 2645392f7a3SLiteSpeed Techlsquic_purga_cids_per_page (void) 2655392f7a3SLiteSpeed Tech{ 2665392f7a3SLiteSpeed Tech LSQ_DEBUG("CIDs per page: %u", PURGA_ELS_PER_PAGE); 2675392f7a3SLiteSpeed Tech return PURGA_ELS_PER_PAGE; 2685392f7a3SLiteSpeed Tech} 2695392f7a3SLiteSpeed Tech 2705392f7a3SLiteSpeed Tech 2715392f7a3SLiteSpeed Tech#ifndef NDEBUG 2725392f7a3SLiteSpeed Techstruct purga_bloom_stats * 2735392f7a3SLiteSpeed Techlsquic_purga_get_bloom_stats (struct lsquic_purga *purga) 2745392f7a3SLiteSpeed Tech{ 2755392f7a3SLiteSpeed Tech return &purga->pur_stats; 2765392f7a3SLiteSpeed Tech} 2775392f7a3SLiteSpeed Tech#endif 278