1/* find-xxh -- find XXH seed common for ls-hpack and ls-qpack
2 *
3 * To speed up decoding/encoding process in a proxy, we need to use the
4 * same name and nameval XXH hashes in ls-hpack and ls-qpack libraries.
5 * This program finds an XXH seed common to both and shifts and offsets
6 * for construction of name and nameval XXH tables in both libraries.
7 */
8
9#include <inttypes.h>
10#include <limits.h>
11#include <stdint.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <unistd.h>
16
17#include XXH_HEADER_NAME
18
19#define NAME_VAL(a, b) sizeof(a) - 1, sizeof(b) - 1, (a), (b)
20
21struct table_elem
22{
23    unsigned          name_len;
24    unsigned          val_len;
25    const char       *name;
26    const char       *val;
27};
28
29
30static const struct table_elem hpack_table[] =
31{
32    { NAME_VAL(":authority",                    "") },
33    { NAME_VAL(":method",                       "GET") },
34    { NAME_VAL(":method",                       "POST") },
35    { NAME_VAL(":path",                         "/") },
36    { NAME_VAL(":path",                         "/index.html") },
37    { NAME_VAL(":scheme",                       "http") },
38    { NAME_VAL(":scheme",                       "https") },
39    { NAME_VAL(":status",                       "200") },
40    { NAME_VAL(":status",                       "204") },
41    { NAME_VAL(":status",                       "206") },
42    { NAME_VAL(":status",                       "304") },
43    { NAME_VAL(":status",                       "400") },
44    { NAME_VAL(":status",                       "404") },
45    { NAME_VAL(":status",                       "500") },
46    { NAME_VAL("accept-charset",                "") },
47    { NAME_VAL("accept-encoding",               "gzip, deflate") },
48    { NAME_VAL("accept-language",               "") },
49    { NAME_VAL("accept-ranges",                 "") },
50    { NAME_VAL("accept",                        "") },
51    { NAME_VAL("access-control-allow-origin",   "") },
52    { NAME_VAL("age",                           "") },
53    { NAME_VAL("allow",                         "") },
54    { NAME_VAL("authorization",                 "") },
55    { NAME_VAL("cache-control",                 "") },
56    { NAME_VAL("content-disposition",           "") },
57    { NAME_VAL("content-encoding",              "") },
58    { NAME_VAL("content-language",              "") },
59    { NAME_VAL("content-length",                "") },
60    { NAME_VAL("content-location",              "") },
61    { NAME_VAL("content-range",                 "") },
62    { NAME_VAL("content-type",                  "") },
63    { NAME_VAL("cookie",                        "") },
64    { NAME_VAL("date",                          "") },
65    { NAME_VAL("etag",                          "") },
66    { NAME_VAL("expect",                        "") },
67    { NAME_VAL("expires",                       "") },
68    { NAME_VAL("from",                          "") },
69    { NAME_VAL("host",                          "") },
70    { NAME_VAL("if-match",                      "") },
71    { NAME_VAL("if-modified-since",             "") },
72    { NAME_VAL("if-none-match",                 "") },
73    { NAME_VAL("if-range",                      "") },
74    { NAME_VAL("if-unmodified-since",           "") },
75    { NAME_VAL("last-modified",                 "") },
76    { NAME_VAL("link",                          "") },
77    { NAME_VAL("location",                      "") },
78    { NAME_VAL("max-forwards",                  "") },
79    { NAME_VAL("proxy-authenticate",            "") },
80    { NAME_VAL("proxy-authorization",           "") },
81    { NAME_VAL("range",                         "") },
82    { NAME_VAL("referer",                       "") },
83    { NAME_VAL("refresh",                       "") },
84    { NAME_VAL("retry-after",                   "") },
85    { NAME_VAL("server",                        "") },
86    { NAME_VAL("set-cookie",                    "") },
87    { NAME_VAL("strict-transport-security",     "") },
88    { NAME_VAL("transfer-encoding",             "") },
89    { NAME_VAL("user-agent",                    "") },
90    { NAME_VAL("vary",                          "") },
91    { NAME_VAL("via",                           "") },
92    { NAME_VAL("www-authenticate",              "") }
93};
94#define HPACK_STATIC_TABLE_SIZE (sizeof(hpack_table) / sizeof(hpack_table[0]))
95
96static const unsigned hpack_name_indexes[] = {
97    0, 1, 3, 5, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
98    27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
99    44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
100};
101#define HPACK_NAME_SIZE (sizeof(hpack_name_indexes) / sizeof(hpack_name_indexes[0]))
102
103/* [draft-ietf-quic-qpack-03] Appendix A */
104static const struct table_elem qpack_table[] =
105{
106    { NAME_VAL(":authority", "") },
107    { NAME_VAL(":path", "/") },
108    { NAME_VAL("age", "0") },
109    { NAME_VAL("content-disposition", "") },
110    { NAME_VAL("content-length", "0") },
111    { NAME_VAL("cookie", "") },
112    { NAME_VAL("date", "") },
113    { NAME_VAL("etag", "") },
114    { NAME_VAL("if-modified-since", "") },
115    { NAME_VAL("if-none-match", "") },
116    { NAME_VAL("last-modified", "") },
117    { NAME_VAL("link", "") },
118    { NAME_VAL("location", "") },
119    { NAME_VAL("referer", "") },
120    { NAME_VAL("set-cookie", "") },
121    { NAME_VAL(":method", "CONNECT") },
122    { NAME_VAL(":method", "DELETE") },
123    { NAME_VAL(":method", "GET") },
124    { NAME_VAL(":method", "HEAD") },
125    { NAME_VAL(":method", "OPTIONS") },
126    { NAME_VAL(":method", "POST") },
127    { NAME_VAL(":method", "PUT") },
128    { NAME_VAL(":scheme", "http") },
129    { NAME_VAL(":scheme", "https") },
130    { NAME_VAL(":status", "103") },
131    { NAME_VAL(":status", "200") },
132    { NAME_VAL(":status", "304") },
133    { NAME_VAL(":status", "404") },
134    { NAME_VAL(":status", "503") },
135    { NAME_VAL("accept", "*/*") },
136    { NAME_VAL("accept", "application/dns-message") },
137    { NAME_VAL("accept-encoding", "gzip, deflate, br") },
138    { NAME_VAL("accept-ranges", "bytes") },
139    { NAME_VAL("access-control-allow-headers", "cache-control") },
140    { NAME_VAL("access-control-allow-headers", "content-type") },
141    { NAME_VAL("access-control-allow-origin", "*") },
142    { NAME_VAL("cache-control", "max-age=0") },
143    { NAME_VAL("cache-control", "max-age=2592000") },
144    { NAME_VAL("cache-control", "max-age=604800") },
145    { NAME_VAL("cache-control", "no-cache") },
146    { NAME_VAL("cache-control", "no-store") },
147    { NAME_VAL("cache-control", "public, max-age=31536000") },
148    { NAME_VAL("content-encoding", "br") },
149    { NAME_VAL("content-encoding", "gzip") },
150    { NAME_VAL("content-type", "application/dns-message") },
151    { NAME_VAL("content-type", "application/javascript") },
152    { NAME_VAL("content-type", "application/json") },
153    { NAME_VAL("content-type", "application/x-www-form-urlencoded") },
154    { NAME_VAL("content-type", "image/gif") },
155    { NAME_VAL("content-type", "image/jpeg") },
156    { NAME_VAL("content-type", "image/png") },
157    { NAME_VAL("content-type", "text/css") },
158    { NAME_VAL("content-type", "text/html; charset=utf-8") },
159    { NAME_VAL("content-type", "text/plain") },
160    { NAME_VAL("content-type", "text/plain;charset=utf-8") },
161    { NAME_VAL("range", "bytes=0-") },
162    { NAME_VAL("strict-transport-security", "max-age=31536000") },
163    { NAME_VAL("strict-transport-security", "max-age=31536000; includesubdomains") },
164    { NAME_VAL("strict-transport-security", "max-age=31536000; includesubdomains; preload") },
165    { NAME_VAL("vary", "accept-encoding") },
166    { NAME_VAL("vary", "origin") },
167    { NAME_VAL("x-content-type-options", "nosniff") },
168    { NAME_VAL("x-xss-protection", "1; mode=block") },
169    { NAME_VAL(":status", "100") },
170    { NAME_VAL(":status", "204") },
171    { NAME_VAL(":status", "206") },
172    { NAME_VAL(":status", "302") },
173    { NAME_VAL(":status", "400") },
174    { NAME_VAL(":status", "403") },
175    { NAME_VAL(":status", "421") },
176    { NAME_VAL(":status", "425") },
177    { NAME_VAL(":status", "500") },
178    { NAME_VAL("accept-language", "") },
179    { NAME_VAL("access-control-allow-credentials", "FALSE") },
180    { NAME_VAL("access-control-allow-credentials", "TRUE") },
181    { NAME_VAL("access-control-allow-headers", "*") },
182    { NAME_VAL("access-control-allow-methods", "get") },
183    { NAME_VAL("access-control-allow-methods", "get, post, options") },
184    { NAME_VAL("access-control-allow-methods", "options") },
185    { NAME_VAL("access-control-expose-headers", "content-length") },
186    { NAME_VAL("access-control-request-headers", "content-type") },
187    { NAME_VAL("access-control-request-method", "get") },
188    { NAME_VAL("access-control-request-method", "post") },
189    { NAME_VAL("alt-svc", "clear") },
190    { NAME_VAL("authorization", "") },
191    { NAME_VAL("content-security-policy", "script-src 'none'; object-src 'none'; base-uri 'none'") },
192    { NAME_VAL("early-data", "1") },
193    { NAME_VAL("expect-ct", "") },
194    { NAME_VAL("forwarded", "") },
195    { NAME_VAL("if-range", "") },
196    { NAME_VAL("origin", "") },
197    { NAME_VAL("purpose", "prefetch") },
198    { NAME_VAL("server", "") },
199    { NAME_VAL("timing-allow-origin", "*") },
200    { NAME_VAL("upgrade-insecure-requests", "1") },
201    { NAME_VAL("user-agent", "") },
202    { NAME_VAL("x-forwarded-for", "") },
203    { NAME_VAL("x-frame-options", "deny") },
204    { NAME_VAL("x-frame-options", "sameorigin") },
205};
206#define QPACK_STATIC_TABLE_SIZE (sizeof(qpack_table) / sizeof(qpack_table[0]))
207
208/* This is calculated at runtime */
209static unsigned qpack_name_indexes[QPACK_STATIC_TABLE_SIZE];
210static unsigned n_qpack_name_indexes;
211
212/* QPACK static table has an interesting property that headers names are
213 * not all placed in contiguous sequences.  For example, :status appears
214 * in two places in the table.
215 */
216static void
217calculate_qpack_name_indexes (void)
218{
219    unsigned i, n;
220
221    qpack_name_indexes[0] = 0;
222    n_qpack_name_indexes = 1;
223    for (n = 1; n < QPACK_STATIC_TABLE_SIZE; ++n)
224    {
225        for (i = 0; i < n
226                && !(qpack_table[i].name_len == qpack_table[n].name_len
227                && 0 == memcmp(qpack_table[i].name,
228                            qpack_table[n].name, qpack_table[n].name_len)); ++i)
229            ;
230        if (i == n)
231            qpack_name_indexes[n_qpack_name_indexes++] = n;
232    }
233}
234
235
236/* Need to strike some balance here.  A small start width will result in
237 * potentially long search time and -- more importantly -- in a hpack_table that
238 * is very small, which may require lookup code to perform more comparisons.
239 * On the other hand, a large width will result in a hpack_table that may be
240 * slower to use.
241 */
242static unsigned MIN_WIDTH = 9;
243static unsigned MAX_WIDTH = 9;
244
245static unsigned MIN_SHIFT = 0;
246static unsigned MAX_SHIFT = 31;
247
248/* Return true if acceptable shift and width were found, false otherwise */
249static int
250find_shift_and_width (const uint32_t *hashes, const unsigned n_hashes,
251                                        unsigned *shift_p, unsigned *width_p)
252{
253    unsigned shift, width, hash, i, j;
254
255    for (width = MIN_WIDTH; width <= MAX_WIDTH; ++width)
256    {
257        for (shift = MIN_SHIFT; shift <= MAX_SHIFT
258                                            && shift < 32 - width; ++shift)
259        {
260            for (i = 1; i < n_hashes; ++i)
261            {
262                hash = hashes[i] & (((1u << width) - 1) << shift);
263                for (j = 0; j < i; ++j)
264                    if ((hashes[j] & (((1u << width) - 1) << shift)) == hash)
265                        goto check;
266            }
267  check:    if (i >= n_hashes)
268            {
269                *shift_p = shift;
270                *width_p = width;
271                return 1;
272            }
273        }
274    }
275
276    return 0;
277}
278
279
280int
281main (int argc, char **argv)
282{
283    uint32_t hpack_name_hashes[HPACK_NAME_SIZE];
284    uint32_t hpack_nameval_hashes[HPACK_STATIC_TABLE_SIZE];
285    uint32_t qpack_nameval_hashes[QPACK_STATIC_TABLE_SIZE];
286    uint32_t qpack_name_hashes[QPACK_STATIC_TABLE_SIZE];
287    uint32_t seed, init_seed = 0;
288    unsigned n, idx;
289    unsigned hpack_nameval_shift, hpack_name_width, hpack_nameval_width,
290                hpack_name_shift;
291    unsigned qpack_nameval_shift, qpack_name_width, qpack_nameval_width,
292                qpack_name_shift;
293    int opt, dont_stop = 0, print_tables = 0;
294
295    while (-1 != (opt = getopt(argc, argv, "i:w:W:s:S:phN")))
296    {
297        switch (opt)
298        {
299        case 'i':
300            init_seed = atoi(optarg);
301            break;
302        case 'w':
303            MIN_WIDTH = atoi(optarg);
304            break;
305        case 'W':
306            MAX_WIDTH = atoi(optarg);
307            break;
308        case 's':
309            MIN_SHIFT = atoi(optarg);
310            break;
311        case 'S':
312            MAX_SHIFT = atoi(optarg);
313            break;
314        case 'p':
315            print_tables = 1;
316            break;
317        case 'N':
318            dont_stop = 1;
319            break;
320        case 'h': printf(
321"Usage: %s [options]\n"
322"\n"
323"   -i seed     Initial seed (defaults to 0)\n"
324"   -w width    Minimum width (defaults to %u)\n"
325"   -W width    Maximum width (defaults to %u)\n"
326"   -s shift    Minimum shift (defaults to %u)\n"
327"   -S shift    Maximum shift (defaults to %u)\n"
328"   -N          Don't stop after finding a match, keep searching\n"
329"   -p          Print resulting HPACK and QPACK tables\n"
330"   -h          Print this help screen and exit\n"
331            , argv[0], MIN_WIDTH, MAX_WIDTH, MIN_SHIFT, MAX_SHIFT);
332            return 0;
333        }
334    }
335
336    seed = init_seed;
337    calculate_qpack_name_indexes();
338
339  again:
340    for (n = 0; n < HPACK_NAME_SIZE; ++n)
341    {
342        idx = hpack_name_indexes[n];
343        hpack_name_hashes[n] = XXH32(hpack_table[idx].name,
344                                        hpack_table[idx].name_len, seed);
345    }
346    if (!find_shift_and_width(hpack_name_hashes, HPACK_NAME_SIZE,
347                                    &hpack_name_shift, &hpack_name_width))
348        goto incr_seed;
349
350    for (n = 0; n < HPACK_STATIC_TABLE_SIZE; ++n)
351    {
352        hpack_nameval_hashes[n] = XXH32(hpack_table[n].name,
353                                            hpack_table[n].name_len, seed);
354        hpack_nameval_hashes[n] = XXH32(hpack_table[n].val,
355                            hpack_table[n].val_len, hpack_nameval_hashes[n]);
356    }
357    if (!find_shift_and_width(hpack_nameval_hashes, HPACK_STATIC_TABLE_SIZE,
358                                &hpack_nameval_shift, &hpack_nameval_width))
359        goto incr_seed;
360
361    for (n = 0; n < n_qpack_name_indexes; ++n)
362    {
363        idx = qpack_name_indexes[n];
364        qpack_name_hashes[n] = XXH32(qpack_table[idx].name,
365                                            qpack_table[idx].name_len, seed);
366    }
367    if (!find_shift_and_width(qpack_name_hashes, n_qpack_name_indexes,
368                                    &qpack_name_shift, &qpack_name_width))
369        goto incr_seed;
370
371    for (n = 0; n < QPACK_STATIC_TABLE_SIZE; ++n)
372    {
373        qpack_nameval_hashes[n] = XXH32(qpack_table[n].name,
374                                                qpack_table[n].name_len, seed);
375        qpack_nameval_hashes[n] = XXH32(qpack_table[n].val,
376                            qpack_table[n].val_len, qpack_nameval_hashes[n]);
377    }
378    if (!find_shift_and_width(qpack_nameval_hashes, QPACK_STATIC_TABLE_SIZE,
379                                &qpack_nameval_shift, &qpack_nameval_width))
380        goto incr_seed;
381
382    printf("unique set: seed %u\n"
383           "  hpack:\n"
384           "    name shift: %u; width: %u\n"
385           "    nameval shift: %u; width: %u\n"
386           "  qpack:\n"
387           "    name shift: %u; width: %u\n"
388           "    nameval shift: %u; width: %u\n"
389           , seed
390           , hpack_name_shift, hpack_name_width
391           , hpack_nameval_shift, hpack_nameval_width
392           , qpack_name_shift, qpack_name_width
393           , qpack_nameval_shift, qpack_nameval_width
394           );
395
396    if (print_tables)
397    {
398        printf("#define XXH_SEED %"PRIu32"\n", seed);
399
400        printf("#define XXH_HPACK_NAME_WIDTH %"PRIu32"\n", hpack_name_width);
401        printf("#define XXH_HPACK_NAME_SHIFT %"PRIu32"\n", hpack_name_shift);
402        printf("static const unsigned char hpack_name2id[ 1 << XXH_HPACK_NAME_WIDTH ] =\n{\n");
403        for (n = 0; n < HPACK_NAME_SIZE; ++n)
404            printf("[%u] = %u, ", (hpack_name_hashes[n] >> hpack_name_shift)
405                    & ((1 << hpack_name_width) - 1), hpack_name_indexes[n] + 1);
406        printf("\n};\n");
407
408        printf("#define XXH_HPACK_NAMEVAL_WIDTH %"PRIu32"\n", hpack_nameval_width);
409        printf("#define XXH_HPACK_NAMEVAL_SHIFT %"PRIu32"\n", hpack_nameval_shift);
410        printf("static const unsigned char hpack_nameval2id[ 1 << XXH_HPACK_NAMEVAL_WIDTH ] =\n{\n");
411        for (n = 0; n < HPACK_STATIC_TABLE_SIZE; ++n)
412            printf("[%u] = %u, ", (hpack_nameval_hashes[n] >> hpack_nameval_shift)
413                    & ((1 << hpack_nameval_width) - 1), n + 1);
414        printf("\n};\n");
415
416        printf("#define XXH_QPACK_NAME_WIDTH %"PRIu32"\n", qpack_name_width);
417        printf("#define XXH_QPACK_NAME_SHIFT %"PRIu32"\n", qpack_name_shift);
418        printf("static const unsigned char qpack_name2id[ 1 << XXH_QPACK_NAME_WIDTH ] =\n{\n");
419        for (n = 0; n < n_qpack_name_indexes; ++n)
420            printf("[%u] = %u, ", (qpack_name_hashes[n] >> qpack_name_shift)
421                    & ((1 << qpack_name_width) - 1), qpack_name_indexes[n] + 1);
422        printf("\n};\n");
423
424        printf("#define XXH_QPACK_NAMEVAL_WIDTH %"PRIu32"\n", qpack_nameval_width);
425        printf("#define XXH_QPACK_NAMEVAL_SHIFT %"PRIu32"\n", qpack_nameval_shift);
426        printf("static const unsigned char qpack_nameval2id[ 1 << XXH_QPACK_NAMEVAL_WIDTH ] =\n{\n");
427        for (n = 0; n < QPACK_STATIC_TABLE_SIZE; ++n)
428            printf("[%u] = %u, ", (qpack_nameval_hashes[n] >> qpack_nameval_shift)
429                    & ((1 << qpack_nameval_width) - 1), n + 1);
430        printf("\n};\n");
431    }
432
433    if (dont_stop)
434    {
435        fflush(stdout);
436        goto incr_seed;
437    }
438
439#if 0   /* TODO */
440    for (i = 0; i < TABLE_SIZE; ++i)
441#if NAMEVAL_SEARCH
442        printf("[%u] = %u,\n", (hashes[i] >> min_shift) & ((1 << min_width) - 1), nameval_indexes[i] + 1);
443#else
444        printf("[%u] = %u,\n", (hashes[i] >> min_shift) & ((1 << min_width) - 1), name_indexes[i] + 1);
445#endif
446#endif
447
448    return 0;
449
450  incr_seed:
451    ++seed;
452    if ((seed - init_seed) % 100000 == 0)
453        fprintf(stderr, "....  seed: %u\n", seed);
454    goto again;
455}
456