lsquic_xxhash.c revision a74702c6
1/* Copyright (c) 2017 - 2022 LiteSpeed Technologies Inc.  See LICENSE. */
2/*
3xxHash - Fast Hash algorithm
4Copyright (C) 2012-2014, Yann Collet.
5BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7Redistribution and use in source and binary forms, with or without
8modification, are permitted provided that the following conditions are
9met:
10
11* Redistributions of source code must retain the above copyright
12notice, this list of conditions and the following disclaimer.
13* Redistributions in binary form must reproduce the above
14copyright notice, this list of conditions and the following disclaimer
15in the documentation and/or other materials provided with the
16distribution.
17
18THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30You can contact the author at :
31- xxHash source repository : http://code.google.com/p/xxhash/
32- public discussion board : https://groups.google.com/forum/#!forum/lz4c
33*/
34
35
36//**************************************
37// Tuning parameters
38//**************************************
39// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
40// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
41// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
42// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
43#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
44#  define XXH_USE_UNALIGNED_ACCESS 1
45#endif
46
47// XXH_ACCEPT_NULL_INPUT_POINTER :
48// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
49// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
50// This option has a very small performance cost (only measurable on small inputs).
51// By default, this option is disabled. To enable it, uncomment below define :
52// #define XXH_ACCEPT_NULL_INPUT_POINTER 1
53
54// XXH_FORCE_NATIVE_FORMAT :
55// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
56// Results are therefore identical for little-endian and big-endian CPU.
57// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
58// Should endian-independance be of no importance for your application, you may set the #define below to 1.
59// It will improve speed for Big-endian CPU.
60// This option has no impact on Little_Endian CPU.
61#define XXH_FORCE_NATIVE_FORMAT 0
62
63//**************************************
64// Compiler Specific Options
65//**************************************
66// Disable some Visual warning messages
67#ifdef _MSC_VER  // Visual Studio
68#  pragma warning(disable : 4127)      // disable: C4127: conditional expression is constant
69#endif
70
71#ifdef _MSC_VER    // Visual Studio
72#  define FORCE_INLINE static __forceinline
73#else
74#  ifdef __GNUC__
75#    define FORCE_INLINE static inline __attribute__((always_inline))
76#  else
77#    define FORCE_INLINE static inline
78#  endif
79#endif
80
81//**************************************
82// Includes & Memory related functions
83//**************************************
84#include "lsquic_xxhash.h"
85// Modify the local functions below should you wish to use some other memory routines
86// for malloc(), free()
87#include <stdlib.h>
88static void *XXH_malloc(size_t s) { return malloc(s); }
89static void  XXH_free(void *p)  { free(p); }
90// for memcpy()
91#include <string.h>
92static void *XXH_memcpy(void *dest, const void *src, size_t size)
93{
94    return memcpy(dest, src, size);
95}
96
97
98//**************************************
99// Basic Types
100//**************************************
101#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
102# include <stdint.h>
103typedef uint8_t  BYTE;
104typedef uint16_t U16;
105typedef uint32_t U32;
106typedef  int32_t S32;
107typedef uint64_t U64;
108#else
109typedef unsigned char      BYTE;
110typedef unsigned short     U16;
111typedef unsigned int       U32;
112typedef   signed int       S32;
113typedef unsigned long long U64;
114#endif
115
116#if defined(__GNUC__)  && !defined(XXH_USE_UNALIGNED_ACCESS)
117#  define _PACKED __attribute__ ((packed))
118#else
119#  define _PACKED
120#endif
121
122#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
123#  ifdef __IBMC__
124#    pragma pack(1)
125#  else
126#    pragma pack(push, 1)
127#  endif
128#endif
129
130typedef struct _U32_S
131{
132    U32 v;
133} _PACKED U32_S;
134typedef struct _U64_S
135{
136    U64 v;
137} _PACKED U64_S;
138
139#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
140#  pragma pack(pop)
141#endif
142
143#define A32(x) (((U32_S *)(x))->v)
144#define A64(x) (((U64_S *)(x))->v)
145
146
147//***************************************
148// Compiler-specific Functions and Macros
149//***************************************
150#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
151
152// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
153#if defined(_MSC_VER)
154#  define XXH_rotl32(x,r) _rotl(x,r)
155#  define XXH_rotl64(x,r) _rotl64(x,r)
156#else
157#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
158#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
159#endif
160
161#if defined(_MSC_VER)     // Visual Studio
162#  define XXH_swap32 _byteswap_ulong
163#  define XXH_swap64 _byteswap_uint64
164#elif GCC_VERSION >= 403
165#  define XXH_swap32 __builtin_bswap32
166#  define XXH_swap64 __builtin_bswap64
167#else
168static inline U32 XXH_swap32(U32 x)
169{
170    return ((x << 24) & 0xff000000) |
171           ((x <<  8) & 0x00ff0000) |
172           ((x >>  8) & 0x0000ff00) |
173           ((x >> 24) & 0x000000ff);
174}
175static inline U64 XXH_swap64(U64 x)
176{
177    return ((x << 56) & 0xff00000000000000ULL) |
178           ((x << 40) & 0x00ff000000000000ULL) |
179           ((x << 24) & 0x0000ff0000000000ULL) |
180           ((x << 8)  & 0x000000ff00000000ULL) |
181           ((x >> 8)  & 0x00000000ff000000ULL) |
182           ((x >> 24) & 0x0000000000ff0000ULL) |
183           ((x >> 40) & 0x000000000000ff00ULL) |
184           ((x >> 56) & 0x00000000000000ffULL);
185}
186#endif
187
188
189//**************************************
190// Constants
191//**************************************
192#define PRIME32_1   2654435761U
193#define PRIME32_2   2246822519U
194#define PRIME32_3   3266489917U
195#define PRIME32_4    668265263U
196#define PRIME32_5    374761393U
197
198#define PRIME64_1 11400714785074694791ULL
199#define PRIME64_2 14029467366897019727ULL
200#define PRIME64_3  1609587929392839161ULL
201#define PRIME64_4  9650029242287828579ULL
202#define PRIME64_5  2870177450012600261ULL
203
204//**************************************
205// Architecture Macros
206//**************************************
207typedef enum { XXH_bigEndian = 0, XXH_littleEndian = 1 } XXH_endianess;
208#ifndef XXH_CPU_LITTLE_ENDIAN   // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
209static const int one = 1;
210#   define XXH_CPU_LITTLE_ENDIAN   (*(char*)(&one))
211#endif
212
213
214//**************************************
215// Macros
216//**************************************
217#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    // use only *after* variable declarations
218
219
220//****************************
221// Memory reads
222//****************************
223typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
224
225FORCE_INLINE U32 XXH_readLE32_align(const void *ptr, XXH_endianess endian,
226                                    XXH_alignment align)
227{
228    if (align == XXH_unaligned)
229        return endian == XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
230    else
231        return endian == XXH_littleEndian ? *(U32 *)ptr : XXH_swap32(*(U32 *)ptr);
232}
233
234FORCE_INLINE U32 XXH_readLE32(const void *ptr, XXH_endianess endian)
235{
236    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
237}
238
239FORCE_INLINE U64 XXH_readLE64_align(const void *ptr, XXH_endianess endian,
240                                    XXH_alignment align)
241{
242    if (align == XXH_unaligned)
243        return endian == XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr));
244    else
245        return endian == XXH_littleEndian ? *(U64 *)ptr : XXH_swap64(*(U64 *)ptr);
246}
247
248FORCE_INLINE U64 XXH_readLE64(const void *ptr, XXH_endianess endian)
249{
250    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
251}
252
253
254//****************************
255// Simple Hash Functions
256//****************************
257FORCE_INLINE U32 XXH32_endian_align(const void *input, size_t len,
258                                    U32 seed, XXH_endianess endian, XXH_alignment align)
259{
260    const BYTE *p = (const BYTE *)input;
261    const BYTE *bEnd = p + len;
262    U32 h32;
263#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
264
265#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
266    if (p == NULL)
267    {
268        len = 0;
269        bEnd = p = (const BYTE *)(size_t)16;
270    }
271#endif
272
273    if (len >= 16)
274    {
275        const BYTE *const limit = bEnd - 16;
276        U32 v1 = seed + PRIME32_1 + PRIME32_2;
277        U32 v2 = seed + PRIME32_2;
278        U32 v3 = seed + 0;
279        U32 v4 = seed - PRIME32_1;
280
281        do
282        {
283            v1 += XXH_get32bits(p) * PRIME32_2;
284            v1 = XXH_rotl32(v1, 13);
285            v1 *= PRIME32_1;
286            p += 4;
287            v2 += XXH_get32bits(p) * PRIME32_2;
288            v2 = XXH_rotl32(v2, 13);
289            v2 *= PRIME32_1;
290            p += 4;
291            v3 += XXH_get32bits(p) * PRIME32_2;
292            v3 = XXH_rotl32(v3, 13);
293            v3 *= PRIME32_1;
294            p += 4;
295            v4 += XXH_get32bits(p) * PRIME32_2;
296            v4 = XXH_rotl32(v4, 13);
297            v4 *= PRIME32_1;
298            p += 4;
299        }
300        while (p <= limit);
301
302        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3,
303                12) + XXH_rotl32(v4, 18);
304    }
305    else
306        h32  = seed + PRIME32_5;
307
308    h32 += (U32) len;
309
310    while (p + 4 <= bEnd)
311    {
312        h32 += XXH_get32bits(p) * PRIME32_3;
313        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
314        p += 4;
315    }
316
317    while (p < bEnd)
318    {
319        h32 += (*p) * PRIME32_5;
320        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
321        p++;
322    }
323
324    h32 ^= h32 >> 15;
325    h32 *= PRIME32_2;
326    h32 ^= h32 >> 13;
327    h32 *= PRIME32_3;
328    h32 ^= h32 >> 16;
329
330    return h32;
331}
332
333
334unsigned int XXH32(const void *input, size_t len, unsigned seed)
335{
336#if 0
337    // Simple version, good for code maintenance, but unfortunately slow for small inputs
338    XXH32_state_t state;
339    XXH32_reset(&state, seed);
340    XXH32_update(&state, input, len);
341    return XXH32_digest(&state);
342#else
343    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
344
345#  if !defined(XXH_USE_UNALIGNED_ACCESS)
346    if ((((size_t)input) & 3) ==
347        0)   // Input is aligned, let's leverage the speed advantage
348    {
349        if ((endian_detected == XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
350            return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
351        else
352            return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
353    }
354#  endif
355
356    if ((endian_detected == XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
357        return XXH32_endian_align(input, len, seed, XXH_littleEndian,
358                                  XXH_unaligned);
359    else
360        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
361#endif
362}
363
364FORCE_INLINE U64 XXH64_endian_align(const void *input, size_t len,
365                                    U64 seed, XXH_endianess endian, XXH_alignment align)
366{
367    const BYTE *p = (const BYTE *)input;
368    const BYTE *bEnd = p + len;
369    U64 h64;
370#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
371
372#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
373    if (p == NULL)
374    {
375        len = 0;
376        bEnd = p = (const BYTE *)(size_t)32;
377    }
378#endif
379
380    if (len >= 32)
381    {
382        const BYTE *const limit = bEnd - 32;
383        U64 v1 = seed + PRIME64_1 + PRIME64_2;
384        U64 v2 = seed + PRIME64_2;
385        U64 v3 = seed + 0;
386        U64 v4 = seed - PRIME64_1;
387
388        do
389        {
390            v1 += XXH_get64bits(p) * PRIME64_2;
391            p += 8;
392            v1 = XXH_rotl64(v1, 31);
393            v1 *= PRIME64_1;
394            v2 += XXH_get64bits(p) * PRIME64_2;
395            p += 8;
396            v2 = XXH_rotl64(v2, 31);
397            v2 *= PRIME64_1;
398            v3 += XXH_get64bits(p) * PRIME64_2;
399            p += 8;
400            v3 = XXH_rotl64(v3, 31);
401            v3 *= PRIME64_1;
402            v4 += XXH_get64bits(p) * PRIME64_2;
403            p += 8;
404            v4 = XXH_rotl64(v4, 31);
405            v4 *= PRIME64_1;
406        }
407        while (p <= limit);
408
409        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3,
410                12) + XXH_rotl64(v4, 18);
411
412        v1 *= PRIME64_2;
413        v1 = XXH_rotl64(v1, 31);
414        v1 *= PRIME64_1;
415        h64 ^= v1;
416        h64 = h64 * PRIME64_1 + PRIME64_4;
417
418        v2 *= PRIME64_2;
419        v2 = XXH_rotl64(v2, 31);
420        v2 *= PRIME64_1;
421        h64 ^= v2;
422        h64 = h64 * PRIME64_1 + PRIME64_4;
423
424        v3 *= PRIME64_2;
425        v3 = XXH_rotl64(v3, 31);
426        v3 *= PRIME64_1;
427        h64 ^= v3;
428        h64 = h64 * PRIME64_1 + PRIME64_4;
429
430        v4 *= PRIME64_2;
431        v4 = XXH_rotl64(v4, 31);
432        v4 *= PRIME64_1;
433        h64 ^= v4;
434        h64 = h64 * PRIME64_1 + PRIME64_4;
435    }
436    else
437        h64  = seed + PRIME64_5;
438
439    h64 += (U64) len;
440
441    while (p + 8 <= bEnd)
442    {
443        U64 k1 = XXH_get64bits(p);
444        k1 *= PRIME64_2;
445        k1 = XXH_rotl64(k1, 31);
446        k1 *= PRIME64_1;
447        h64 ^= k1;
448        h64 = XXH_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
449        p += 8;
450    }
451
452    if (p + 4 <= bEnd)
453    {
454        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
455        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
456        p += 4;
457    }
458
459    while (p < bEnd)
460    {
461        h64 ^= (*p) * PRIME64_5;
462        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
463        p++;
464    }
465
466    h64 ^= h64 >> 33;
467    h64 *= PRIME64_2;
468    h64 ^= h64 >> 29;
469    h64 *= PRIME64_3;
470    h64 ^= h64 >> 32;
471
472    return h64;
473}
474
475
476unsigned long long XXH64(const void *input, size_t len,
477                         unsigned long long seed)
478{
479#if 0
480    // Simple version, good for code maintenance, but unfortunately slow for small inputs
481    XXH64_state_t state;
482    XXH64_reset(&state, seed);
483    XXH64_update(&state, input, len);
484    return XXH64_digest(&state);
485#else
486    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
487
488#  if !defined(XXH_USE_UNALIGNED_ACCESS)
489    if ((((size_t)input) & 7) ==
490        0) // Input is aligned, let's leverage the speed advantage
491    {
492        if ((endian_detected == XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
493            return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
494        else
495            return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
496    }
497#  endif
498
499    if ((endian_detected == XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
500        return XXH64_endian_align(input, len, seed, XXH_littleEndian,
501                                  XXH_unaligned);
502    else
503        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
504#endif
505}
506
507/****************************************************
508 *  Advanced Hash Functions
509****************************************************/
510
511/*** Allocation ***/
512typedef struct
513{
514    U64 total_len;
515    U32 seed;
516    U32 v1;
517    U32 v2;
518    U32 v3;
519    U32 v4;
520    U32 mem32[4];   /* defined as U32 for alignment */
521    U32 memsize;
522} XXH_istate32_t;
523
524typedef struct
525{
526    U64 total_len;
527    U64 seed;
528    U64 v1;
529    U64 v2;
530    U64 v3;
531    U64 v4;
532    U64 mem64[4];   /* defined as U64 for alignment */
533    U32 memsize;
534} XXH_istate64_t;
535
536
537XXH32_state_t *XXH32_createState(void)
538{
539    XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(
540                          XXH_istate32_t));   // A compilation error here means XXH32_state_t is not large enough
541    return (XXH32_state_t *)XXH_malloc(sizeof(XXH32_state_t));
542}
543XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr)
544{
545    XXH_free(statePtr);
546    return XXH_OK;
547};
548
549XXH64_state_t *XXH64_createState(void)
550{
551    XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(
552                          XXH_istate64_t));   // A compilation error here means XXH64_state_t is not large enough
553    return (XXH64_state_t *)XXH_malloc(sizeof(XXH64_state_t));
554}
555XXH_errorcode XXH64_freeState(XXH64_state_t *statePtr)
556{
557    XXH_free(statePtr);
558    return XXH_OK;
559};
560
561
562/*** Hash feed ***/
563
564XXH_errorcode XXH32_reset(XXH32_state_t *state_in, U32 seed)
565{
566    XXH_istate32_t *state = (XXH_istate32_t *) state_in;
567    state->seed = seed;
568    state->v1 = seed + PRIME32_1 + PRIME32_2;
569    state->v2 = seed + PRIME32_2;
570    state->v3 = seed + 0;
571    state->v4 = seed - PRIME32_1;
572    state->total_len = 0;
573    state->memsize = 0;
574    return XXH_OK;
575}
576
577XXH_errorcode XXH64_reset(XXH64_state_t *state_in, unsigned long long seed)
578{
579    XXH_istate64_t *state = (XXH_istate64_t *) state_in;
580    state->seed = seed;
581    state->v1 = seed + PRIME64_1 + PRIME64_2;
582    state->v2 = seed + PRIME64_2;
583    state->v3 = seed + 0;
584    state->v4 = seed - PRIME64_1;
585    state->total_len = 0;
586    state->memsize = 0;
587    return XXH_OK;
588}
589
590
591FORCE_INLINE XXH_errorcode XXH32_update_endian(XXH32_state_t *state_in,
592        const void *input, size_t len, XXH_endianess endian)
593{
594    XXH_istate32_t *state = (XXH_istate32_t *) state_in;
595    const BYTE *p = (const BYTE *)input;
596    const BYTE *const bEnd = p + len;
597
598#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
599    if (input == NULL) return XXH_ERROR;
600#endif
601
602    state->total_len += len;
603
604    if (state->memsize + len < 16)   // fill in tmp buffer
605    {
606        XXH_memcpy((BYTE *)(state->mem32) + state->memsize, input, len);
607        state->memsize += (U32)len;
608        return XXH_OK;
609    }
610
611    if (state->memsize)   // some data left from previous update
612    {
613        XXH_memcpy((BYTE *)(state->mem32) + state->memsize, input,
614                   16 - state->memsize);
615        {
616            const U32 *p32 = state->mem32;
617            state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
618            state->v1 = XXH_rotl32(state->v1, 13);
619            state->v1 *= PRIME32_1;
620            p32++;
621            state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
622            state->v2 = XXH_rotl32(state->v2, 13);
623            state->v2 *= PRIME32_1;
624            p32++;
625            state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
626            state->v3 = XXH_rotl32(state->v3, 13);
627            state->v3 *= PRIME32_1;
628            p32++;
629            state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
630            state->v4 = XXH_rotl32(state->v4, 13);
631            state->v4 *= PRIME32_1;
632            p32++;
633        }
634        p += 16 - state->memsize;
635        state->memsize = 0;
636    }
637
638    if (p <= bEnd - 16)
639    {
640        const BYTE *const limit = bEnd - 16;
641        U32 v1 = state->v1;
642        U32 v2 = state->v2;
643        U32 v3 = state->v3;
644        U32 v4 = state->v4;
645
646        do
647        {
648            v1 += XXH_readLE32(p, endian) * PRIME32_2;
649            v1 = XXH_rotl32(v1, 13);
650            v1 *= PRIME32_1;
651            p += 4;
652            v2 += XXH_readLE32(p, endian) * PRIME32_2;
653            v2 = XXH_rotl32(v2, 13);
654            v2 *= PRIME32_1;
655            p += 4;
656            v3 += XXH_readLE32(p, endian) * PRIME32_2;
657            v3 = XXH_rotl32(v3, 13);
658            v3 *= PRIME32_1;
659            p += 4;
660            v4 += XXH_readLE32(p, endian) * PRIME32_2;
661            v4 = XXH_rotl32(v4, 13);
662            v4 *= PRIME32_1;
663            p += 4;
664        }
665        while (p <= limit);
666
667        state->v1 = v1;
668        state->v2 = v2;
669        state->v3 = v3;
670        state->v4 = v4;
671    }
672
673    if (p < bEnd)
674    {
675        XXH_memcpy(state->mem32, p, bEnd - p);
676        state->memsize = (int)(bEnd - p);
677    }
678
679    return XXH_OK;
680}
681
682XXH_errorcode XXH32_update(XXH32_state_t *state_in, const void *input,
683                           size_t len)
684{
685    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
686
687    if ((endian_detected == XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
688        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
689    else
690        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
691}
692
693
694
695FORCE_INLINE U32 XXH32_digest_endian(const XXH32_state_t *state_in,
696                                     XXH_endianess endian)
697{
698    XXH_istate32_t *state = (XXH_istate32_t *) state_in;
699    const BYTE *p = (const BYTE *)state->mem32;
700    BYTE *bEnd = (BYTE *)(state->mem32) + state->memsize;
701    U32 h32;
702
703    if (state->total_len >= 16)
704        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2,
705                7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
706    else
707        h32  = state->seed + PRIME32_5;
708
709    h32 += (U32) state->total_len;
710
711    while (p + 4 <= bEnd)
712    {
713        h32 += XXH_readLE32(p, endian) * PRIME32_3;
714        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
715        p += 4;
716    }
717
718    while (p < bEnd)
719    {
720        h32 += (*p) * PRIME32_5;
721        h32 = XXH_rotl32(h32, 11) * PRIME32_1;
722        p++;
723    }
724
725    h32 ^= h32 >> 15;
726    h32 *= PRIME32_2;
727    h32 ^= h32 >> 13;
728    h32 *= PRIME32_3;
729    h32 ^= h32 >> 16;
730
731    return h32;
732}
733
734
735U32 XXH32_digest(const XXH32_state_t *state_in)
736{
737    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
738
739    if ((endian_detected == XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
740        return XXH32_digest_endian(state_in, XXH_littleEndian);
741    else
742        return XXH32_digest_endian(state_in, XXH_bigEndian);
743}
744
745
746FORCE_INLINE XXH_errorcode XXH64_update_endian(XXH64_state_t *state_in,
747        const void *input, size_t len, XXH_endianess endian)
748{
749    XXH_istate64_t *state = (XXH_istate64_t *) state_in;
750    const BYTE *p = (const BYTE *)input;
751    const BYTE *const bEnd = p + len;
752
753#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
754    if (input == NULL) return XXH_ERROR;
755#endif
756
757    state->total_len += len;
758
759    if (state->memsize + len < 32)   // fill in tmp buffer
760    {
761        XXH_memcpy(((BYTE *)state->mem64) + state->memsize, input, len);
762        state->memsize += (U32)len;
763        return XXH_OK;
764    }
765
766    if (state->memsize)   // some data left from previous update
767    {
768        XXH_memcpy(((BYTE *)state->mem64) + state->memsize, input,
769                   32 - state->memsize);
770        {
771            const U64 *p64 = state->mem64;
772            state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
773            state->v1 = XXH_rotl64(state->v1, 31);
774            state->v1 *= PRIME64_1;
775            p64++;
776            state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
777            state->v2 = XXH_rotl64(state->v2, 31);
778            state->v2 *= PRIME64_1;
779            p64++;
780            state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
781            state->v3 = XXH_rotl64(state->v3, 31);
782            state->v3 *= PRIME64_1;
783            p64++;
784            state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
785            state->v4 = XXH_rotl64(state->v4, 31);
786            state->v4 *= PRIME64_1;
787            p64++;
788        }
789        p += 32 - state->memsize;
790        state->memsize = 0;
791    }
792
793    if (p + 32 <= bEnd)
794    {
795        const BYTE *const limit = bEnd - 32;
796        U64 v1 = state->v1;
797        U64 v2 = state->v2;
798        U64 v3 = state->v3;
799        U64 v4 = state->v4;
800
801        do
802        {
803            v1 += XXH_readLE64(p, endian) * PRIME64_2;
804            v1 = XXH_rotl64(v1, 31);
805            v1 *= PRIME64_1;
806            p += 8;
807            v2 += XXH_readLE64(p, endian) * PRIME64_2;
808            v2 = XXH_rotl64(v2, 31);
809            v2 *= PRIME64_1;
810            p += 8;
811            v3 += XXH_readLE64(p, endian) * PRIME64_2;
812            v3 = XXH_rotl64(v3, 31);
813            v3 *= PRIME64_1;
814            p += 8;
815            v4 += XXH_readLE64(p, endian) * PRIME64_2;
816            v4 = XXH_rotl64(v4, 31);
817            v4 *= PRIME64_1;
818            p += 8;
819        }
820        while (p <= limit);
821
822        state->v1 = v1;
823        state->v2 = v2;
824        state->v3 = v3;
825        state->v4 = v4;
826    }
827
828    if (p < bEnd)
829    {
830        XXH_memcpy(state->mem64, p, bEnd - p);
831        state->memsize = (int)(bEnd - p);
832    }
833
834    return XXH_OK;
835}
836
837XXH_errorcode XXH64_update(XXH64_state_t *state_in, const void *input,
838                           size_t len)
839{
840    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
841
842    if ((endian_detected == XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
843        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
844    else
845        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
846}
847
848
849
850FORCE_INLINE U64 XXH64_digest_endian(const XXH64_state_t *state_in,
851                                     XXH_endianess endian)
852{
853    XXH_istate64_t *state = (XXH_istate64_t *) state_in;
854    const BYTE *p = (const BYTE *)state->mem64;
855    BYTE *bEnd = (BYTE *)state->mem64 + state->memsize;
856    U64 h64;
857
858    if (state->total_len >= 32)
859    {
860        U64 v1 = state->v1;
861        U64 v2 = state->v2;
862        U64 v3 = state->v3;
863        U64 v4 = state->v4;
864
865        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3,
866                12) + XXH_rotl64(v4, 18);
867
868        v1 *= PRIME64_2;
869        v1 = XXH_rotl64(v1, 31);
870        v1 *= PRIME64_1;
871        h64 ^= v1;
872        h64 = h64 * PRIME64_1 + PRIME64_4;
873
874        v2 *= PRIME64_2;
875        v2 = XXH_rotl64(v2, 31);
876        v2 *= PRIME64_1;
877        h64 ^= v2;
878        h64 = h64 * PRIME64_1 + PRIME64_4;
879
880        v3 *= PRIME64_2;
881        v3 = XXH_rotl64(v3, 31);
882        v3 *= PRIME64_1;
883        h64 ^= v3;
884        h64 = h64 * PRIME64_1 + PRIME64_4;
885
886        v4 *= PRIME64_2;
887        v4 = XXH_rotl64(v4, 31);
888        v4 *= PRIME64_1;
889        h64 ^= v4;
890        h64 = h64 * PRIME64_1 + PRIME64_4;
891    }
892    else
893        h64  = state->seed + PRIME64_5;
894
895    h64 += (U64) state->total_len;
896
897    while (p + 8 <= bEnd)
898    {
899        U64 k1 = XXH_readLE64(p, endian);
900        k1 *= PRIME64_2;
901        k1 = XXH_rotl64(k1, 31);
902        k1 *= PRIME64_1;
903        h64 ^= k1;
904        h64 = XXH_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
905        p += 8;
906    }
907
908    if (p + 4 <= bEnd)
909    {
910        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
911        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
912        p += 4;
913    }
914
915    while (p < bEnd)
916    {
917        h64 ^= (*p) * PRIME64_5;
918        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
919        p++;
920    }
921
922    h64 ^= h64 >> 33;
923    h64 *= PRIME64_2;
924    h64 ^= h64 >> 29;
925    h64 *= PRIME64_3;
926    h64 ^= h64 >> 32;
927
928    return h64;
929}
930
931
932unsigned long long XXH64_digest(const XXH64_state_t *state_in)
933{
934    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
935
936    if ((endian_detected == XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
937        return XXH64_digest_endian(state_in, XXH_littleEndian);
938    else
939        return XXH64_digest_endian(state_in, XXH_bigEndian);
940}
941
942
943