panfrost: add missing dependency on midgard_pack.h
[mesa.git] / src / util / xxhash.h
1 /*
2 xxHash - Extremely Fast Hash algorithm
3 Header File
4 Copyright (C) 2012-2016, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - xxHash source repository : https://github.com/Cyan4973/xxHash
33 */
34
35 /* Notice extracted from xxHash homepage :
36
37 xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
38 It also successfully passes all tests from the SMHasher suite.
39
40 Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
41
42 Name Speed Q.Score Author
43 xxHash 5.4 GB/s 10
44 CrapWow 3.2 GB/s 2 Andrew
45 MumurHash 3a 2.7 GB/s 10 Austin Appleby
46 SpookyHash 2.0 GB/s 10 Bob Jenkins
47 SBox 1.4 GB/s 9 Bret Mulvey
48 Lookup3 1.2 GB/s 9 Bob Jenkins
49 SuperFastHash 1.2 GB/s 1 Paul Hsieh
50 CityHash64 1.05 GB/s 10 Pike & Alakuijala
51 FNV 0.55 GB/s 5 Fowler, Noll, Vo
52 CRC32 0.43 GB/s 9
53 MD5-32 0.33 GB/s 10 Ronald L. Rivest
54 SHA1-32 0.28 GB/s 10
55
56 Q.Score is a measure of quality of the hash function.
57 It depends on successfully passing SMHasher test set.
58 10 is a perfect score.
59
60 Note : SMHasher's CRC32 implementation is not the fastest one.
61 Other speed-oriented implementations can be faster,
62 especially in combination with PCLMUL instruction :
63 http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
64
65 A 64-bit version, named XXH64, is available since r35.
66 It offers much better speed, but for 64-bit applications only.
67 Name Speed on 64 bits Speed on 32 bits
68 XXH64 13.8 GB/s 1.9 GB/s
69 XXH32 6.8 GB/s 6.0 GB/s
70 */
71
72 /* Mesa leaves strict aliasing on in the compiler, and this code likes to
73 * dereference the passed in data as u32*, which means that the compiler is
74 * free to move the u32 read before the write of the struct members being
75 * hashed, and in practice it did in freedreno. Forcing these two things
76 * prevents it.
77 */
78 #define XXH_FORCE_ALIGN_CHECK 0
79 #define XXH_FORCE_MEMORY_ACCESS 0
80
81 #if defined (__cplusplus)
82 extern "C" {
83 #endif
84
85
86 #ifndef XXHASH_H_5627135585666179
87 #define XXHASH_H_5627135585666179 1
88
89 /* ****************************
90 * API modifier
91 ******************************/
92 /** XXH_INLINE_ALL (and XXH_PRIVATE_API)
93 * This build macro includes xxhash functions in `static` mode
94 * in order to inline them, and remove their symbol from the public list.
95 * Inlining offers great performance improvement on small keys,
96 * and dramatic ones when length is expressed as a compile-time constant.
97 * See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html .
98 * Methodology :
99 * #define XXH_INLINE_ALL
100 * #include "xxhash.h"
101 * `xxhash.c` is automatically included.
102 * It's not useful to compile and link it as a separate object.
103 */
104 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
105 # ifndef XXH_STATIC_LINKING_ONLY
106 # define XXH_STATIC_LINKING_ONLY
107 # endif
108 # if defined(__GNUC__)
109 # define XXH_PUBLIC_API static __inline __attribute__((unused))
110 # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
111 # define XXH_PUBLIC_API static inline
112 # elif defined(_MSC_VER)
113 # define XXH_PUBLIC_API static __inline
114 # else
115 /* this version may generate warnings for unused static functions */
116 # define XXH_PUBLIC_API static
117 # endif
118 #else
119 # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
120 # ifdef XXH_EXPORT
121 # define XXH_PUBLIC_API __declspec(dllexport)
122 # elif XXH_IMPORT
123 # define XXH_PUBLIC_API __declspec(dllimport)
124 # endif
125 # else
126 # define XXH_PUBLIC_API /* do nothing */
127 # endif
128 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
129
130 /*! XXH_NAMESPACE, aka Namespace Emulation :
131 *
132 * If you want to include _and expose_ xxHash functions from within your own library,
133 * but also want to avoid symbol collisions with other libraries which may also include xxHash,
134 *
135 * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
136 * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
137 *
138 * Note that no change is required within the calling program as long as it includes `xxhash.h` :
139 * regular symbol name will be automatically translated by this header.
140 */
141 #ifdef XXH_NAMESPACE
142 # define XXH_CAT(A,B) A##B
143 # define XXH_NAME2(A,B) XXH_CAT(A,B)
144 # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
145 # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
146 # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
147 # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
148 # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
149 # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
150 # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
151 # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
152 # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
153 # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
154 # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
155 # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
156 # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
157 # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
158 # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
159 # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
160 # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
161 # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
162 # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
163 #endif
164
165
166 /* *************************************
167 * Version
168 ***************************************/
169 #define XXH_VERSION_MAJOR 0
170 #define XXH_VERSION_MINOR 7
171 #define XXH_VERSION_RELEASE 2
172 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
173 XXH_PUBLIC_API unsigned XXH_versionNumber (void);
174
175
176 /* ****************************
177 * Definitions
178 ******************************/
179 #include <stddef.h> /* size_t */
180 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
181
182
183 /*-**********************************************************************
184 * 32-bit hash
185 ************************************************************************/
186 #if !defined (__VMS) \
187 && (defined (__cplusplus) \
188 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
189 # include <stdint.h>
190 typedef uint32_t XXH32_hash_t;
191 #else
192 # include <limits.h>
193 # if UINT_MAX == 0xFFFFFFFFUL
194 typedef unsigned int XXH32_hash_t;
195 # else
196 # if ULONG_MAX == 0xFFFFFFFFUL
197 typedef unsigned long XXH32_hash_t;
198 # else
199 # error "unsupported platform : need a 32-bit type"
200 # endif
201 # endif
202 #endif
203
204 /*! XXH32() :
205 Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
206 The memory between input & input+length must be valid (allocated and read-accessible).
207 "seed" can be used to alter the result predictably.
208 Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
209 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
210
211 /******* Streaming *******/
212
213 /*
214 * Streaming functions generate the xxHash value from an incrememtal input.
215 * This method is slower than single-call functions, due to state management.
216 * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
217 *
218 * XXH state must first be allocated, using XXH*_createState() .
219 *
220 * Start a new hash by initializing state with a seed, using XXH*_reset().
221 *
222 * Then, feed the hash state by calling XXH*_update() as many times as necessary.
223 * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
224 *
225 * Finally, a hash value can be produced anytime, by using XXH*_digest().
226 * This function returns the nn-bits hash as an int or long long.
227 *
228 * It's still possible to continue inserting input into the hash state after a digest,
229 * and generate some new hash values later on, by invoking again XXH*_digest().
230 *
231 * When done, release the state, using XXH*_freeState().
232 */
233
234 typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
235 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
236 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
237 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
238
239 XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed);
240 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
241 XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
242
243 /******* Canonical representation *******/
244
245 /* Default return values from XXH functions are basic unsigned 32 and 64 bits.
246 * This the simplest and fastest format for further post-processing.
247 * However, this leaves open the question of what is the order of bytes,
248 * since little and big endian conventions will write the same number differently.
249 *
250 * The canonical representation settles this issue,
251 * by mandating big-endian convention,
252 * aka, the same convention as human-readable numbers (large digits first).
253 * When writing hash values to storage, sending them over a network, or printing them,
254 * it's highly recommended to use the canonical representation,
255 * to ensure portability across a wider range of systems, present and future.
256 *
257 * The following functions allow transformation of hash values into and from canonical format.
258 */
259
260 typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
261 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
262 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
263
264
265 #ifndef XXH_NO_LONG_LONG
266 /*-**********************************************************************
267 * 64-bit hash
268 ************************************************************************/
269 #if !defined (__VMS) \
270 && (defined (__cplusplus) \
271 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
272 # include <stdint.h>
273 typedef uint64_t XXH64_hash_t;
274 #else
275 /* the following type must have a width of 64-bit */
276 typedef unsigned long long XXH64_hash_t;
277 #endif
278
279 /*! XXH64() :
280 * Returns the 64-bit hash of sequence of length @length stored at memory address @input.
281 * @seed can be used to alter the result predictably.
282 * This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
283 */
284 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed);
285
286 /******* Streaming *******/
287 typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
288 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
289 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
290 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
291
292 XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed);
293 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
294 XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
295
296 /******* Canonical representation *******/
297 typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
298 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
299 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
300
301
302 #endif /* XXH_NO_LONG_LONG */
303
304 #endif /* XXHASH_H_5627135585666179 */
305
306
307
308 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
309 #define XXHASH_H_STATIC_13879238742
310 /* ************************************************************************************************
311 This section contains declarations which are not guaranteed to remain stable.
312 They may change in future versions, becoming incompatible with a different version of the library.
313 These declarations should only be used with static linking.
314 Never use them in association with dynamic linking !
315 *************************************************************************************************** */
316
317 /* These definitions are only present to allow
318 * static allocation of XXH state, on stack or in a struct for example.
319 * Never **ever** use members directly. */
320
321 struct XXH32_state_s {
322 XXH32_hash_t total_len_32;
323 XXH32_hash_t large_len;
324 XXH32_hash_t v1;
325 XXH32_hash_t v2;
326 XXH32_hash_t v3;
327 XXH32_hash_t v4;
328 XXH32_hash_t mem32[4];
329 XXH32_hash_t memsize;
330 XXH32_hash_t reserved; /* never read nor write, might be removed in a future version */
331 }; /* typedef'd to XXH32_state_t */
332
333
334 #ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */
335
336 struct XXH64_state_s {
337 XXH64_hash_t total_len;
338 XXH64_hash_t v1;
339 XXH64_hash_t v2;
340 XXH64_hash_t v3;
341 XXH64_hash_t v4;
342 XXH64_hash_t mem64[4];
343 XXH32_hash_t memsize;
344 XXH32_hash_t reserved32; /* required for padding anyway */
345 XXH64_hash_t reserved64; /* never read nor write, might be removed in a future version */
346 }; /* typedef'd to XXH64_state_t */
347
348 #endif /* XXH_NO_LONG_LONG */
349
350 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
351 # define XXH_IMPLEMENTATION
352 #endif
353
354 #endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
355
356
357
358 /*-**********************************************************************
359 * xxHash implementation
360 * Functions implementation used to be hosted within xxhash.c .
361 * However, code inlining requires to place implementation in the header file.
362 * As a consequence, xxhash.c used to be included within xxhash.h .
363 * But some build systems don't like *.c inclusions.
364 * So the implementation is now directly integrated within xxhash.h .
365 * Another small advantage is that xxhash.c is no longer required in /includes .
366 ************************************************************************/
367
368 #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
369 || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
370 # define XXH_IMPLEM_13a8737387
371
372 /* *************************************
373 * Tuning parameters
374 ***************************************/
375 /*!XXH_FORCE_MEMORY_ACCESS :
376 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
377 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
378 * The below switch allow to select different access method for improved performance.
379 * Method 0 (default) : use `memcpy()`. Safe and portable.
380 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
381 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
382 * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
383 * It can generate buggy code on targets which do not support unaligned memory accesses.
384 * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
385 * See http://stackoverflow.com/a/32095106/646947 for details.
386 * Prefer these methods in priority order (0 > 1 > 2)
387 */
388 #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
389 # if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6)
390 # define XXH_FORCE_MEMORY_ACCESS 2
391 # elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
392 (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)))
393 # define XXH_FORCE_MEMORY_ACCESS 1
394 # endif
395 #endif
396
397 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
398 * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
399 * When this macro is enabled, xxHash actively checks input for null pointer.
400 * It it is, result for null input pointers is the same as a null-length input.
401 */
402 #ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */
403 # define XXH_ACCEPT_NULL_INPUT_POINTER 0
404 #endif
405
406 /*!XXH_FORCE_ALIGN_CHECK :
407 * This is a minor performance trick, only useful with lots of very small keys.
408 * It means : check for aligned/unaligned input.
409 * The check costs one initial branch per hash;
410 * set it to 0 when the input is guaranteed to be aligned,
411 * or when alignment doesn't matter for performance.
412 */
413 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
414 # if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
415 # define XXH_FORCE_ALIGN_CHECK 0
416 # else
417 # define XXH_FORCE_ALIGN_CHECK 1
418 # endif
419 #endif
420
421 /*!XXH_REROLL:
422 * Whether to reroll XXH32_finalize, and XXH64_finalize,
423 * instead of using an unrolled jump table/if statement loop.
424 *
425 * This is automatically defined on -Os/-Oz on GCC and Clang. */
426 #ifndef XXH_REROLL
427 # if defined(__OPTIMIZE_SIZE__)
428 # define XXH_REROLL 1
429 # else
430 # define XXH_REROLL 0
431 # endif
432 #endif
433
434
435 /* *************************************
436 * Includes & Memory related functions
437 ***************************************/
438 /*! Modify the local functions below should you wish to use some other memory routines
439 * for malloc(), free() */
440 #include <stdlib.h>
441 static void* XXH_malloc(size_t s) { return malloc(s); }
442 static void XXH_free (void* p) { free(p); }
443 /*! and for memcpy() */
444 #include <string.h>
445 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
446
447 #include <limits.h> /* ULLONG_MAX */
448
449
450 /* *************************************
451 * Compiler Specific Options
452 ***************************************/
453 #ifdef _MSC_VER /* Visual Studio */
454 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
455 # define XXH_FORCE_INLINE static __forceinline
456 # define XXH_NO_INLINE static __declspec(noinline)
457 #else
458 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
459 # ifdef __GNUC__
460 # define XXH_FORCE_INLINE static inline __attribute__((always_inline))
461 # define XXH_NO_INLINE static __attribute__((noinline))
462 # else
463 # define XXH_FORCE_INLINE static inline
464 # define XXH_NO_INLINE static
465 # endif
466 # else
467 # define XXH_FORCE_INLINE static
468 # define XXH_NO_INLINE static
469 # endif /* __STDC_VERSION__ */
470 #endif
471
472
473
474 /* *************************************
475 * Debug
476 ***************************************/
477 /* DEBUGLEVEL is expected to be defined externally,
478 * typically through compiler command line.
479 * Value must be a number. */
480 #ifndef DEBUGLEVEL
481 # define DEBUGLEVEL 0
482 #endif
483
484 #if (DEBUGLEVEL>=1)
485 # include <assert.h> /* note : can still be disabled with NDEBUG */
486 # define XXH_ASSERT(c) assert(c)
487 #else
488 # define XXH_ASSERT(c) ((void)0)
489 #endif
490
491 /* note : use after variable declarations */
492 #define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; }
493
494
495 /* *************************************
496 * Basic Types
497 ***************************************/
498 #if !defined (__VMS) \
499 && (defined (__cplusplus) \
500 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
501 # include <stdint.h>
502 typedef uint8_t xxh_u8;
503 #else
504 typedef unsigned char xxh_u8;
505 #endif
506 typedef XXH32_hash_t xxh_u32;
507
508
509 /* *** Memory access *** */
510
511 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
512
513 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
514 static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
515
516 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
517
518 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
519 /* currently only defined for gcc and icc */
520 typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
521 static xxh_u32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
522
523 #else
524
525 /* portable and safe solution. Generally efficient.
526 * see : http://stackoverflow.com/a/32095106/646947
527 */
528 static xxh_u32 XXH_read32(const void* memPtr)
529 {
530 xxh_u32 val;
531 memcpy(&val, memPtr, sizeof(val));
532 return val;
533 }
534
535 #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
536
537
538 /* *** Endianess *** */
539 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
540
541 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
542 #ifndef XXH_CPU_LITTLE_ENDIAN
543 # if defined(_WIN32) /* Windows is always little endian */ \
544 || defined(__LITTLE_ENDIAN__) \
545 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
546 # define XXH_CPU_LITTLE_ENDIAN 1
547 # elif defined(__BIG_ENDIAN__) \
548 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
549 # define XXH_CPU_LITTLE_ENDIAN 0
550 # else
551 static int XXH_isLittleEndian(void)
552 {
553 const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; /* don't use static : performance detrimental */
554 return one.c[0];
555 }
556 # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
557 # endif
558 #endif
559
560
561
562
563 /* ****************************************
564 * Compiler-specific Functions and Macros
565 ******************************************/
566 #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
567
568 #ifndef __has_builtin
569 # define __has_builtin(x) 0
570 #endif
571
572 #if !defined(NO_CLANG_BUILTIN) && __has_builtin(__builtin_rotateleft32) && __has_builtin(__builtin_rotateleft64)
573 # define XXH_rotl32 __builtin_rotateleft32
574 # define XXH_rotl64 __builtin_rotateleft64
575 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
576 #elif defined(_MSC_VER)
577 # define XXH_rotl32(x,r) _rotl(x,r)
578 # define XXH_rotl64(x,r) _rotl64(x,r)
579 #else
580 # define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
581 # define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
582 #endif
583
584 #if defined(_MSC_VER) /* Visual Studio */
585 # define XXH_swap32 _byteswap_ulong
586 #elif XXH_GCC_VERSION >= 403
587 # define XXH_swap32 __builtin_bswap32
588 #else
589 static xxh_u32 XXH_swap32 (xxh_u32 x)
590 {
591 return ((x << 24) & 0xff000000 ) |
592 ((x << 8) & 0x00ff0000 ) |
593 ((x >> 8) & 0x0000ff00 ) |
594 ((x >> 24) & 0x000000ff );
595 }
596 #endif
597
598
599 /* ***************************
600 * Memory reads
601 *****************************/
602 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
603
604 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
605 {
606 return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
607 }
608
609 static xxh_u32 XXH_readBE32(const void* ptr)
610 {
611 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
612 }
613
614 XXH_FORCE_INLINE xxh_u32
615 XXH_readLE32_align(const void* ptr, XXH_alignment align)
616 {
617 if (align==XXH_unaligned) {
618 return XXH_readLE32(ptr);
619 } else {
620 return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
621 }
622 }
623
624
625 /* *************************************
626 * Misc
627 ***************************************/
628 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
629
630
631 /* *******************************************************************
632 * 32-bit hash functions
633 *********************************************************************/
634 static const xxh_u32 PRIME32_1 = 0x9E3779B1U; /* 0b10011110001101110111100110110001 */
635 static const xxh_u32 PRIME32_2 = 0x85EBCA77U; /* 0b10000101111010111100101001110111 */
636 static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */
637 static const xxh_u32 PRIME32_4 = 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */
638 static const xxh_u32 PRIME32_5 = 0x165667B1U; /* 0b00010110010101100110011110110001 */
639
640 static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
641 {
642 acc += input * PRIME32_2;
643 acc = XXH_rotl32(acc, 13);
644 acc *= PRIME32_1;
645 #if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE)
646 /* UGLY HACK:
647 * This inline assembly hack forces acc into a normal register. This is the
648 * only thing that prevents GCC and Clang from autovectorizing the XXH32 loop
649 * (pragmas and attributes don't work for some resason) without globally
650 * disabling SSE4.1.
651 *
652 * The reason we want to avoid vectorization is because despite working on
653 * 4 integers at a time, there are multiple factors slowing XXH32 down on
654 * SSE4:
655 * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on newer chips!)
656 * making it slightly slower to multiply four integers at once compared to four
657 * integers independently. Even when pmulld was fastest, Sandy/Ivy Bridge, it is
658 * still not worth it to go into SSE just to multiply unless doing a long operation.
659 *
660 * - Four instructions are required to rotate,
661 * movqda tmp, v // not required with VEX encoding
662 * pslld tmp, 13 // tmp <<= 13
663 * psrld v, 19 // x >>= 19
664 * por v, tmp // x |= tmp
665 * compared to one for scalar:
666 * roll v, 13 // reliably fast across the board
667 * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason
668 *
669 * - Instruction level parallelism is actually more beneficial here because the
670 * SIMD actually serializes this operation: While v1 is rotating, v2 can load data,
671 * while v3 can multiply. SSE forces them to operate together.
672 *
673 * How this hack works:
674 * __asm__("" // Declare an assembly block but don't declare any instructions
675 * : // However, as an Input/Output Operand,
676 * "+r" // constrain a read/write operand (+) as a general purpose register (r).
677 * (acc) // and set acc as the operand
678 * );
679 *
680 * Because of the 'r', the compiler has promised that seed will be in a
681 * general purpose register and the '+' says that it will be 'read/write',
682 * so it has to assume it has changed. It is like volatile without all the
683 * loads and stores.
684 *
685 * Since the argument has to be in a normal register (not an SSE register),
686 * each time XXH32_round is called, it is impossible to vectorize. */
687 __asm__("" : "+r" (acc));
688 #endif
689 return acc;
690 }
691
692 /* mix all bits */
693 static xxh_u32 XXH32_avalanche(xxh_u32 h32)
694 {
695 h32 ^= h32 >> 15;
696 h32 *= PRIME32_2;
697 h32 ^= h32 >> 13;
698 h32 *= PRIME32_3;
699 h32 ^= h32 >> 16;
700 return(h32);
701 }
702
703 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
704
705 static xxh_u32
706 XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
707 {
708 #define PROCESS1 \
709 h32 += (*ptr++) * PRIME32_5; \
710 h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
711
712 #define PROCESS4 \
713 h32 += XXH_get32bits(ptr) * PRIME32_3; \
714 ptr+=4; \
715 h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
716
717 /* Compact rerolled version */
718 if (XXH_REROLL) {
719 len &= 15;
720 while (len >= 4) {
721 PROCESS4;
722 len -= 4;
723 }
724 while (len > 0) {
725 PROCESS1;
726 --len;
727 }
728 return XXH32_avalanche(h32);
729 } else {
730 switch(len&15) /* or switch(bEnd - p) */ {
731 case 12: PROCESS4;
732 /* fallthrough */
733 case 8: PROCESS4;
734 /* fallthrough */
735 case 4: PROCESS4;
736 return XXH32_avalanche(h32);
737
738 case 13: PROCESS4;
739 /* fallthrough */
740 case 9: PROCESS4;
741 /* fallthrough */
742 case 5: PROCESS4;
743 PROCESS1;
744 return XXH32_avalanche(h32);
745
746 case 14: PROCESS4;
747 /* fallthrough */
748 case 10: PROCESS4;
749 /* fallthrough */
750 case 6: PROCESS4;
751 PROCESS1;
752 PROCESS1;
753 return XXH32_avalanche(h32);
754
755 case 15: PROCESS4;
756 /* fallthrough */
757 case 11: PROCESS4;
758 /* fallthrough */
759 case 7: PROCESS4;
760 /* fallthrough */
761 case 3: PROCESS1;
762 /* fallthrough */
763 case 2: PROCESS1;
764 /* fallthrough */
765 case 1: PROCESS1;
766 /* fallthrough */
767 case 0: return XXH32_avalanche(h32);
768 }
769 XXH_ASSERT(0);
770 return h32; /* reaching this point is deemed impossible */
771 }
772 }
773
774 XXH_FORCE_INLINE xxh_u32
775 XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
776 {
777 const xxh_u8* bEnd = input + len;
778 xxh_u32 h32;
779
780 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
781 if (input==NULL) {
782 len=0;
783 bEnd=input=(const xxh_u8*)(size_t)16;
784 }
785 #endif
786
787 if (len>=16) {
788 const xxh_u8* const limit = bEnd - 15;
789 xxh_u32 v1 = seed + PRIME32_1 + PRIME32_2;
790 xxh_u32 v2 = seed + PRIME32_2;
791 xxh_u32 v3 = seed + 0;
792 xxh_u32 v4 = seed - PRIME32_1;
793
794 do {
795 v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
796 v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
797 v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
798 v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
799 } while (input < limit);
800
801 h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
802 + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
803 } else {
804 h32 = seed + PRIME32_5;
805 }
806
807 h32 += (xxh_u32)len;
808
809 return XXH32_finalize(h32, input, len&15, align);
810 }
811
812
813 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
814 {
815 #if 0
816 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
817 XXH32_state_t state;
818 XXH32_reset(&state, seed);
819 XXH32_update(&state, (const xxh_u8*)input, len);
820 return XXH32_digest(&state);
821
822 #else
823
824 if (XXH_FORCE_ALIGN_CHECK) {
825 if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
826 return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
827 } }
828
829 return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
830 #endif
831 }
832
833
834
835 /******* Hash streaming *******/
836
837 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
838 {
839 return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
840 }
841 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
842 {
843 XXH_free(statePtr);
844 return XXH_OK;
845 }
846
847 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
848 {
849 memcpy(dstState, srcState, sizeof(*dstState));
850 }
851
852 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
853 {
854 XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
855 memset(&state, 0, sizeof(state));
856 state.v1 = seed + PRIME32_1 + PRIME32_2;
857 state.v2 = seed + PRIME32_2;
858 state.v3 = seed + 0;
859 state.v4 = seed - PRIME32_1;
860 /* do not write into reserved, planned to be removed in a future version */
861 memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
862 return XXH_OK;
863 }
864
865
866 XXH_PUBLIC_API XXH_errorcode
867 XXH32_update(XXH32_state_t* state, const void* input, size_t len)
868 {
869 if (input==NULL)
870 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
871 return XXH_OK;
872 #else
873 return XXH_ERROR;
874 #endif
875
876 { const xxh_u8* p = (const xxh_u8*)input;
877 const xxh_u8* const bEnd = p + len;
878
879 state->total_len_32 += (XXH32_hash_t)len;
880 state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
881
882 if (state->memsize + len < 16) { /* fill in tmp buffer */
883 XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
884 state->memsize += (XXH32_hash_t)len;
885 return XXH_OK;
886 }
887
888 if (state->memsize) { /* some data left from previous update */
889 XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
890 { const xxh_u32* p32 = state->mem32;
891 state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
892 state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
893 state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
894 state->v4 = XXH32_round(state->v4, XXH_readLE32(p32));
895 }
896 p += 16-state->memsize;
897 state->memsize = 0;
898 }
899
900 if (p <= bEnd-16) {
901 const xxh_u8* const limit = bEnd - 16;
902 xxh_u32 v1 = state->v1;
903 xxh_u32 v2 = state->v2;
904 xxh_u32 v3 = state->v3;
905 xxh_u32 v4 = state->v4;
906
907 do {
908 v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
909 v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
910 v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
911 v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
912 } while (p<=limit);
913
914 state->v1 = v1;
915 state->v2 = v2;
916 state->v3 = v3;
917 state->v4 = v4;
918 }
919
920 if (p < bEnd) {
921 XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
922 state->memsize = (unsigned)(bEnd-p);
923 }
924 }
925
926 return XXH_OK;
927 }
928
929
930 XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state)
931 {
932 xxh_u32 h32;
933
934 if (state->large_len) {
935 h32 = XXH_rotl32(state->v1, 1)
936 + XXH_rotl32(state->v2, 7)
937 + XXH_rotl32(state->v3, 12)
938 + XXH_rotl32(state->v4, 18);
939 } else {
940 h32 = state->v3 /* == seed */ + PRIME32_5;
941 }
942
943 h32 += state->total_len_32;
944
945 return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
946 }
947
948
949 /******* Canonical representation *******/
950
951 /*! Default XXH result types are basic unsigned 32 and 64 bits.
952 * The canonical representation follows human-readable write convention, aka big-endian (large digits first).
953 * These functions allow transformation of hash result into and from its canonical format.
954 * This way, hash values can be written into a file or buffer, remaining comparable across different systems.
955 */
956
957 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
958 {
959 XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
960 if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
961 memcpy(dst, &hash, sizeof(*dst));
962 }
963
964 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
965 {
966 return XXH_readBE32(src);
967 }
968
969
970 #ifndef XXH_NO_LONG_LONG
971
972 /* *******************************************************************
973 * 64-bit hash functions
974 *********************************************************************/
975
976 /******* Memory access *******/
977
978 typedef XXH64_hash_t xxh_u64;
979
980
981 /*! XXH_REROLL_XXH64:
982 * Whether to reroll the XXH64_finalize() loop.
983 *
984 * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a performance gain
985 * on 64-bit hosts, as only one jump is required.
986 *
987 * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit registers,
988 * and 64-bit arithmetic needs to be simulated, it isn't beneficial to unroll. The code becomes
989 * ridiculously large (the largest function in the binary on i386!), and rerolling it saves
990 * anywhere from 3kB to 20kB. It is also slightly faster because it fits into cache better
991 * and is more likely to be inlined by the compiler.
992 *
993 * If XXH_REROLL is defined, this is ignored and the loop is always rerolled. */
994 #ifndef XXH_REROLL_XXH64
995 # if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \
996 || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \
997 || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \
998 || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \
999 || defined(__mips64__) || defined(__mips64)) /* mips64 */ \
1000 || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */
1001 # define XXH_REROLL_XXH64 1
1002 # else
1003 # define XXH_REROLL_XXH64 0
1004 # endif
1005 #endif /* !defined(XXH_REROLL_XXH64) */
1006
1007 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
1008
1009 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
1010 static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; }
1011
1012 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1013
1014 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
1015 /* currently only defined for gcc and icc */
1016 typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
1017 static xxh_u64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
1018
1019 #else
1020
1021 /* portable and safe solution. Generally efficient.
1022 * see : http://stackoverflow.com/a/32095106/646947
1023 */
1024
1025 static xxh_u64 XXH_read64(const void* memPtr)
1026 {
1027 xxh_u64 val;
1028 memcpy(&val, memPtr, sizeof(val));
1029 return val;
1030 }
1031
1032 #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
1033
1034 #if defined(_MSC_VER) /* Visual Studio */
1035 # define XXH_swap64 _byteswap_uint64
1036 #elif XXH_GCC_VERSION >= 403
1037 # define XXH_swap64 __builtin_bswap64
1038 #else
1039 static xxh_u64 XXH_swap64 (xxh_u64 x)
1040 {
1041 return ((x << 56) & 0xff00000000000000ULL) |
1042 ((x << 40) & 0x00ff000000000000ULL) |
1043 ((x << 24) & 0x0000ff0000000000ULL) |
1044 ((x << 8) & 0x000000ff00000000ULL) |
1045 ((x >> 8) & 0x00000000ff000000ULL) |
1046 ((x >> 24) & 0x0000000000ff0000ULL) |
1047 ((x >> 40) & 0x000000000000ff00ULL) |
1048 ((x >> 56) & 0x00000000000000ffULL);
1049 }
1050 #endif
1051
1052 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
1053 {
1054 return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
1055 }
1056
1057 static xxh_u64 XXH_readBE64(const void* ptr)
1058 {
1059 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
1060 }
1061
1062 XXH_FORCE_INLINE xxh_u64
1063 XXH_readLE64_align(const void* ptr, XXH_alignment align)
1064 {
1065 if (align==XXH_unaligned)
1066 return XXH_readLE64(ptr);
1067 else
1068 return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
1069 }
1070
1071
1072 /******* xxh64 *******/
1073
1074 static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
1075 static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
1076 static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
1077 static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
1078 static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
1079
1080 static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
1081 {
1082 acc += input * PRIME64_2;
1083 acc = XXH_rotl64(acc, 31);
1084 acc *= PRIME64_1;
1085 return acc;
1086 }
1087
1088 static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
1089 {
1090 val = XXH64_round(0, val);
1091 acc ^= val;
1092 acc = acc * PRIME64_1 + PRIME64_4;
1093 return acc;
1094 }
1095
1096 static xxh_u64 XXH64_avalanche(xxh_u64 h64)
1097 {
1098 h64 ^= h64 >> 33;
1099 h64 *= PRIME64_2;
1100 h64 ^= h64 >> 29;
1101 h64 *= PRIME64_3;
1102 h64 ^= h64 >> 32;
1103 return h64;
1104 }
1105
1106
1107 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
1108
1109 static xxh_u64
1110 XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
1111 {
1112 #define PROCESS1_64 \
1113 h64 ^= (*ptr++) * PRIME64_5; \
1114 h64 = XXH_rotl64(h64, 11) * PRIME64_1;
1115
1116 #define PROCESS4_64 \
1117 h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * PRIME64_1; \
1118 ptr+=4; \
1119 h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
1120
1121 #define PROCESS8_64 { \
1122 xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \
1123 ptr+=8; \
1124 h64 ^= k1; \
1125 h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
1126 }
1127
1128 /* Rerolled version for 32-bit targets is faster and much smaller. */
1129 if (XXH_REROLL || XXH_REROLL_XXH64) {
1130 len &= 31;
1131 while (len >= 8) {
1132 PROCESS8_64;
1133 len -= 8;
1134 }
1135 if (len >= 4) {
1136 PROCESS4_64;
1137 len -= 4;
1138 }
1139 while (len > 0) {
1140 PROCESS1_64;
1141 --len;
1142 }
1143 return XXH64_avalanche(h64);
1144 } else {
1145 switch(len & 31) {
1146 case 24: PROCESS8_64;
1147 /* fallthrough */
1148 case 16: PROCESS8_64;
1149 /* fallthrough */
1150 case 8: PROCESS8_64;
1151 return XXH64_avalanche(h64);
1152
1153 case 28: PROCESS8_64;
1154 /* fallthrough */
1155 case 20: PROCESS8_64;
1156 /* fallthrough */
1157 case 12: PROCESS8_64;
1158 /* fallthrough */
1159 case 4: PROCESS4_64;
1160 return XXH64_avalanche(h64);
1161
1162 case 25: PROCESS8_64;
1163 /* fallthrough */
1164 case 17: PROCESS8_64;
1165 /* fallthrough */
1166 case 9: PROCESS8_64;
1167 PROCESS1_64;
1168 return XXH64_avalanche(h64);
1169
1170 case 29: PROCESS8_64;
1171 /* fallthrough */
1172 case 21: PROCESS8_64;
1173 /* fallthrough */
1174 case 13: PROCESS8_64;
1175 /* fallthrough */
1176 case 5: PROCESS4_64;
1177 PROCESS1_64;
1178 return XXH64_avalanche(h64);
1179
1180 case 26: PROCESS8_64;
1181 /* fallthrough */
1182 case 18: PROCESS8_64;
1183 /* fallthrough */
1184 case 10: PROCESS8_64;
1185 PROCESS1_64;
1186 PROCESS1_64;
1187 return XXH64_avalanche(h64);
1188
1189 case 30: PROCESS8_64;
1190 /* fallthrough */
1191 case 22: PROCESS8_64;
1192 /* fallthrough */
1193 case 14: PROCESS8_64;
1194 /* fallthrough */
1195 case 6: PROCESS4_64;
1196 PROCESS1_64;
1197 PROCESS1_64;
1198 return XXH64_avalanche(h64);
1199
1200 case 27: PROCESS8_64;
1201 /* fallthrough */
1202 case 19: PROCESS8_64;
1203 /* fallthrough */
1204 case 11: PROCESS8_64;
1205 PROCESS1_64;
1206 PROCESS1_64;
1207 PROCESS1_64;
1208 return XXH64_avalanche(h64);
1209
1210 case 31: PROCESS8_64;
1211 /* fallthrough */
1212 case 23: PROCESS8_64;
1213 /* fallthrough */
1214 case 15: PROCESS8_64;
1215 /* fallthrough */
1216 case 7: PROCESS4_64;
1217 /* fallthrough */
1218 case 3: PROCESS1_64;
1219 /* fallthrough */
1220 case 2: PROCESS1_64;
1221 /* fallthrough */
1222 case 1: PROCESS1_64;
1223 /* fallthrough */
1224 case 0: return XXH64_avalanche(h64);
1225 }
1226 }
1227 /* impossible to reach */
1228 XXH_ASSERT(0);
1229 return 0; /* unreachable, but some compilers complain without it */
1230 }
1231
1232 XXH_FORCE_INLINE xxh_u64
1233 XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
1234 {
1235 const xxh_u8* bEnd = input + len;
1236 xxh_u64 h64;
1237
1238 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
1239 if (input==NULL) {
1240 len=0;
1241 bEnd=input=(const xxh_u8*)(size_t)32;
1242 }
1243 #endif
1244
1245 if (len>=32) {
1246 const xxh_u8* const limit = bEnd - 32;
1247 xxh_u64 v1 = seed + PRIME64_1 + PRIME64_2;
1248 xxh_u64 v2 = seed + PRIME64_2;
1249 xxh_u64 v3 = seed + 0;
1250 xxh_u64 v4 = seed - PRIME64_1;
1251
1252 do {
1253 v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
1254 v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
1255 v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
1256 v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
1257 } while (input<=limit);
1258
1259 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
1260 h64 = XXH64_mergeRound(h64, v1);
1261 h64 = XXH64_mergeRound(h64, v2);
1262 h64 = XXH64_mergeRound(h64, v3);
1263 h64 = XXH64_mergeRound(h64, v4);
1264
1265 } else {
1266 h64 = seed + PRIME64_5;
1267 }
1268
1269 h64 += (xxh_u64) len;
1270
1271 return XXH64_finalize(h64, input, len, align);
1272 }
1273
1274
1275 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
1276 {
1277 #if 0
1278 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
1279 XXH64_state_t state;
1280 XXH64_reset(&state, seed);
1281 XXH64_update(&state, (const xxh_u8*)input, len);
1282 return XXH64_digest(&state);
1283
1284 #else
1285
1286 if (XXH_FORCE_ALIGN_CHECK) {
1287 if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
1288 return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
1289 } }
1290
1291 return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
1292
1293 #endif
1294 }
1295
1296 /******* Hash Streaming *******/
1297
1298 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
1299 {
1300 return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
1301 }
1302 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
1303 {
1304 XXH_free(statePtr);
1305 return XXH_OK;
1306 }
1307
1308 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
1309 {
1310 memcpy(dstState, srcState, sizeof(*dstState));
1311 }
1312
1313 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
1314 {
1315 XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
1316 memset(&state, 0, sizeof(state));
1317 state.v1 = seed + PRIME64_1 + PRIME64_2;
1318 state.v2 = seed + PRIME64_2;
1319 state.v3 = seed + 0;
1320 state.v4 = seed - PRIME64_1;
1321 /* do not write into reserved64, might be removed in a future version */
1322 memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
1323 return XXH_OK;
1324 }
1325
1326 XXH_PUBLIC_API XXH_errorcode
1327 XXH64_update (XXH64_state_t* state, const void* input, size_t len)
1328 {
1329 if (input==NULL)
1330 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
1331 return XXH_OK;
1332 #else
1333 return XXH_ERROR;
1334 #endif
1335
1336 { const xxh_u8* p = (const xxh_u8*)input;
1337 const xxh_u8* const bEnd = p + len;
1338
1339 state->total_len += len;
1340
1341 if (state->memsize + len < 32) { /* fill in tmp buffer */
1342 XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
1343 state->memsize += (xxh_u32)len;
1344 return XXH_OK;
1345 }
1346
1347 if (state->memsize) { /* tmp buffer is full */
1348 XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
1349 state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
1350 state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
1351 state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
1352 state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
1353 p += 32-state->memsize;
1354 state->memsize = 0;
1355 }
1356
1357 if (p+32 <= bEnd) {
1358 const xxh_u8* const limit = bEnd - 32;
1359 xxh_u64 v1 = state->v1;
1360 xxh_u64 v2 = state->v2;
1361 xxh_u64 v3 = state->v3;
1362 xxh_u64 v4 = state->v4;
1363
1364 do {
1365 v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
1366 v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
1367 v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
1368 v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
1369 } while (p<=limit);
1370
1371 state->v1 = v1;
1372 state->v2 = v2;
1373 state->v3 = v3;
1374 state->v4 = v4;
1375 }
1376
1377 if (p < bEnd) {
1378 XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
1379 state->memsize = (unsigned)(bEnd-p);
1380 }
1381 }
1382
1383 return XXH_OK;
1384 }
1385
1386
1387 XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state)
1388 {
1389 xxh_u64 h64;
1390
1391 if (state->total_len >= 32) {
1392 xxh_u64 const v1 = state->v1;
1393 xxh_u64 const v2 = state->v2;
1394 xxh_u64 const v3 = state->v3;
1395 xxh_u64 const v4 = state->v4;
1396
1397 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
1398 h64 = XXH64_mergeRound(h64, v1);
1399 h64 = XXH64_mergeRound(h64, v2);
1400 h64 = XXH64_mergeRound(h64, v3);
1401 h64 = XXH64_mergeRound(h64, v4);
1402 } else {
1403 h64 = state->v3 /*seed*/ + PRIME64_5;
1404 }
1405
1406 h64 += (xxh_u64) state->total_len;
1407
1408 return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
1409 }
1410
1411
1412 /******* Canonical representation *******/
1413
1414 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
1415 {
1416 XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
1417 if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
1418 memcpy(dst, &hash, sizeof(*dst));
1419 }
1420
1421 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
1422 {
1423 return XXH_readBE64(src);
1424 }
1425
1426
1427
1428 /* *********************************************************************
1429 * XXH3
1430 * New generation hash designed for speed on small keys and vectorization
1431 ************************************************************************ */
1432
1433 /* #include "xxh3.h" */
1434
1435
1436 #endif /* XXH_NO_LONG_LONG */
1437
1438
1439 #endif /* XXH_IMPLEMENTATION */
1440
1441
1442 #if defined (__cplusplus)
1443 }
1444 #endif