Ninja
rapidhash.h
Go to the documentation of this file.
1 /*
2  * rapidhash - Very fast, high quality, platform-independent hashing algorithm.
3  * Copyright (C) 2024 Nicolas De Carli
4  *
5  * Based on 'wyhash', by Wang Yi <godspeed_china@yeah.net>
6  *
7  * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are
11  * met:
12  *
13  * * Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * * Redistributions in binary form must reproduce the above
16  * copyright notice, this list of conditions and the following disclaimer
17  * in the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  * You can contact the author at:
33  * - rapidhash source repository: https://github.com/Nicoshev/rapidhash
34  */
35 
36 /*
37  * Includes.
38  */
39 #include <stdint.h>
40 #include <string.h>
41 #if defined(_MSC_VER)
42  #include <intrin.h>
43  #if defined(_M_X64) && !defined(_M_ARM64EC)
44  #pragma intrinsic(_umul128)
45  #endif
46 #endif
47 
48 /*
49  * C++ macros.
50  *
51  * RAPIDHASH_INLINE can be overridden to be stronger than a hint, i.e. by adding __attribute__((always_inline)).
52  */
53 #ifdef __cplusplus
54  #define RAPIDHASH_NOEXCEPT noexcept
55  #define RAPIDHASH_CONSTEXPR constexpr
56  #ifndef RAPIDHASH_INLINE
57  #define RAPIDHASH_INLINE inline
58  #endif
59 #else
60  #define RAPIDHASH_NOEXCEPT
61  #define RAPIDHASH_CONSTEXPR static const
62  #ifndef RAPIDHASH_INLINE
63  #define RAPIDHASH_INLINE static inline
64  #endif
65 #endif
66 
67 /*
68  * Protection macro, alters behaviour of rapid_mum multiplication function.
69  *
70  * RAPIDHASH_FAST: Normal behavior, max speed.
71  * RAPIDHASH_PROTECTED: Extra protection against entropy loss.
72  */
73 #ifndef RAPIDHASH_PROTECTED
74  #define RAPIDHASH_FAST
75 #elif defined(RAPIDHASH_FAST)
76  #error "cannot define RAPIDHASH_PROTECTED and RAPIDHASH_FAST simultaneously."
77 #endif
78 
79 /*
80  * Unrolling macros, changes code definition for main hash function.
81  *
82  * RAPIDHASH_COMPACT: Legacy variant, each loop process 48 bytes.
83  * RAPIDHASH_UNROLLED: Unrolled variant, each loop process 96 bytes.
84  *
85  * Most modern CPUs should benefit from having RAPIDHASH_UNROLLED.
86  *
87  * These macros do not alter the output hash.
88  */
89 #ifndef RAPIDHASH_COMPACT
90  #define RAPIDHASH_UNROLLED
91 #elif defined(RAPIDHASH_UNROLLED)
92  #error "cannot define RAPIDHASH_COMPACT and RAPIDHASH_UNROLLED simultaneously."
93 #endif
94 
95 /*
96  * Likely and unlikely macros.
97  */
98 #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
99  #define _likely_(x) __builtin_expect(x,1)
100  #define _unlikely_(x) __builtin_expect(x,0)
101 #else
102  #define _likely_(x) (x)
103  #define _unlikely_(x) (x)
104 #endif
105 
106 /*
107  * Endianness macros.
108  */
109 #ifndef RAPIDHASH_LITTLE_ENDIAN
110  #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
111  #define RAPIDHASH_LITTLE_ENDIAN
112  #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
113  #define RAPIDHASH_BIG_ENDIAN
114  #else
115  #warning "could not determine endianness! Falling back to little endian."
116  #define RAPIDHASH_LITTLE_ENDIAN
117  #endif
118 #endif
119 
120 /*
121  * Default seed.
122  */
123 #define RAPID_SEED (0xbdd89aa982704029ull)
124 
125 /*
126  * Default secret parameters.
127  */
128 RAPIDHASH_CONSTEXPR uint64_t rapid_secret[3] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull};
129 
130 /*
131  * 64*64 -> 128bit multiply function.
132  *
133  * @param A Address of 64-bit number.
134  * @param B Address of 64-bit number.
135  *
136  * Calculates 128-bit C = *A * *B.
137  *
138  * When RAPIDHASH_FAST is defined:
139  * Overwrites A contents with C's low 64 bits.
140  * Overwrites B contents with C's high 64 bits.
141  *
142  * When RAPIDHASH_PROTECTED is defined:
143  * Xors and overwrites A contents with C's low 64 bits.
144  * Xors and overwrites B contents with C's high 64 bits.
145  */
147 #if defined(__SIZEOF_INT128__)
148  __uint128_t r=*A; r*=*B;
149  #ifdef RAPIDHASH_PROTECTED
150  *A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
151  #else
152  *A=(uint64_t)r; *B=(uint64_t)(r>>64);
153  #endif
154 #elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64))
155  #if defined(_M_X64)
156  #ifdef RAPIDHASH_PROTECTED
157  uint64_t a, b;
158  a=_umul128(*A,*B,&b);
159  *A^=a; *B^=b;
160  #else
161  *A=_umul128(*A,*B,B);
162  #endif
163  #else
164  #ifdef RAPIDHASH_PROTECTED
165  uint64_t a, b;
166  b = __umulh(*A, *B);
167  a = *A * *B;
168  *A^=a; *B^=b;
169  #else
170  uint64_t c = __umulh(*A, *B);
171  *A = *A * *B;
172  *B = c;
173  #endif
174  #endif
175 #else
176  uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
177  uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
178  lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
179  #ifdef RAPIDHASH_PROTECTED
180  *A^=lo; *B^=hi;
181  #else
182  *A=lo; *B=hi;
183  #endif
184 #endif
185 }
186 
187 /*
188  * Multiply and xor mix function.
189  *
190  * @param A 64-bit number.
191  * @param B 64-bit number.
192  *
193  * Calculates 128-bit C = A * B.
194  * Returns 64-bit xor between high and low 64 bits of C.
195  */
197 
198 /*
199  * Read functions.
200  */
201 #ifdef RAPIDHASH_LITTLE_ENDIAN
202 RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return v;}
203 RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return v;}
204 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
205 RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return __builtin_bswap64(v);}
206 RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return __builtin_bswap32(v);}
207 #elif defined(_MSC_VER)
208 RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return _byteswap_uint64(v);}
209 RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return _byteswap_ulong(v);}
210 #else
212  uint64_t v; memcpy(&v, p, 8);
213  return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000));
214 }
216  uint32_t v; memcpy(&v, p, 4);
217  return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000));
218 }
219 #endif
220 
221 /*
222  * Reads and combines 3 bytes of input.
223  *
224  * @param p Buffer to read from.
225  * @param k Length of @p, in bytes.
226  *
227  * Always reads and combines 3 bytes from memory.
228  * Guarantees to read each buffer position at least once.
229  *
230  * Returns a 64-bit value containing all three bytes read.
231  */
232 RAPIDHASH_INLINE uint64_t rapid_readSmall(const uint8_t *p, size_t k) RAPIDHASH_NOEXCEPT { return (((uint64_t)p[0])<<56)|(((uint64_t)p[k>>1])<<32)|p[k-1];}
233 
234 /*
235  * rapidhash main function.
236  *
237  * @param key Buffer to be hashed.
238  * @param len @key length, in bytes.
239  * @param seed 64-bit seed used to alter the hash result predictably.
240  * @param secret Triplet of 64-bit secrets used to alter hash result predictably.
241  *
242  * Returns a 64-bit hash.
243  */
244 RAPIDHASH_INLINE uint64_t rapidhash_internal(const void *key, size_t len, uint64_t seed, const uint64_t* secret) RAPIDHASH_NOEXCEPT {
245  const uint8_t *p=(const uint8_t *)key; seed^=rapid_mix(seed^secret[0],secret[1])^len; uint64_t a, b;
246  if(_likely_(len<=16)){
247  if(_likely_(len>=4)){
248  const uint8_t * plast = p + len - 4;
249  a = (rapid_read32(p) << 32) | rapid_read32(plast);
250  const uint64_t delta = ((len&24)>>(len>>3));
251  b = ((rapid_read32(p + delta) << 32) | rapid_read32(plast - delta)); }
252  else if(_likely_(len>0)){ a=rapid_readSmall(p,len); b=0;}
253  else a=b=0;
254  }
255  else{
256  size_t i=len;
257  if(_unlikely_(i>48)){
258  uint64_t see1=seed, see2=seed;
259 #ifdef RAPIDHASH_UNROLLED
260  while(_likely_(i>=96)){
261  seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
262  see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
263  see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
264  seed=rapid_mix(rapid_read64(p+48)^secret[0],rapid_read64(p+56)^seed);
265  see1=rapid_mix(rapid_read64(p+64)^secret[1],rapid_read64(p+72)^see1);
266  see2=rapid_mix(rapid_read64(p+80)^secret[2],rapid_read64(p+88)^see2);
267  p+=96; i-=96;
268  }
269  if(_unlikely_(i>=48)){
270  seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
271  see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
272  see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
273  p+=48; i-=48;
274  }
275 #else
276  do {
277  seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
278  see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
279  see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
280  p+=48; i-=48;
281  } while (_likely_(i>=48));
282 #endif
283  seed^=see1^see2;
284  }
285  if(i>16){
286  seed=rapid_mix(rapid_read64(p)^secret[2],rapid_read64(p+8)^seed^secret[1]);
287  if(i>32)
288  seed=rapid_mix(rapid_read64(p+16)^secret[2],rapid_read64(p+24)^seed);
289  }
290  a=rapid_read64(p+i-16); b=rapid_read64(p+i-8);
291  }
292  a^=secret[1]; b^=seed; rapid_mum(&a,&b);
293  return rapid_mix(a^secret[0]^len,b^secret[1]);
294 }
295 
296 /*
297  * rapidhash default seeded hash function.
298  *
299  * @param key Buffer to be hashed.
300  * @param len @key length, in bytes.
301  * @param seed 64-bit seed used to alter the hash result predictably.
302  *
303  * Calls rapidhash_internal using provided parameters and default secrets.
304  *
305  * Returns a 64-bit hash.
306  */
308  return rapidhash_internal(key, len, seed, rapid_secret);
309 }
310 
311 /*
312  * rapidhash default hash function.
313  *
314  * @param key Buffer to be hashed.
315  * @param len @key length, in bytes.
316  *
317  * Calls rapidhash_withSeed using provided parameters and the default seed.
318  *
319  * Returns a 64-bit hash.
320  */
322  return rapidhash_withSeed(key, len, RAPID_SEED);
323 }
#define _unlikely_(x)
Definition: rapidhash.h:103
#define RAPIDHASH_CONSTEXPR
Definition: rapidhash.h:61
RAPIDHASH_INLINE uint64_t rapid_readSmall(const uint8_t *p, size_t k) RAPIDHASH_NOEXCEPT
Definition: rapidhash.h:232
RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT
Definition: rapidhash.h:202
#define _likely_(x)
Definition: rapidhash.h:102
RAPIDHASH_INLINE uint64_t rapidhash_internal(const void *key, size_t len, uint64_t seed, const uint64_t *secret) RAPIDHASH_NOEXCEPT
Definition: rapidhash.h:244
RAPIDHASH_INLINE uint64_t rapid_mix(uint64_t A, uint64_t B) RAPIDHASH_NOEXCEPT
Definition: rapidhash.h:196
#define RAPIDHASH_NOEXCEPT
Definition: rapidhash.h:60
#define RAPIDHASH_INLINE
Definition: rapidhash.h:63
RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT
Definition: rapidhash.h:203
RAPIDHASH_INLINE void rapid_mum(uint64_t *A, uint64_t *B) RAPIDHASH_NOEXCEPT
Definition: rapidhash.h:146
RAPIDHASH_INLINE uint64_t rapidhash(const void *key, size_t len) RAPIDHASH_NOEXCEPT
Definition: rapidhash.h:321
#define RAPID_SEED
Definition: rapidhash.h:123
RAPIDHASH_CONSTEXPR uint64_t rapid_secret[3]
Definition: rapidhash.h:128
RAPIDHASH_INLINE uint64_t rapidhash_withSeed(const void *key, size_t len, uint64_t seed) RAPIDHASH_NOEXCEPT
Definition: rapidhash.h:307
unsigned long long uint64_t
Definition: win32port.h:29