doc
c_jhash.h
Go to the documentation of this file.
1/*
2 * c_jhash.c Jenkins Hash
3 *
4 * Copyright (c) 1997 Bob Jenkins <bob_jenkins@burtleburtle.net>
5 *
6 * lookup8.c, by Bob Jenkins, January 4 1997, Public Domain.
7 * hash(), hash2(), hash3, and _c_mix() are externally useful functions.
8 * Routines to test the hash are included if SELF_TEST is defined.
9 * You can use this free for any purpose. It has no warranty.
10 *
11 * See http://burtleburtle.net/bob/hash/evahash.html
12 */
13
14/**
15 * @file c_jhash.h
16 *
17 * @brief Interface of the cynapses jhash implementation
18 *
19 * @defgroup cynJHashInternals cynapses libc jhash function
20 * @ingroup cynLibraryAPI
21 *
22 * @{
23 */
24#ifndef _C_JHASH_H
25#define _C_JHASH_H
26
27#include <stdint.h>
28
29#define c_hashsize(n) ((uint8_t) 1 << (n))
30#define c_hashmask(n) (xhashsize(n) - 1)
31
32/**
33 * _c_mix -- Mix 3 32-bit values reversibly.
34 *
35 * For every delta with one or two bit set, and the deltas of all three
36 * high bits or all three low bits, whether the original value of a,b,c
37 * is almost all zero or is uniformly distributed,
38 * If _c_mix() is run forward or backward, at least 32 bits in a,b,c
39 * have at least 1/4 probability of changing.
40 * If _c_mix() is run forward, every bit of c will change between 1/3 and
41 * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
42 * _c_mix() was built out of 36 single-cycle latency instructions in a
43 * structure that could supported 2x parallelism, like so:
44 * a -= b;
45 * a -= c; x = (c>>13);
46 * b -= c; a ^= x;
47 * b -= a; x = (a<<8);
48 * c -= a; b ^= x;
49 * c -= b; x = (b>>13);
50 * ...
51 *
52 * Unfortunately, superscalar Pentiums and Sparcs can't take advantage
53 * of that parallelism. They've also turned some of those single-cycle
54 * latency instructions into multi-cycle latency instructions. Still,
55 * this is the fastest good hash I could find. There were about 2^^68
56 * to choose from. I only looked at a billion or so.
57 */
58#define _c_mix(a,b,c) \
59{ \
60 a -= b; a -= c; a ^= (c>>13); \
61 b -= c; b -= a; b ^= (a<<8); \
62 c -= a; c -= b; c ^= (b>>13); \
63 a -= b; a -= c; a ^= (c>>12); \
64 b -= c; b -= a; b ^= (a<<16); \
65 c -= a; c -= b; c ^= (b>>5); \
66 a -= b; a -= c; a ^= (c>>3); \
67 b -= c; b -= a; b ^= (a<<10); \
68 c -= a; c -= b; c ^= (b>>15); \
69}
70
71/**
72 * _c_mix64 -- Mix 3 64-bit values reversibly.
73 *
74 * _c_mix64() takes 48 machine instructions, but only 24 cycles on a superscalar
75 * machine (like Intel's new MMX architecture). It requires 4 64-bit
76 * registers for 4::2 parallelism.
77 * All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of
78 * (a,b,c), and all deltas of bottom bits were tested. All deltas were
79 * tested both on random keys and on keys that were nearly all zero.
80 * These deltas all cause every bit of c to change between 1/3 and 2/3
81 * of the time (well, only 113/400 to 287/400 of the time for some
82 * 2-bit delta). These deltas all cause at least 80 bits to change
83 * among (a,b,c) when the _c_mix is run either forward or backward (yes it
84 * is reversible).
85 * This implies that a hash using _c_mix64 has no funnels. There may be
86 * characteristics with 3-bit deltas or bigger, I didn't test for
87 * those.
88 */
89#define _c_mix64(a,b,c) \
90{ \
91 a -= b; a -= c; a ^= (c>>43); \
92 b -= c; b -= a; b ^= (a<<9); \
93 c -= a; c -= b; c ^= (b>>8); \
94 a -= b; a -= c; a ^= (c>>38); \
95 b -= c; b -= a; b ^= (a<<23); \
96 c -= a; c -= b; c ^= (b>>5); \
97 a -= b; a -= c; a ^= (c>>35); \
98 b -= c; b -= a; b ^= (a<<49); \
99 c -= a; c -= b; c ^= (b>>11); \
100 a -= b; a -= c; a ^= (c>>12); \
101 b -= c; b -= a; b ^= (a<<18); \
102 c -= a; c -= b; c ^= (b>>22); \
103}
104
105/**
106 * @brief hash a variable-length key into a 32-bit value
107 *
108 * The best hash table sizes are powers of 2. There is no need to do
109 * mod a prime (mod is sooo slow!). If you need less than 32 bits,
110 * use a bitmask. For example, if you need only 10 bits, do
111 * h = (h & hashmask(10));
112 * In which case, the hash table should have hashsize(10) elements.
113 *
114 * Use for hash table lookup, or anything where one collision in 2^32 is
115 * acceptable. Do NOT use for cryptographic purposes.
116 *
117 * @param k The key (the unaligned variable-length array of bytes).
118 *
119 * @param length The length of the key, counting by bytes.
120 *
121 * @param initval Initial value, can be any 4-byte value.
122 *
123 * @return Returns a 32-bit value. Every bit of the key affects every bit
124 * of the return value. Every 1-bit and 2-bit delta achieves
125 * avalanche. About 36+6len instructions.
126 */
127static inline uint32_t c_jhash(const uint8_t *k, uint32_t length, uint32_t initval) {
128 uint32_t a,b,c,len;
129
130 /* Set up the internal state */
131 len = length;
132 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
133 c = initval; /* the previous hash value */
134
135 while (len >= 12) {
136 a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24));
137 b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24));
138 c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24));
139 _c_mix(a,b,c);
140 k += 12; len -= 12;
141 }
142
143 /* handle the last 11 bytes */
144 c += length;
145 /* all the case statements fall through */
146 switch(len) {
147 case 11: c+=((uint32_t)k[10]<<24);
148 case 10: c+=((uint32_t)k[9]<<16);
149 case 9 : c+=((uint32_t)k[8]<<8);
150 /* the first byte of c is reserved for the length */
151 case 8 : b+=((uint32_t)k[7]<<24);
152 case 7 : b+=((uint32_t)k[6]<<16);
153 case 6 : b+=((uint32_t)k[5]<<8);
154 case 5 : b+=k[4];
155 case 4 : a+=((uint32_t)k[3]<<24);
156 case 3 : a+=((uint32_t)k[2]<<16);
157 case 2 : a+=((uint32_t)k[1]<<8);
158 case 1 : a+=k[0];
159 /* case 0: nothing left to add */
160 }
161 _c_mix(a,b,c);
162
163 return c;
164}
165
166/**
167 * @brief hash a variable-length key into a 64-bit value
168 *
169 * The best hash table sizes are powers of 2. There is no need to do
170 * mod a prime (mod is sooo slow!). If you need less than 64 bits,
171 * use a bitmask. For example, if you need only 10 bits, do
172 * h = (h & hashmask(10));
173 * In which case, the hash table should have hashsize(10) elements.
174 *
175 * Use for hash table lookup, or anything where one collision in 2^^64
176 * is acceptable. Do NOT use for cryptographic purposes.
177 *
178 * @param k The key (the unaligned variable-length array of bytes).
179 * @param length The length of the key, counting by bytes.
180 * @param intval Initial value, can be any 8-byte value.
181 *
182 * @return A 64-bit value. Every bit of the key affects every bit of
183 * the return value. No funnels. Every 1-bit and 2-bit delta
184 * achieves avalanche. About 41+5len instructions.
185 */
186static inline uint64_t c_jhash64(const uint8_t *k, uint64_t length, uint64_t intval) {
187 uint64_t a,b,c,len;
188
189 /* Set up the internal state */
190 len = length;
191 a = b = intval; /* the previous hash value */
192 c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */
193
194 /* handle most of the key */
195 while (len >= 24)
196 {
197 a += (k[0] +((uint64_t)k[ 1]<< 8)+((uint64_t)k[ 2]<<16)+((uint64_t)k[ 3]<<24)
198 +((uint64_t)k[4 ]<<32)+((uint64_t)k[ 5]<<40)+((uint64_t)k[ 6]<<48)+((uint64_t)k[ 7]<<56));
199 b += (k[8] +((uint64_t)k[ 9]<< 8)+((uint64_t)k[10]<<16)+((uint64_t)k[11]<<24)
200 +((uint64_t)k[12]<<32)+((uint64_t)k[13]<<40)+((uint64_t)k[14]<<48)+((uint64_t)k[15]<<56));
201 c += (k[16] +((uint64_t)k[17]<< 8)+((uint64_t)k[18]<<16)+((uint64_t)k[19]<<24)
202 +((uint64_t)k[20]<<32)+((uint64_t)k[21]<<40)+((uint64_t)k[22]<<48)+((uint64_t)k[23]<<56));
203 _c_mix64(a,b,c);
204 k += 24; len -= 24;
205 }
206
207 /* handle the last 23 bytes */
208 c += length;
209 switch(len) {
210 case 23: c+=((uint64_t)k[22]<<56);
211 case 22: c+=((uint64_t)k[21]<<48);
212 case 21: c+=((uint64_t)k[20]<<40);
213 case 20: c+=((uint64_t)k[19]<<32);
214 case 19: c+=((uint64_t)k[18]<<24);
215 case 18: c+=((uint64_t)k[17]<<16);
216 case 17: c+=((uint64_t)k[16]<<8);
217 /* the first byte of c is reserved for the length */
218 case 16: b+=((uint64_t)k[15]<<56);
219 case 15: b+=((uint64_t)k[14]<<48);
220 case 14: b+=((uint64_t)k[13]<<40);
221 case 13: b+=((uint64_t)k[12]<<32);
222 case 12: b+=((uint64_t)k[11]<<24);
223 case 11: b+=((uint64_t)k[10]<<16);
224 case 10: b+=((uint64_t)k[ 9]<<8);
225 case 9: b+=((uint64_t)k[ 8]);
226 case 8: a+=((uint64_t)k[ 7]<<56);
227 case 7: a+=((uint64_t)k[ 6]<<48);
228 case 6: a+=((uint64_t)k[ 5]<<40);
229 case 5: a+=((uint64_t)k[ 4]<<32);
230 case 4: a+=((uint64_t)k[ 3]<<24);
231 case 3: a+=((uint64_t)k[ 2]<<16);
232 case 2: a+=((uint64_t)k[ 1]<<8);
233 case 1: a+=((uint64_t)k[ 0]);
234 /* case 0: nothing left to add */
235 }
236 _c_mix64(a,b,c);
237
238 return c;
239}
240
241/**
242 * }@
243 */
244#endif /* _C_JHASH_H */
245
#define _c_mix(a, b, c)
_c_mix – Mix 3 32-bit values reversibly.
Definition: c_jhash.h:58
static uint64_t c_jhash64(const uint8_t *k, uint64_t length, uint64_t intval)
hash a variable-length key into a 64-bit value
Definition: c_jhash.h:186
#define _c_mix64(a, b, c)
_c_mix64 – Mix 3 64-bit values reversibly.
Definition: c_jhash.h:89
static uint32_t c_jhash(const uint8_t *k, uint32_t length, uint32_t initval)
hash a variable-length key into a 32-bit value
Definition: c_jhash.h:127