1 : /*-------------------------------------------------------------------------
2 : *
3 : * hashfunc.c
4 : * Support functions for hash access method.
5 : *
6 : * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.54 2007/11/15 21:14:32 momjian Exp $
12 : *
13 : * NOTES
14 : * These functions are stored in pg_amproc. For each operator class
15 : * defined for hash indexes, they compute the hash value of the argument.
16 : *
17 : * Additional hash functions appear in /utils/adt/ files for various
18 : * specialized datatypes.
19 : *
20 : * It is expected that every bit of a hash function's 32-bit result is
21 : * as random as every other; failure to ensure this is likely to lead
22 : * to poor performance of hash joins, for example. In most cases a hash
23 : * function should use hash_any() or its variant hash_uint32().
24 : *-------------------------------------------------------------------------
25 : */
26 :
27 : #include "postgres.h"
28 :
29 : #include "access/hash.h"
30 :
31 :
32 : /* Note: this is used for both "char" and boolean datatypes */
33 : Datum
34 : hashchar(PG_FUNCTION_ARGS)
35 5 : {
36 5 : return hash_uint32((int32) PG_GETARG_CHAR(0));
37 : }
38 :
39 : Datum
40 : hashint2(PG_FUNCTION_ARGS)
41 55835 : {
42 55835 : return hash_uint32((int32) PG_GETARG_INT16(0));
43 : }
44 :
45 : Datum
46 : hashint4(PG_FUNCTION_ARGS)
47 90762 : {
48 90762 : return hash_uint32(PG_GETARG_INT32(0));
49 : }
50 :
51 : Datum
52 : hashint8(PG_FUNCTION_ARGS)
53 49 : {
54 : /*
55 : * The idea here is to produce a hash value compatible with the values
56 : * produced by hashint4 and hashint2 for logically equal inputs; this is
57 : * necessary to support cross-type hash joins across these input types.
58 : * Since all three types are signed, we can xor the high half of the int8
59 : * value if the sign is positive, or the complement of the high half when
60 : * the sign is negative.
61 : */
62 : #ifndef INT64_IS_BUSTED
63 49 : int64 val = PG_GETARG_INT64(0);
64 49 : uint32 lohalf = (uint32) val;
65 49 : uint32 hihalf = (uint32) (val >> 32);
66 :
67 49 : lohalf ^= (val >= 0) ? hihalf : ~hihalf;
68 :
69 49 : return hash_uint32(lohalf);
70 : #else
71 : /* here if we can't count on "x >> 32" to work sanely */
72 : return hash_uint32((int32) PG_GETARG_INT64(0));
73 : #endif
74 : }
75 :
76 : Datum
77 : hashoid(PG_FUNCTION_ARGS)
78 631253 : {
79 631253 : return hash_uint32((uint32) PG_GETARG_OID(0));
80 : }
81 :
82 : Datum
83 : hashenum(PG_FUNCTION_ARGS)
84 7 : {
85 7 : return hash_uint32((uint32) PG_GETARG_OID(0));
86 : }
87 :
88 : Datum
89 : hashfloat4(PG_FUNCTION_ARGS)
90 0 : {
91 0 : float4 key = PG_GETARG_FLOAT4(0);
92 : float8 key8;
93 :
94 : /*
95 : * On IEEE-float machines, minus zero and zero have different bit patterns
96 : * but should compare as equal. We must ensure that they have the same
97 : * hash value, which is most reliably done this way:
98 : */
99 0 : if (key == (float4) 0)
100 0 : PG_RETURN_UINT32(0);
101 :
102 : /*
103 : * To support cross-type hashing of float8 and float4, we want to return
104 : * the same hash value hashfloat8 would produce for an equal float8 value.
105 : * So, widen the value to float8 and hash that. (We must do this rather
106 : * than have hashfloat8 try to narrow its value to float4; that could fail
107 : * on overflow.)
108 : */
109 0 : key8 = key;
110 :
111 0 : return hash_any((unsigned char *) &key8, sizeof(key8));
112 : }
113 :
114 : Datum
115 : hashfloat8(PG_FUNCTION_ARGS)
116 23578 : {
117 23578 : float8 key = PG_GETARG_FLOAT8(0);
118 :
119 : /*
120 : * On IEEE-float machines, minus zero and zero have different bit patterns
121 : * but should compare as equal. We must ensure that they have the same
122 : * hash value, which is most reliably done this way:
123 : */
124 23578 : if (key == (float8) 0)
125 4 : PG_RETURN_UINT32(0);
126 :
127 23574 : return hash_any((unsigned char *) &key, sizeof(key));
128 : }
129 :
130 : Datum
131 : hashoidvector(PG_FUNCTION_ARGS)
132 3123 : {
133 3123 : oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
134 :
135 3123 : return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
136 : }
137 :
138 : Datum
139 : hashint2vector(PG_FUNCTION_ARGS)
140 0 : {
141 0 : int2vector *key = (int2vector *) PG_GETARG_POINTER(0);
142 :
143 0 : return hash_any((unsigned char *) key->values, key->dim1 * sizeof(int2));
144 : }
145 :
146 : Datum
147 : hashname(PG_FUNCTION_ARGS)
148 110916 : {
149 110916 : char *key = NameStr(*PG_GETARG_NAME(0));
150 110916 : int keylen = strlen(key);
151 :
152 : Assert(keylen < NAMEDATALEN); /* else it's not truncated correctly */
153 :
154 110916 : return hash_any((unsigned char *) key, keylen);
155 : }
156 :
157 : Datum
158 : hashtext(PG_FUNCTION_ARGS)
159 24024 : {
160 24024 : text *key = PG_GETARG_TEXT_PP(0);
161 : Datum result;
162 :
163 : /*
164 : * Note: this is currently identical in behavior to hashvarlena, but keep
165 : * it as a separate function in case we someday want to do something
166 : * different in non-C locales. (See also hashbpchar, if so.)
167 : */
168 24024 : result = hash_any((unsigned char *) VARDATA_ANY(key),
169 : VARSIZE_ANY_EXHDR(key));
170 :
171 : /* Avoid leaking memory for toasted inputs */
172 24024 : PG_FREE_IF_COPY(key, 0);
173 :
174 24024 : return result;
175 : }
176 :
177 : /*
178 : * hashvarlena() can be used for any varlena datatype in which there are
179 : * no non-significant bits, ie, distinct bitpatterns never compare as equal.
180 : */
181 : Datum
182 : hashvarlena(PG_FUNCTION_ARGS)
183 0 : {
184 0 : struct varlena *key = PG_GETARG_VARLENA_PP(0);
185 : Datum result;
186 :
187 0 : result = hash_any((unsigned char *) VARDATA_ANY(key),
188 : VARSIZE_ANY_EXHDR(key));
189 :
190 : /* Avoid leaking memory for toasted inputs */
191 0 : PG_FREE_IF_COPY(key, 0);
192 :
193 0 : return result;
194 : }
195 :
196 : /*
197 : * This hash function was written by Bob Jenkins
198 : * (bob_jenkins@burtleburtle.net), and superficially adapted
199 : * for PostgreSQL by Neil Conway. For more information on this
200 : * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
201 : * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
202 : */
203 :
204 : /*----------
205 : * mix -- mix 3 32-bit values reversibly.
206 : * For every delta with one or two bits set, and the deltas of all three
207 : * high bits or all three low bits, whether the original value of a,b,c
208 : * is almost all zero or is uniformly distributed,
209 : * - If mix() is run forward or backward, at least 32 bits in a,b,c
210 : * have at least 1/4 probability of changing.
211 : * - If mix() is run forward, every bit of c will change between 1/3 and
212 : * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
213 : *----------
214 : */
215 : #define mix(a,b,c) \
216 : { \
217 : a -= b; a -= c; a ^= ((c)>>13); \
218 : b -= c; b -= a; b ^= ((a)<<8); \
219 : c -= a; c -= b; c ^= ((b)>>13); \
220 : a -= b; a -= c; a ^= ((c)>>12); \
221 : b -= c; b -= a; b ^= ((a)<<16); \
222 : c -= a; c -= b; c ^= ((b)>>5); \
223 : a -= b; a -= c; a ^= ((c)>>3); \
224 : b -= c; b -= a; b ^= ((a)<<10); \
225 : c -= a; c -= b; c ^= ((b)>>15); \
226 : }
227 :
228 : /*
229 : * hash_any() -- hash a variable-length key into a 32-bit value
230 : * k : the key (the unaligned variable-length array of bytes)
231 : * len : the length of the key, counting by bytes
232 : *
233 : * Returns a uint32 value. Every bit of the key affects every bit of
234 : * the return value. Every 1-bit and 2-bit delta achieves avalanche.
235 : * About 6*len+35 instructions. The best hash table sizes are powers
236 : * of 2. There is no need to do mod a prime (mod is sooo slow!).
237 : * If you need less than 32 bits, use a bitmask.
238 : */
239 : Datum
240 : hash_any(register const unsigned char *k, register int keylen)
241 2616447 : {
242 : register uint32 a,
243 : b,
244 : c,
245 : len;
246 :
247 : /* Set up the internal state */
248 2616447 : len = keylen;
249 2616447 : a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
250 2616447 : c = 3923095; /* initialize with an arbitrary value */
251 :
252 : /* handle most of the key */
253 7797726 : while (len >= 12)
254 : {
255 2564832 : a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
256 2564832 : b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
257 2564832 : c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
258 2564832 : mix(a, b, c);
259 2564832 : k += 12;
260 2564832 : len -= 12;
261 : }
262 :
263 : /* handle the last 11 bytes */
264 2616447 : c += keylen;
265 2616447 : switch (len) /* all the case statements fall through */
266 : {
267 : case 11:
268 3778 : c += ((uint32) k[10] << 24);
269 : case 10:
270 37302 : c += ((uint32) k[9] << 16);
271 : case 9:
272 62118 : c += ((uint32) k[8] << 8);
273 : /* the first byte of c is reserved for the length */
274 : case 8:
275 1031282 : b += ((uint32) k[7] << 24);
276 : case 7:
277 1042561 : b += ((uint32) k[6] << 16);
278 : case 6:
279 1050377 : b += ((uint32) k[5] << 8);
280 : case 5:
281 1054675 : b += k[4];
282 : case 4:
283 2508009 : a += ((uint32) k[3] << 24);
284 : case 3:
285 2512832 : a += ((uint32) k[2] << 16);
286 : case 2:
287 2521874 : a += ((uint32) k[1] << 8);
288 : case 1:
289 2530942 : a += k[0];
290 : /* case 0: nothing left to add */
291 : }
292 2616447 : mix(a, b, c);
293 :
294 : /* report the result */
295 2616447 : return UInt32GetDatum(c);
296 : }
297 :
298 : /*
299 : * hash_uint32() -- hash a 32-bit value
300 : *
301 : * This has the same result (at least on little-endian machines) as
302 : * hash_any(&k, sizeof(uint32))
303 : * but is faster and doesn't force the caller to store k into memory.
304 : */
305 : Datum
306 : hash_uint32(uint32 k)
307 1217048 : {
308 : register uint32 a,
309 : b,
310 : c;
311 :
312 1217048 : a = 0x9e3779b9 + k;
313 1217048 : b = 0x9e3779b9;
314 1217048 : c = 3923095 + (uint32) sizeof(uint32);
315 :
316 1217048 : mix(a, b, c);
317 :
318 : /* report the result */
319 1217048 : return UInt32GetDatum(c);
320 : }
|