LTP GCOV extension - code coverage report
Current view: directory - access/hash - hashfunc.c
Test: unnamed
Date: 2008-07-03 Instrumented lines: 79
Code covered: 82.3 % Executed lines: 65
Legend: not executed executed

       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * hashfunc.c
       4                 :  *        Support functions for hash access method.
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *        $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.54 2007/11/15 21:14:32 momjian Exp $
      12                 :  *
      13                 :  * NOTES
      14                 :  *        These functions are stored in pg_amproc.      For each operator class
      15                 :  *        defined for hash indexes, they compute the hash value of the argument.
      16                 :  *
      17                 :  *        Additional hash functions appear in /utils/adt/ files for various
      18                 :  *        specialized datatypes.
      19                 :  *
      20                 :  *        It is expected that every bit of a hash function's 32-bit result is
      21                 :  *        as random as every other; failure to ensure this is likely to lead
      22                 :  *        to poor performance of hash joins, for example.  In most cases a hash
      23                 :  *        function should use hash_any() or its variant hash_uint32().
      24                 :  *-------------------------------------------------------------------------
      25                 :  */
      26                 : 
      27                 : #include "postgres.h"
      28                 : 
      29                 : #include "access/hash.h"
      30                 : 
      31                 : 
      32                 : /* Note: this is used for both "char" and boolean datatypes */
      33                 : Datum
      34                 : hashchar(PG_FUNCTION_ARGS)
      35               5 : {
      36               5 :         return hash_uint32((int32) PG_GETARG_CHAR(0));
      37                 : }
      38                 : 
      39                 : Datum
      40                 : hashint2(PG_FUNCTION_ARGS)
      41           55835 : {
      42           55835 :         return hash_uint32((int32) PG_GETARG_INT16(0));
      43                 : }
      44                 : 
      45                 : Datum
      46                 : hashint4(PG_FUNCTION_ARGS)
      47           90762 : {
      48           90762 :         return hash_uint32(PG_GETARG_INT32(0));
      49                 : }
      50                 : 
      51                 : Datum
      52                 : hashint8(PG_FUNCTION_ARGS)
      53              49 : {
      54                 :         /*
      55                 :          * The idea here is to produce a hash value compatible with the values
      56                 :          * produced by hashint4 and hashint2 for logically equal inputs; this is
      57                 :          * necessary to support cross-type hash joins across these input types.
      58                 :          * Since all three types are signed, we can xor the high half of the int8
      59                 :          * value if the sign is positive, or the complement of the high half when
      60                 :          * the sign is negative.
      61                 :          */
      62                 : #ifndef INT64_IS_BUSTED
      63              49 :         int64           val = PG_GETARG_INT64(0);
      64              49 :         uint32          lohalf = (uint32) val;
      65              49 :         uint32          hihalf = (uint32) (val >> 32);
      66                 : 
      67              49 :         lohalf ^= (val >= 0) ? hihalf : ~hihalf;
      68                 : 
      69              49 :         return hash_uint32(lohalf);
      70                 : #else
      71                 :         /* here if we can't count on "x >> 32" to work sanely */
      72                 :         return hash_uint32((int32) PG_GETARG_INT64(0));
      73                 : #endif
      74                 : }
      75                 : 
      76                 : Datum
      77                 : hashoid(PG_FUNCTION_ARGS)
      78          631253 : {
      79          631253 :         return hash_uint32((uint32) PG_GETARG_OID(0));
      80                 : }
      81                 : 
      82                 : Datum
      83                 : hashenum(PG_FUNCTION_ARGS)
      84               7 : {
      85               7 :         return hash_uint32((uint32) PG_GETARG_OID(0));
      86                 : }
      87                 : 
      88                 : Datum
      89                 : hashfloat4(PG_FUNCTION_ARGS)
      90               0 : {
      91               0 :         float4          key = PG_GETARG_FLOAT4(0);
      92                 :         float8          key8;
      93                 : 
      94                 :         /*
      95                 :          * On IEEE-float machines, minus zero and zero have different bit patterns
      96                 :          * but should compare as equal.  We must ensure that they have the same
      97                 :          * hash value, which is most reliably done this way:
      98                 :          */
      99               0 :         if (key == (float4) 0)
     100               0 :                 PG_RETURN_UINT32(0);
     101                 : 
     102                 :         /*
     103                 :          * To support cross-type hashing of float8 and float4, we want to return
     104                 :          * the same hash value hashfloat8 would produce for an equal float8 value.
     105                 :          * So, widen the value to float8 and hash that.  (We must do this rather
     106                 :          * than have hashfloat8 try to narrow its value to float4; that could fail
     107                 :          * on overflow.)
     108                 :          */
     109               0 :         key8 = key;
     110                 : 
     111               0 :         return hash_any((unsigned char *) &key8, sizeof(key8));
     112                 : }
     113                 : 
     114                 : Datum
     115                 : hashfloat8(PG_FUNCTION_ARGS)
     116           23578 : {
     117           23578 :         float8          key = PG_GETARG_FLOAT8(0);
     118                 : 
     119                 :         /*
     120                 :          * On IEEE-float machines, minus zero and zero have different bit patterns
     121                 :          * but should compare as equal.  We must ensure that they have the same
     122                 :          * hash value, which is most reliably done this way:
     123                 :          */
     124           23578 :         if (key == (float8) 0)
     125               4 :                 PG_RETURN_UINT32(0);
     126                 : 
     127           23574 :         return hash_any((unsigned char *) &key, sizeof(key));
     128                 : }
     129                 : 
     130                 : Datum
     131                 : hashoidvector(PG_FUNCTION_ARGS)
     132            3123 : {
     133            3123 :         oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     134                 : 
     135            3123 :         return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
     136                 : }
     137                 : 
     138                 : Datum
     139                 : hashint2vector(PG_FUNCTION_ARGS)
     140               0 : {
     141               0 :         int2vector *key = (int2vector *) PG_GETARG_POINTER(0);
     142                 : 
     143               0 :         return hash_any((unsigned char *) key->values, key->dim1 * sizeof(int2));
     144                 : }
     145                 : 
     146                 : Datum
     147                 : hashname(PG_FUNCTION_ARGS)
     148          110916 : {
     149          110916 :         char       *key = NameStr(*PG_GETARG_NAME(0));
     150          110916 :         int                     keylen = strlen(key);
     151                 : 
     152                 :         Assert(keylen < NAMEDATALEN);                /* else it's not truncated correctly */
     153                 : 
     154          110916 :         return hash_any((unsigned char *) key, keylen);
     155                 : }
     156                 : 
     157                 : Datum
     158                 : hashtext(PG_FUNCTION_ARGS)
     159           24024 : {
     160           24024 :         text       *key = PG_GETARG_TEXT_PP(0);
     161                 :         Datum           result;
     162                 : 
     163                 :         /*
     164                 :          * Note: this is currently identical in behavior to hashvarlena, but keep
     165                 :          * it as a separate function in case we someday want to do something
     166                 :          * different in non-C locales.  (See also hashbpchar, if so.)
     167                 :          */
     168           24024 :         result = hash_any((unsigned char *) VARDATA_ANY(key),
     169                 :                                           VARSIZE_ANY_EXHDR(key));
     170                 : 
     171                 :         /* Avoid leaking memory for toasted inputs */
     172           24024 :         PG_FREE_IF_COPY(key, 0);
     173                 : 
     174           24024 :         return result;
     175                 : }
     176                 : 
     177                 : /*
     178                 :  * hashvarlena() can be used for any varlena datatype in which there are
     179                 :  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
     180                 :  */
     181                 : Datum
     182                 : hashvarlena(PG_FUNCTION_ARGS)
     183               0 : {
     184               0 :         struct varlena *key = PG_GETARG_VARLENA_PP(0);
     185                 :         Datum           result;
     186                 : 
     187               0 :         result = hash_any((unsigned char *) VARDATA_ANY(key),
     188                 :                                           VARSIZE_ANY_EXHDR(key));
     189                 : 
     190                 :         /* Avoid leaking memory for toasted inputs */
     191               0 :         PG_FREE_IF_COPY(key, 0);
     192                 : 
     193               0 :         return result;
     194                 : }
     195                 : 
     196                 : /*
     197                 :  * This hash function was written by Bob Jenkins
     198                 :  * (bob_jenkins@burtleburtle.net), and superficially adapted
     199                 :  * for PostgreSQL by Neil Conway. For more information on this
     200                 :  * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
     201                 :  * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
     202                 :  */
     203                 : 
     204                 : /*----------
     205                 :  * mix -- mix 3 32-bit values reversibly.
     206                 :  * For every delta with one or two bits set, and the deltas of all three
     207                 :  * high bits or all three low bits, whether the original value of a,b,c
     208                 :  * is almost all zero or is uniformly distributed,
     209                 :  * - If mix() is run forward or backward, at least 32 bits in a,b,c
     210                 :  *       have at least 1/4 probability of changing.
     211                 :  * - If mix() is run forward, every bit of c will change between 1/3 and
     212                 :  *       2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
     213                 :  *----------
     214                 :  */
     215                 : #define mix(a,b,c) \
     216                 : { \
     217                 :   a -= b; a -= c; a ^= ((c)>>13); \
     218                 :   b -= c; b -= a; b ^= ((a)<<8); \
     219                 :   c -= a; c -= b; c ^= ((b)>>13); \
     220                 :   a -= b; a -= c; a ^= ((c)>>12);  \
     221                 :   b -= c; b -= a; b ^= ((a)<<16); \
     222                 :   c -= a; c -= b; c ^= ((b)>>5); \
     223                 :   a -= b; a -= c; a ^= ((c)>>3);  \
     224                 :   b -= c; b -= a; b ^= ((a)<<10); \
     225                 :   c -= a; c -= b; c ^= ((b)>>15); \
     226                 : }
     227                 : 
     228                 : /*
     229                 :  * hash_any() -- hash a variable-length key into a 32-bit value
     230                 :  *              k               : the key (the unaligned variable-length array of bytes)
     231                 :  *              len             : the length of the key, counting by bytes
     232                 :  *
     233                 :  * Returns a uint32 value.      Every bit of the key affects every bit of
     234                 :  * the return value.  Every 1-bit and 2-bit delta achieves avalanche.
     235                 :  * About 6*len+35 instructions. The best hash table sizes are powers
     236                 :  * of 2.  There is no need to do mod a prime (mod is sooo slow!).
     237                 :  * If you need less than 32 bits, use a bitmask.
     238                 :  */
     239                 : Datum
     240                 : hash_any(register const unsigned char *k, register int keylen)
     241         2616447 : {
     242                 :         register uint32 a,
     243                 :                                 b,
     244                 :                                 c,
     245                 :                                 len;
     246                 : 
     247                 :         /* Set up the internal state */
     248         2616447 :         len = keylen;
     249         2616447 :         a = b = 0x9e3779b9;                     /* the golden ratio; an arbitrary value */
     250         2616447 :         c = 3923095;                            /* initialize with an arbitrary value */
     251                 : 
     252                 :         /* handle most of the key */
     253         7797726 :         while (len >= 12)
     254                 :         {
     255         2564832 :                 a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
     256         2564832 :                 b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
     257         2564832 :                 c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
     258         2564832 :                 mix(a, b, c);
     259         2564832 :                 k += 12;
     260         2564832 :                 len -= 12;
     261                 :         }
     262                 : 
     263                 :         /* handle the last 11 bytes */
     264         2616447 :         c += keylen;
     265         2616447 :         switch (len)                            /* all the case statements fall through */
     266                 :         {
     267                 :                 case 11:
     268            3778 :                         c += ((uint32) k[10] << 24);
     269                 :                 case 10:
     270           37302 :                         c += ((uint32) k[9] << 16);
     271                 :                 case 9:
     272           62118 :                         c += ((uint32) k[8] << 8);
     273                 :                         /* the first byte of c is reserved for the length */
     274                 :                 case 8:
     275         1031282 :                         b += ((uint32) k[7] << 24);
     276                 :                 case 7:
     277         1042561 :                         b += ((uint32) k[6] << 16);
     278                 :                 case 6:
     279         1050377 :                         b += ((uint32) k[5] << 8);
     280                 :                 case 5:
     281         1054675 :                         b += k[4];
     282                 :                 case 4:
     283         2508009 :                         a += ((uint32) k[3] << 24);
     284                 :                 case 3:
     285         2512832 :                         a += ((uint32) k[2] << 16);
     286                 :                 case 2:
     287         2521874 :                         a += ((uint32) k[1] << 8);
     288                 :                 case 1:
     289         2530942 :                         a += k[0];
     290                 :                         /* case 0: nothing left to add */
     291                 :         }
     292         2616447 :         mix(a, b, c);
     293                 : 
     294                 :         /* report the result */
     295         2616447 :         return UInt32GetDatum(c);
     296                 : }
     297                 : 
     298                 : /*
     299                 :  * hash_uint32() -- hash a 32-bit value
     300                 :  *
     301                 :  * This has the same result (at least on little-endian machines) as
     302                 :  *              hash_any(&k, sizeof(uint32))
     303                 :  * but is faster and doesn't force the caller to store k into memory.
     304                 :  */
     305                 : Datum
     306                 : hash_uint32(uint32 k)
     307         1217048 : {
     308                 :         register uint32 a,
     309                 :                                 b,
     310                 :                                 c;
     311                 : 
     312         1217048 :         a = 0x9e3779b9 + k;
     313         1217048 :         b = 0x9e3779b9;
     314         1217048 :         c = 3923095 + (uint32) sizeof(uint32);
     315                 : 
     316         1217048 :         mix(a, b, c);
     317                 : 
     318                 :         /* report the result */
     319         1217048 :         return UInt32GetDatum(c);
     320                 : }

Generated by: LTP GCOV extension version 1.5