LTP GCOV extension - code coverage report
Current view: directory - access/hash - hashpage.c
Test: unnamed
Date: 2008-07-03 Instrumented lines: 223
Code covered: 88.8 % Executed lines: 198
Legend: not executed executed

       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * hashpage.c
       4                 :  *        Hash table page management code for the Postgres hash access method
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *        $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.71 2007/11/15 21:14:32 momjian Exp $
      12                 :  *
      13                 :  * NOTES
      14                 :  *        Postgres hash pages look like ordinary relation pages.  The opaque
      15                 :  *        data at high addresses includes information about the page including
      16                 :  *        whether a page is an overflow page or a true bucket, the bucket
      17                 :  *        number, and the block numbers of the preceding and following pages
      18                 :  *        in the same bucket.
      19                 :  *
      20                 :  *        The first page in a hash relation, page zero, is special -- it stores
      21                 :  *        information describing the hash table; it is referred to as the
      22                 :  *        "meta page." Pages one and higher store the actual data.
      23                 :  *
      24                 :  *        There are also bitmap pages, which are not manipulated here;
      25                 :  *        see hashovfl.c.
      26                 :  *
      27                 :  *-------------------------------------------------------------------------
      28                 :  */
      29                 : #include "postgres.h"
      30                 : 
      31                 : #include "access/genam.h"
      32                 : #include "access/hash.h"
      33                 : #include "miscadmin.h"
      34                 : #include "storage/lmgr.h"
      35                 : #include "storage/smgr.h"
      36                 : #include "utils/lsyscache.h"
      37                 : 
      38                 : 
      39                 : static bool _hash_alloc_buckets(Relation rel, BlockNumber firstblock,
      40                 :                                         uint32 nblocks);
      41                 : static void _hash_splitbucket(Relation rel, Buffer metabuf,
      42                 :                                   Bucket obucket, Bucket nbucket,
      43                 :                                   BlockNumber start_oblkno,
      44                 :                                   BlockNumber start_nblkno,
      45                 :                                   uint32 maxbucket,
      46                 :                                   uint32 highmask, uint32 lowmask);
      47                 : 
      48                 : 
      49                 : /*
      50                 :  * We use high-concurrency locking on hash indexes (see README for an overview
      51                 :  * of the locking rules).  However, we can skip taking lmgr locks when the
      52                 :  * index is local to the current backend (ie, either temp or new in the
      53                 :  * current transaction).  No one else can see it, so there's no reason to
      54                 :  * take locks.  We still take buffer-level locks, but not lmgr locks.
      55                 :  */
      56                 : #define USELOCKING(rel)         (!RELATION_IS_LOCAL(rel))
      57                 : 
      58                 : 
      59                 : /*
      60                 :  * _hash_getlock() -- Acquire an lmgr lock.
      61                 :  *
      62                 :  * 'whichlock' should be zero to acquire the split-control lock, or the
      63                 :  * block number of a bucket's primary bucket page to acquire the per-bucket
      64                 :  * lock.  (See README for details of the use of these locks.)
      65                 :  *
      66                 :  * 'access' must be HASH_SHARE or HASH_EXCLUSIVE.
      67                 :  */
      68                 : void
      69                 : _hash_getlock(Relation rel, BlockNumber whichlock, int access)
      70           80328 : {
      71           80328 :         if (USELOCKING(rel))
      72              61 :                 LockPage(rel, whichlock, access);
      73           80328 : }
      74                 : 
      75                 : /*
      76                 :  * _hash_try_getlock() -- Acquire an lmgr lock, but only if it's free.
      77                 :  *
      78                 :  * Same as above except we return FALSE without blocking if lock isn't free.
      79                 :  */
      80                 : bool
      81                 : _hash_try_getlock(Relation rel, BlockNumber whichlock, int access)
      82             500 : {
      83             500 :         if (USELOCKING(rel))
      84               2 :                 return ConditionalLockPage(rel, whichlock, access);
      85                 :         else
      86             498 :                 return true;
      87                 : }
      88                 : 
      89                 : /*
      90                 :  * _hash_droplock() -- Release an lmgr lock.
      91                 :  */
      92                 : void
      93                 : _hash_droplock(Relation rel, BlockNumber whichlock, int access)
      94           80828 : {
      95           80828 :         if (USELOCKING(rel))
      96              63 :                 UnlockPage(rel, whichlock, access);
      97           80828 : }
      98                 : 
      99                 : /*
     100                 :  *      _hash_getbuf() -- Get a buffer by block number for read or write.
     101                 :  *
     102                 :  *              'access' must be HASH_READ, HASH_WRITE, or HASH_NOLOCK.
     103                 :  *              'flags' is a bitwise OR of the allowed page types.
     104                 :  *
     105                 :  *              This must be used only to fetch pages that are expected to be valid
     106                 :  *              already.  _hash_checkpage() is applied using the given flags.
     107                 :  *
     108                 :  *              When this routine returns, the appropriate lock is set on the
     109                 :  *              requested buffer and its reference count has been incremented
     110                 :  *              (ie, the buffer is "locked and pinned").
     111                 :  *
     112                 :  *              P_NEW is disallowed because this routine can only be used
     113                 :  *              to access pages that are known to be before the filesystem EOF.
     114                 :  *              Extending the index should be done with _hash_getnewbuf.
     115                 :  */
     116                 : Buffer
     117                 : _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
     118           86438 : {
     119                 :         Buffer          buf;
     120                 : 
     121           86438 :         if (blkno == P_NEW)
     122               0 :                 elog(ERROR, "hash AM does not use P_NEW");
     123                 : 
     124           86438 :         buf = ReadBuffer(rel, blkno);
     125                 : 
     126           86438 :         if (access != HASH_NOLOCK)
     127           86438 :                 LockBuffer(buf, access);
     128                 : 
     129                 :         /* ref count and lock type are correct */
     130                 : 
     131           86438 :         _hash_checkpage(rel, buf, flags);
     132                 : 
     133           86438 :         return buf;
     134                 : }
     135                 : 
     136                 : /*
     137                 :  *      _hash_getinitbuf() -- Get and initialize a buffer by block number.
     138                 :  *
     139                 :  *              This must be used only to fetch pages that are known to be before
     140                 :  *              the index's filesystem EOF, but are to be filled from scratch.
     141                 :  *              _hash_pageinit() is applied automatically.      Otherwise it has
     142                 :  *              effects similar to _hash_getbuf() with access = HASH_WRITE.
     143                 :  *
     144                 :  *              When this routine returns, a write lock is set on the
     145                 :  *              requested buffer and its reference count has been incremented
     146                 :  *              (ie, the buffer is "locked and pinned").
     147                 :  *
     148                 :  *              P_NEW is disallowed because this routine can only be used
     149                 :  *              to access pages that are known to be before the filesystem EOF.
     150                 :  *              Extending the index should be done with _hash_getnewbuf.
     151                 :  */
     152                 : Buffer
     153                 : _hash_getinitbuf(Relation rel, BlockNumber blkno)
     154              68 : {
     155                 :         Buffer          buf;
     156                 : 
     157              68 :         if (blkno == P_NEW)
     158               0 :                 elog(ERROR, "hash AM does not use P_NEW");
     159                 : 
     160              68 :         buf = ReadOrZeroBuffer(rel, blkno);
     161                 : 
     162              68 :         LockBuffer(buf, HASH_WRITE);
     163                 : 
     164                 :         /* ref count and lock type are correct */
     165                 : 
     166                 :         /* initialize the page */
     167              68 :         _hash_pageinit(BufferGetPage(buf), BufferGetPageSize(buf));
     168                 : 
     169              68 :         return buf;
     170                 : }
     171                 : 
     172                 : /*
     173                 :  *      _hash_getnewbuf() -- Get a new page at the end of the index.
     174                 :  *
     175                 :  *              This has the same API as _hash_getinitbuf, except that we are adding
     176                 :  *              a page to the index, and hence expect the page to be past the
     177                 :  *              logical EOF.  (However, we have to support the case where it isn't,
     178                 :  *              since a prior try might have crashed after extending the filesystem
     179                 :  *              EOF but before updating the metapage to reflect the added page.)
     180                 :  *
     181                 :  *              It is caller's responsibility to ensure that only one process can
     182                 :  *              extend the index at a time.
     183                 :  */
     184                 : Buffer
     185                 : _hash_getnewbuf(Relation rel, BlockNumber blkno)
     186             323 : {
     187             323 :         BlockNumber nblocks = RelationGetNumberOfBlocks(rel);
     188                 :         Buffer          buf;
     189                 : 
     190             323 :         if (blkno == P_NEW)
     191               0 :                 elog(ERROR, "hash AM does not use P_NEW");
     192             323 :         if (blkno > nblocks)
     193               0 :                 elog(ERROR, "access to noncontiguous page in hash index \"%s\"",
     194                 :                          RelationGetRelationName(rel));
     195                 : 
     196                 :         /* smgr insists we use P_NEW to extend the relation */
     197             323 :         if (blkno == nblocks)
     198                 :         {
     199              73 :                 buf = ReadBuffer(rel, P_NEW);
     200              73 :                 if (BufferGetBlockNumber(buf) != blkno)
     201               0 :                         elog(ERROR, "unexpected hash relation size: %u, should be %u",
     202                 :                                  BufferGetBlockNumber(buf), blkno);
     203                 :         }
     204                 :         else
     205             250 :                 buf = ReadOrZeroBuffer(rel, blkno);
     206                 : 
     207             323 :         LockBuffer(buf, HASH_WRITE);
     208                 : 
     209                 :         /* ref count and lock type are correct */
     210                 : 
     211                 :         /* initialize the page */
     212             323 :         _hash_pageinit(BufferGetPage(buf), BufferGetPageSize(buf));
     213                 : 
     214             323 :         return buf;
     215                 : }
     216                 : 
     217                 : /*
     218                 :  *      _hash_getbuf_with_strategy() -- Get a buffer with nondefault strategy.
     219                 :  *
     220                 :  *              This is identical to _hash_getbuf() but also allows a buffer access
     221                 :  *              strategy to be specified.  We use this for VACUUM operations.
     222                 :  */
     223                 : Buffer
     224                 : _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
     225                 :                                                    int access, int flags,
     226                 :                                                    BufferAccessStrategy bstrategy)
     227             470 : {
     228                 :         Buffer          buf;
     229                 : 
     230             470 :         if (blkno == P_NEW)
     231               0 :                 elog(ERROR, "hash AM does not use P_NEW");
     232                 : 
     233             470 :         buf = ReadBufferWithStrategy(rel, blkno, bstrategy);
     234                 : 
     235             470 :         if (access != HASH_NOLOCK)
     236             470 :                 LockBuffer(buf, access);
     237                 : 
     238                 :         /* ref count and lock type are correct */
     239                 : 
     240             470 :         _hash_checkpage(rel, buf, flags);
     241                 : 
     242             470 :         return buf;
     243                 : }
     244                 : 
     245                 : /*
     246                 :  *      _hash_relbuf() -- release a locked buffer.
     247                 :  *
     248                 :  * Lock and pin (refcount) are both dropped.
     249                 :  */
     250                 : void
     251                 : _hash_relbuf(Relation rel, Buffer buf)
     252            5953 : {
     253            5953 :         UnlockReleaseBuffer(buf);
     254            5953 : }
     255                 : 
     256                 : /*
     257                 :  *      _hash_dropbuf() -- release an unlocked buffer.
     258                 :  *
     259                 :  * This is used to unpin a buffer on which we hold no lock.
     260                 :  */
     261                 : void
     262                 : _hash_dropbuf(Relation rel, Buffer buf)
     263           40018 : {
     264           40018 :         ReleaseBuffer(buf);
     265           40018 : }
     266                 : 
     267                 : /*
     268                 :  *      _hash_wrtbuf() -- write a hash page to disk.
     269                 :  *
     270                 :  *              This routine releases the lock held on the buffer and our refcount
     271                 :  *              for it.  It is an error to call _hash_wrtbuf() without a write lock
     272                 :  *              and a pin on the buffer.
     273                 :  *
     274                 :  * NOTE: this routine should go away when/if hash indexes are WAL-ified.
     275                 :  * The correct sequence of operations is to mark the buffer dirty, then
     276                 :  * write the WAL record, then release the lock and pin; so marking dirty
     277                 :  * can't be combined with releasing.
     278                 :  */
     279                 : void
     280                 : _hash_wrtbuf(Relation rel, Buffer buf)
     281           41328 : {
     282           41328 :         MarkBufferDirty(buf);
     283           41328 :         UnlockReleaseBuffer(buf);
     284           41328 : }
     285                 : 
     286                 : /*
     287                 :  * _hash_chgbufaccess() -- Change the lock type on a buffer, without
     288                 :  *                      dropping our pin on it.
     289                 :  *
     290                 :  * from_access and to_access may be HASH_READ, HASH_WRITE, or HASH_NOLOCK,
     291                 :  * the last indicating that no buffer-level lock is held or wanted.
     292                 :  *
     293                 :  * When from_access == HASH_WRITE, we assume the buffer is dirty and tell
     294                 :  * bufmgr it must be written out.  If the caller wants to release a write
     295                 :  * lock on a page that's not been modified, it's okay to pass from_access
     296                 :  * as HASH_READ (a bit ugly, but handy in some places).
     297                 :  */
     298                 : void
     299                 : _hash_chgbufaccess(Relation rel,
     300                 :                                    Buffer buf,
     301                 :                                    int from_access,
     302                 :                                    int to_access)
     303          121478 : {
     304          121478 :         if (from_access == HASH_WRITE)
     305           40385 :                 MarkBufferDirty(buf);
     306          121478 :         if (from_access != HASH_NOLOCK)
     307           80748 :                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     308          121478 :         if (to_access != HASH_NOLOCK)
     309           40730 :                 LockBuffer(buf, to_access);
     310          121478 : }
     311                 : 
     312                 : 
     313                 : /*
     314                 :  *      _hash_metapinit() -- Initialize the metadata page of a hash index,
     315                 :  *                              the two buckets that we begin with and the initial
     316                 :  *                              bitmap page.
     317                 :  *
     318                 :  * We are fairly cavalier about locking here, since we know that no one else
     319                 :  * could be accessing this index.  In particular the rule about not holding
     320                 :  * multiple buffer locks is ignored.
     321                 :  */
     322                 : void
     323                 : _hash_metapinit(Relation rel)
     324               6 : {
     325                 :         HashMetaPage metap;
     326                 :         HashPageOpaque pageopaque;
     327                 :         Buffer          metabuf;
     328                 :         Buffer          buf;
     329                 :         Page            pg;
     330                 :         int32           data_width;
     331                 :         int32           item_width;
     332                 :         int32           ffactor;
     333                 :         uint16          i;
     334                 : 
     335                 :         /* safety check */
     336               6 :         if (RelationGetNumberOfBlocks(rel) != 0)
     337               0 :                 elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
     338                 :                          RelationGetRelationName(rel));
     339                 : 
     340                 :         /*
     341                 :          * Determine the target fill factor (in tuples per bucket) for this index.
     342                 :          * The idea is to make the fill factor correspond to pages about as full
     343                 :          * as the user-settable fillfactor parameter says.      We can compute it
     344                 :          * exactly if the index datatype is fixed-width, but for var-width there's
     345                 :          * some guessing involved.
     346                 :          */
     347               6 :         data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid,
     348                 :                                                                  RelationGetDescr(rel)->attrs[0]->atttypmod);
     349               6 :         item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
     350                 :                 sizeof(ItemIdData);             /* include the line pointer */
     351               6 :         ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width;
     352                 :         /* keep to a sane range */
     353               6 :         if (ffactor < 10)
     354               0 :                 ffactor = 10;
     355                 : 
     356                 :         /*
     357                 :          * We initialize the metapage, the first two bucket pages, and the first
     358                 :          * bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
     359                 :          * calls to occur.      This ensures that the smgr level has the right idea of
     360                 :          * the physical index length.
     361                 :          */
     362               6 :         metabuf = _hash_getnewbuf(rel, HASH_METAPAGE);
     363               6 :         pg = BufferGetPage(metabuf);
     364                 : 
     365               6 :         pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
     366               6 :         pageopaque->hasho_prevblkno = InvalidBlockNumber;
     367               6 :         pageopaque->hasho_nextblkno = InvalidBlockNumber;
     368               6 :         pageopaque->hasho_bucket = -1;
     369               6 :         pageopaque->hasho_flag = LH_META_PAGE;
     370               6 :         pageopaque->hasho_page_id = HASHO_PAGE_ID;
     371                 : 
     372               6 :         metap = (HashMetaPage) pg;
     373                 : 
     374               6 :         metap->hashm_magic = HASH_MAGIC;
     375               6 :         metap->hashm_version = HASH_VERSION;
     376               6 :         metap->hashm_ntuples = 0;
     377               6 :         metap->hashm_nmaps = 0;
     378               6 :         metap->hashm_ffactor = ffactor;
     379               6 :         metap->hashm_bsize = BufferGetPageSize(metabuf);
     380                 :         /* find largest bitmap array size that will fit in page size */
     381              12 :         for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
     382                 :         {
     383              12 :                 if ((1 << i) <= (metap->hashm_bsize -
     384                 :                                                  (MAXALIGN(sizeof(PageHeaderData)) +
     385                 :                                                   MAXALIGN(sizeof(HashPageOpaqueData)))))
     386               6 :                         break;
     387                 :         }
     388                 :         Assert(i > 0);
     389               6 :         metap->hashm_bmsize = 1 << i;
     390               6 :         metap->hashm_bmshift = i + BYTE_TO_BIT;
     391                 :         Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1));
     392                 : 
     393                 :         /*
     394                 :          * Label the index with its primary hash support function's OID.  This is
     395                 :          * pretty useless for normal operation (in fact, hashm_procid is not used
     396                 :          * anywhere), but it might be handy for forensic purposes so we keep it.
     397                 :          */
     398               6 :         metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
     399                 : 
     400                 :         /*
     401                 :          * We initialize the index with two buckets, 0 and 1, occupying physical
     402                 :          * blocks 1 and 2.      The first freespace bitmap page is in block 3.
     403                 :          */
     404               6 :         metap->hashm_maxbucket = metap->hashm_lowmask = 1;        /* nbuckets - 1 */
     405               6 :         metap->hashm_highmask = 3;   /* (nbuckets << 1) - 1 */
     406                 : 
     407               6 :         MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
     408               6 :         MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
     409                 : 
     410               6 :         metap->hashm_spares[1] = 1; /* the first bitmap page is only spare */
     411               6 :         metap->hashm_ovflpoint = 1;
     412               6 :         metap->hashm_firstfree = 0;
     413                 : 
     414                 :         /*
     415                 :          * Initialize the first two buckets
     416                 :          */
     417              18 :         for (i = 0; i <= 1; i++)
     418                 :         {
     419              12 :                 buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i));
     420              12 :                 pg = BufferGetPage(buf);
     421              12 :                 pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
     422              12 :                 pageopaque->hasho_prevblkno = InvalidBlockNumber;
     423              12 :                 pageopaque->hasho_nextblkno = InvalidBlockNumber;
     424              12 :                 pageopaque->hasho_bucket = i;
     425              12 :                 pageopaque->hasho_flag = LH_BUCKET_PAGE;
     426              12 :                 pageopaque->hasho_page_id = HASHO_PAGE_ID;
     427              12 :                 _hash_wrtbuf(rel, buf);
     428                 :         }
     429                 : 
     430                 :         /*
     431                 :          * Initialize first bitmap page
     432                 :          */
     433               6 :         _hash_initbitmap(rel, metap, 3);
     434                 : 
     435                 :         /* all done */
     436               6 :         _hash_wrtbuf(rel, metabuf);
     437               6 : }
     438                 : 
     439                 : /*
     440                 :  *      _hash_pageinit() -- Initialize a new hash index page.
     441                 :  */
     442                 : void
     443                 : _hash_pageinit(Page page, Size size)
     444             391 : {
     445                 :         Assert(PageIsNew(page));
     446             391 :         PageInit(page, size, sizeof(HashPageOpaqueData));
     447             391 : }
     448                 : 
     449                 : /*
     450                 :  * Attempt to expand the hash table by creating one new bucket.
     451                 :  *
     452                 :  * This will silently do nothing if it cannot get the needed locks.
     453                 :  *
     454                 :  * The caller should hold no locks on the hash index.
     455                 :  *
     456                 :  * The caller must hold a pin, but no lock, on the metapage buffer.
     457                 :  * The buffer is returned in the same state.
     458                 :  */
     459                 : void
     460                 : _hash_expandtable(Relation rel, Buffer metabuf)
     461             250 : {
     462                 :         HashMetaPage metap;
     463                 :         Bucket          old_bucket;
     464                 :         Bucket          new_bucket;
     465                 :         uint32          spare_ndx;
     466                 :         BlockNumber start_oblkno;
     467                 :         BlockNumber start_nblkno;
     468                 :         uint32          maxbucket;
     469                 :         uint32          highmask;
     470                 :         uint32          lowmask;
     471                 : 
     472                 :         /*
     473                 :          * Obtain the page-zero lock to assert the right to begin a split (see
     474                 :          * README).
     475                 :          *
     476                 :          * Note: deadlock should be impossible here. Our own backend could only be
     477                 :          * holding bucket sharelocks due to stopped indexscans; those will not
     478                 :          * block other holders of the page-zero lock, who are only interested in
     479                 :          * acquiring bucket sharelocks themselves.      Exclusive bucket locks are
     480                 :          * only taken here and in hashbulkdelete, and neither of these operations
     481                 :          * needs any additional locks to complete.      (If, due to some flaw in this
     482                 :          * reasoning, we manage to deadlock anyway, it's okay to error out; the
     483                 :          * index will be left in a consistent state.)
     484                 :          */
     485             250 :         _hash_getlock(rel, 0, HASH_EXCLUSIVE);
     486                 : 
     487                 :         /* Write-lock the meta page */
     488             250 :         _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
     489                 : 
     490             250 :         _hash_checkpage(rel, metabuf, LH_META_PAGE);
     491             250 :         metap = (HashMetaPage) BufferGetPage(metabuf);
     492                 : 
     493                 :         /*
     494                 :          * Check to see if split is still needed; someone else might have already
     495                 :          * done one while we waited for the lock.
     496                 :          *
     497                 :          * Make sure this stays in sync with _hash_doinsert()
     498                 :          */
     499             250 :         if (metap->hashm_ntuples <=
     500                 :                 (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1))
     501               0 :                 goto fail;
     502                 : 
     503                 :         /*
     504                 :          * Can't split anymore if maxbucket has reached its maximum possible
     505                 :          * value.
     506                 :          *
     507                 :          * Ideally we'd allow bucket numbers up to UINT_MAX-1 (no higher because
     508                 :          * the calculation maxbucket+1 mustn't overflow).  Currently we restrict
     509                 :          * to half that because of overflow looping in _hash_log2() and
     510                 :          * insufficient space in hashm_spares[].  It's moot anyway because an
     511                 :          * index with 2^32 buckets would certainly overflow BlockNumber and hence
     512                 :          * _hash_alloc_buckets() would fail, but if we supported buckets smaller
     513                 :          * than a disk block then this would be an independent constraint.
     514                 :          */
     515             250 :         if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)
     516               0 :                 goto fail;
     517                 : 
     518                 :         /*
     519                 :          * Determine which bucket is to be split, and attempt to lock the old
     520                 :          * bucket.      If we can't get the lock, give up.
     521                 :          *
     522                 :          * The lock protects us against other backends, but not against our own
     523                 :          * backend.  Must check for active scans separately.
     524                 :          */
     525             250 :         new_bucket = metap->hashm_maxbucket + 1;
     526                 : 
     527             250 :         old_bucket = (new_bucket & metap->hashm_lowmask);
     528                 : 
     529             250 :         start_oblkno = BUCKET_TO_BLKNO(metap, old_bucket);
     530                 : 
     531             250 :         if (_hash_has_active_scan(rel, old_bucket))
     532               0 :                 goto fail;
     533                 : 
     534             250 :         if (!_hash_try_getlock(rel, start_oblkno, HASH_EXCLUSIVE))
     535               0 :                 goto fail;
     536                 : 
     537                 :         /*
     538                 :          * Likewise lock the new bucket (should never fail).
     539                 :          *
     540                 :          * Note: it is safe to compute the new bucket's blkno here, even though we
     541                 :          * may still need to update the BUCKET_TO_BLKNO mapping.  This is because
     542                 :          * the current value of hashm_spares[hashm_ovflpoint] correctly shows
     543                 :          * where we are going to put a new splitpoint's worth of buckets.
     544                 :          */
     545             250 :         start_nblkno = BUCKET_TO_BLKNO(metap, new_bucket);
     546                 : 
     547             250 :         if (_hash_has_active_scan(rel, new_bucket))
     548               0 :                 elog(ERROR, "scan in progress on supposedly new bucket");
     549                 : 
     550             250 :         if (!_hash_try_getlock(rel, start_nblkno, HASH_EXCLUSIVE))
     551               0 :                 elog(ERROR, "could not get lock on supposedly new bucket");
     552                 : 
     553                 :         /*
     554                 :          * If the split point is increasing (hashm_maxbucket's log base 2
     555                 :          * increases), we need to allocate a new batch of bucket pages.
     556                 :          */
     557             250 :         spare_ndx = _hash_log2(new_bucket + 1);
     558             250 :         if (spare_ndx > metap->hashm_ovflpoint)
     559                 :         {
     560                 :                 Assert(spare_ndx == metap->hashm_ovflpoint + 1);
     561                 : 
     562                 :                 /*
     563                 :                  * The number of buckets in the new splitpoint is equal to the total
     564                 :                  * number already in existence, i.e. new_bucket.  Currently this maps
     565                 :                  * one-to-one to blocks required, but someday we may need a more
     566                 :                  * complicated calculation here.
     567                 :                  */
     568              21 :                 if (!_hash_alloc_buckets(rel, start_nblkno, new_bucket))
     569                 :                 {
     570                 :                         /* can't split due to BlockNumber overflow */
     571               0 :                         _hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
     572               0 :                         _hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
     573               0 :                         goto fail;
     574                 :                 }
     575                 :         }
     576                 : 
     577                 :         /*
     578                 :          * Okay to proceed with split.  Update the metapage bucket mapping info.
     579                 :          *
     580                 :          * Since we are scribbling on the metapage data right in the shared
     581                 :          * buffer, any failure in this next little bit leaves us with a big
     582                 :          * problem: the metapage is effectively corrupt but could get written back
     583                 :          * to disk.  We don't really expect any failure, but just to be sure,
     584                 :          * establish a critical section.
     585                 :          */
     586             250 :         START_CRIT_SECTION();
     587                 : 
     588             250 :         metap->hashm_maxbucket = new_bucket;
     589                 : 
     590             250 :         if (new_bucket > metap->hashm_highmask)
     591                 :         {
     592                 :                 /* Starting a new doubling */
     593              17 :                 metap->hashm_lowmask = metap->hashm_highmask;
     594              17 :                 metap->hashm_highmask = new_bucket | metap->hashm_lowmask;
     595                 :         }
     596                 : 
     597                 :         /*
     598                 :          * If the split point is increasing (hashm_maxbucket's log base 2
     599                 :          * increases), we need to adjust the hashm_spares[] array and
     600                 :          * hashm_ovflpoint so that future overflow pages will be created beyond
     601                 :          * this new batch of bucket pages.
     602                 :          */
     603             250 :         if (spare_ndx > metap->hashm_ovflpoint)
     604                 :         {
     605              21 :                 metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint];
     606              21 :                 metap->hashm_ovflpoint = spare_ndx;
     607                 :         }
     608                 : 
     609                 :         /* Done mucking with metapage */
     610             250 :         END_CRIT_SECTION();
     611                 : 
     612                 :         /*
     613                 :          * Copy bucket mapping info now; this saves re-accessing the meta page
     614                 :          * inside _hash_splitbucket's inner loop.  Note that once we drop the
     615                 :          * split lock, other splits could begin, so these values might be out of
     616                 :          * date before _hash_splitbucket finishes.      That's okay, since all it
     617                 :          * needs is to tell which of these two buckets to map hashkeys into.
     618                 :          */
     619             250 :         maxbucket = metap->hashm_maxbucket;
     620             250 :         highmask = metap->hashm_highmask;
     621             250 :         lowmask = metap->hashm_lowmask;
     622                 : 
     623                 :         /* Write out the metapage and drop lock, but keep pin */
     624             250 :         _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
     625                 : 
     626                 :         /* Release split lock; okay for other splits to occur now */
     627             250 :         _hash_droplock(rel, 0, HASH_EXCLUSIVE);
     628                 : 
     629                 :         /* Relocate records to the new bucket */
     630             250 :         _hash_splitbucket(rel, metabuf, old_bucket, new_bucket,
     631                 :                                           start_oblkno, start_nblkno,
     632                 :                                           maxbucket, highmask, lowmask);
     633                 : 
     634                 :         /* Release bucket locks, allowing others to access them */
     635             250 :         _hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
     636             250 :         _hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
     637                 : 
     638             250 :         return;
     639                 : 
     640                 :         /* Here if decide not to split or fail to acquire old bucket lock */
     641               0 : fail:
     642                 : 
     643                 :         /* We didn't write the metapage, so just drop lock */
     644               0 :         _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
     645                 : 
     646                 :         /* Release split lock */
     647               0 :         _hash_droplock(rel, 0, HASH_EXCLUSIVE);
     648                 : }
     649                 : 
     650                 : 
     651                 : /*
     652                 :  * _hash_alloc_buckets -- allocate a new splitpoint's worth of bucket pages
     653                 :  *
     654                 :  * This does not need to initialize the new bucket pages; we'll do that as
     655                 :  * each one is used by _hash_expandtable().  But we have to extend the logical
     656                 :  * EOF to the end of the splitpoint; this keeps smgr's idea of the EOF in
     657                 :  * sync with ours, so that we don't get complaints from smgr.
     658                 :  *
     659                 :  * We do this by writing a page of zeroes at the end of the splitpoint range.
     660                 :  * We expect that the filesystem will ensure that the intervening pages read
     661                 :  * as zeroes too.  On many filesystems this "hole" will not be allocated
     662                 :  * immediately, which means that the index file may end up more fragmented
     663                 :  * than if we forced it all to be allocated now; but since we don't scan
     664                 :  * hash indexes sequentially anyway, that probably doesn't matter.
     665                 :  *
     666                 :  * XXX It's annoying that this code is executed with the metapage lock held.
     667                 :  * We need to interlock against _hash_getovflpage() adding a new overflow page
     668                 :  * concurrently, but it'd likely be better to use LockRelationForExtension
     669                 :  * for the purpose.  OTOH, adding a splitpoint is a very infrequent operation,
     670                 :  * so it may not be worth worrying about.
     671                 :  *
     672                 :  * Returns TRUE if successful, or FALSE if allocation failed due to
     673                 :  * BlockNumber overflow.
     674                 :  */
     675                 : static bool
     676                 : _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
     677              21 : {
     678                 :         BlockNumber lastblock;
     679                 :         char            zerobuf[BLCKSZ];
     680                 : 
     681              21 :         lastblock = firstblock + nblocks - 1;
     682                 : 
     683                 :         /*
     684                 :          * Check for overflow in block number calculation; if so, we cannot extend
     685                 :          * the index anymore.
     686                 :          */
     687              21 :         if (lastblock < firstblock || lastblock == InvalidBlockNumber)
     688               0 :                 return false;
     689                 : 
     690              21 :         MemSet(zerobuf, 0, sizeof(zerobuf));
     691                 : 
     692              21 :         RelationOpenSmgr(rel);
     693              21 :         smgrextend(rel->rd_smgr, lastblock, zerobuf, rel->rd_istemp);
     694                 : 
     695              21 :         return true;
     696                 : }
     697                 : 
     698                 : 
     699                 : /*
     700                 :  * _hash_splitbucket -- split 'obucket' into 'obucket' and 'nbucket'
     701                 :  *
     702                 :  * We are splitting a bucket that consists of a base bucket page and zero
     703                 :  * or more overflow (bucket chain) pages.  We must relocate tuples that
     704                 :  * belong in the new bucket, and compress out any free space in the old
     705                 :  * bucket.
     706                 :  *
     707                 :  * The caller must hold exclusive locks on both buckets to ensure that
     708                 :  * no one else is trying to access them (see README).
     709                 :  *
     710                 :  * The caller must hold a pin, but no lock, on the metapage buffer.
     711                 :  * The buffer is returned in the same state.  (The metapage is only
     712                 :  * touched if it becomes necessary to add or remove overflow pages.)
     713                 :  */
     714                 : static void
     715                 : _hash_splitbucket(Relation rel,
     716                 :                                   Buffer metabuf,
     717                 :                                   Bucket obucket,
     718                 :                                   Bucket nbucket,
     719                 :                                   BlockNumber start_oblkno,
     720                 :                                   BlockNumber start_nblkno,
     721                 :                                   uint32 maxbucket,
     722                 :                                   uint32 highmask,
     723                 :                                   uint32 lowmask)
     724             250 : {
     725                 :         Bucket          bucket;
     726                 :         Buffer          obuf;
     727                 :         Buffer          nbuf;
     728                 :         BlockNumber oblkno;
     729                 :         BlockNumber nblkno;
     730                 :         bool            null;
     731                 :         Datum           datum;
     732                 :         HashPageOpaque oopaque;
     733                 :         HashPageOpaque nopaque;
     734                 :         IndexTuple      itup;
     735                 :         Size            itemsz;
     736                 :         OffsetNumber ooffnum;
     737                 :         OffsetNumber noffnum;
     738                 :         OffsetNumber omaxoffnum;
     739                 :         Page            opage;
     740                 :         Page            npage;
     741             250 :         TupleDesc       itupdesc = RelationGetDescr(rel);
     742                 : 
     743                 :         /*
     744                 :          * It should be okay to simultaneously write-lock pages from each bucket,
     745                 :          * since no one else can be trying to acquire buffer lock on pages of
     746                 :          * either bucket.
     747                 :          */
     748             250 :         oblkno = start_oblkno;
     749             250 :         obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_BUCKET_PAGE);
     750             250 :         opage = BufferGetPage(obuf);
     751             250 :         oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
     752                 : 
     753             250 :         nblkno = start_nblkno;
     754             250 :         nbuf = _hash_getnewbuf(rel, nblkno);
     755             250 :         npage = BufferGetPage(nbuf);
     756                 : 
     757                 :         /* initialize the new bucket's primary page */
     758             250 :         nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
     759             250 :         nopaque->hasho_prevblkno = InvalidBlockNumber;
     760             250 :         nopaque->hasho_nextblkno = InvalidBlockNumber;
     761             250 :         nopaque->hasho_bucket = nbucket;
     762             250 :         nopaque->hasho_flag = LH_BUCKET_PAGE;
     763             250 :         nopaque->hasho_page_id = HASHO_PAGE_ID;
     764                 : 
     765                 :         /*
     766                 :          * Partition the tuples in the old bucket between the old bucket and the
     767                 :          * new bucket, advancing along the old bucket's overflow bucket chain and
     768                 :          * adding overflow pages to the new bucket as needed.
     769                 :          */
     770             250 :         ooffnum = FirstOffsetNumber;
     771             250 :         omaxoffnum = PageGetMaxOffsetNumber(opage);
     772                 :         for (;;)
     773                 :         {
     774                 :                 /*
     775                 :                  * at each iteration through this loop, each of these variables should
     776                 :                  * be up-to-date: obuf opage oopaque ooffnum omaxoffnum
     777                 :                  */
     778                 : 
     779                 :                 /* check if we're at the end of the page */
     780           55376 :                 if (ooffnum > omaxoffnum)
     781                 :                 {
     782                 :                         /* at end of page, but check for an(other) overflow page */
     783             360 :                         oblkno = oopaque->hasho_nextblkno;
     784             360 :                         if (!BlockNumberIsValid(oblkno))
     785             250 :                                 break;
     786                 : 
     787                 :                         /*
     788                 :                          * we ran out of tuples on this particular page, but we have more
     789                 :                          * overflow pages; advance to next page.
     790                 :                          */
     791             110 :                         _hash_wrtbuf(rel, obuf);
     792                 : 
     793             110 :                         obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
     794             110 :                         opage = BufferGetPage(obuf);
     795             110 :                         oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
     796             110 :                         ooffnum = FirstOffsetNumber;
     797             110 :                         omaxoffnum = PageGetMaxOffsetNumber(opage);
     798             110 :                         continue;
     799                 :                 }
     800                 : 
     801                 :                 /*
     802                 :                  * Re-hash the tuple to determine which bucket it now belongs in.
     803                 :                  *
     804                 :                  * It is annoying to call the hash function while holding locks, but
     805                 :                  * releasing and relocking the page for each tuple is unappealing too.
     806                 :                  */
     807           55016 :                 itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum));
     808           55016 :                 datum = index_getattr(itup, 1, itupdesc, &null);
     809                 :                 Assert(!null);
     810                 : 
     811           55016 :                 bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum),
     812                 :                                                                           maxbucket, highmask, lowmask);
     813                 : 
     814           55016 :                 if (bucket == nbucket)
     815                 :                 {
     816                 :                         /*
     817                 :                          * insert the tuple into the new bucket.  if it doesn't fit on the
     818                 :                          * current page in the new bucket, we must allocate a new overflow
     819                 :                          * page and place the tuple on that page instead.
     820                 :                          */
     821           27502 :                         itemsz = IndexTupleDSize(*itup);
     822           27502 :                         itemsz = MAXALIGN(itemsz);
     823                 : 
     824           27502 :                         if (PageGetFreeSpace(npage) < itemsz)
     825                 :                         {
     826                 :                                 /* write out nbuf and drop lock, but keep pin */
     827               0 :                                 _hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK);
     828                 :                                 /* chain to a new overflow page */
     829               0 :                                 nbuf = _hash_addovflpage(rel, metabuf, nbuf);
     830               0 :                                 npage = BufferGetPage(nbuf);
     831                 :                                 /* we don't need nopaque within the loop */
     832                 :                         }
     833                 : 
     834           27502 :                         noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage));
     835           27502 :                         if (PageAddItem(npage, (Item) itup, itemsz, noffnum, false, false)
     836                 :                                 == InvalidOffsetNumber)
     837               0 :                                 elog(ERROR, "failed to add index item to \"%s\"",
     838                 :                                          RelationGetRelationName(rel));
     839                 : 
     840                 :                         /*
     841                 :                          * now delete the tuple from the old bucket.  after this section
     842                 :                          * of code, 'ooffnum' will actually point to the ItemId to which
     843                 :                          * we would point if we had advanced it before the deletion
     844                 :                          * (PageIndexTupleDelete repacks the ItemId array).  this also
     845                 :                          * means that 'omaxoffnum' is exactly one less than it used to be,
     846                 :                          * so we really can just decrement it instead of calling
     847                 :                          * PageGetMaxOffsetNumber.
     848                 :                          */
     849           27502 :                         PageIndexTupleDelete(opage, ooffnum);
     850           27502 :                         omaxoffnum = OffsetNumberPrev(omaxoffnum);
     851                 :                 }
     852                 :                 else
     853                 :                 {
     854                 :                         /*
     855                 :                          * the tuple stays on this page.  we didn't move anything, so we
     856                 :                          * didn't delete anything and therefore we don't have to change
     857                 :                          * 'omaxoffnum'.
     858                 :                          */
     859                 :                         Assert(bucket == obucket);
     860           27514 :                         ooffnum = OffsetNumberNext(ooffnum);
     861                 :                 }
     862                 :         }
     863                 : 
     864                 :         /*
     865                 :          * We're at the end of the old bucket chain, so we're done partitioning
     866                 :          * the tuples.  Before quitting, call _hash_squeezebucket to ensure the
     867                 :          * tuples remaining in the old bucket (including the overflow pages) are
     868                 :          * packed as tightly as possible.  The new bucket is already tight.
     869                 :          */
     870             250 :         _hash_wrtbuf(rel, obuf);
     871             250 :         _hash_wrtbuf(rel, nbuf);
     872                 : 
     873             250 :         _hash_squeezebucket(rel, obucket, start_oblkno, NULL);
     874             250 : }

Generated by: LTP GCOV extension version 1.5