LTP GCOV extension - code coverage report
Current view: directory - access/heap - heapam.c
Test: unnamed
Date: 2008-07-03 Instrumented lines: 1493
Code covered: 53.6 % Executed lines: 800
Legend: not executed executed

       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * heapam.c
       4                 :  *        heap access method code
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *        $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.248 2008/01/14 01:39:09 tgl Exp $
      12                 :  *
      13                 :  *
      14                 :  * INTERFACE ROUTINES
      15                 :  *              relation_open   - open any relation by relation OID
      16                 :  *              relation_openrv - open any relation specified by a RangeVar
      17                 :  *              relation_close  - close any relation
      18                 :  *              heap_open               - open a heap relation by relation OID
      19                 :  *              heap_openrv             - open a heap relation specified by a RangeVar
      20                 :  *              heap_close              - (now just a macro for relation_close)
      21                 :  *              heap_beginscan  - begin relation scan
      22                 :  *              heap_rescan             - restart a relation scan
      23                 :  *              heap_endscan    - end relation scan
      24                 :  *              heap_getnext    - retrieve next tuple in scan
      25                 :  *              heap_fetch              - retrieve tuple with given tid
      26                 :  *              heap_insert             - insert tuple into a relation
      27                 :  *              heap_delete             - delete a tuple from a relation
      28                 :  *              heap_update             - replace a tuple in a relation with another tuple
      29                 :  *              heap_markpos    - mark scan position
      30                 :  *              heap_restrpos   - restore position to marked location
      31                 :  *              heap_sync               - sync heap, for when no WAL has been written
      32                 :  *
      33                 :  * NOTES
      34                 :  *        This file contains the heap_ routines which implement
      35                 :  *        the POSTGRES heap access method used for all POSTGRES
      36                 :  *        relations.
      37                 :  *
      38                 :  *-------------------------------------------------------------------------
      39                 :  */
      40                 : #include "postgres.h"
      41                 : 
      42                 : #include "access/heapam.h"
      43                 : #include "access/hio.h"
      44                 : #include "access/multixact.h"
      45                 : #include "access/transam.h"
      46                 : #include "access/tuptoaster.h"
      47                 : #include "access/valid.h"
      48                 : #include "access/xact.h"
      49                 : #include "catalog/catalog.h"
      50                 : #include "catalog/namespace.h"
      51                 : #include "miscadmin.h"
      52                 : #include "pgstat.h"
      53                 : #include "storage/procarray.h"
      54                 : #include "storage/smgr.h"
      55                 : #include "utils/datum.h"
      56                 : #include "utils/inval.h"
      57                 : #include "utils/lsyscache.h"
      58                 : #include "utils/relcache.h"
      59                 : #include "utils/syscache.h"
      60                 : 
      61                 : 
      62                 : /* GUC variable */
      63                 : bool    synchronize_seqscans = true;
      64                 : 
      65                 : 
      66                 : static HeapScanDesc heap_beginscan_internal(Relation relation,
      67                 :                                                 Snapshot snapshot,
      68                 :                                                 int nkeys, ScanKey key,
      69                 :                                                 bool allow_strat, bool allow_sync,
      70                 :                                                 bool is_bitmapscan);
      71                 : static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
      72                 :                    ItemPointerData from, Buffer newbuf, HeapTuple newtup, bool move);
      73                 : static bool HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs,
      74                 :                                            HeapTuple oldtup, HeapTuple newtup);
      75                 : 
      76                 : 
      77                 : /* ----------------------------------------------------------------
      78                 :  *                                               heap support routines
      79                 :  * ----------------------------------------------------------------
      80                 :  */
      81                 : 
      82                 : /* ----------------
      83                 :  *              initscan - scan code common to heap_beginscan and heap_rescan
      84                 :  * ----------------
      85                 :  */
      86                 : static void
      87                 : initscan(HeapScanDesc scan, ScanKey key)
      88           26662 : {
      89                 :         bool            allow_strat;
      90                 :         bool            allow_sync;
      91                 : 
      92                 :         /*
      93                 :          * Determine the number of blocks we have to scan.
      94                 :          *
      95                 :          * It is sufficient to do this once at scan start, since any tuples added
      96                 :          * while the scan is in progress will be invisible to my snapshot anyway.
      97                 :          * (That is not true when using a non-MVCC snapshot.  However, we couldn't
      98                 :          * guarantee to return tuples added after scan start anyway, since they
      99                 :          * might go into pages we already scanned.      To guarantee consistent
     100                 :          * results for a non-MVCC snapshot, the caller must hold some higher-level
     101                 :          * lock that ensures the interesting tuple(s) won't change.)
     102                 :          */
     103           26662 :         scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
     104                 : 
     105                 :         /*
     106                 :          * If the table is large relative to NBuffers, use a bulk-read access
     107                 :          * strategy and enable synchronized scanning (see syncscan.c).  Although
     108                 :          * the thresholds for these features could be different, we make them the
     109                 :          * same so that there are only two behaviors to tune rather than four.
     110                 :          * (However, some callers need to be able to disable one or both of
     111                 :          * these behaviors, independently of the size of the table; also there
     112                 :          * is a GUC variable that can disable synchronized scanning.)
     113                 :          *
     114                 :          * During a rescan, don't make a new strategy object if we don't have to.
     115                 :          */
     116           26662 :         if (!scan->rs_rd->rd_istemp &&
     117                 :                 scan->rs_nblocks > NBuffers / 4)
     118                 :         {
     119               0 :                 allow_strat = scan->rs_allow_strat;
     120               0 :                 allow_sync = scan->rs_allow_sync;
     121                 :         }
     122                 :         else
     123           26662 :                 allow_strat = allow_sync = false;
     124                 : 
     125           26662 :         if (allow_strat)
     126                 :         {
     127               0 :                 if (scan->rs_strategy == NULL)
     128               0 :                         scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
     129                 :         }
     130                 :         else
     131                 :         {
     132           26662 :                 if (scan->rs_strategy != NULL)
     133               0 :                         FreeAccessStrategy(scan->rs_strategy);
     134           26662 :                 scan->rs_strategy = NULL;
     135                 :         }
     136                 : 
     137           26662 :         if (allow_sync && synchronize_seqscans)
     138                 :         {
     139               0 :                 scan->rs_syncscan = true;
     140               0 :                 scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks);
     141                 :         }
     142                 :         else
     143                 :         {
     144           26662 :                 scan->rs_syncscan = false;
     145           26662 :                 scan->rs_startblock = 0;
     146                 :         }
     147                 : 
     148           26662 :         scan->rs_inited = false;
     149           26662 :         scan->rs_ctup.t_data = NULL;
     150           26662 :         ItemPointerSetInvalid(&scan->rs_ctup.t_self);
     151           26662 :         scan->rs_cbuf = InvalidBuffer;
     152           26662 :         scan->rs_cblock = InvalidBlockNumber;
     153                 : 
     154                 :         /* we don't have a marked position... */
     155           26662 :         ItemPointerSetInvalid(&(scan->rs_mctid));
     156                 : 
     157                 :         /* page-at-a-time fields are always invalid when not rs_inited */
     158                 : 
     159                 :         /*
     160                 :          * copy the scan key, if appropriate
     161                 :          */
     162           26662 :         if (key != NULL)
     163            5898 :                 memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData));
     164                 : 
     165                 :         /*
     166                 :          * Currently, we don't have a stats counter for bitmap heap scans (but the
     167                 :          * underlying bitmap index scans will be counted).
     168                 :          */
     169           26662 :         if (!scan->rs_bitmapscan)
     170           26308 :                 pgstat_count_heap_scan(scan->rs_rd);
     171           26662 : }
     172                 : 
     173                 : /*
     174                 :  * heapgetpage - subroutine for heapgettup()
     175                 :  *
     176                 :  * This routine reads and pins the specified page of the relation.
     177                 :  * In page-at-a-time mode it performs additional work, namely determining
     178                 :  * which tuples on the page are visible.
     179                 :  */
     180                 : static void
     181                 : heapgetpage(HeapScanDesc scan, BlockNumber page)
     182          183680 : {
     183                 :         Buffer          buffer;
     184                 :         Snapshot        snapshot;
     185                 :         Page            dp;
     186                 :         int                     lines;
     187                 :         int                     ntup;
     188                 :         OffsetNumber lineoff;
     189                 :         ItemId          lpp;
     190                 : 
     191                 :         Assert(page < scan->rs_nblocks);
     192                 : 
     193                 :         /* release previous scan buffer, if any */
     194          183680 :         if (BufferIsValid(scan->rs_cbuf))
     195                 :         {
     196          161003 :                 ReleaseBuffer(scan->rs_cbuf);
     197          161003 :                 scan->rs_cbuf = InvalidBuffer;
     198                 :         }
     199                 : 
     200                 :         /* read page using selected strategy */
     201          183680 :         scan->rs_cbuf = ReadBufferWithStrategy(scan->rs_rd,
     202                 :                                                                                    page,
     203                 :                                                                                    scan->rs_strategy);
     204          183680 :         scan->rs_cblock = page;
     205                 : 
     206          183680 :         if (!scan->rs_pageatatime)
     207          149235 :                 return;
     208                 : 
     209           34445 :         buffer = scan->rs_cbuf;
     210           34445 :         snapshot = scan->rs_snapshot;
     211                 : 
     212                 :         /*
     213                 :          * Prune and repair fragmentation for the whole page, if possible.
     214                 :          */
     215           34445 :         heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin);
     216                 : 
     217                 :         /*
     218                 :          * We must hold share lock on the buffer content while examining tuple
     219                 :          * visibility.  Afterwards, however, the tuples we have found to be
     220                 :          * visible are guaranteed good as long as we hold the buffer pin.
     221                 :          */
     222           34445 :         LockBuffer(buffer, BUFFER_LOCK_SHARE);
     223                 : 
     224           34445 :         dp = (Page) BufferGetPage(buffer);
     225           34445 :         lines = PageGetMaxOffsetNumber(dp);
     226           34445 :         ntup = 0;
     227                 : 
     228           34445 :         for (lineoff = FirstOffsetNumber, lpp = PageGetItemId(dp, lineoff);
     229         1092949 :                  lineoff <= lines;
     230         1024059 :                  lineoff++, lpp++)
     231                 :         {
     232         1024059 :                 if (ItemIdIsNormal(lpp))
     233                 :                 {
     234                 :                         HeapTupleData loctup;
     235                 :                         bool            valid;
     236                 : 
     237          920976 :                         loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
     238          920976 :                         loctup.t_len = ItemIdGetLength(lpp);
     239          920976 :                         ItemPointerSet(&(loctup.t_self), page, lineoff);
     240                 : 
     241          920976 :                         valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
     242          920976 :                         if (valid)
     243          906232 :                                 scan->rs_vistuples[ntup++] = lineoff;
     244                 :                 }
     245                 :         }
     246                 : 
     247           34445 :         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
     248                 : 
     249                 :         Assert(ntup <= MaxHeapTuplesPerPage);
     250           34445 :         scan->rs_ntuples = ntup;
     251                 : }
     252                 : 
     253                 : /* ----------------
     254                 :  *              heapgettup - fetch next heap tuple
     255                 :  *
     256                 :  *              Initialize the scan if not already done; then advance to the next
     257                 :  *              tuple as indicated by "dir"; return the next tuple in scan->rs_ctup,
     258                 :  *              or set scan->rs_ctup.t_data = NULL if no more tuples.
     259                 :  *
     260                 :  * dir == NoMovementScanDirection means "re-fetch the tuple indicated
     261                 :  * by scan->rs_ctup".
     262                 :  *
     263                 :  * Note: the reason nkeys/key are passed separately, even though they are
     264                 :  * kept in the scan descriptor, is that the caller may not want us to check
     265                 :  * the scankeys.
     266                 :  *
     267                 :  * Note: when we fall off the end of the scan in either direction, we
     268                 :  * reset rs_inited.  This means that a further request with the same
     269                 :  * scan direction will restart the scan, which is a bit odd, but a
     270                 :  * request with the opposite scan direction will start a fresh scan
     271                 :  * in the proper direction.  The latter is required behavior for cursors,
     272                 :  * while the former case is generally undefined behavior in Postgres
     273                 :  * so we don't care too much.
     274                 :  * ----------------
     275                 :  */
     276                 : static void
     277                 : heapgettup(HeapScanDesc scan,
     278                 :                    ScanDirection dir,
     279                 :                    int nkeys,
     280                 :                    ScanKey key)
     281          254224 : {
     282          254224 :         HeapTuple       tuple = &(scan->rs_ctup);
     283          254224 :         Snapshot        snapshot = scan->rs_snapshot;
     284          254224 :         bool            backward = ScanDirectionIsBackward(dir);
     285                 :         BlockNumber page;
     286                 :         bool            finished;
     287                 :         Page            dp;
     288                 :         int                     lines;
     289                 :         OffsetNumber lineoff;
     290                 :         int                     linesleft;
     291                 :         ItemId          lpp;
     292                 : 
     293                 :         /*
     294                 :          * calculate next starting lineoff, given scan direction
     295                 :          */
     296          254224 :         if (ScanDirectionIsForward(dir))
     297                 :         {
     298          254224 :                 if (!scan->rs_inited)
     299                 :                 {
     300                 :                         /*
     301                 :                          * return null immediately if relation is empty
     302                 :                          */
     303            6769 :                         if (scan->rs_nblocks == 0)
     304                 :                         {
     305                 :                                 Assert(!BufferIsValid(scan->rs_cbuf));
     306             481 :                                 tuple->t_data = NULL;
     307             481 :                                 return;
     308                 :                         }
     309            6288 :                         page = scan->rs_startblock; /* first page */
     310            6288 :                         heapgetpage(scan, page);
     311            6288 :                         lineoff = FirstOffsetNumber;            /* first offnum */
     312            6288 :                         scan->rs_inited = true;
     313                 :                 }
     314                 :                 else
     315                 :                 {
     316                 :                         /* continue from previously returned page/tuple */
     317          247455 :                         page = scan->rs_cblock;              /* current page */
     318          247455 :                         lineoff =                       /* next offnum */
     319                 :                                 OffsetNumberNext(ItemPointerGetOffsetNumber(&(tuple->t_self)));
     320                 :                 }
     321                 : 
     322          253743 :                 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
     323                 : 
     324          253743 :                 dp = (Page) BufferGetPage(scan->rs_cbuf);
     325          253743 :                 lines = PageGetMaxOffsetNumber(dp);
     326                 :                 /* page and lineoff now reference the physically next tid */
     327                 : 
     328          253743 :                 linesleft = lines - lineoff + 1;
     329                 :         }
     330               0 :         else if (backward)
     331                 :         {
     332               0 :                 if (!scan->rs_inited)
     333                 :                 {
     334                 :                         /*
     335                 :                          * return null immediately if relation is empty
     336                 :                          */
     337               0 :                         if (scan->rs_nblocks == 0)
     338                 :                         {
     339                 :                                 Assert(!BufferIsValid(scan->rs_cbuf));
     340               0 :                                 tuple->t_data = NULL;
     341               0 :                                 return;
     342                 :                         }
     343                 : 
     344                 :                         /*
     345                 :                          * Disable reporting to syncscan logic in a backwards scan; it's
     346                 :                          * not very likely anyone else is doing the same thing at the same
     347                 :                          * time, and much more likely that we'll just bollix things for
     348                 :                          * forward scanners.
     349                 :                          */
     350               0 :                         scan->rs_syncscan = false;
     351                 :                         /* start from last page of the scan */
     352               0 :                         if (scan->rs_startblock > 0)
     353               0 :                                 page = scan->rs_startblock - 1;
     354                 :                         else
     355               0 :                                 page = scan->rs_nblocks - 1;
     356               0 :                         heapgetpage(scan, page);
     357                 :                 }
     358                 :                 else
     359                 :                 {
     360                 :                         /* continue from previously returned page/tuple */
     361               0 :                         page = scan->rs_cblock;              /* current page */
     362                 :                 }
     363                 : 
     364               0 :                 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
     365                 : 
     366               0 :                 dp = (Page) BufferGetPage(scan->rs_cbuf);
     367               0 :                 lines = PageGetMaxOffsetNumber(dp);
     368                 : 
     369               0 :                 if (!scan->rs_inited)
     370                 :                 {
     371               0 :                         lineoff = lines;        /* final offnum */
     372               0 :                         scan->rs_inited = true;
     373                 :                 }
     374                 :                 else
     375                 :                 {
     376               0 :                         lineoff =                       /* previous offnum */
     377                 :                                 OffsetNumberPrev(ItemPointerGetOffsetNumber(&(tuple->t_self)));
     378                 :                 }
     379                 :                 /* page and lineoff now reference the physically previous tid */
     380                 : 
     381               0 :                 linesleft = lineoff;
     382                 :         }
     383                 :         else
     384                 :         {
     385                 :                 /*
     386                 :                  * ``no movement'' scan direction: refetch prior tuple
     387                 :                  */
     388               0 :                 if (!scan->rs_inited)
     389                 :                 {
     390                 :                         Assert(!BufferIsValid(scan->rs_cbuf));
     391               0 :                         tuple->t_data = NULL;
     392               0 :                         return;
     393                 :                 }
     394                 : 
     395               0 :                 page = ItemPointerGetBlockNumber(&(tuple->t_self));
     396               0 :                 if (page != scan->rs_cblock)
     397               0 :                         heapgetpage(scan, page);
     398                 : 
     399                 :                 /* Since the tuple was previously fetched, needn't lock page here */
     400               0 :                 dp = (Page) BufferGetPage(scan->rs_cbuf);
     401               0 :                 lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self));
     402               0 :                 lpp = PageGetItemId(dp, lineoff);
     403                 :                 Assert(ItemIdIsNormal(lpp));
     404                 : 
     405               0 :                 tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
     406               0 :                 tuple->t_len = ItemIdGetLength(lpp);
     407                 : 
     408               0 :                 return;
     409                 :         }
     410                 : 
     411                 :         /*
     412                 :          * advance the scan until we find a qualifying tuple or run out of stuff
     413                 :          * to scan
     414                 :          */
     415          253743 :         lpp = PageGetItemId(dp, lineoff);
     416                 :         for (;;)
     417                 :         {
     418         6833004 :                 while (linesleft > 0)
     419                 :                 {
     420         6686787 :                         if (ItemIdIsNormal(lpp))
     421                 :                         {
     422                 :                                 bool            valid;
     423                 : 
     424         6660318 :                                 tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
     425         6660318 :                                 tuple->t_len = ItemIdGetLength(lpp);
     426         6660318 :                                 ItemPointerSet(&(tuple->t_self), page, lineoff);
     427                 : 
     428                 :                                 /*
     429                 :                                  * if current tuple qualifies, return it.
     430                 :                                  */
     431         6660318 :                                 valid = HeapTupleSatisfiesVisibility(tuple,
     432                 :                                                                                                          snapshot,
     433                 :                                                                                                          scan->rs_cbuf);
     434                 : 
     435         6660318 :                                 if (valid && key != NULL)
     436         6412102 :                                         HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd),
     437                 :                                                                 nkeys, key, valid);
     438                 : 
     439         6660318 :                                 if (valid)
     440                 :                                 {
     441          250473 :                                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
     442          250473 :                                         return;
     443                 :                                 }
     444                 :                         }
     445                 : 
     446                 :                         /*
     447                 :                          * otherwise move to the next item on the page
     448                 :                          */
     449         6436314 :                         --linesleft;
     450         6436314 :                         if (backward)
     451                 :                         {
     452               0 :                                 --lpp;                  /* move back in this page's ItemId array */
     453               0 :                                 --lineoff;
     454                 :                         }
     455                 :                         else
     456                 :                         {
     457         6436314 :                                 ++lpp;                  /* move forward in this page's ItemId array */
     458         6436314 :                                 ++lineoff;
     459                 :                         }
     460                 :                 }
     461                 : 
     462                 :                 /*
     463                 :                  * if we get here, it means we've exhausted the items on this page and
     464                 :                  * it's time to move to the next.
     465                 :                  */
     466          146217 :                 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
     467                 : 
     468                 :                 /*
     469                 :                  * advance to next/prior page and detect end of scan
     470                 :                  */
     471          146217 :                 if (backward)
     472                 :                 {
     473               0 :                         finished = (page == scan->rs_startblock);
     474               0 :                         if (page == 0)
     475               0 :                                 page = scan->rs_nblocks;
     476               0 :                         page--;
     477                 :                 }
     478                 :                 else
     479                 :                 {
     480          146217 :                         page++;
     481          146217 :                         if (page >= scan->rs_nblocks)
     482            3270 :                                 page = 0;
     483          146217 :                         finished = (page == scan->rs_startblock);
     484                 : 
     485                 :                         /*
     486                 :                          * Report our new scan position for synchronization purposes. We
     487                 :                          * don't do that when moving backwards, however. That would just
     488                 :                          * mess up any other forward-moving scanners.
     489                 :                          *
     490                 :                          * Note: we do this before checking for end of scan so that the
     491                 :                          * final state of the position hint is back at the start of the
     492                 :                          * rel.  That's not strictly necessary, but otherwise when you run
     493                 :                          * the same query multiple times the starting position would shift
     494                 :                          * a little bit backwards on every invocation, which is confusing.
     495                 :                          * We don't guarantee any specific ordering in general, though.
     496                 :                          */
     497          146217 :                         if (scan->rs_syncscan)
     498               0 :                                 ss_report_location(scan->rs_rd, page);
     499                 :                 }
     500                 : 
     501                 :                 /*
     502                 :                  * return NULL if we've exhausted all the pages
     503                 :                  */
     504          146217 :                 if (finished)
     505                 :                 {
     506            3270 :                         if (BufferIsValid(scan->rs_cbuf))
     507            3270 :                                 ReleaseBuffer(scan->rs_cbuf);
     508            3270 :                         scan->rs_cbuf = InvalidBuffer;
     509            3270 :                         scan->rs_cblock = InvalidBlockNumber;
     510            3270 :                         tuple->t_data = NULL;
     511            3270 :                         scan->rs_inited = false;
     512            3270 :                         return;
     513                 :                 }
     514                 : 
     515          142947 :                 heapgetpage(scan, page);
     516                 : 
     517          142947 :                 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
     518                 : 
     519          142947 :                 dp = (Page) BufferGetPage(scan->rs_cbuf);
     520          142947 :                 lines = PageGetMaxOffsetNumber((Page) dp);
     521          142947 :                 linesleft = lines;
     522          142947 :                 if (backward)
     523                 :                 {
     524               0 :                         lineoff = lines;
     525               0 :                         lpp = PageGetItemId(dp, lines);
     526                 :                 }
     527                 :                 else
     528                 :                 {
     529          142947 :                         lineoff = FirstOffsetNumber;
     530          142947 :                         lpp = PageGetItemId(dp, FirstOffsetNumber);
     531                 :                 }
     532                 :         }
     533                 : }
     534                 : 
     535                 : /* ----------------
     536                 :  *              heapgettup_pagemode - fetch next heap tuple in page-at-a-time mode
     537                 :  *
     538                 :  *              Same API as heapgettup, but used in page-at-a-time mode
     539                 :  *
     540                 :  * The internal logic is much the same as heapgettup's too, but there are some
     541                 :  * differences: we do not take the buffer content lock (that only needs to
     542                 :  * happen inside heapgetpage), and we iterate through just the tuples listed
     543                 :  * in rs_vistuples[] rather than all tuples on the page.  Notice that
     544                 :  * lineindex is 0-based, where the corresponding loop variable lineoff in
     545                 :  * heapgettup is 1-based.
     546                 :  * ----------------
     547                 :  */
     548                 : static void
     549                 : heapgettup_pagemode(HeapScanDesc scan,
     550                 :                                         ScanDirection dir,
     551                 :                                         int nkeys,
     552                 :                                         ScanKey key)
     553          840201 : {
     554          840201 :         HeapTuple       tuple = &(scan->rs_ctup);
     555          840201 :         bool            backward = ScanDirectionIsBackward(dir);
     556                 :         BlockNumber page;
     557                 :         bool            finished;
     558                 :         Page            dp;
     559                 :         int                     lines;
     560                 :         int                     lineindex;
     561                 :         OffsetNumber lineoff;
     562                 :         int                     linesleft;
     563                 :         ItemId          lpp;
     564                 : 
     565                 :         /*
     566                 :          * calculate next starting lineindex, given scan direction
     567                 :          */
     568          840201 :         if (ScanDirectionIsForward(dir))
     569                 :         {
     570          840109 :                 if (!scan->rs_inited)
     571                 :                 {
     572                 :                         /*
     573                 :                          * return null immediately if relation is empty
     574                 :                          */
     575           16470 :                         if (scan->rs_nblocks == 0)
     576                 :                         {
     577                 :                                 Assert(!BufferIsValid(scan->rs_cbuf));
     578              84 :                                 tuple->t_data = NULL;
     579              84 :                                 return;
     580                 :                         }
     581           16386 :                         page = scan->rs_startblock; /* first page */
     582           16386 :                         heapgetpage(scan, page);
     583           16386 :                         lineindex = 0;
     584           16386 :                         scan->rs_inited = true;
     585                 :                 }
     586                 :                 else
     587                 :                 {
     588                 :                         /* continue from previously returned page/tuple */
     589          823639 :                         page = scan->rs_cblock;              /* current page */
     590          823639 :                         lineindex = scan->rs_cindex + 1;
     591                 :                 }
     592                 : 
     593          840025 :                 dp = (Page) BufferGetPage(scan->rs_cbuf);
     594          840025 :                 lines = scan->rs_ntuples;
     595                 :                 /* page and lineindex now reference the next visible tid */
     596                 : 
     597          840025 :                 linesleft = lines - lineindex;
     598                 :         }
     599              92 :         else if (backward)
     600                 :         {
     601              92 :                 if (!scan->rs_inited)
     602                 :                 {
     603                 :                         /*
     604                 :                          * return null immediately if relation is empty
     605                 :                          */
     606               3 :                         if (scan->rs_nblocks == 0)
     607                 :                         {
     608                 :                                 Assert(!BufferIsValid(scan->rs_cbuf));
     609               0 :                                 tuple->t_data = NULL;
     610               0 :                                 return;
     611                 :                         }
     612                 : 
     613                 :                         /*
     614                 :                          * Disable reporting to syncscan logic in a backwards scan; it's
     615                 :                          * not very likely anyone else is doing the same thing at the same
     616                 :                          * time, and much more likely that we'll just bollix things for
     617                 :                          * forward scanners.
     618                 :                          */
     619               3 :                         scan->rs_syncscan = false;
     620                 :                         /* start from last page of the scan */
     621               3 :                         if (scan->rs_startblock > 0)
     622               0 :                                 page = scan->rs_startblock - 1;
     623                 :                         else
     624               3 :                                 page = scan->rs_nblocks - 1;
     625               3 :                         heapgetpage(scan, page);
     626                 :                 }
     627                 :                 else
     628                 :                 {
     629                 :                         /* continue from previously returned page/tuple */
     630              89 :                         page = scan->rs_cblock;              /* current page */
     631                 :                 }
     632                 : 
     633              92 :                 dp = (Page) BufferGetPage(scan->rs_cbuf);
     634              92 :                 lines = scan->rs_ntuples;
     635                 : 
     636              92 :                 if (!scan->rs_inited)
     637                 :                 {
     638               3 :                         lineindex = lines - 1;
     639               3 :                         scan->rs_inited = true;
     640                 :                 }
     641                 :                 else
     642                 :                 {
     643              89 :                         lineindex = scan->rs_cindex - 1;
     644                 :                 }
     645                 :                 /* page and lineindex now reference the previous visible tid */
     646                 : 
     647              92 :                 linesleft = lineindex + 1;
     648                 :         }
     649                 :         else
     650                 :         {
     651                 :                 /*
     652                 :                  * ``no movement'' scan direction: refetch prior tuple
     653                 :                  */
     654               0 :                 if (!scan->rs_inited)
     655                 :                 {
     656                 :                         Assert(!BufferIsValid(scan->rs_cbuf));
     657               0 :                         tuple->t_data = NULL;
     658               0 :                         return;
     659                 :                 }
     660                 : 
     661               0 :                 page = ItemPointerGetBlockNumber(&(tuple->t_self));
     662               0 :                 if (page != scan->rs_cblock)
     663               0 :                         heapgetpage(scan, page);
     664                 : 
     665                 :                 /* Since the tuple was previously fetched, needn't lock page here */
     666               0 :                 dp = (Page) BufferGetPage(scan->rs_cbuf);
     667               0 :                 lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self));
     668               0 :                 lpp = PageGetItemId(dp, lineoff);
     669                 :                 Assert(ItemIdIsNormal(lpp));
     670                 : 
     671               0 :                 tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
     672               0 :                 tuple->t_len = ItemIdGetLength(lpp);
     673                 : 
     674                 :                 /* check that rs_cindex is in sync */
     675                 :                 Assert(scan->rs_cindex < scan->rs_ntuples);
     676                 :                 Assert(lineoff == scan->rs_vistuples[scan->rs_cindex]);
     677                 : 
     678               0 :                 return;
     679                 :         }
     680                 : 
     681                 :         /*
     682                 :          * advance the scan until we find a qualifying tuple or run out of stuff
     683                 :          * to scan
     684                 :          */
     685                 :         for (;;)
     686                 :         {
     687          858173 :                 while (linesleft > 0)
     688                 :                 {
     689          835898 :                         lineoff = scan->rs_vistuples[lineindex];
     690          835898 :                         lpp = PageGetItemId(dp, lineoff);
     691                 :                         Assert(ItemIdIsNormal(lpp));
     692                 : 
     693          835898 :                         tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
     694          835898 :                         tuple->t_len = ItemIdGetLength(lpp);
     695          835898 :                         ItemPointerSet(&(tuple->t_self), page, lineoff);
     696                 : 
     697                 :                         /*
     698                 :                          * if current tuple qualifies, return it.
     699                 :                          */
     700          835898 :                         if (key != NULL)
     701                 :                         {
     702                 :                                 bool            valid;
     703                 : 
     704               0 :                                 HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd),
     705                 :                                                         nkeys, key, valid);
     706               0 :                                 if (valid)
     707                 :                                 {
     708               0 :                                         scan->rs_cindex = lineindex;
     709               0 :                                         return;
     710                 :                                 }
     711                 :                         }
     712                 :                         else
     713                 :                         {
     714          835898 :                                 scan->rs_cindex = lineindex;
     715          835898 :                                 return;
     716                 :                         }
     717                 : 
     718                 :                         /*
     719                 :                          * otherwise move to the next item on the page
     720                 :                          */
     721               0 :                         --linesleft;
     722               0 :                         if (backward)
     723               0 :                                 --lineindex;
     724                 :                         else
     725               0 :                                 ++lineindex;
     726                 :                 }
     727                 : 
     728                 :                 /*
     729                 :                  * if we get here, it means we've exhausted the items on this page and
     730                 :                  * it's time to move to the next.
     731                 :                  */
     732           22275 :                 if (backward)
     733                 :                 {
     734              11 :                         finished = (page == scan->rs_startblock);
     735              11 :                         if (page == 0)
     736              11 :                                 page = scan->rs_nblocks;
     737              11 :                         page--;
     738                 :                 }
     739                 :                 else
     740                 :                 {
     741           22264 :                         page++;
     742           22264 :                         if (page >= scan->rs_nblocks)
     743            4208 :                                 page = 0;
     744           22264 :                         finished = (page == scan->rs_startblock);
     745                 : 
     746                 :                         /*
     747                 :                          * Report our new scan position for synchronization purposes. We
     748                 :                          * don't do that when moving backwards, however. That would just
     749                 :                          * mess up any other forward-moving scanners.
     750                 :                          *
     751                 :                          * Note: we do this before checking for end of scan so that the
     752                 :                          * final state of the position hint is back at the start of the
     753                 :                          * rel.  That's not strictly necessary, but otherwise when you run
     754                 :                          * the same query multiple times the starting position would shift
     755                 :                          * a little bit backwards on every invocation, which is confusing.
     756                 :                          * We don't guarantee any specific ordering in general, though.
     757                 :                          */
     758           22264 :                         if (scan->rs_syncscan)
     759               0 :                                 ss_report_location(scan->rs_rd, page);
     760                 :                 }
     761                 : 
     762                 :                 /*
     763                 :                  * return NULL if we've exhausted all the pages
     764                 :                  */
     765           22275 :                 if (finished)
     766                 :                 {
     767            4219 :                         if (BufferIsValid(scan->rs_cbuf))
     768            4219 :                                 ReleaseBuffer(scan->rs_cbuf);
     769            4219 :                         scan->rs_cbuf = InvalidBuffer;
     770            4219 :                         scan->rs_cblock = InvalidBlockNumber;
     771            4219 :                         tuple->t_data = NULL;
     772            4219 :                         scan->rs_inited = false;
     773            4219 :                         return;
     774                 :                 }
     775                 : 
     776           18056 :                 heapgetpage(scan, page);
     777                 : 
     778           18056 :                 dp = (Page) BufferGetPage(scan->rs_cbuf);
     779           18056 :                 lines = scan->rs_ntuples;
     780           18056 :                 linesleft = lines;
     781           18056 :                 if (backward)
     782               0 :                         lineindex = lines - 1;
     783                 :                 else
     784           18056 :                         lineindex = 0;
     785                 :         }
     786                 : }
     787                 : 
     788                 : 
     789                 : #if defined(DISABLE_COMPLEX_MACRO)
     790                 : /*
     791                 :  * This is formatted so oddly so that the correspondence to the macro
     792                 :  * definition in access/heapam.h is maintained.
     793                 :  */
     794                 : Datum
     795                 : fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc,
     796                 :                         bool *isnull)
     797                 : {
     798                 :         return (
     799                 :                         (attnum) > 0 ?
     800                 :                         (
     801                 :                          ((isnull) ? (*(isnull) = false) : (dummyret) NULL),
     802                 :                          HeapTupleNoNulls(tup) ?
     803                 :                          (
     804                 :                           (tupleDesc)->attrs[(attnum) - 1]->attcacheoff >= 0 ?
     805                 :                           (
     806                 :                            fetchatt((tupleDesc)->attrs[(attnum) - 1],
     807                 :                                                 (char *) (tup)->t_data + (tup)->t_data->t_hoff +
     808                 :                                                 (tupleDesc)->attrs[(attnum) - 1]->attcacheoff)
     809                 :                            )
     810                 :                           :
     811                 :                           nocachegetattr((tup), (attnum), (tupleDesc), (isnull))
     812                 :                           )
     813                 :                          :
     814                 :                          (
     815                 :                           att_isnull((attnum) - 1, (tup)->t_data->t_bits) ?
     816                 :                           (
     817                 :                            ((isnull) ? (*(isnull) = true) : (dummyret) NULL),
     818                 :                            (Datum) NULL
     819                 :                            )
     820                 :                           :
     821                 :                           (
     822                 :                            nocachegetattr((tup), (attnum), (tupleDesc), (isnull))
     823                 :                            )
     824                 :                           )
     825                 :                          )
     826                 :                         :
     827                 :                         (
     828                 :                          (Datum) NULL
     829                 :                          )
     830                 :                 );
     831                 : }
     832                 : #endif   /* defined(DISABLE_COMPLEX_MACRO) */
     833                 : 
     834                 : 
     835                 : /* ----------------------------------------------------------------
     836                 :  *                                       heap access method interface
     837                 :  * ----------------------------------------------------------------
     838                 :  */
     839                 : 
     840                 : /* ----------------
     841                 :  *              relation_open - open any relation by relation OID
     842                 :  *
     843                 :  *              If lockmode is not "NoLock", the specified kind of lock is
     844                 :  *              obtained on the relation.  (Generally, NoLock should only be
     845                 :  *              used if the caller knows it has some appropriate lock on the
     846                 :  *              relation already.)
     847                 :  *
     848                 :  *              An error is raised if the relation does not exist.
     849                 :  *
     850                 :  *              NB: a "relation" is anything with a pg_class entry.  The caller is
     851                 :  *              expected to check whether the relkind is something it can handle.
     852                 :  * ----------------
     853                 :  */
     854                 : Relation
     855                 : relation_open(Oid relationId, LOCKMODE lockmode)
     856          333304 : {
     857                 :         Relation        r;
     858                 : 
     859                 :         Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
     860                 : 
     861                 :         /* Get the lock before trying to open the relcache entry */
     862          333304 :         if (lockmode != NoLock)
     863          313172 :                 LockRelationOid(relationId, lockmode);
     864                 : 
     865                 :         /* The relcache does all the real work... */
     866          333302 :         r = RelationIdGetRelation(relationId);
     867                 : 
     868          333302 :         if (!RelationIsValid(r))
     869               0 :                 elog(ERROR, "could not open relation with OID %u", relationId);
     870                 : 
     871          333302 :         pgstat_initstats(r);
     872                 : 
     873          333302 :         return r;
     874                 : }
     875                 : 
     876                 : /* ----------------
     877                 :  *              try_relation_open - open any relation by relation OID
     878                 :  *
     879                 :  *              Same as relation_open, except return NULL instead of failing
     880                 :  *              if the relation does not exist.
     881                 :  * ----------------
     882                 :  */
     883                 : Relation
     884                 : try_relation_open(Oid relationId, LOCKMODE lockmode)
     885             406 : {
     886                 :         Relation        r;
     887                 : 
     888                 :         Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
     889                 : 
     890                 :         /* Get the lock first */
     891             406 :         if (lockmode != NoLock)
     892             406 :                 LockRelationOid(relationId, lockmode);
     893                 : 
     894                 :         /*
     895                 :          * Now that we have the lock, probe to see if the relation really exists
     896                 :          * or not.
     897                 :          */
     898             406 :         if (!SearchSysCacheExists(RELOID,
     899                 :                                                           ObjectIdGetDatum(relationId),
     900                 :                                                           0, 0, 0))
     901                 :         {
     902                 :                 /* Release useless lock */
     903               0 :                 if (lockmode != NoLock)
     904               0 :                         UnlockRelationOid(relationId, lockmode);
     905                 : 
     906               0 :                 return NULL;
     907                 :         }
     908                 : 
     909                 :         /* Should be safe to do a relcache load */
     910             406 :         r = RelationIdGetRelation(relationId);
     911                 : 
     912             406 :         if (!RelationIsValid(r))
     913               0 :                 elog(ERROR, "could not open relation with OID %u", relationId);
     914                 : 
     915             406 :         pgstat_initstats(r);
     916                 : 
     917             406 :         return r;
     918                 : }
     919                 : 
     920                 : /* ----------------
     921                 :  *              relation_open_nowait - open but don't wait for lock
     922                 :  *
     923                 :  *              Same as relation_open, except throw an error instead of waiting
     924                 :  *              when the requested lock is not immediately obtainable.
     925                 :  * ----------------
     926                 :  */
     927                 : Relation
     928                 : relation_open_nowait(Oid relationId, LOCKMODE lockmode)
     929               0 : {
     930                 :         Relation        r;
     931                 : 
     932                 :         Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
     933                 : 
     934                 :         /* Get the lock before trying to open the relcache entry */
     935               0 :         if (lockmode != NoLock)
     936                 :         {
     937               0 :                 if (!ConditionalLockRelationOid(relationId, lockmode))
     938                 :                 {
     939                 :                         /* try to throw error by name; relation could be deleted... */
     940               0 :                         char       *relname = get_rel_name(relationId);
     941                 : 
     942               0 :                         if (relname)
     943               0 :                                 ereport(ERROR,
     944                 :                                                 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     945                 :                                                  errmsg("could not obtain lock on relation \"%s\"",
     946                 :                                                                 relname)));
     947                 :                         else
     948               0 :                                 ereport(ERROR,
     949                 :                                                 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     950                 :                                           errmsg("could not obtain lock on relation with OID %u",
     951                 :                                                          relationId)));
     952                 :                 }
     953                 :         }
     954                 : 
     955                 :         /* The relcache does all the real work... */
     956               0 :         r = RelationIdGetRelation(relationId);
     957                 : 
     958               0 :         if (!RelationIsValid(r))
     959               0 :                 elog(ERROR, "could not open relation with OID %u", relationId);
     960                 : 
     961               0 :         pgstat_initstats(r);
     962                 : 
     963               0 :         return r;
     964                 : }
     965                 : 
     966                 : /* ----------------
     967                 :  *              relation_openrv - open any relation specified by a RangeVar
     968                 :  *
     969                 :  *              Same as relation_open, but the relation is specified by a RangeVar.
     970                 :  * ----------------
     971                 :  */
     972                 : Relation
     973                 : relation_openrv(const RangeVar *relation, LOCKMODE lockmode)
     974            8653 : {
     975                 :         Oid                     relOid;
     976                 : 
     977                 :         /*
     978                 :          * Check for shared-cache-inval messages before trying to open the
     979                 :          * relation.  This is needed to cover the case where the name identifies a
     980                 :          * rel that has been dropped and recreated since the start of our
     981                 :          * transaction: if we don't flush the old syscache entry then we'll latch
     982                 :          * onto that entry and suffer an error when we do RelationIdGetRelation.
     983                 :          * Note that relation_open does not need to do this, since a relation's
     984                 :          * OID never changes.
     985                 :          *
     986                 :          * We skip this if asked for NoLock, on the assumption that the caller has
     987                 :          * already ensured some appropriate lock is held.
     988                 :          */
     989            8653 :         if (lockmode != NoLock)
     990            8617 :                 AcceptInvalidationMessages();
     991                 : 
     992                 :         /* Look up the appropriate relation using namespace search */
     993            8653 :         relOid = RangeVarGetRelid(relation, false);
     994                 : 
     995                 :         /* Let relation_open do the rest */
     996            8634 :         return relation_open(relOid, lockmode);
     997                 : }
     998                 : 
     999                 : /* ----------------
    1000                 :  *              relation_close - close any relation
    1001                 :  *
    1002                 :  *              If lockmode is not "NoLock", we then release the specified lock.
    1003                 :  *
    1004                 :  *              Note that it is often sensible to hold a lock beyond relation_close;
    1005                 :  *              in that case, the lock is released automatically at xact end.
    1006                 :  * ----------------
    1007                 :  */
    1008                 : void
    1009                 : relation_close(Relation relation, LOCKMODE lockmode)
    1010          178170 : {
    1011          178170 :         LockRelId       relid = relation->rd_lockInfo.lockRelId;
    1012                 : 
    1013                 :         Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
    1014                 : 
    1015                 :         /* The relcache does the real work... */
    1016          178170 :         RelationClose(relation);
    1017                 : 
    1018          178170 :         if (lockmode != NoLock)
    1019          128657 :                 UnlockRelationId(&relid, lockmode);
    1020          178170 : }
    1021                 : 
    1022                 : 
    1023                 : /* ----------------
    1024                 :  *              heap_open - open a heap relation by relation OID
    1025                 :  *
    1026                 :  *              This is essentially relation_open plus check that the relation
    1027                 :  *              is not an index nor a composite type.  (The caller should also
    1028                 :  *              check that it's not a view before assuming it has storage.)
    1029                 :  * ----------------
    1030                 :  */
    1031                 : Relation
    1032                 : heap_open(Oid relationId, LOCKMODE lockmode)
    1033          165008 : {
    1034                 :         Relation        r;
    1035                 : 
    1036          165008 :         r = relation_open(relationId, lockmode);
    1037                 : 
    1038          165008 :         if (r->rd_rel->relkind == RELKIND_INDEX)
    1039               0 :                 ereport(ERROR,
    1040                 :                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
    1041                 :                                  errmsg("\"%s\" is an index",
    1042                 :                                                 RelationGetRelationName(r))));
    1043          165008 :         else if (r->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
    1044               0 :                 ereport(ERROR,
    1045                 :                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
    1046                 :                                  errmsg("\"%s\" is a composite type",
    1047                 :                                                 RelationGetRelationName(r))));
    1048                 : 
    1049          165008 :         return r;
    1050                 : }
    1051                 : 
    1052                 : /* ----------------
    1053                 :  *              heap_openrv - open a heap relation specified
    1054                 :  *              by a RangeVar node
    1055                 :  *
    1056                 :  *              As above, but relation is specified by a RangeVar.
    1057                 :  * ----------------
    1058                 :  */
    1059                 : Relation
    1060                 : heap_openrv(const RangeVar *relation, LOCKMODE lockmode)
    1061            7979 : {
    1062                 :         Relation        r;
    1063                 : 
    1064            7979 :         r = relation_openrv(relation, lockmode);
    1065                 : 
    1066            7965 :         if (r->rd_rel->relkind == RELKIND_INDEX)
    1067               0 :                 ereport(ERROR,
    1068                 :                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
    1069                 :                                  errmsg("\"%s\" is an index",
    1070                 :                                                 RelationGetRelationName(r))));
    1071            7965 :         else if (r->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
    1072               0 :                 ereport(ERROR,
    1073                 :                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
    1074                 :                                  errmsg("\"%s\" is a composite type",
    1075                 :                                                 RelationGetRelationName(r))));
    1076                 : 
    1077            7965 :         return r;
    1078                 : }
    1079                 : 
    1080                 : 
    1081                 : /* ----------------
    1082                 :  *              heap_beginscan  - begin relation scan
    1083                 :  *
    1084                 :  * heap_beginscan_strat offers an extended API that lets the caller control
    1085                 :  * whether a nondefault buffer access strategy can be used, and whether
    1086                 :  * syncscan can be chosen (possibly resulting in the scan not starting from
    1087                 :  * block zero).  Both of these default to TRUE with plain heap_beginscan.
    1088                 :  *
    1089                 :  * heap_beginscan_bm is an alternative entry point for setting up a
    1090                 :  * HeapScanDesc for a bitmap heap scan.  Although that scan technology is
    1091                 :  * really quite unlike a standard seqscan, there is just enough commonality
    1092                 :  * to make it worth using the same data structure.
    1093                 :  * ----------------
    1094                 :  */
    1095                 : HeapScanDesc
    1096                 : heap_beginscan(Relation relation, Snapshot snapshot,
    1097                 :                            int nkeys, ScanKey key)
    1098           13876 : {
    1099           13876 :         return heap_beginscan_internal(relation, snapshot, nkeys, key,
    1100                 :                                                                    true, true, false);
    1101                 : }
    1102                 : 
    1103                 : HeapScanDesc
    1104                 : heap_beginscan_strat(Relation relation, Snapshot snapshot,
    1105                 :                                          int nkeys, ScanKey key,
    1106                 :                                          bool allow_strat, bool allow_sync)
    1107               5 : {
    1108               5 :         return heap_beginscan_internal(relation, snapshot, nkeys, key,
    1109                 :                                                                    allow_strat, allow_sync, false);
    1110                 : }
    1111                 : 
    1112                 : HeapScanDesc
    1113                 : heap_beginscan_bm(Relation relation, Snapshot snapshot,
    1114                 :                                   int nkeys, ScanKey key)
    1115             196 : {
    1116             196 :         return heap_beginscan_internal(relation, snapshot, nkeys, key,
    1117                 :                                                                    false, false, true);
    1118                 : }
    1119                 : 
    1120                 : static HeapScanDesc
    1121                 : heap_beginscan_internal(Relation relation, Snapshot snapshot,
    1122                 :                                                 int nkeys, ScanKey key,
    1123                 :                                                 bool allow_strat, bool allow_sync,
    1124                 :                                                 bool is_bitmapscan)
    1125           14077 : {
    1126                 :         HeapScanDesc scan;
    1127                 : 
    1128                 :         /*
    1129                 :          * increment relation ref count while scanning relation
    1130                 :          *
    1131                 :          * This is just to make really sure the relcache entry won't go away while
    1132                 :          * the scan has a pointer to it.  Caller should be holding the rel open
    1133                 :          * anyway, so this is redundant in all normal scenarios...
    1134                 :          */
    1135           14077 :         RelationIncrementReferenceCount(relation);
    1136                 : 
    1137                 :         /*
    1138                 :          * allocate and initialize scan descriptor
    1139                 :          */
    1140           14077 :         scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
    1141                 : 
    1142           14077 :         scan->rs_rd = relation;
    1143           14077 :         scan->rs_snapshot = snapshot;
    1144           14077 :         scan->rs_nkeys = nkeys;
    1145           14077 :         scan->rs_bitmapscan = is_bitmapscan;
    1146           14077 :         scan->rs_strategy = NULL;    /* set in initscan */
    1147           14077 :         scan->rs_allow_strat = allow_strat;
    1148           14077 :         scan->rs_allow_sync = allow_sync;
    1149                 : 
    1150                 :         /*
    1151                 :          * we can use page-at-a-time mode if it's an MVCC-safe snapshot
    1152                 :          */
    1153           14077 :         scan->rs_pageatatime = IsMVCCSnapshot(snapshot);
    1154                 : 
    1155                 :         /* we only need to set this up once */
    1156           14077 :         scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
    1157                 : 
    1158                 :         /*
    1159                 :          * we do this here instead of in initscan() because heap_rescan also calls
    1160                 :          * initscan() and we don't want to allocate memory again
    1161                 :          */
    1162           14077 :         if (nkeys > 0)
    1163            5898 :                 scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
    1164                 :         else
    1165            8179 :                 scan->rs_key = NULL;
    1166                 : 
    1167           14077 :         initscan(scan, key);
    1168                 : 
    1169           14077 :         return scan;
    1170                 : }
    1171                 : 
    1172                 : /* ----------------
    1173                 :  *              heap_rescan             - restart a relation scan
    1174                 :  * ----------------
    1175                 :  */
    1176                 : void
    1177                 : heap_rescan(HeapScanDesc scan,
    1178                 :                         ScanKey key)
    1179           12585 : {
    1180                 :         /*
    1181                 :          * unpin scan buffers
    1182                 :          */
    1183           12585 :         if (BufferIsValid(scan->rs_cbuf))
    1184           12126 :                 ReleaseBuffer(scan->rs_cbuf);
    1185                 : 
    1186                 :         /*
    1187                 :          * reinitialize scan descriptor
    1188                 :          */
    1189           12585 :         initscan(scan, key);
    1190           12585 : }
    1191                 : 
    1192                 : /* ----------------
    1193                 :  *              heap_endscan    - end relation scan
    1194                 :  *
    1195                 :  *              See how to integrate with index scans.
    1196                 :  *              Check handling if reldesc caching.
    1197                 :  * ----------------
    1198                 :  */
    1199                 : void
    1200                 : heap_endscan(HeapScanDesc scan)
    1201           14019 : {
    1202                 :         /* Note: no locking manipulations needed */
    1203                 : 
    1204                 :         /*
    1205                 :          * unpin scan buffers
    1206                 :          */
    1207           14019 :         if (BufferIsValid(scan->rs_cbuf))
    1208            3328 :                 ReleaseBuffer(scan->rs_cbuf);
    1209                 : 
    1210                 :         /*
    1211                 :          * decrement relation reference count and free scan descriptor storage
    1212                 :          */
    1213           14019 :         RelationDecrementReferenceCount(scan->rs_rd);
    1214                 : 
    1215           14019 :         if (scan->rs_key)
    1216            5898 :                 pfree(scan->rs_key);
    1217                 : 
    1218           14019 :         if (scan->rs_strategy != NULL)
    1219               0 :                 FreeAccessStrategy(scan->rs_strategy);
    1220                 : 
    1221           14019 :         pfree(scan);
    1222           14019 : }
    1223                 : 
    1224                 : /* ----------------
    1225                 :  *              heap_getnext    - retrieve next tuple in scan
    1226                 :  *
    1227                 :  *              Fix to work with index relations.
    1228                 :  *              We don't return the buffer anymore, but you can get it from the
    1229                 :  *              returned HeapTuple.
    1230                 :  * ----------------
    1231                 :  */
    1232                 : 
    1233                 : #ifdef HEAPDEBUGALL
    1234                 : #define HEAPDEBUG_1 \
    1235                 :         elog(DEBUG2, "heap_getnext([%s,nkeys=%d],dir=%d) called", \
    1236                 :                  RelationGetRelationName(scan->rs_rd), scan->rs_nkeys, (int) direction)
    1237                 : #define HEAPDEBUG_2 \
    1238                 :         elog(DEBUG2, "heap_getnext returning EOS")
    1239                 : #define HEAPDEBUG_3 \
    1240                 :         elog(DEBUG2, "heap_getnext returning tuple")
    1241                 : #else
    1242                 : #define HEAPDEBUG_1
    1243                 : #define HEAPDEBUG_2
    1244                 : #define HEAPDEBUG_3
    1245                 : #endif   /* !defined(HEAPDEBUGALL) */
    1246                 : 
    1247                 : 
    1248                 : HeapTuple
    1249                 : heap_getnext(HeapScanDesc scan, ScanDirection direction)
    1250         1094425 : {
    1251                 :         /* Note: no locking manipulations needed */
    1252                 : 
    1253                 :         HEAPDEBUG_1;                            /* heap_getnext( info ) */
    1254                 : 
    1255         1094425 :         if (scan->rs_pageatatime)
    1256          840201 :                 heapgettup_pagemode(scan, direction,
    1257                 :                                                         scan->rs_nkeys, scan->rs_key);
    1258                 :         else
    1259          254224 :                 heapgettup(scan, direction, scan->rs_nkeys, scan->rs_key);
    1260                 : 
    1261         1094425 :         if (scan->rs_ctup.t_data == NULL)
    1262                 :         {
    1263                 :                 HEAPDEBUG_2;                    /* heap_getnext returning EOS */
    1264            8054 :                 return NULL;
    1265                 :         }
    1266                 : 
    1267                 :         /*
    1268                 :          * if we get here it means we have a new current scan tuple, so point to
    1269                 :          * the proper return buffer and return the tuple.
    1270                 :          */
    1271                 :         HEAPDEBUG_3;                            /* heap_getnext returning tuple */
    1272                 : 
    1273         1086371 :         pgstat_count_heap_getnext(scan->rs_rd);
    1274                 : 
    1275         1086371 :         return &(scan->rs_ctup);
    1276                 : }
    1277                 : 
    1278                 : /*
    1279                 :  *      heap_fetch              - retrieve tuple with given tid
    1280                 :  *
    1281                 :  * On entry, tuple->t_self is the TID to fetch.  We pin the buffer holding
    1282                 :  * the tuple, fill in the remaining fields of *tuple, and check the tuple
    1283                 :  * against the specified snapshot.
    1284                 :  *
    1285                 :  * If successful (tuple found and passes snapshot time qual), then *userbuf
    1286                 :  * is set to the buffer holding the tuple and TRUE is returned.  The caller
    1287                 :  * must unpin the buffer when done with the tuple.
    1288                 :  *
    1289                 :  * If the tuple is not found (ie, item number references a deleted slot),
    1290                 :  * then tuple->t_data is set to NULL and FALSE is returned.
    1291                 :  *
    1292                 :  * If the tuple is found but fails the time qual check, then FALSE is returned
    1293                 :  * but tuple->t_data is left pointing to the tuple.
    1294                 :  *
    1295                 :  * keep_buf determines what is done with the buffer in the FALSE-result cases.
    1296                 :  * When the caller specifies keep_buf = true, we retain the pin on the buffer
    1297                 :  * and return it in *userbuf (so the caller must eventually unpin it); when
    1298                 :  * keep_buf = false, the pin is released and *userbuf is set to InvalidBuffer.
    1299                 :  *
    1300                 :  * stats_relation is the relation to charge the heap_fetch operation against
    1301                 :  * for statistical purposes.  (This could be the heap rel itself, an
    1302                 :  * associated index, or NULL to not count the fetch at all.)
    1303                 :  *
    1304                 :  * heap_fetch does not follow HOT chains: only the exact TID requested will
    1305                 :  * be fetched.
    1306                 :  *
    1307                 :  * It is somewhat inconsistent that we ereport() on invalid block number but
    1308                 :  * return false on invalid item number.  There are a couple of reasons though.
    1309                 :  * One is that the caller can relatively easily check the block number for
    1310                 :  * validity, but cannot check the item number without reading the page
    1311                 :  * himself.  Another is that when we are following a t_ctid link, we can be
    1312                 :  * reasonably confident that the page number is valid (since VACUUM shouldn't
    1313                 :  * truncate off the destination page without having killed the referencing
    1314                 :  * tuple first), but the item number might well not be good.
    1315                 :  */
    1316                 : bool
    1317                 : heap_fetch(Relation relation,
    1318                 :                    Snapshot snapshot,
    1319                 :                    HeapTuple tuple,
    1320                 :                    Buffer *userbuf,
    1321                 :                    bool keep_buf,
    1322                 :                    Relation stats_relation)
    1323            1078 : {
    1324                 :         /* Assume *userbuf is undefined on entry */
    1325            1078 :         *userbuf = InvalidBuffer;
    1326            1078 :         return heap_release_fetch(relation, snapshot, tuple,
    1327                 :                                                           userbuf, keep_buf, stats_relation);
    1328                 : }
    1329                 : 
    1330                 : /*
    1331                 :  *      heap_release_fetch              - retrieve tuple with given tid
    1332                 :  *
    1333                 :  * This has the same API as heap_fetch except that if *userbuf is not
    1334                 :  * InvalidBuffer on entry, that buffer will be released before reading
    1335                 :  * the new page.  This saves a separate ReleaseBuffer step and hence
    1336                 :  * one entry into the bufmgr when looping through multiple fetches.
    1337                 :  * Also, if *userbuf is the same buffer that holds the target tuple,
    1338                 :  * we avoid bufmgr manipulation altogether.
    1339                 :  */
    1340                 : bool
    1341                 : heap_release_fetch(Relation relation,
    1342                 :                                    Snapshot snapshot,
    1343                 :                                    HeapTuple tuple,
    1344                 :                                    Buffer *userbuf,
    1345                 :                                    bool keep_buf,
    1346                 :                                    Relation stats_relation)
    1347           55760 : {
    1348           55760 :         ItemPointer tid = &(tuple->t_self);
    1349                 :         ItemId          lp;
    1350                 :         Buffer          buffer;
    1351                 :         PageHeader      dp;
    1352                 :         OffsetNumber offnum;
    1353                 :         bool            valid;
    1354                 : 
    1355                 :         /*
    1356                 :          * get the buffer from the relation descriptor. Note that this does a
    1357                 :          * buffer pin, and releases the old *userbuf if not InvalidBuffer.
    1358                 :          */
    1359           55760 :         buffer = ReleaseAndReadBuffer(*userbuf, relation,
    1360                 :                                                                   ItemPointerGetBlockNumber(tid));
    1361                 : 
    1362                 :         /*
    1363                 :          * Need share lock on buffer to examine tuple commit status.
    1364                 :          */
    1365           55760 :         LockBuffer(buffer, BUFFER_LOCK_SHARE);
    1366           55760 :         dp = (PageHeader) BufferGetPage(buffer);
    1367                 : 
    1368                 :         /*
    1369                 :          * We'd better check for out-of-range offnum in case of VACUUM since the
    1370                 :          * TID was obtained.
    1371                 :          */
    1372           55760 :         offnum = ItemPointerGetOffsetNumber(tid);
    1373           55760 :         if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
    1374                 :         {
    1375               0 :                 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    1376               0 :                 if (keep_buf)
    1377               0 :                         *userbuf = buffer;
    1378                 :                 else
    1379                 :                 {
    1380               0 :                         ReleaseBuffer(buffer);
    1381               0 :                         *userbuf = InvalidBuffer;
    1382                 :                 }
    1383               0 :                 tuple->t_data = NULL;
    1384               0 :                 return false;
    1385                 :         }
    1386                 : 
    1387                 :         /*
    1388                 :          * get the item line pointer corresponding to the requested tid
    1389                 :          */
    1390           55760 :         lp = PageGetItemId(dp, offnum);
    1391                 : 
    1392                 :         /*
    1393                 :          * Must check for deleted tuple.
    1394                 :          */
    1395           55760 :         if (!ItemIdIsNormal(lp))
    1396                 :         {
    1397             164 :                 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    1398             164 :                 if (keep_buf)
    1399             164 :                         *userbuf = buffer;
    1400                 :                 else
    1401                 :                 {
    1402               0 :                         ReleaseBuffer(buffer);
    1403               0 :                         *userbuf = InvalidBuffer;
    1404                 :                 }
    1405             164 :                 tuple->t_data = NULL;
    1406             164 :                 return false;
    1407                 :         }
    1408                 : 
    1409                 :         /*
    1410                 :          * fill in *tuple fields
    1411                 :          */
    1412           55596 :         tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
    1413           55596 :         tuple->t_len = ItemIdGetLength(lp);
    1414           55596 :         tuple->t_tableOid = RelationGetRelid(relation);
    1415                 : 
    1416                 :         /*
    1417                 :          * check time qualification of tuple, then release lock
    1418                 :          */
    1419           55596 :         valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
    1420                 : 
    1421           55596 :         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    1422                 : 
    1423           55596 :         if (valid)
    1424                 :         {
    1425                 :                 /*
    1426                 :                  * All checks passed, so return the tuple as valid. Caller is now
    1427                 :                  * responsible for releasing the buffer.
    1428                 :                  */
    1429           55455 :                 *userbuf = buffer;
    1430                 : 
    1431                 :                 /* Count the successful fetch against appropriate rel, if any */
    1432           55455 :                 if (stats_relation != NULL)
    1433               0 :                         pgstat_count_heap_fetch(stats_relation);
    1434                 : 
    1435           55455 :                 return true;
    1436                 :         }
    1437                 : 
    1438                 :         /* Tuple failed time qual, but maybe caller wants to see it anyway. */
    1439             141 :         if (keep_buf)
    1440             137 :                 *userbuf = buffer;
    1441                 :         else
    1442                 :         {
    1443               4 :                 ReleaseBuffer(buffer);
    1444               4 :                 *userbuf = InvalidBuffer;
    1445                 :         }
    1446                 : 
    1447             141 :         return false;
    1448                 : }
    1449                 : 
    1450                 : /*
    1451                 :  *      heap_hot_search_buffer  - search HOT chain for tuple satisfying snapshot
    1452                 :  *
    1453                 :  * On entry, *tid is the TID of a tuple (either a simple tuple, or the root
    1454                 :  * of a HOT chain), and buffer is the buffer holding this tuple.  We search
    1455                 :  * for the first chain member satisfying the given snapshot.  If one is
    1456                 :  * found, we update *tid to reference that tuple's offset number, and
    1457                 :  * return TRUE.  If no match, return FALSE without modifying *tid.
    1458                 :  *
    1459                 :  * If all_dead is not NULL, we check non-visible tuples to see if they are
    1460                 :  * globally dead; *all_dead is set TRUE if all members of the HOT chain
    1461                 :  * are vacuumable, FALSE if not.
    1462                 :  *
    1463                 :  * Unlike heap_fetch, the caller must already have pin and (at least) share
    1464                 :  * lock on the buffer; it is still pinned/locked at exit.  Also unlike
    1465                 :  * heap_fetch, we do not report any pgstats count; caller may do so if wanted.
    1466                 :  */
    1467                 : bool
    1468                 : heap_hot_search_buffer(ItemPointer tid, Buffer buffer, Snapshot snapshot,
    1469                 :                                            bool *all_dead)
    1470            4024 : {
    1471            4024 :         Page            dp = (Page) BufferGetPage(buffer);
    1472            4024 :         TransactionId prev_xmax = InvalidTransactionId;
    1473                 :         OffsetNumber offnum;
    1474                 :         bool            at_chain_start;
    1475                 : 
    1476            4024 :         if (all_dead)
    1477            1301 :                 *all_dead = true;
    1478                 : 
    1479                 :         Assert(ItemPointerGetBlockNumber(tid) == BufferGetBlockNumber(buffer));
    1480            4024 :         offnum = ItemPointerGetOffsetNumber(tid);
    1481            4024 :         at_chain_start = true;
    1482                 : 
    1483                 :         /* Scan through possible multiple members of HOT-chain */
    1484                 :         for (;;)
    1485                 :         {
    1486                 :                 ItemId          lp;
    1487                 :                 HeapTupleData heapTuple;
    1488                 : 
    1489                 :                 /* check for bogus TID */
    1490            4745 :                 if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
    1491                 :                         break;
    1492                 : 
    1493            4745 :                 lp = PageGetItemId(dp, offnum);
    1494                 : 
    1495                 :                 /* check for unused, dead, or redirected items */
    1496            4745 :                 if (!ItemIdIsNormal(lp))
    1497                 :                 {
    1498                 :                         /* We should only see a redirect at start of chain */
    1499             113 :                         if (ItemIdIsRedirected(lp) && at_chain_start)
    1500                 :                         {
    1501                 :                                 /* Follow the redirect */
    1502             105 :                                 offnum = ItemIdGetRedirect(lp);
    1503             105 :                                 at_chain_start = false;
    1504             105 :                                 continue;
    1505                 :                         }
    1506                 :                         /* else must be end of chain */
    1507                 :                         break;
    1508                 :                 }
    1509                 : 
    1510            4632 :                 heapTuple.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
    1511            4632 :                 heapTuple.t_len = ItemIdGetLength(lp);
    1512                 : 
    1513                 :                 /*
    1514                 :                  * Shouldn't see a HEAP_ONLY tuple at chain start.
    1515                 :                  */
    1516            4632 :                 if (at_chain_start && HeapTupleIsHeapOnly(&heapTuple))
    1517               0 :                         break;
    1518                 : 
    1519                 :                 /*
    1520                 :                  * The xmin should match the previous xmax value, else chain is
    1521                 :                  * broken.
    1522                 :                  */
    1523            4632 :                 if (TransactionIdIsValid(prev_xmax) &&
    1524                 :                         !TransactionIdEquals(prev_xmax,
    1525                 :                                                                  HeapTupleHeaderGetXmin(heapTuple.t_data)))
    1526               0 :                         break;
    1527                 : 
    1528                 :                 /* If it's visible per the snapshot, we must return it */
    1529            4632 :                 if (HeapTupleSatisfiesVisibility(&heapTuple, snapshot, buffer))
    1530                 :                 {
    1531            2715 :                         ItemPointerSetOffsetNumber(tid, offnum);
    1532            2715 :                         if (all_dead)
    1533              22 :                                 *all_dead = false;
    1534            2715 :                         return true;
    1535                 :                 }
    1536                 : 
    1537                 :                 /*
    1538                 :                  * If we can't see it, maybe no one else can either.  At caller
    1539                 :                  * request, check whether all chain members are dead to all
    1540                 :                  * transactions.
    1541                 :                  */
    1542            1917 :                 if (all_dead && *all_dead &&
    1543                 :                         HeapTupleSatisfiesVacuum(heapTuple.t_data, RecentGlobalXmin,
    1544                 :                                                                          buffer) != HEAPTUPLE_DEAD)
    1545            1247 :                         *all_dead = false;
    1546                 : 
    1547                 :                 /*
    1548                 :                  * Check to see if HOT chain continues past this tuple; if so fetch
    1549                 :                  * the next offnum and loop around.
    1550                 :                  */
    1551            1917 :                 if (HeapTupleIsHotUpdated(&heapTuple))
    1552                 :                 {
    1553                 :                         Assert(ItemPointerGetBlockNumber(&heapTuple.t_data->t_ctid) ==
    1554                 :                                    ItemPointerGetBlockNumber(tid));
    1555             616 :                         offnum = ItemPointerGetOffsetNumber(&heapTuple.t_data->t_ctid);
    1556             616 :                         at_chain_start = false;
    1557             616 :                         prev_xmax = HeapTupleHeaderGetXmax(heapTuple.t_data);
    1558                 :                 }
    1559                 :                 else
    1560                 :                         break;                          /* end of chain */
    1561                 :         }
    1562                 : 
    1563            1309 :         return false;
    1564                 : }
    1565                 : 
    1566                 : /*
    1567                 :  *      heap_hot_search         - search HOT chain for tuple satisfying snapshot
    1568                 :  *
    1569                 :  * This has the same API as heap_hot_search_buffer, except that the caller
    1570                 :  * does not provide the buffer containing the page, rather we access it
    1571                 :  * locally.
    1572                 :  */
    1573                 : bool
    1574                 : heap_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot,
    1575                 :                                 bool *all_dead)
    1576            1323 : {
    1577                 :         bool            result;
    1578                 :         Buffer          buffer;
    1579                 : 
    1580            1323 :         buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
    1581            1323 :         LockBuffer(buffer, BUFFER_LOCK_SHARE);
    1582            1323 :         result = heap_hot_search_buffer(tid, buffer, snapshot, all_dead);
    1583            1323 :         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    1584            1323 :         ReleaseBuffer(buffer);
    1585            1323 :         return result;
    1586                 : }
    1587                 : 
    1588                 : /*
    1589                 :  *      heap_get_latest_tid -  get the latest tid of a specified tuple
    1590                 :  *
    1591                 :  * Actually, this gets the latest version that is visible according to
    1592                 :  * the passed snapshot.  You can pass SnapshotDirty to get the very latest,
    1593                 :  * possibly uncommitted version.
    1594                 :  *
    1595                 :  * *tid is both an input and an output parameter: it is updated to
    1596                 :  * show the latest version of the row.  Note that it will not be changed
    1597                 :  * if no version of the row passes the snapshot test.
    1598                 :  */
    1599                 : void
    1600                 : heap_get_latest_tid(Relation relation,
    1601                 :                                         Snapshot snapshot,
    1602                 :                                         ItemPointer tid)
    1603              15 : {
    1604                 :         BlockNumber blk;
    1605                 :         ItemPointerData ctid;
    1606                 :         TransactionId priorXmax;
    1607                 : 
    1608                 :         /* this is to avoid Assert failures on bad input */
    1609              15 :         if (!ItemPointerIsValid(tid))
    1610               0 :                 return;
    1611                 : 
    1612                 :         /*
    1613                 :          * Since this can be called with user-supplied TID, don't trust the input
    1614                 :          * too much.  (RelationGetNumberOfBlocks is an expensive check, so we
    1615                 :          * don't check t_ctid links again this way.  Note that it would not do to
    1616                 :          * call it just once and save the result, either.)
    1617                 :          */
    1618              15 :         blk = ItemPointerGetBlockNumber(tid);
    1619              15 :         if (blk >= RelationGetNumberOfBlocks(relation))
    1620               0 :                 elog(ERROR, "block number %u is out of range for relation \"%s\"",
    1621                 :                          blk, RelationGetRelationName(relation));
    1622                 : 
    1623                 :         /*
    1624                 :          * Loop to chase down t_ctid links.  At top of loop, ctid is the tuple we
    1625                 :          * need to examine, and *tid is the TID we will return if ctid turns out
    1626                 :          * to be bogus.
    1627                 :          *
    1628                 :          * Note that we will loop until we reach the end of the t_ctid chain.
    1629                 :          * Depending on the snapshot passed, there might be at most one visible
    1630                 :          * version of the row, but we don't try to optimize for that.
    1631                 :          */
    1632              15 :         ctid = *tid;
    1633              15 :         priorXmax = InvalidTransactionId;       /* cannot check first XMIN */
    1634                 :         for (;;)
    1635                 :         {
    1636                 :                 Buffer          buffer;
    1637                 :                 PageHeader      dp;
    1638                 :                 OffsetNumber offnum;
    1639                 :                 ItemId          lp;
    1640                 :                 HeapTupleData tp;
    1641                 :                 bool            valid;
    1642                 : 
    1643                 :                 /*
    1644                 :                  * Read, pin, and lock the page.
    1645                 :                  */
    1646              29 :                 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
    1647              29 :                 LockBuffer(buffer, BUFFER_LOCK_SHARE);
    1648              29 :                 dp = (PageHeader) BufferGetPage(buffer);
    1649                 : 
    1650                 :                 /*
    1651                 :                  * Check for bogus item number.  This is not treated as an error
    1652                 :                  * condition because it can happen while following a t_ctid link. We
    1653                 :                  * just assume that the prior tid is OK and return it unchanged.
    1654                 :                  */
    1655              29 :                 offnum = ItemPointerGetOffsetNumber(&ctid);
    1656              29 :                 if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
    1657                 :                 {
    1658               0 :                         UnlockReleaseBuffer(buffer);
    1659               0 :                         break;
    1660                 :                 }
    1661              29 :                 lp = PageGetItemId(dp, offnum);
    1662              29 :                 if (!ItemIdIsNormal(lp))
    1663                 :                 {
    1664               0 :                         UnlockReleaseBuffer(buffer);
    1665               0 :                         break;
    1666                 :                 }
    1667                 : 
    1668                 :                 /* OK to access the tuple */
    1669              29 :                 tp.t_self = ctid;
    1670              29 :                 tp.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
    1671              29 :                 tp.t_len = ItemIdGetLength(lp);
    1672                 : 
    1673                 :                 /*
    1674                 :                  * After following a t_ctid link, we might arrive at an unrelated
    1675                 :                  * tuple.  Check for XMIN match.
    1676                 :                  */
    1677              29 :                 if (TransactionIdIsValid(priorXmax) &&
    1678                 :                   !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data)))
    1679                 :                 {
    1680               0 :                         UnlockReleaseBuffer(buffer);
    1681               0 :                         break;
    1682                 :                 }
    1683                 : 
    1684                 :                 /*
    1685                 :                  * Check time qualification of tuple; if visible, set it as the new
    1686                 :                  * result candidate.
    1687                 :                  */
    1688              29 :                 valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
    1689              29 :                 if (valid)
    1690              11 :                         *tid = ctid;
    1691                 : 
    1692                 :                 /*
    1693                 :                  * If there's a valid t_ctid link, follow it, else we're done.
    1694                 :                  */
    1695              29 :                 if ((tp.t_data->t_infomask & (HEAP_XMAX_INVALID | HEAP_IS_LOCKED)) ||
    1696                 :                         ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
    1697                 :                 {
    1698              15 :                         UnlockReleaseBuffer(buffer);
    1699              15 :                         break;
    1700                 :                 }
    1701                 : 
    1702              14 :                 ctid = tp.t_data->t_ctid;
    1703              14 :                 priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
    1704              14 :                 UnlockReleaseBuffer(buffer);
    1705              14 :         }                                                       /* end of loop */
    1706                 : }
    1707                 : 
    1708                 : 
    1709                 : /*
    1710                 :  * UpdateXmaxHintBits - update tuple hint bits after xmax transaction ends
    1711                 :  *
    1712                 :  * This is called after we have waited for the XMAX transaction to terminate.
    1713                 :  * If the transaction aborted, we guarantee the XMAX_INVALID hint bit will
    1714                 :  * be set on exit.      If the transaction committed, we set the XMAX_COMMITTED
    1715                 :  * hint bit if possible --- but beware that that may not yet be possible,
    1716                 :  * if the transaction committed asynchronously.  Hence callers should look
    1717                 :  * only at XMAX_INVALID.
    1718                 :  */
    1719                 : static void
    1720                 : UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
    1721               0 : {
    1722                 :         Assert(TransactionIdEquals(HeapTupleHeaderGetXmax(tuple), xid));
    1723                 : 
    1724               0 :         if (!(tuple->t_infomask & (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID)))
    1725                 :         {
    1726               0 :                 if (TransactionIdDidCommit(xid))
    1727               0 :                         HeapTupleSetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
    1728                 :                                                                  xid);
    1729                 :                 else
    1730               0 :                         HeapTupleSetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
    1731                 :                                                                  InvalidTransactionId);
    1732                 :         }
    1733               0 : }
    1734                 : 
    1735                 : 
    1736                 : /*
    1737                 :  *      heap_insert             - insert tuple into a heap
    1738                 :  *
    1739                 :  * The new tuple is stamped with current transaction ID and the specified
    1740                 :  * command ID.
    1741                 :  *
    1742                 :  * If use_wal is false, the new tuple is not logged in WAL, even for a
    1743                 :  * non-temp relation.  Safe usage of this behavior requires that we arrange
    1744                 :  * that all new tuples go into new pages not containing any tuples from other
    1745                 :  * transactions, and that the relation gets fsync'd before commit.
    1746                 :  * (See also heap_sync() comments)
    1747                 :  *
    1748                 :  * use_fsm is passed directly to RelationGetBufferForTuple, which see for
    1749                 :  * more info.
    1750                 :  *
    1751                 :  * Note that use_wal and use_fsm will be applied when inserting into the
    1752                 :  * heap's TOAST table, too, if the tuple requires any out-of-line data.
    1753                 :  *
    1754                 :  * The return value is the OID assigned to the tuple (either here or by the
    1755                 :  * caller), or InvalidOid if no OID.  The header fields of *tup are updated
    1756                 :  * to match the stored tuple; in particular tup->t_self receives the actual
    1757                 :  * TID where the tuple was stored.      But note that any toasting of fields
    1758                 :  * within the tuple data is NOT reflected into *tup.
    1759                 :  */
    1760                 : Oid
    1761                 : heap_insert(Relation relation, HeapTuple tup, CommandId cid,
    1762                 :                         bool use_wal, bool use_fsm)
    1763          201605 : {
    1764          201605 :         TransactionId xid = GetCurrentTransactionId();
    1765                 :         HeapTuple       heaptup;
    1766                 :         Buffer          buffer;
    1767                 : 
    1768          201605 :         if (relation->rd_rel->relhasoids)
    1769                 :         {
    1770                 : #ifdef NOT_USED
    1771                 :                 /* this is redundant with an Assert in HeapTupleSetOid */
    1772                 :                 Assert(tup->t_data->t_infomask & HEAP_HASOID);
    1773                 : #endif
    1774                 : 
    1775                 :                 /*
    1776                 :                  * If the object id of this tuple has already been assigned, trust the
    1777                 :                  * caller.      There are a couple of ways this can happen.  At initial db
    1778                 :                  * creation, the backend program sets oids for tuples. When we define
    1779                 :                  * an index, we set the oid.  Finally, in the future, we may allow
    1780                 :                  * users to set their own object ids in order to support a persistent
    1781                 :                  * object store (objects need to contain pointers to one another).
    1782                 :                  */
    1783           27892 :                 if (!OidIsValid(HeapTupleGetOid(tup)))
    1784           22282 :                         HeapTupleSetOid(tup, GetNewOid(relation));
    1785                 :         }
    1786                 :         else
    1787                 :         {
    1788                 :                 /* check there is not space for an OID */
    1789                 :                 Assert(!(tup->t_data->t_infomask & HEAP_HASOID));
    1790                 :         }
    1791                 : 
    1792          201605 :         tup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
    1793          201605 :         tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
    1794          201605 :         tup->t_data->t_infomask |= HEAP_XMAX_INVALID;
    1795          201605 :         HeapTupleHeaderSetXmin(tup->t_data, xid);
    1796          201605 :         HeapTupleHeaderSetCmin(tup->t_data, cid);
    1797          201605 :         HeapTupleHeaderSetXmax(tup->t_data, 0);              /* for cleanliness */
    1798          201605 :         tup->t_tableOid = RelationGetRelid(relation);
    1799                 : 
    1800                 :         /*
    1801                 :          * If the new tuple is too big for storage or contains already toasted
    1802                 :          * out-of-line attributes from some other relation, invoke the toaster.
    1803                 :          *
    1804                 :          * Note: below this point, heaptup is the data we actually intend to store
    1805                 :          * into the relation; tup is the caller's original untoasted data.
    1806                 :          */
    1807          201605 :         if (relation->rd_rel->relkind != RELKIND_RELATION)
    1808                 :         {
    1809                 :                 /* toast table entries should never be recursively toasted */
    1810                 :                 Assert(!HeapTupleHasExternal(tup));
    1811            3541 :                 heaptup = tup;
    1812                 :         }
    1813          198856 :         else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
    1814             792 :                 heaptup = toast_insert_or_update(relation, tup, NULL,
    1815                 :                                                                                  use_wal, use_fsm);
    1816                 :         else
    1817          197272 :                 heaptup = tup;
    1818                 : 
    1819                 :         /* Find buffer to insert this tuple into */
    1820          201605 :         buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
    1821                 :                                                                            InvalidBuffer, use_fsm);
    1822                 : 
    1823                 :         /* NO EREPORT(ERROR) from here till changes are logged */
    1824          201605 :         START_CRIT_SECTION();
    1825                 : 
    1826          201605 :         RelationPutHeapTuple(relation, buffer, heaptup);
    1827                 : 
    1828                 :         /*
    1829                 :          * XXX Should we set PageSetPrunable on this page ?
    1830                 :          *
    1831                 :          * The inserting transaction may eventually abort thus making this tuple
    1832                 :          * DEAD and hence available for pruning. Though we don't want to optimize
    1833                 :          * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
    1834                 :          * aborted tuple will never be pruned until next vacuum is triggered.
    1835                 :          *
    1836                 :          * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
    1837                 :          */
    1838                 : 
    1839          201605 :         MarkBufferDirty(buffer);
    1840                 : 
    1841                 :         /* XLOG stuff */
    1842          201605 :         if (use_wal && !relation->rd_istemp)
    1843                 :         {
    1844                 :                 xl_heap_insert xlrec;
    1845                 :                 xl_heap_header xlhdr;
    1846                 :                 XLogRecPtr      recptr;
    1847                 :                 XLogRecData rdata[3];
    1848          178130 :                 Page            page = BufferGetPage(buffer);
    1849          178130 :                 uint8           info = XLOG_HEAP_INSERT;
    1850                 : 
    1851          178130 :                 xlrec.target.node = relation->rd_node;
    1852          178130 :                 xlrec.target.tid = heaptup->t_self;
    1853          178130 :                 rdata[0].data = (char *) &xlrec;
    1854          178130 :                 rdata[0].len = SizeOfHeapInsert;
    1855          178130 :                 rdata[0].buffer = InvalidBuffer;
    1856          178130 :                 rdata[0].next = &(rdata[1]);
    1857                 : 
    1858          178130 :                 xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
    1859          178130 :                 xlhdr.t_infomask = heaptup->t_data->t_infomask;
    1860          178130 :                 xlhdr.t_hoff = heaptup->t_data->t_hoff;
    1861                 : 
    1862                 :                 /*
    1863                 :                  * note we mark rdata[1] as belonging to buffer; if XLogInsert decides
    1864                 :                  * to write the whole page to the xlog, we don't need to store
    1865                 :                  * xl_heap_header in the xlog.
    1866                 :                  */
    1867          178130 :                 rdata[1].data = (char *) &xlhdr;
    1868          178130 :                 rdata[1].len = SizeOfHeapHeader;
    1869          178130 :                 rdata[1].buffer = buffer;
    1870          178130 :                 rdata[1].buffer_std = true;
    1871          178130 :                 rdata[1].next = &(rdata[2]);
    1872                 : 
    1873                 :                 /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
    1874          178130 :                 rdata[2].data = (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits);
    1875          178130 :                 rdata[2].len = heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits);
    1876          178130 :                 rdata[2].buffer = buffer;
    1877          178130 :                 rdata[2].buffer_std = true;
    1878          178130 :                 rdata[2].next = NULL;
    1879                 : 
    1880                 :                 /*
    1881                 :                  * If this is the single and first tuple on page, we can reinit the
    1882                 :                  * page instead of restoring the whole thing.  Set flag, and hide
    1883                 :                  * buffer references from XLogInsert.
    1884                 :                  */
    1885          178130 :                 if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
    1886                 :                         PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
    1887                 :                 {
    1888            2574 :                         info |= XLOG_HEAP_INIT_PAGE;
    1889            2574 :                         rdata[1].buffer = rdata[2].buffer = InvalidBuffer;
    1890                 :                 }
    1891                 : 
    1892          178130 :                 recptr = XLogInsert(RM_HEAP_ID, info, rdata);
    1893                 : 
    1894          178130 :                 PageSetLSN(page, recptr);
    1895          178130 :                 PageSetTLI(page, ThisTimeLineID);
    1896                 :         }
    1897                 : 
    1898          201605 :         END_CRIT_SECTION();
    1899                 : 
    1900          201605 :         UnlockReleaseBuffer(buffer);
    1901                 : 
    1902                 :         /*
    1903                 :          * If tuple is cachable, mark it for invalidation from the caches in case
    1904                 :          * we abort.  Note it is OK to do this after releasing the buffer, because
    1905                 :          * the heaptup data structure is all in local memory, not in the shared
    1906                 :          * buffer.
    1907                 :          */
    1908          201605 :         CacheInvalidateHeapTuple(relation, heaptup);
    1909                 : 
    1910          201605 :         pgstat_count_heap_insert(relation);
    1911                 : 
    1912                 :         /*
    1913                 :          * If heaptup is a private copy, release it.  Don't forget to copy t_self
    1914                 :          * back to the caller's image, too.
    1915                 :          */
    1916          201605 :         if (heaptup != tup)
    1917                 :         {
    1918             792 :                 tup->t_self = heaptup->t_self;
    1919             792 :                 heap_freetuple(heaptup);
    1920                 :         }
    1921                 : 
    1922          201605 :         return HeapTupleGetOid(tup);
    1923                 : }
    1924                 : 
    1925                 : /*
    1926                 :  *      simple_heap_insert - insert a tuple
    1927                 :  *
    1928                 :  * Currently, this routine differs from heap_insert only in supplying
    1929                 :  * a default command ID and not allowing access to the speedup options.
    1930                 :  *
    1931                 :  * This should be used rather than using heap_insert directly in most places
    1932                 :  * where we are modifying system catalogs.
    1933                 :  */
    1934                 : Oid
    1935                 : simple_heap_insert(Relation relation, HeapTuple tup)
    1936           28679 : {
    1937           28679 :         return heap_insert(relation, tup, GetCurrentCommandId(true), true, true);
    1938                 : }
    1939                 : 
    1940                 : /*
    1941                 :  *      heap_delete - delete a tuple
    1942                 :  *
    1943                 :  * NB: do not call this directly unless you are prepared to deal with
    1944                 :  * concurrent-update conditions.  Use simple_heap_delete instead.
    1945                 :  *
    1946                 :  *      relation - table to be modified (caller must hold suitable lock)
    1947                 :  *      tid - TID of tuple to be deleted
    1948                 :  *      ctid - output parameter, used only for failure case (see below)
    1949                 :  *      update_xmax - output parameter, used only for failure case (see below)
    1950                 :  *      cid - delete command ID (used for visibility test, and stored into
    1951                 :  *              cmax if successful)
    1952                 :  *      crosscheck - if not InvalidSnapshot, also check tuple against this
    1953                 :  *      wait - true if should wait for any conflicting update to commit/abort
    1954                 :  *
    1955                 :  * Normal, successful return value is HeapTupleMayBeUpdated, which
    1956                 :  * actually means we did delete it.  Failure return codes are
    1957                 :  * HeapTupleSelfUpdated, HeapTupleUpdated, or HeapTupleBeingUpdated
    1958                 :  * (the last only possible if wait == false).
    1959                 :  *
    1960                 :  * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
    1961                 :  * If t_ctid is the same as tid, the tuple was deleted; if different, the
    1962                 :  * tuple was updated, and t_ctid is the location of the replacement tuple.
    1963                 :  * (t_xmax is needed to verify that the replacement tuple matches.)
    1964                 :  */
    1965                 : HTSU_Result
    1966                 : heap_delete(Relation relation, ItemPointer tid,
    1967                 :                         ItemPointer ctid, TransactionId *update_xmax,
    1968                 :                         CommandId cid, Snapshot crosscheck, bool wait)
    1969           21381 : {
    1970                 :         HTSU_Result result;
    1971           21381 :         TransactionId xid = GetCurrentTransactionId();
    1972                 :         ItemId          lp;
    1973                 :         HeapTupleData tp;
    1974                 :         PageHeader      dp;
    1975                 :         Buffer          buffer;
    1976           21381 :         bool            have_tuple_lock = false;
    1977                 :         bool            iscombo;
    1978                 : 
    1979                 :         Assert(ItemPointerIsValid(tid));
    1980                 : 
    1981           21381 :         buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
    1982           21381 :         LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
    1983                 : 
    1984           21381 :         dp = (PageHeader) BufferGetPage(buffer);
    1985           21381 :         lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid));
    1986                 :         Assert(ItemIdIsNormal(lp));
    1987                 : 
    1988           21381 :         tp.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
    1989           21381 :         tp.t_len = ItemIdGetLength(lp);
    1990           21381 :         tp.t_self = *tid;
    1991                 : 
    1992           21381 : l1:
    1993           21381 :         result = HeapTupleSatisfiesUpdate(tp.t_data, cid, buffer);
    1994                 : 
    1995           21381 :         if (result == HeapTupleInvisible)
    1996                 :         {
    1997               0 :                 UnlockReleaseBuffer(buffer);
    1998               0 :                 elog(ERROR, "attempted to delete invisible tuple");
    1999                 :         }
    2000           21381 :         else if (result == HeapTupleBeingUpdated && wait)
    2001                 :         {
    2002                 :                 TransactionId xwait;
    2003                 :                 uint16          infomask;
    2004                 : 
    2005                 :                 /* must copy state data before unlocking buffer */
    2006               0 :                 xwait = HeapTupleHeaderGetXmax(tp.t_data);
    2007               0 :                 infomask = tp.t_data->t_infomask;
    2008                 : 
    2009               0 :                 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    2010                 : 
    2011                 :                 /*
    2012                 :                  * Acquire tuple lock to establish our priority for the tuple (see
    2013                 :                  * heap_lock_tuple).  LockTuple will release us when we are
    2014                 :                  * next-in-line for the tuple.
    2015                 :                  *
    2016                 :                  * If we are forced to "start over" below, we keep the tuple lock;
    2017                 :                  * this arranges that we stay at the head of the line while rechecking
    2018                 :                  * tuple state.
    2019                 :                  */
    2020               0 :                 if (!have_tuple_lock)
    2021                 :                 {
    2022               0 :                         LockTuple(relation, &(tp.t_self), ExclusiveLock);
    2023               0 :                         have_tuple_lock = true;
    2024                 :                 }
    2025                 : 
    2026                 :                 /*
    2027                 :                  * Sleep until concurrent transaction ends.  Note that we don't care
    2028                 :                  * if the locker has an exclusive or shared lock, because we need
    2029                 :                  * exclusive.
    2030                 :                  */
    2031                 : 
    2032               0 :                 if (infomask & HEAP_XMAX_IS_MULTI)
    2033                 :                 {
    2034                 :                         /* wait for multixact */
    2035               0 :                         MultiXactIdWait((MultiXactId) xwait);
    2036               0 :                         LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
    2037                 : 
    2038                 :                         /*
    2039                 :                          * If xwait had just locked the tuple then some other xact could
    2040                 :                          * update this tuple before we get to this point.  Check for xmax
    2041                 :                          * change, and start over if so.
    2042                 :                          */
    2043               0 :                         if (!(tp.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
    2044                 :                                 !TransactionIdEquals(HeapTupleHeaderGetXmax(tp.t_data),
    2045                 :                                                                          xwait))
    2046                 :                                 goto l1;
    2047                 : 
    2048                 :                         /*
    2049                 :                          * You might think the multixact is necessarily done here, but not
    2050                 :                          * so: it could have surviving members, namely our own xact or
    2051                 :                          * other subxacts of this backend.      It is legal for us to delete
    2052                 :                          * the tuple in either case, however (the latter case is
    2053                 :                          * essentially a situation of upgrading our former shared lock to
    2054                 :                          * exclusive).  We don't bother changing the on-disk hint bits
    2055                 :                          * since we are about to overwrite the xmax altogether.
    2056                 :                          */
    2057                 :                 }
    2058                 :                 else
    2059                 :                 {
    2060                 :                         /* wait for regular transaction to end */
    2061               0 :                         XactLockTableWait(xwait);
    2062               0 :                         LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
    2063                 : 
    2064                 :                         /*
    2065                 :                          * xwait is done, but if xwait had just locked the tuple then some
    2066                 :                          * other xact could update this tuple before we get to this point.
    2067                 :                          * Check for xmax change, and start over if so.
    2068                 :                          */
    2069               0 :                         if ((tp.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
    2070                 :                                 !TransactionIdEquals(HeapTupleHeaderGetXmax(tp.t_data),
    2071                 :                                                                          xwait))
    2072                 :                                 goto l1;
    2073                 : 
    2074                 :                         /* Otherwise check if it committed or aborted */
    2075               0 :                         UpdateXmaxHintBits(tp.t_data, buffer, xwait);
    2076                 :                 }
    2077                 : 
    2078                 :                 /*
    2079                 :                  * We may overwrite if previous xmax aborted, or if it committed but
    2080                 :                  * only locked the tuple without updating it.
    2081                 :                  */
    2082               0 :                 if (tp.t_data->t_infomask & (HEAP_XMAX_INVALID |
    2083                 :                                                                          HEAP_IS_LOCKED))
    2084               0 :                         result = HeapTupleMayBeUpdated;
    2085                 :                 else
    2086               0 :                         result = HeapTupleUpdated;
    2087                 :         }
    2088                 : 
    2089           21381 :         if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
    2090                 :         {
    2091                 :                 /* Perform additional check for serializable RI updates */
    2092               0 :                 if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
    2093               0 :                         result = HeapTupleUpdated;
    2094                 :         }
    2095                 : 
    2096           21381 :         if (result != HeapTupleMayBeUpdated)
    2097                 :         {
    2098                 :                 Assert(result == HeapTupleSelfUpdated ||
    2099                 :                            result == HeapTupleUpdated ||
    2100                 :                            result == HeapTupleBeingUpdated);
    2101                 :                 Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));
    2102               0 :                 *ctid = tp.t_data->t_ctid;
    2103               0 :                 *update_xmax = HeapTupleHeaderGetXmax(tp.t_data);
    2104               0 :                 UnlockReleaseBuffer(buffer);
    2105               0 :                 if (have_tuple_lock)
    2106               0 :                         UnlockTuple(relation, &(tp.t_self), ExclusiveLock);
    2107               0 :                 return result;
    2108                 :         }
    2109                 : 
    2110                 :         /* replace cid with a combo cid if necessary */
    2111           21381 :         HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
    2112                 : 
    2113           21381 :         START_CRIT_SECTION();
    2114                 : 
    2115                 :         /*
    2116                 :          * If this transaction commits, the tuple will become DEAD sooner or
    2117                 :          * later.  Set flag that this page is a candidate for pruning once our xid
    2118                 :          * falls below the OldestXmin horizon.  If the transaction finally aborts,
    2119                 :          * the subsequent page pruning will be a no-op and the hint will be
    2120                 :          * cleared.
    2121                 :          */
    2122           21381 :         PageSetPrunable(dp, xid);
    2123                 : 
    2124                 :         /* store transaction information of xact deleting the tuple */
    2125           21381 :         tp.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
    2126                 :                                                            HEAP_XMAX_INVALID |
    2127                 :                                                            HEAP_XMAX_IS_MULTI |
    2128                 :                                                            HEAP_IS_LOCKED |
    2129                 :                                                            HEAP_MOVED);
    2130           21381 :         HeapTupleHeaderClearHotUpdated(tp.t_data);
    2131           21381 :         HeapTupleHeaderSetXmax(tp.t_data, xid);
    2132           21381 :         HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
    2133                 :         /* Make sure there is no forward chain link in t_ctid */
    2134           21381 :         tp.t_data->t_ctid = tp.t_self;
    2135                 : 
    2136           21381 :         MarkBufferDirty(buffer);
    2137                 : 
    2138                 :         /* XLOG stuff */
    2139           21381 :         if (!relation->rd_istemp)
    2140                 :         {
    2141                 :                 xl_heap_delete xlrec;
    2142                 :                 XLogRecPtr      recptr;
    2143                 :                 XLogRecData rdata[2];
    2144                 : 
    2145           21359 :                 xlrec.target.node = relation->rd_node;
    2146           21359 :                 xlrec.target.tid = tp.t_self;
    2147           21359 :                 rdata[0].data = (char *) &xlrec;
    2148           21359 :                 rdata[0].len = SizeOfHeapDelete;
    2149           21359 :                 rdata[0].buffer = InvalidBuffer;
    2150           21359 :                 rdata[0].next = &(rdata[1]);
    2151                 : 
    2152           21359 :                 rdata[1].data = NULL;
    2153           21359 :                 rdata[1].len = 0;
    2154           21359 :                 rdata[1].buffer = buffer;
    2155           21359 :                 rdata[1].buffer_std = true;
    2156           21359 :                 rdata[1].next = NULL;
    2157                 : 
    2158           21359 :                 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, rdata);
    2159                 : 
    2160           21359 :                 PageSetLSN(dp, recptr);
    2161           21359 :                 PageSetTLI(dp, ThisTimeLineID);
    2162                 :         }
    2163                 : 
    2164           21381 :         END_CRIT_SECTION();
    2165                 : 
    2166           21381 :         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    2167                 : 
    2168                 :         /*
    2169                 :          * If the tuple has toasted out-of-line attributes, we need to delete
    2170                 :          * those items too.  We have to do this before releasing the buffer
    2171                 :          * because we need to look at the contents of the tuple, but it's OK to
    2172                 :          * release the content lock on the buffer first.
    2173                 :          */
    2174           21381 :         if (relation->rd_rel->relkind != RELKIND_RELATION)
    2175                 :         {
    2176                 :                 /* toast table entries should never be recursively toasted */
    2177                 :                 Assert(!HeapTupleHasExternal(&tp));
    2178                 :         }
    2179           21381 :         else if (HeapTupleHasExternal(&tp))
    2180               0 :                 toast_delete(relation, &tp);
    2181                 : 
    2182                 :         /*
    2183                 :          * Mark tuple for invalidation from system caches at next command
    2184                 :          * boundary. We have to do this before releasing the buffer because we
    2185                 :          * need to look at the contents of the tuple.
    2186                 :          */
    2187           21381 :         CacheInvalidateHeapTuple(relation, &tp);
    2188                 : 
    2189                 :         /* Now we can release the buffer */
    2190           21381 :         ReleaseBuffer(buffer);
    2191                 : 
    2192                 :         /*
    2193                 :          * Release the lmgr tuple lock, if we had it.
    2194                 :          */
    2195           21381 :         if (have_tuple_lock)
    2196               0 :                 UnlockTuple(relation, &(tp.t_self), ExclusiveLock);
    2197                 : 
    2198           21381 :         pgstat_count_heap_delete(relation);
    2199                 : 
    2200           21381 :         return HeapTupleMayBeUpdated;
    2201                 : }
    2202                 : 
    2203                 : /*
    2204                 :  *      simple_heap_delete - delete a tuple
    2205                 :  *
    2206                 :  * This routine may be used to delete a tuple when concurrent updates of
    2207                 :  * the target tuple are not expected (for example, because we have a lock
    2208                 :  * on the relation associated with the tuple).  Any failure is reported
    2209                 :  * via ereport().
    2210                 :  */
    2211                 : void
    2212                 : simple_heap_delete(Relation relation, ItemPointer tid)
    2213           13271 : {
    2214                 :         HTSU_Result result;
    2215                 :         ItemPointerData update_ctid;
    2216                 :         TransactionId update_xmax;
    2217                 : 
    2218           13271 :         result = heap_delete(relation, tid,
    2219                 :                                                  &update_ctid, &update_xmax,
    2220                 :                                                  GetCurrentCommandId(true), InvalidSnapshot,
    2221                 :                                                  true /* wait for commit */ );
    2222           13271 :         switch (result)
    2223                 :         {
    2224                 :                 case HeapTupleSelfUpdated:
    2225                 :                         /* Tuple was already updated in current command? */
    2226               0 :                         elog(ERROR, "tuple already updated by self");
    2227               0 :                         break;
    2228                 : 
    2229                 :                 case HeapTupleMayBeUpdated:
    2230                 :                         /* done successfully */
    2231                 :                         break;
    2232                 : 
    2233                 :                 case HeapTupleUpdated:
    2234               0 :                         elog(ERROR, "tuple concurrently updated");
    2235               0 :                         break;
    2236                 : 
    2237                 :                 default:
    2238               0 :                         elog(ERROR, "unrecognized heap_delete status: %u", result);
    2239                 :                         break;
    2240                 :         }
    2241           13271 : }
    2242                 : 
    2243                 : /*
    2244                 :  *      heap_update - replace a tuple
    2245                 :  *
    2246                 :  * NB: do not call this directly unless you are prepared to deal with
    2247                 :  * concurrent-update conditions.  Use simple_heap_update instead.
    2248                 :  *
    2249                 :  *      relation - table to be modified (caller must hold suitable lock)
    2250                 :  *      otid - TID of old tuple to be replaced
    2251                 :  *      newtup - newly constructed tuple data to store
    2252                 :  *      ctid - output parameter, used only for failure case (see below)
    2253                 :  *      update_xmax - output parameter, used only for failure case (see below)
    2254                 :  *      cid - update command ID (used for visibility test, and stored into
    2255                 :  *              cmax/cmin if successful)
    2256                 :  *      crosscheck - if not InvalidSnapshot, also check old tuple against this
    2257                 :  *      wait - true if should wait for any conflicting update to commit/abort
    2258                 :  *
    2259                 :  * Normal, successful return value is HeapTupleMayBeUpdated, which
    2260                 :  * actually means we *did* update it.  Failure return codes are
    2261                 :  * HeapTupleSelfUpdated, HeapTupleUpdated, or HeapTupleBeingUpdated
    2262                 :  * (the last only possible if wait == false).
    2263                 :  *
    2264                 :  * On success, the header fields of *newtup are updated to match the new
    2265                 :  * stored tuple; in particular, newtup->t_self is set to the TID where the
    2266                 :  * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
    2267                 :  * update was done.  However, any TOAST changes in the new tuple's
    2268                 :  * data are not reflected into *newtup.
    2269                 :  *
    2270                 :  * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
    2271                 :  * If t_ctid is the same as otid, the tuple was deleted; if different, the
    2272                 :  * tuple was updated, and t_ctid is the location of the replacement tuple.
    2273                 :  * (t_xmax is needed to verify that the replacement tuple matches.)
    2274                 :  */
    2275                 : HTSU_Result
    2276                 : heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
    2277                 :                         ItemPointer ctid, TransactionId *update_xmax,
    2278                 :                         CommandId cid, Snapshot crosscheck, bool wait)
    2279            4588 : {
    2280                 :         HTSU_Result result;
    2281            4588 :         TransactionId xid = GetCurrentTransactionId();
    2282                 :         Bitmapset  *hot_attrs;
    2283                 :         ItemId          lp;
    2284                 :         HeapTupleData oldtup;
    2285                 :         HeapTuple       heaptup;
    2286                 :         PageHeader      dp;
    2287                 :         Buffer          buffer,
    2288                 :                                 newbuf;
    2289                 :         bool            need_toast,
    2290                 :                                 already_marked;
    2291                 :         Size            newtupsize,
    2292                 :                                 pagefree;
    2293            4588 :         bool            have_tuple_lock = false;
    2294                 :         bool            iscombo;
    2295            4588 :         bool            use_hot_update = false;
    2296                 : 
    2297                 :         Assert(ItemPointerIsValid(otid));
    2298                 : 
    2299                 :         /*
    2300                 :          * Fetch the list of attributes to be checked for HOT update.  This is
    2301                 :          * wasted effort if we fail to update or have to put the new tuple on a
    2302                 :          * different page.      But we must compute the list before obtaining buffer
    2303                 :          * lock --- in the worst case, if we are doing an update on one of the
    2304                 :          * relevant system catalogs, we could deadlock if we try to fetch the list
    2305                 :          * later.  In any case, the relcache caches the data so this is usually
    2306                 :          * pretty cheap.
    2307                 :          *
    2308                 :          * Note that we get a copy here, so we need not worry about relcache flush
    2309                 :          * happening midway through.
    2310                 :          */
    2311            4588 :         hot_attrs = RelationGetIndexAttrBitmap(relation);
    2312                 : 
    2313            4588 :         buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(otid));
    2314            4588 :         LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
    2315                 : 
    2316            4588 :         dp = (PageHeader) BufferGetPage(buffer);
    2317            4588 :         lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(otid));
    2318                 :         Assert(ItemIdIsNormal(lp));
    2319                 : 
    2320            4588 :         oldtup.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
    2321            4588 :         oldtup.t_len = ItemIdGetLength(lp);
    2322            4588 :         oldtup.t_self = *otid;
    2323                 : 
    2324                 :         /*
    2325                 :          * Note: beyond this point, use oldtup not otid to refer to old tuple.
    2326                 :          * otid may very well point at newtup->t_self, which we will overwrite
    2327                 :          * with the new tuple's location, so there's great risk of confusion if we
    2328                 :          * use otid anymore.
    2329                 :          */
    2330                 : 
    2331            4588 : l2:
    2332            4588 :         result = HeapTupleSatisfiesUpdate(oldtup.t_data, cid, buffer);
    2333                 : 
    2334            4588 :         if (result == HeapTupleInvisible)
    2335                 :         {
    2336               0 :                 UnlockReleaseBuffer(buffer);
    2337               0 :                 elog(ERROR, "attempted to update invisible tuple");
    2338                 :         }
    2339            4588 :         else if (result == HeapTupleBeingUpdated && wait)
    2340                 :         {
    2341                 :                 TransactionId xwait;
    2342                 :                 uint16          infomask;
    2343                 : 
    2344                 :                 /* must copy state data before unlocking buffer */
    2345               0 :                 xwait = HeapTupleHeaderGetXmax(oldtup.t_data);
    2346               0 :                 infomask = oldtup.t_data->t_infomask;
    2347                 : 
    2348               0 :                 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    2349                 : 
    2350                 :                 /*
    2351                 :                  * Acquire tuple lock to establish our priority for the tuple (see
    2352                 :                  * heap_lock_tuple).  LockTuple will release us when we are
    2353                 :                  * next-in-line for the tuple.
    2354                 :                  *
    2355                 :                  * If we are forced to "start over" below, we keep the tuple lock;
    2356                 :                  * this arranges that we stay at the head of the line while rechecking
    2357                 :                  * tuple state.
    2358                 :                  */
    2359               0 :                 if (!have_tuple_lock)
    2360                 :                 {
    2361               0 :                         LockTuple(relation, &(oldtup.t_self), ExclusiveLock);
    2362               0 :                         have_tuple_lock = true;
    2363                 :                 }
    2364                 : 
    2365                 :                 /*
    2366                 :                  * Sleep until concurrent transaction ends.  Note that we don't care
    2367                 :                  * if the locker has an exclusive or shared lock, because we need
    2368                 :                  * exclusive.
    2369                 :                  */
    2370                 : 
    2371               0 :                 if (infomask & HEAP_XMAX_IS_MULTI)
    2372                 :                 {
    2373                 :                         /* wait for multixact */
    2374               0 :                         MultiXactIdWait((MultiXactId) xwait);
    2375               0 :                         LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
    2376                 : 
    2377                 :                         /*
    2378                 :                          * If xwait had just locked the tuple then some other xact could
    2379                 :                          * update this tuple before we get to this point.  Check for xmax
    2380                 :                          * change, and start over if so.
    2381                 :                          */
    2382               0 :                         if (!(oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
    2383                 :                                 !TransactionIdEquals(HeapTupleHeaderGetXmax(oldtup.t_data),
    2384                 :                                                                          xwait))
    2385                 :                                 goto l2;
    2386                 : 
    2387                 :                         /*
    2388                 :                          * You might think the multixact is necessarily done here, but not
    2389                 :                          * so: it could have surviving members, namely our own xact or
    2390                 :                          * other subxacts of this backend.      It is legal for us to update
    2391                 :                          * the tuple in either case, however (the latter case is
    2392                 :                          * essentially a situation of upgrading our former shared lock to
    2393                 :                          * exclusive).  We don't bother changing the on-disk hint bits
    2394                 :                          * since we are about to overwrite the xmax altogether.
    2395                 :                          */
    2396                 :                 }
    2397                 :                 else
    2398                 :                 {
    2399                 :                         /* wait for regular transaction to end */
    2400               0 :                         XactLockTableWait(xwait);
    2401               0 :                         LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
    2402                 : 
    2403                 :                         /*
    2404                 :                          * xwait is done, but if xwait had just locked the tuple then some
    2405                 :                          * other xact could update this tuple before we get to this point.
    2406                 :                          * Check for xmax change, and start over if so.
    2407                 :                          */
    2408               0 :                         if ((oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
    2409                 :                                 !TransactionIdEquals(HeapTupleHeaderGetXmax(oldtup.t_data),
    2410                 :                                                                          xwait))
    2411                 :                                 goto l2;
    2412                 : 
    2413                 :                         /* Otherwise check if it committed or aborted */
    2414               0 :                         UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
    2415                 :                 }
    2416                 : 
    2417                 :                 /*
    2418                 :                  * We may overwrite if previous xmax aborted, or if it committed but
    2419                 :                  * only locked the tuple without updating it.
    2420                 :                  */
    2421               0 :                 if (oldtup.t_data->t_infomask & (HEAP_XMAX_INVALID |
    2422                 :                                                                                  HEAP_IS_LOCKED))
    2423               0 :                         result = HeapTupleMayBeUpdated;
    2424                 :                 else
    2425               0 :                         result = HeapTupleUpdated;
    2426                 :         }
    2427                 : 
    2428            4588 :         if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
    2429                 :         {
    2430                 :                 /* Perform additional check for serializable RI updates */
    2431               0 :                 if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
    2432               0 :                         result = HeapTupleUpdated;
    2433                 :         }
    2434                 : 
    2435            4588 :         if (result != HeapTupleMayBeUpdated)
    2436                 :         {
    2437                 :                 Assert(result == HeapTupleSelfUpdated ||
    2438                 :                            result == HeapTupleUpdated ||
    2439                 :                            result == HeapTupleBeingUpdated);
    2440                 :                 Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
    2441               4 :                 *ctid = oldtup.t_data->t_ctid;
    2442               4 :                 *update_xmax = HeapTupleHeaderGetXmax(oldtup.t_data);
    2443               4 :                 UnlockReleaseBuffer(buffer);
    2444               4 :                 if (have_tuple_lock)
    2445               0 :                         UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock);
    2446               4 :                 bms_free(hot_attrs);
    2447               4 :                 return result;
    2448                 :         }
    2449                 : 
    2450                 :         /* Fill in OID and transaction status data for newtup */
    2451            4584 :         if (relation->rd_rel->relhasoids)
    2452                 :         {
    2453                 : #ifdef NOT_USED
    2454                 :                 /* this is redundant with an Assert in HeapTupleSetOid */
    2455                 :                 Assert(newtup->t_data->t_infomask & HEAP_HASOID);
    2456                 : #endif
    2457            1718 :                 HeapTupleSetOid(newtup, HeapTupleGetOid(&oldtup));
    2458                 :         }
    2459                 :         else
    2460                 :         {
    2461                 :                 /* check there is not space for an OID */
    2462                 :                 Assert(!(newtup->t_data->t_infomask & HEAP_HASOID));
    2463                 :         }
    2464                 : 
    2465            4584 :         newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
    2466            4584 :         newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
    2467            4584 :         newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
    2468            4584 :         HeapTupleHeaderSetXmin(newtup->t_data, xid);
    2469            4584 :         HeapTupleHeaderSetCmin(newtup->t_data, cid);
    2470            4584 :         HeapTupleHeaderSetXmax(newtup->t_data, 0);   /* for cleanliness */
    2471                 : 
    2472                 :         /*
    2473                 :          * Replace cid with a combo cid if necessary.  Note that we already put
    2474                 :          * the plain cid into the new tuple.
    2475                 :          */
    2476            4584 :         HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
    2477                 : 
    2478                 :         /*
    2479                 :          * If the toaster needs to be activated, OR if the new tuple will not fit
    2480                 :          * on the same page as the old, then we need to release the content lock
    2481                 :          * (but not the pin!) on the old tuple's buffer while we are off doing
    2482                 :          * TOAST and/or table-file-extension work.      We must mark the old tuple to
    2483                 :          * show that it's already being updated, else other processes may try to
    2484                 :          * update it themselves.
    2485                 :          *
    2486                 :          * We need to invoke the toaster if there are already any out-of-line
    2487                 :          * toasted values present, or if the new tuple is over-threshold.
    2488                 :          */
    2489            4584 :         if (relation->rd_rel->relkind != RELKIND_RELATION)
    2490                 :         {
    2491                 :                 /* toast table entries should never be recursively toasted */
    2492                 :                 Assert(!HeapTupleHasExternal(&oldtup));
    2493                 :                 Assert(!HeapTupleHasExternal(newtup));
    2494               0 :                 need_toast = false;
    2495                 :         }
    2496                 :         else
    2497            4584 :                 need_toast = (HeapTupleHasExternal(&oldtup) ||
    2498                 :                                           HeapTupleHasExternal(newtup) ||
    2499                 :                                           newtup->t_len > TOAST_TUPLE_THRESHOLD);
    2500                 : 
    2501            4584 :         pagefree = PageGetHeapFreeSpace((Page) dp);
    2502                 : 
    2503            4584 :         newtupsize = MAXALIGN(newtup->t_len);
    2504                 : 
    2505            4584 :         if (need_toast || newtupsize > pagefree)
    2506                 :         {
    2507                 :                 /* Clear obsolete visibility flags ... */
    2508            2204 :                 oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
    2509                 :                                                                            HEAP_XMAX_INVALID |
    2510                 :                                                                            HEAP_XMAX_IS_MULTI |
    2511                 :                                                                            HEAP_IS_LOCKED |
    2512                 :                                                                            HEAP_MOVED);
    2513            2204 :                 HeapTupleClearHotUpdated(&oldtup);
    2514                 :                 /* ... and store info about transaction updating this tuple */
    2515            2204 :                 HeapTupleHeaderSetXmax(oldtup.t_data, xid);
    2516            2204 :                 HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
    2517                 :                 /* temporarily make it look not-updated */
    2518            2204 :                 oldtup.t_data->t_ctid = oldtup.t_self;
    2519            2204 :                 already_marked = true;
    2520            2204 :                 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    2521                 : 
    2522                 :                 /*
    2523                 :                  * Let the toaster do its thing, if needed.
    2524                 :                  *
    2525                 :                  * Note: below this point, heaptup is the data we actually intend to
    2526                 :                  * store into the relation; newtup is the caller's original untoasted
    2527                 :                  * data.
    2528                 :                  */
    2529            2204 :                 if (need_toast)
    2530                 :                 {
    2531                 :                         /* Note we always use WAL and FSM during updates */
    2532               5 :                         heaptup = toast_insert_or_update(relation, newtup, &oldtup,
    2533                 :                                                                                          true, true);
    2534               5 :                         newtupsize = MAXALIGN(heaptup->t_len);
    2535                 :                 }
    2536                 :                 else
    2537            2199 :                         heaptup = newtup;
    2538                 : 
    2539                 :                 /*
    2540                 :                  * Now, do we need a new page for the tuple, or not?  This is a bit
    2541                 :                  * tricky since someone else could have added tuples to the page while
    2542                 :                  * we weren't looking.  We have to recheck the available space after
    2543                 :                  * reacquiring the buffer lock.  But don't bother to do that if the
    2544                 :                  * former amount of free space is still not enough; it's unlikely
    2545                 :                  * there's more free now than before.
    2546                 :                  *
    2547                 :                  * What's more, if we need to get a new page, we will need to acquire
    2548                 :                  * buffer locks on both old and new pages.      To avoid deadlock against
    2549                 :                  * some other backend trying to get the same two locks in the other
    2550                 :                  * order, we must be consistent about the order we get the locks in.
    2551                 :                  * We use the rule "lock the lower-numbered page of the relation
    2552                 :                  * first".  To implement this, we must do RelationGetBufferForTuple
    2553                 :                  * while not holding the lock on the old page, and we must rely on it
    2554                 :                  * to get the locks on both pages in the correct order.
    2555                 :                  */
    2556            2204 :                 if (newtupsize > pagefree)
    2557                 :                 {
    2558                 :                         /* Assume there's no chance to put heaptup on same page. */
    2559            2199 :                         newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
    2560                 :                                                                                            buffer, true);
    2561                 :                 }
    2562                 :                 else
    2563                 :                 {
    2564                 :                         /* Re-acquire the lock on the old tuple's page. */
    2565               5 :                         LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
    2566                 :                         /* Re-check using the up-to-date free space */
    2567               5 :                         pagefree = PageGetHeapFreeSpace((Page) dp);
    2568               5 :                         if (newtupsize > pagefree)
    2569                 :                         {
    2570                 :                                 /*
    2571                 :                                  * Rats, it doesn't fit anymore.  We must now unlock and
    2572                 :                                  * relock to avoid deadlock.  Fortunately, this path should
    2573                 :                                  * seldom be taken.
    2574                 :                                  */
    2575               0 :                                 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    2576               0 :                                 newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
    2577                 :                                                                                                    buffer, true);
    2578                 :                         }
    2579                 :                         else
    2580                 :                         {
    2581                 :                                 /* OK, it fits here, so we're done. */
    2582               5 :                                 newbuf = buffer;
    2583                 :                         }
    2584                 :                 }
    2585                 :         }
    2586                 :         else
    2587                 :         {
    2588                 :                 /* No TOAST work needed, and it'll fit on same page */
    2589            2380 :                 already_marked = false;
    2590            2380 :                 newbuf = buffer;
    2591            2380 :                 heaptup = newtup;
    2592                 :         }
    2593                 : 
    2594                 :         /*
    2595                 :          * At this point newbuf and buffer are both pinned and locked, and newbuf
    2596                 :          * has enough space for the new tuple.  If they are the same buffer, only
    2597                 :          * one pin is held.
    2598                 :          */
    2599                 : 
    2600            4584 :         if (newbuf == buffer)
    2601                 :         {
    2602                 :                 /*
    2603                 :                  * Since the new tuple is going into the same page, we might be able
    2604                 :                  * to do a HOT update.  Check if any of the index columns have been
    2605                 :                  * changed.  If not, then HOT update is possible.
    2606                 :                  */
    2607            2385 :                 if (HeapSatisfiesHOTUpdate(relation, hot_attrs, &oldtup, heaptup))
    2608            2196 :                         use_hot_update = true;
    2609                 :         }
    2610                 :         else
    2611                 :         {
    2612                 :                 /* Set a hint that the old page could use prune/defrag */
    2613            2199 :                 PageSetFull(dp);
    2614                 :         }
    2615                 : 
    2616                 :         /* NO EREPORT(ERROR) from here till changes are logged */
    2617            4584 :         START_CRIT_SECTION();
    2618                 : 
    2619                 :         /*
    2620                 :          * If this transaction commits, the old tuple will become DEAD sooner or
    2621                 :          * later.  Set flag that this page is a candidate for pruning once our xid
    2622                 :          * falls below the OldestXmin horizon.  If the transaction finally aborts,
    2623                 :          * the subsequent page pruning will be a no-op and the hint will be
    2624                 :          * cleared.
    2625                 :          *
    2626                 :          * XXX Should we set hint on newbuf as well?  If the transaction aborts,
    2627                 :          * there would be a prunable tuple in the newbuf; but for now we choose
    2628                 :          * not to optimize for aborts.  Note that heap_xlog_update must be kept in
    2629                 :          * sync if this decision changes.
    2630                 :          */
    2631            4584 :         PageSetPrunable(dp, xid);
    2632                 : 
    2633            4584 :         if (use_hot_update)
    2634                 :         {
    2635                 :                 /* Mark the old tuple as HOT-updated */
    2636            2196 :                 HeapTupleSetHotUpdated(&oldtup);
    2637                 :                 /* And mark the new tuple as heap-only */
    2638            2196 :                 HeapTupleSetHeapOnly(heaptup);
    2639                 :                 /* Mark the caller's copy too, in case different from heaptup */
    2640            2196 :                 HeapTupleSetHeapOnly(newtup);
    2641                 :         }
    2642                 :         else
    2643                 :         {
    2644                 :                 /* Make sure tuples are correctly marked as not-HOT */
    2645            2388 :                 HeapTupleClearHotUpdated(&oldtup);
    2646            2388 :                 HeapTupleClearHeapOnly(heaptup);
    2647            2388 :                 HeapTupleClearHeapOnly(newtup);
    2648                 :         }
    2649                 : 
    2650            4584 :         RelationPutHeapTuple(relation, newbuf, heaptup);        /* insert new tuple */
    2651                 : 
    2652            4584 :         if (!already_marked)
    2653                 :         {
    2654                 :                 /* Clear obsolete visibility flags ... */
    2655            2380 :                 oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
    2656                 :                                                                            HEAP_XMAX_INVALID |
    2657                 :                                                                            HEAP_XMAX_IS_MULTI |
    2658                 :                                                                            HEAP_IS_LOCKED |
    2659                 :                                                                            HEAP_MOVED);
    2660                 :                 /* ... and store info about transaction updating this tuple */
    2661            2380 :                 HeapTupleHeaderSetXmax(oldtup.t_data, xid);
    2662            2380 :                 HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
    2663                 :         }
    2664                 : 
    2665                 :         /* record address of new tuple in t_ctid of old one */
    2666            4584 :         oldtup.t_data->t_ctid = heaptup->t_self;
    2667                 : 
    2668            4584 :         if (newbuf != buffer)
    2669            2199 :                 MarkBufferDirty(newbuf);
    2670            4584 :         MarkBufferDirty(buffer);
    2671                 : 
    2672                 :         /* XLOG stuff */
    2673            4584 :         if (!relation->rd_istemp)
    2674                 :         {
    2675                 :                 XLogRecPtr      recptr = log_heap_update(relation, buffer, oldtup.t_self,
    2676            4521 :                                                                                          newbuf, heaptup, false);
    2677                 : 
    2678            4521 :                 if (newbuf != buffer)
    2679                 :                 {
    2680            2199 :                         PageSetLSN(BufferGetPage(newbuf), recptr);
    2681            2199 :                         PageSetTLI(BufferGetPage(newbuf), ThisTimeLineID);
    2682                 :                 }
    2683            4521 :                 PageSetLSN(BufferGetPage(buffer), recptr);
    2684            4521 :                 PageSetTLI(BufferGetPage(buffer), ThisTimeLineID);
    2685                 :         }
    2686                 : 
    2687            4584 :         END_CRIT_SECTION();
    2688                 : 
    2689            4584 :         if (newbuf != buffer)
    2690            2199 :                 LockBuffer(newbuf, BUFFER_LOCK_UNLOCK);
    2691            4584 :         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    2692                 : 
    2693                 :         /*
    2694                 :          * Mark old tuple for invalidation from system caches at next command
    2695                 :          * boundary. We have to do this before releasing the buffer because we
    2696                 :          * need to look at the contents of the tuple.
    2697                 :          */
    2698            4584 :         CacheInvalidateHeapTuple(relation, &oldtup);
    2699                 : 
    2700                 :         /* Now we can release the buffer(s) */
    2701            4584 :         if (newbuf != buffer)
    2702            2199 :                 ReleaseBuffer(newbuf);
    2703            4584 :         ReleaseBuffer(buffer);
    2704                 : 
    2705                 :         /*
    2706                 :          * If new tuple is cachable, mark it for invalidation from the caches in
    2707                 :          * case we abort.  Note it is OK to do this after releasing the buffer,
    2708                 :          * because the heaptup data structure is all in local memory, not in the
    2709                 :          * shared buffer.
    2710                 :          */
    2711            4584 :         CacheInvalidateHeapTuple(relation, heaptup);
    2712                 : 
    2713                 :         /*
    2714                 :          * Release the lmgr tuple lock, if we had it.
    2715                 :          */
    2716            4584 :         if (have_tuple_lock)
    2717               0 :                 UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock);
    2718                 : 
    2719            4584 :         pgstat_count_heap_update(relation, use_hot_update);
    2720                 : 
    2721                 :         /*
    2722                 :          * If heaptup is a private copy, release it.  Don't forget to copy t_self
    2723                 :          * back to the caller's image, too.
    2724                 :          */
    2725            4584 :         if (heaptup != newtup)
    2726                 :         {
    2727               5 :                 newtup->t_self = heaptup->t_self;
    2728               5 :                 heap_freetuple(heaptup);
    2729                 :         }
    2730                 : 
    2731            4584 :         bms_free(hot_attrs);
    2732                 : 
    2733            4584 :         return HeapTupleMayBeUpdated;
    2734                 : }
    2735                 : 
    2736                 : /*
    2737                 :  * Check if the specified attribute's value is same in both given tuples.
    2738                 :  * Subroutine for HeapSatisfiesHOTUpdate.
    2739                 :  */
    2740                 : static bool
    2741                 : heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
    2742                 :                                            HeapTuple tup1, HeapTuple tup2)
    2743            5525 : {
    2744                 :         Datum           value1,
    2745                 :                                 value2;
    2746                 :         bool            isnull1,
    2747                 :                                 isnull2;
    2748                 :         Form_pg_attribute att;
    2749                 : 
    2750                 :         /*
    2751                 :          * If it's a whole-tuple reference, say "not equal".  It's not really
    2752                 :          * worth supporting this case, since it could only succeed after a no-op
    2753                 :          * update, which is hardly a case worth optimizing for.
    2754                 :          */
    2755            5525 :         if (attrnum == 0)
    2756               0 :                 return false;
    2757                 : 
    2758                 :         /*
    2759                 :          * Likewise, automatically say "not equal" for any system attribute other
    2760                 :          * than OID and tableOID; we cannot expect these to be consistent in a HOT
    2761                 :          * chain, or even to be set correctly yet in the new tuple.
    2762                 :          */
    2763            5525 :         if (attrnum < 0)
    2764                 :         {
    2765            1502 :                 if (attrnum != ObjectIdAttributeNumber &&
    2766                 :                         attrnum != TableOidAttributeNumber)
    2767               0 :                         return false;
    2768                 :         }
    2769                 : 
    2770                 :         /*
    2771                 :          * Extract the corresponding values.  XXX this is pretty inefficient if
    2772                 :          * there are many indexed columns.      Should HeapSatisfiesHOTUpdate do a
    2773                 :          * single heap_deform_tuple call on each tuple, instead?  But that doesn't
    2774                 :          * work for system columns ...
    2775                 :          */
    2776            5525 :         value1 = heap_getattr(tup1, attrnum, tupdesc, &isnull1);
    2777            5525 :         value2 = heap_getattr(tup2, attrnum, tupdesc, &isnull2);
    2778                 : 
    2779                 :         /*
    2780                 :          * If one value is NULL and other is not, then they are certainly not
    2781                 :          * equal
    2782                 :          */
    2783            5525 :         if (isnull1 != isnull2)
    2784               0 :                 return false;
    2785                 : 
    2786                 :         /*
    2787                 :          * If both are NULL, they can be considered equal.
    2788                 :          */
    2789            5525 :         if (isnull1)
    2790               0 :                 return true;
    2791                 : 
    2792                 :         /*
    2793                 :          * We do simple binary comparison of the two datums.  This may be overly
    2794                 :          * strict because there can be multiple binary representations for the
    2795                 :          * same logical value.  But we should be OK as long as there are no false
    2796                 :          * positives.  Using a type-specific equality operator is messy because
    2797                 :          * there could be multiple notions of equality in different operator
    2798                 :          * classes; furthermore, we cannot safely invoke user-defined functions
    2799                 :          * while holding exclusive buffer lock.
    2800                 :          */
    2801            5525 :         if (attrnum <= 0)
    2802                 :         {
    2803                 :                 /* The only allowed system columns are OIDs, so do this */
    2804            1502 :                 return (DatumGetObjectId(value1) == DatumGetObjectId(value2));
    2805                 :         }
    2806                 :         else
    2807                 :         {
    2808                 :                 Assert(attrnum <= tupdesc->natts);
    2809            4023 :                 att = tupdesc->attrs[attrnum - 1];
    2810            4023 :                 return datumIsEqual(value1, value2, att->attbyval, att->attlen);
    2811                 :         }
    2812                 : }
    2813                 : 
    2814                 : /*
    2815                 :  * Check if the old and new tuples represent a HOT-safe update. To be able
    2816                 :  * to do a HOT update, we must not have changed any columns used in index
    2817                 :  * definitions.
    2818                 :  *
    2819                 :  * The set of attributes to be checked is passed in (we dare not try to
    2820                 :  * compute it while holding exclusive buffer lock...)  NOTE that hot_attrs
    2821                 :  * is destructively modified!  That is OK since this is invoked at most once
    2822                 :  * by heap_update().
    2823                 :  *
    2824                 :  * Returns true if safe to do HOT update.
    2825                 :  */
    2826                 : static bool
    2827                 : HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs,
    2828                 :                                            HeapTuple oldtup, HeapTuple newtup)
    2829            2385 : {
    2830                 :         int                     attrnum;
    2831                 : 
    2832           10106 :         while ((attrnum = bms_first_member(hot_attrs)) >= 0)
    2833                 :         {
    2834                 :                 /* Adjust for system attributes */
    2835            5525 :                 attrnum += FirstLowInvalidHeapAttributeNumber;
    2836                 : 
    2837                 :                 /* If the attribute value has changed, we can't do HOT update */
    2838            5525 :                 if (!heap_tuple_attr_equals(RelationGetDescr(relation), attrnum,
    2839                 :                                                                         oldtup, newtup))
    2840             189 :                         return false;
    2841                 :         }
    2842                 : 
    2843            2196 :         return true;
    2844                 : }
    2845                 : 
    2846                 : /*
    2847                 :  *      simple_heap_update - replace a tuple
    2848                 :  *
    2849                 :  * This routine may be used to update a tuple when concurrent updates of
    2850                 :  * the target tuple are not expected (for example, because we have a lock
    2851                 :  * on the relation associated with the tuple).  Any failure is reported
    2852                 :  * via ereport().
    2853                 :  */
    2854                 : void
    2855                 : simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
    2856            1848 : {
    2857                 :         HTSU_Result result;
    2858                 :         ItemPointerData update_ctid;
    2859                 :         TransactionId update_xmax;
    2860                 : 
    2861            1848 :         result = heap_update(relation, otid, tup,
    2862                 :                                                  &update_ctid, &update_xmax,
    2863                 :                                                  GetCurrentCommandId(true), InvalidSnapshot,
    2864                 :                                                  true /* wait for commit */ );
    2865            1848 :         switch (result)
    2866                 :         {
    2867                 :                 case HeapTupleSelfUpdated:
    2868                 :                         /* Tuple was already updated in current command? */
    2869               0 :                         elog(ERROR, "tuple already updated by self");
    2870               0 :                         break;
    2871                 : 
    2872                 :                 case HeapTupleMayBeUpdated:
    2873                 :                         /* done successfully */
    2874                 :                         break;
    2875                 : 
    2876                 :                 case HeapTupleUpdated:
    2877               0 :                         elog(ERROR, "tuple concurrently updated");
    2878               0 :                         break;
    2879                 : 
    2880                 :                 default:
    2881               0 :                         elog(ERROR, "unrecognized heap_update status: %u", result);
    2882                 :                         break;
    2883                 :         }
    2884            1848 : }
    2885                 : 
    2886                 : /*
    2887                 :  *      heap_lock_tuple - lock a tuple in shared or exclusive mode
    2888                 :  *
    2889                 :  * Note that this acquires a buffer pin, which the caller must release.
    2890                 :  *
    2891                 :  * Input parameters:
    2892                 :  *      relation: relation containing tuple (caller must hold suitable lock)
    2893                 :  *      tuple->t_self: TID of tuple to lock (rest of struct need not be valid)
    2894                 :  *      cid: current command ID (used for visibility test, and stored into
    2895                 :  *              tuple's cmax if lock is successful)
    2896                 :  *      mode: indicates if shared or exclusive tuple lock is desired
    2897                 :  *      nowait: if true, ereport rather than blocking if lock not available
    2898                 :  *
    2899                 :  * Output parameters:
    2900                 :  *      *tuple: all fields filled in
    2901                 :  *      *buffer: set to buffer holding tuple (pinned but not locked at exit)
    2902                 :  *      *ctid: set to tuple's t_ctid, but only in failure cases
    2903                 :  *      *update_xmax: set to tuple's xmax, but only in failure cases
    2904                 :  *
    2905                 :  * Function result may be:
    2906                 :  *      HeapTupleMayBeUpdated: lock was successfully acquired
    2907                 :  *      HeapTupleSelfUpdated: lock failed because tuple updated by self
    2908                 :  *      HeapTupleUpdated: lock failed because tuple updated by other xact
    2909                 :  *
    2910                 :  * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
    2911                 :  * If t_ctid is the same as t_self, the tuple was deleted; if different, the
    2912                 :  * tuple was updated, and t_ctid is the location of the replacement tuple.
    2913                 :  * (t_xmax is needed to verify that the replacement tuple matches.)
    2914                 :  *
    2915                 :  *
    2916                 :  * NOTES: because the shared-memory lock table is of finite size, but users
    2917                 :  * could reasonably want to lock large numbers of tuples, we do not rely on
    2918                 :  * the standard lock manager to store tuple-level locks over the long term.
    2919                 :  * Instead, a tuple is marked as locked by setting the current transaction's
    2920                 :  * XID as its XMAX, and setting additional infomask bits to distinguish this
    2921                 :  * usage from the more normal case of having deleted the tuple.  When
    2922                 :  * multiple transactions concurrently share-lock a tuple, the first locker's
    2923                 :  * XID is replaced in XMAX with a MultiTransactionId representing the set of
    2924                 :  * XIDs currently holding share-locks.
    2925                 :  *
    2926                 :  * When it is necessary to wait for a tuple-level lock to be released, the
    2927                 :  * basic delay is provided by XactLockTableWait or MultiXactIdWait on the
    2928                 :  * contents of the tuple's XMAX.  However, that mechanism will release all
    2929                 :  * waiters concurrently, so there would be a race condition as to which
    2930                 :  * waiter gets the tuple, potentially leading to indefinite starvation of
    2931                 :  * some waiters.  The possibility of share-locking makes the problem much
    2932                 :  * worse --- a steady stream of share-lockers can easily block an exclusive
    2933                 :  * locker forever.      To provide more reliable semantics about who gets a
    2934                 :  * tuple-level lock first, we use the standard lock manager.  The protocol
    2935                 :  * for waiting for a tuple-level lock is really
    2936                 :  *              LockTuple()
    2937                 :  *              XactLockTableWait()
    2938                 :  *              mark tuple as locked by me
    2939                 :  *              UnlockTuple()
    2940                 :  * When there are multiple waiters, arbitration of who is to get the lock next
    2941                 :  * is provided by LockTuple().  However, at most one tuple-level lock will
    2942                 :  * be held or awaited per backend at any time, so we don't risk overflow
    2943                 :  * of the lock table.  Note that incoming share-lockers are required to
    2944                 :  * do LockTuple as well, if there is any conflict, to ensure that they don't
    2945                 :  * starve out waiting exclusive-lockers.  However, if there is not any active
    2946                 :  * conflict for a tuple, we don't incur any extra overhead.
    2947                 :  */
    2948                 : HTSU_Result
    2949                 : heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer,
    2950                 :                                 ItemPointer ctid, TransactionId *update_xmax,
    2951                 :                                 CommandId cid, LockTupleMode mode, bool nowait)
    2952             263 : {
    2953                 :         HTSU_Result result;
    2954             263 :         ItemPointer tid = &(tuple->t_self);
    2955                 :         ItemId          lp;
    2956                 :         PageHeader      dp;
    2957                 :         TransactionId xid;
    2958                 :         TransactionId xmax;
    2959                 :         uint16          old_infomask;
    2960                 :         uint16          new_infomask;
    2961                 :         LOCKMODE        tuple_lock_type;
    2962             263 :         bool            have_tuple_lock = false;
    2963                 : 
    2964             263 :         tuple_lock_type = (mode == LockTupleShared) ? ShareLock : ExclusiveLock;
    2965                 : 
    2966             263 :         *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
    2967             263 :         LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
    2968                 : 
    2969             263 :         dp = (PageHeader) BufferGetPage(*buffer);
    2970             263 :         lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid));
    2971                 :         Assert(ItemIdIsNormal(lp));
    2972                 : 
    2973             263 :         tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
    2974             263 :         tuple->t_len = ItemIdGetLength(lp);
    2975             263 :         tuple->t_tableOid = RelationGetRelid(relation);
    2976                 : 
    2977             263 : l3:
    2978             263 :         result = HeapTupleSatisfiesUpdate(tuple->t_data, cid, *buffer);
    2979                 : 
    2980             263 :         if (result == HeapTupleInvisible)
    2981                 :         {
    2982               0 :                 UnlockReleaseBuffer(*buffer);
    2983               0 :                 elog(ERROR, "attempted to lock invisible tuple");
    2984                 :         }
    2985             263 :         else if (result == HeapTupleBeingUpdated)
    2986                 :         {
    2987                 :                 TransactionId xwait;
    2988                 :                 uint16          infomask;
    2989                 : 
    2990                 :                 /* must copy state data before unlocking buffer */
    2991               0 :                 xwait = HeapTupleHeaderGetXmax(tuple->t_data);
    2992               0 :                 infomask = tuple->t_data->t_infomask;
    2993                 : 
    2994               0 :                 LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
    2995                 : 
    2996                 :                 /*
    2997                 :                  * If we wish to acquire share lock, and the tuple is already
    2998                 :                  * share-locked by a multixact that includes any subtransaction of the
    2999                 :                  * current top transaction, then we effectively hold the desired lock
    3000                 :                  * already.  We *must* succeed without trying to take the tuple lock,
    3001                 :                  * else we will deadlock against anyone waiting to acquire exclusive
    3002                 :                  * lock.  We don't need to make any state changes in this case.
    3003                 :                  */
    3004               0 :                 if (mode == LockTupleShared &&
    3005                 :                         (infomask & HEAP_XMAX_IS_MULTI) &&
    3006                 :                         MultiXactIdIsCurrent((MultiXactId) xwait))
    3007                 :                 {
    3008                 :                         Assert(infomask & HEAP_XMAX_SHARED_LOCK);
    3009                 :                         /* Probably can't hold tuple lock here, but may as well check */
    3010               0 :                         if (have_tuple_lock)
    3011               0 :                                 UnlockTuple(relation, tid, tuple_lock_type);
    3012               0 :                         return HeapTupleMayBeUpdated;
    3013                 :                 }
    3014                 : 
    3015                 :                 /*
    3016                 :                  * Acquire tuple lock to establish our priority for the tuple.
    3017                 :                  * LockTuple will release us when we are next-in-line for the tuple.
    3018                 :                  * We must do this even if we are share-locking.
    3019                 :                  *
    3020                 :                  * If we are forced to "start over" below, we keep the tuple lock;
    3021                 :                  * this arranges that we stay at the head of the line while rechecking
    3022                 :                  * tuple state.
    3023                 :                  */
    3024               0 :                 if (!have_tuple_lock)
    3025                 :                 {
    3026               0 :                         if (nowait)
    3027                 :                         {
    3028               0 :                                 if (!ConditionalLockTuple(relation, tid, tuple_lock_type))
    3029               0 :                                         ereport(ERROR,
    3030                 :                                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
    3031                 :                                         errmsg("could not obtain lock on row in relation \"%s\"",
    3032                 :                                                    RelationGetRelationName(relation))));
    3033                 :                         }
    3034                 :                         else
    3035               0 :                                 LockTuple(relation, tid, tuple_lock_type);
    3036               0 :                         have_tuple_lock = true;
    3037                 :                 }
    3038                 : 
    3039               0 :                 if (mode == LockTupleShared && (infomask & HEAP_XMAX_SHARED_LOCK))
    3040                 :                 {
    3041                 :                         /*
    3042                 :                          * Acquiring sharelock when there's at least one sharelocker
    3043                 :                          * already.  We need not wait for him/them to complete.
    3044                 :                          */
    3045               0 :                         LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
    3046                 : 
    3047                 :                         /*
    3048                 :                          * Make sure it's still a shared lock, else start over.  (It's OK
    3049                 :                          * if the ownership of the shared lock has changed, though.)
    3050                 :                          */
    3051               0 :                         if (!(tuple->t_data->t_infomask & HEAP_XMAX_SHARED_LOCK))
    3052               0 :                                 goto l3;
    3053                 :                 }
    3054               0 :                 else if (infomask & HEAP_XMAX_IS_MULTI)
    3055                 :                 {
    3056                 :                         /* wait for multixact to end */
    3057               0 :                         if (nowait)
    3058                 :                         {
    3059               0 :                                 if (!ConditionalMultiXactIdWait((MultiXactId) xwait))
    3060               0 :                                         ereport(ERROR,
    3061                 :                                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
    3062                 :                                         errmsg("could not obtain lock on row in relation \"%s\"",
    3063                 :                                                    RelationGetRelationName(relation))));
    3064                 :                         }
    3065                 :                         else
    3066               0 :                                 MultiXactIdWait((MultiXactId) xwait);
    3067                 : 
    3068               0 :                         LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
    3069                 : 
    3070                 :                         /*
    3071                 :                          * If xwait had just locked the tuple then some other xact could
    3072                 :                          * update this tuple before we get to this point. Check for xmax
    3073                 :                          * change, and start over if so.
    3074                 :                          */
    3075               0 :                         if (!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
    3076                 :                                 !TransactionIdEquals(HeapTupleHeaderGetXmax(tuple->t_data),
    3077                 :                                                                          xwait))
    3078                 :                                 goto l3;
    3079                 : 
    3080                 :                         /*
    3081                 :                          * You might think the multixact is necessarily done here, but not
    3082                 :                          * so: it could have surviving members, namely our own xact or
    3083                 :                          * other subxacts of this backend.      It is legal for us to lock the
    3084                 :                          * tuple in either case, however.  We don't bother changing the
    3085                 :                          * on-disk hint bits since we are about to overwrite the xmax
    3086                 :                          * altogether.
    3087                 :                          */
    3088                 :                 }
    3089                 :                 else
    3090                 :                 {
    3091                 :                         /* wait for regular transaction to end */
    3092               0 :                         if (nowait)
    3093                 :                         {
    3094               0 :                                 if (!ConditionalXactLockTableWait(xwait))
    3095               0 :                                         ereport(ERROR,
    3096                 :                                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
    3097                 :                                         errmsg("could not obtain lock on row in relation \"%s\"",
    3098                 :                                                    RelationGetRelationName(relation))));
    3099                 :                         }
    3100                 :                         else
    3101               0 :                                 XactLockTableWait(xwait);
    3102                 : 
    3103               0 :                         LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
    3104                 : 
    3105                 :                         /*
    3106                 :                          * xwait is done, but if xwait had just locked the tuple then some
    3107                 :                          * other xact could update this tuple before we get to this point.
    3108                 :                          * Check for xmax change, and start over if so.
    3109                 :                          */
    3110               0 :                         if ((tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
    3111                 :                                 !TransactionIdEquals(HeapTupleHeaderGetXmax(tuple->t_data),
    3112                 :                                                                          xwait))
    3113                 :                                 goto l3;
    3114                 : 
    3115                 :                         /* Otherwise check if it committed or aborted */
    3116               0 :                         UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
    3117                 :                 }
    3118                 : 
    3119                 :                 /*
    3120                 :                  * We may lock if previous xmax aborted, or if it committed but only
    3121                 :                  * locked the tuple without updating it.  The case where we didn't
    3122                 :                  * wait because we are joining an existing shared lock is correctly
    3123                 :                  * handled, too.
    3124                 :                  */
    3125               0 :                 if (tuple->t_data->t_infomask & (HEAP_XMAX_INVALID |
    3126                 :                                                                                  HEAP_IS_LOCKED))
    3127               0 :                         result = HeapTupleMayBeUpdated;
    3128                 :                 else
    3129               0 :                         result = HeapTupleUpdated;
    3130                 :         }
    3131                 : 
    3132             263 :         if (result != HeapTupleMayBeUpdated)
    3133                 :         {
    3134                 :                 Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated);
    3135                 :                 Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
    3136               0 :                 *ctid = tuple->t_data->t_ctid;
    3137               0 :                 *update_xmax = HeapTupleHeaderGetXmax(tuple->t_data);
    3138               0 :                 LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
    3139               0 :                 if (have_tuple_lock)
    3140               0 :                         UnlockTuple(relation, tid, tuple_lock_type);
    3141               0 :                 return result;
    3142                 :         }
    3143                 : 
    3144                 :         /*
    3145                 :          * We might already hold the desired lock (or stronger), possibly under a
    3146                 :          * different subtransaction of the current top transaction.  If so, there
    3147                 :          * is no need to change state or issue a WAL record.  We already handled
    3148                 :          * the case where this is true for xmax being a MultiXactId, so now check
    3149                 :          * for cases where it is a plain TransactionId.
    3150                 :          *
    3151                 :          * Note in particular that this covers the case where we already hold
    3152                 :          * exclusive lock on the tuple and the caller only wants shared lock. It
    3153                 :          * would certainly not do to give up the exclusive lock.
    3154                 :          */
    3155             263 :         xmax = HeapTupleHeaderGetXmax(tuple->t_data);
    3156             263 :         old_infomask = tuple->t_data->t_infomask;
    3157                 : 
    3158             263 :         if (!(old_infomask & (HEAP_XMAX_INVALID |
    3159                 :                                                   HEAP_XMAX_COMMITTED |
    3160                 :                                                   HEAP_XMAX_IS_MULTI)) &&
    3161                 :                 (mode == LockTupleShared ?
    3162                 :                  (old_infomask & HEAP_IS_LOCKED) :
    3163                 :                  (old_infomask & HEAP_XMAX_EXCL_LOCK)) &&
    3164                 :                 TransactionIdIsCurrentTransactionId(xmax))
    3165                 :         {
    3166               5 :                 LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
    3167                 :                 /* Probably can't hold tuple lock here, but may as well check */
    3168               5 :                 if (have_tuple_lock)
    3169               0 :                         UnlockTuple(relation, tid, tuple_lock_type);
    3170               5 :                 return HeapTupleMayBeUpdated;
    3171                 :         }
    3172                 : 
    3173                 :         /*
    3174                 :          * Compute the new xmax and infomask to store into the tuple.  Note we do
    3175                 :          * not modify the tuple just yet, because that would leave it in the wrong
    3176                 :          * state if multixact.c elogs.
    3177                 :          */
    3178             258 :         xid = GetCurrentTransactionId();
    3179                 : 
    3180             258 :         new_infomask = old_infomask & ~(HEAP_XMAX_COMMITTED |
    3181                 :                                                                         HEAP_XMAX_INVALID |
    3182                 :                                                                         HEAP_XMAX_IS_MULTI |
    3183                 :                                                                         HEAP_IS_LOCKED |
    3184                 :                                                                         HEAP_MOVED);
    3185                 : 
    3186             258 :         if (mode == LockTupleShared)
    3187                 :         {
    3188                 :                 /*
    3189                 :                  * If this is the first acquisition of a shared lock in the current
    3190                 :                  * transaction, set my per-backend OldestMemberMXactId setting. We can
    3191                 :                  * be certain that the transaction will never become a member of any
    3192                 :                  * older MultiXactIds than that.  (We have to do this even if we end
    3193                 :                  * up just using our own TransactionId below, since some other backend
    3194                 :                  * could incorporate our XID into a MultiXact immediately afterwards.)
    3195                 :                  */
    3196             111 :                 MultiXactIdSetOldestMember();
    3197                 : 
    3198             111 :                 new_infomask |= HEAP_XMAX_SHARED_LOCK;
    3199                 : 
    3200                 :                 /*
    3201                 :                  * Check to see if we need a MultiXactId because there are multiple
    3202                 :                  * lockers.
    3203                 :                  *
    3204                 :                  * HeapTupleSatisfiesUpdate will have set the HEAP_XMAX_INVALID bit if
    3205                 :                  * the xmax was a MultiXactId but it was not running anymore. There is
    3206                 :                  * a race condition, which is that the MultiXactId may have finished
    3207                 :                  * since then, but that uncommon case is handled within
    3208                 :                  * MultiXactIdExpand.
    3209                 :                  *
    3210                 :                  * There is a similar race condition possible when the old xmax was a
    3211                 :                  * regular TransactionId.  We test TransactionIdIsInProgress again
    3212                 :                  * just to narrow the window, but it's still possible to end up
    3213                 :                  * creating an unnecessary MultiXactId.  Fortunately this is harmless.
    3214                 :                  */
    3215             111 :                 if (!(old_infomask & (HEAP_XMAX_INVALID | HEAP_XMAX_COMMITTED)))
    3216                 :                 {
    3217               0 :                         if (old_infomask & HEAP_XMAX_IS_MULTI)
    3218                 :                         {
    3219                 :                                 /*
    3220                 :                                  * If the XMAX is already a MultiXactId, then we need to
    3221                 :                                  * expand it to include our own TransactionId.
    3222                 :                                  */
    3223               0 :                                 xid = MultiXactIdExpand((MultiXactId) xmax, xid);
    3224               0 :                                 new_infomask |= HEAP_XMAX_IS_MULTI;
    3225                 :                         }
    3226               0 :                         else if (TransactionIdIsInProgress(xmax))
    3227                 :                         {
    3228                 :                                 /*
    3229                 :                                  * If the XMAX is a valid TransactionId, then we need to
    3230                 :                                  * create a new MultiXactId that includes both the old locker
    3231                 :                                  * and our own TransactionId.
    3232                 :                                  */
    3233               0 :                                 xid = MultiXactIdCreate(xmax, xid);
    3234               0 :                                 new_infomask |= HEAP_XMAX_IS_MULTI;
    3235                 :                         }
    3236                 :                         else
    3237                 :                         {
    3238                 :                                 /*
    3239                 :                                  * Can get here iff HeapTupleSatisfiesUpdate saw the old xmax
    3240                 :                                  * as running, but it finished before
    3241                 :                                  * TransactionIdIsInProgress() got to run.      Treat it like
    3242                 :                                  * there's no locker in the tuple.
    3243                 :                                  */
    3244                 :                         }
    3245                 :                 }
    3246                 :                 else
    3247                 :                 {
    3248                 :                         /*
    3249                 :                          * There was no previous locker, so just insert our own
    3250                 :                          * TransactionId.
    3251                 :                          */
    3252                 :                 }
    3253                 :         }
    3254                 :         else
    3255                 :         {
    3256                 :                 /* We want an exclusive lock on the tuple */
    3257             147 :                 new_infomask |= HEAP_XMAX_EXCL_LOCK;
    3258                 :         }
    3259                 : 
    3260             258 :         START_CRIT_SECTION();
    3261                 : 
    3262                 :         /*
    3263                 :          * Store transaction information of xact locking the tuple.
    3264                 :          *
    3265                 :          * Note: Cmax is meaningless in this context, so don't set it; this avoids
    3266                 :          * possibly generating a useless combo CID.
    3267                 :          */
    3268             258 :         tuple->t_data->t_infomask = new_infomask;
    3269             258 :         HeapTupleHeaderClearHotUpdated(tuple->t_data);
    3270             258 :         HeapTupleHeaderSetXmax(tuple->t_data, xid);
    3271                 :         /* Make sure there is no forward chain link in t_ctid */
    3272             258 :         tuple->t_data->t_ctid = *tid;
    3273                 : 
    3274             258 :         MarkBufferDirty(*buffer);
    3275                 : 
    3276                 :         /*
    3277                 :          * XLOG stuff.  You might think that we don't need an XLOG record because
    3278                 :          * there is no state change worth restoring after a crash.      You would be
    3279                 :          * wrong however: we have just written either a TransactionId or a
    3280                 :          * MultiXactId that may never have been seen on disk before, and we need
    3281                 :          * to make sure that there are XLOG entries covering those ID numbers.
    3282                 :          * Else the same IDs might be re-used after a crash, which would be
    3283                 :          * disastrous if this page made it to disk before the crash.  Essentially
    3284                 :          * we have to enforce the WAL log-before-data rule even in this case.
    3285                 :          * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
    3286                 :          * entries for everything anyway.)
    3287                 :          */
    3288             258 :         if (!relation->rd_istemp)
    3289                 :         {
    3290                 :                 xl_heap_lock xlrec;
    3291                 :                 XLogRecPtr      recptr;
    3292                 :                 XLogRecData rdata[2];
    3293                 : 
    3294             244 :                 xlrec.target.node = relation->rd_node;
    3295             244 :                 xlrec.target.tid = tuple->t_self;
    3296             244 :                 xlrec.locking_xid = xid;
    3297             244 :                 xlrec.xid_is_mxact = ((new_infomask & HEAP_XMAX_IS_MULTI) != 0);
    3298             244 :                 xlrec.shared_lock = (mode == LockTupleShared);
    3299             244 :                 rdata[0].data = (char *) &xlrec;
    3300             244 :                 rdata[0].len = SizeOfHeapLock;
    3301             244 :                 rdata[0].buffer = InvalidBuffer;
    3302             244 :                 rdata[0].next = &(rdata[1]);
    3303                 : 
    3304             244 :                 rdata[1].data = NULL;
    3305             244 :                 rdata[1].len = 0;
    3306             244 :                 rdata[1].buffer = *buffer;
    3307             244 :                 rdata[1].buffer_std = true;
    3308             244 :                 rdata[1].next = NULL;
    3309                 : 
    3310             244 :                 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK, rdata);
    3311                 : 
    3312             244 :                 PageSetLSN(dp, recptr);
    3313             244 :                 PageSetTLI(dp, ThisTimeLineID);
    3314                 :         }
    3315                 : 
    3316             258 :         END_CRIT_SECTION();
    3317                 : 
    3318             258 :         LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
    3319                 : 
    3320                 :         /*
    3321                 :          * Now that we have successfully marked the tuple as locked, we can
    3322                 :          * release the lmgr tuple lock, if we had it.
    3323                 :          */
    3324             258 :         if (have_tuple_lock)
    3325               0 :                 UnlockTuple(relation, tid, tuple_lock_type);
    3326                 : 
    3327             258 :         return HeapTupleMayBeUpdated;
    3328                 : }
    3329                 : 
    3330                 : 
    3331                 : /*
    3332                 :  * heap_inplace_update - update a tuple "in place" (ie, overwrite it)
    3333                 :  *
    3334                 :  * Overwriting violates both MVCC and transactional safety, so the uses
    3335                 :  * of this function in Postgres are extremely limited.  Nonetheless we
    3336                 :  * find some places to use it.
    3337                 :  *
    3338                 :  * The tuple cannot change size, and therefore it's reasonable to assume
    3339                 :  * that its null bitmap (if any) doesn't change either.  So we just
    3340                 :  * overwrite the data portion of the tuple without touching the null
    3341                 :  * bitmap or any of the header fields.
    3342                 :  *
    3343                 :  * tuple is an in-memory tuple structure containing the data to be written
    3344                 :  * over the target tuple.  Also, tuple->t_self identifies the target tuple.
    3345                 :  */
    3346                 : void
    3347                 : heap_inplace_update(Relation relation, HeapTuple tuple)
    3348            1297 : {
    3349                 :         Buffer          buffer;
    3350                 :         Page            page;
    3351                 :         OffsetNumber offnum;
    3352            1297 :         ItemId          lp = NULL;
    3353                 :         HeapTupleHeader htup;
    3354                 :         uint32          oldlen;
    3355                 :         uint32          newlen;
    3356                 : 
    3357            1297 :         buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
    3358            1297 :         LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
    3359            1297 :         page = (Page) BufferGetPage(buffer);
    3360                 : 
    3361            1297 :         offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
    3362            1297 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    3363            1297 :                 lp = PageGetItemId(page, offnum);
    3364                 : 
    3365            1297 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    3366               0 :                 elog(ERROR, "heap_inplace_update: invalid lp");
    3367                 : 
    3368            1297 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    3369                 : 
    3370            1297 :         oldlen = ItemIdGetLength(lp) - htup->t_hoff;
    3371            1297 :         newlen = tuple->t_len - tuple->t_data->t_hoff;
    3372            1297 :         if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
    3373               0 :                 elog(ERROR, "heap_inplace_update: wrong tuple length");
    3374                 : 
    3375                 :         /* NO EREPORT(ERROR) from here till changes are logged */
    3376            1297 :         START_CRIT_SECTION();
    3377                 : 
    3378            1297 :         memcpy((char *) htup + htup->t_hoff,
    3379                 :                    (char *) tuple->t_data + tuple->t_data->t_hoff,
    3380                 :                    newlen);
    3381                 : 
    3382            1297 :         MarkBufferDirty(buffer);
    3383                 : 
    3384                 :         /* XLOG stuff */
    3385            1297 :         if (!relation->rd_istemp)
    3386                 :         {
    3387                 :                 xl_heap_inplace xlrec;
    3388                 :                 XLogRecPtr      recptr;
    3389                 :                 XLogRecData rdata[2];
    3390                 : 
    3391            1297 :                 xlrec.target.node = relation->rd_node;
    3392            1297 :                 xlrec.target.tid = tuple->t_self;
    3393                 : 
    3394            1297 :                 rdata[0].data = (char *) &xlrec;
    3395            1297 :                 rdata[0].len = SizeOfHeapInplace;
    3396            1297 :                 rdata[0].buffer = InvalidBuffer;
    3397            1297 :                 rdata[0].next = &(rdata[1]);
    3398                 : 
    3399            1297 :                 rdata[1].data = (char *) htup + htup->t_hoff;
    3400            1297 :                 rdata[1].len = newlen;
    3401            1297 :                 rdata[1].buffer = buffer;
    3402            1297 :                 rdata[1].buffer_std = true;
    3403            1297 :                 rdata[1].next = NULL;
    3404                 : 
    3405            1297 :                 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE, rdata);
    3406                 : 
    3407            1297 :                 PageSetLSN(page, recptr);
    3408            1297 :                 PageSetTLI(page, ThisTimeLineID);
    3409                 :         }
    3410                 : 
    3411            1297 :         END_CRIT_SECTION();
    3412                 : 
    3413            1297 :         UnlockReleaseBuffer(buffer);
    3414                 : 
    3415                 :         /* Send out shared cache inval if necessary */
    3416            1297 :         if (!IsBootstrapProcessingMode())
    3417            1157 :                 CacheInvalidateHeapTuple(relation, tuple);
    3418            1297 : }
    3419                 : 
    3420                 : 
    3421                 : /*
    3422                 :  * heap_freeze_tuple
    3423                 :  *
    3424                 :  * Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
    3425                 :  * are older than the specified cutoff XID.  If so, replace them with
    3426                 :  * FrozenTransactionId or InvalidTransactionId as appropriate, and return
    3427                 :  * TRUE.  Return FALSE if nothing was changed.
    3428                 :  *
    3429                 :  * It is assumed that the caller has checked the tuple with
    3430                 :  * HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD
    3431                 :  * (else we should be removing the tuple, not freezing it).
    3432                 :  *
    3433                 :  * NB: cutoff_xid *must* be <= the current global xmin, to ensure that any
    3434                 :  * XID older than it could neither be running nor seen as running by any
    3435                 :  * open transaction.  This ensures that the replacement will not change
    3436                 :  * anyone's idea of the tuple state.  Also, since we assume the tuple is
    3437                 :  * not HEAPTUPLE_DEAD, the fact that an XID is not still running allows us
    3438                 :  * to assume that it is either committed good or aborted, as appropriate;
    3439                 :  * so we need no external state checks to decide what to do.  (This is good
    3440                 :  * because this function is applied during WAL recovery, when we don't have
    3441                 :  * access to any such state, and can't depend on the hint bits to be set.)
    3442                 :  *
    3443                 :  * In lazy VACUUM, we call this while initially holding only a shared lock
    3444                 :  * on the tuple's buffer.  If any change is needed, we trade that in for an
    3445                 :  * exclusive lock before making the change.  Caller should pass the buffer ID
    3446                 :  * if shared lock is held, InvalidBuffer if exclusive lock is already held.
    3447                 :  *
    3448                 :  * Note: it might seem we could make the changes without exclusive lock, since
    3449                 :  * TransactionId read/write is assumed atomic anyway.  However there is a race
    3450                 :  * condition: someone who just fetched an old XID that we overwrite here could
    3451                 :  * conceivably not finish checking the XID against pg_clog before we finish
    3452                 :  * the VACUUM and perhaps truncate off the part of pg_clog he needs.  Getting
    3453                 :  * exclusive lock ensures no other backend is in process of checking the
    3454                 :  * tuple status.  Also, getting exclusive lock makes it safe to adjust the
    3455                 :  * infomask bits.
    3456                 :  */
    3457                 : bool
    3458                 : heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
    3459                 :                                   Buffer buf)
    3460          184152 : {
    3461          184152 :         bool            changed = false;
    3462                 :         TransactionId xid;
    3463                 : 
    3464          184152 :         xid = HeapTupleHeaderGetXmin(tuple);
    3465          184152 :         if (TransactionIdIsNormal(xid) &&
    3466                 :                 TransactionIdPrecedes(xid, cutoff_xid))
    3467                 :         {
    3468           10023 :                 if (buf != InvalidBuffer)
    3469                 :                 {
    3470                 :                         /* trade in share lock for exclusive lock */
    3471               0 :                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
    3472               0 :                         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
    3473               0 :                         buf = InvalidBuffer;
    3474                 :                 }
    3475           10023 :                 HeapTupleHeaderSetXmin(tuple, FrozenTransactionId);
    3476                 : 
    3477                 :                 /*
    3478                 :                  * Might as well fix the hint bits too; usually XMIN_COMMITTED will
    3479                 :                  * already be set here, but there's a small chance not.
    3480                 :                  */
    3481                 :                 Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
    3482           10023 :                 tuple->t_infomask |= HEAP_XMIN_COMMITTED;
    3483           10023 :                 changed = true;
    3484                 :         }
    3485                 : 
    3486                 :         /*
    3487                 :          * When we release shared lock, it's possible for someone else to change
    3488                 :          * xmax before we get the lock back, so repeat the check after acquiring
    3489                 :          * exclusive lock.      (We don't need this pushup for xmin, because only
    3490                 :          * VACUUM could be interested in changing an existing tuple's xmin, and
    3491                 :          * there's only one VACUUM allowed on a table at a time.)
    3492                 :          */
    3493          184152 : recheck_xmax:
    3494          184152 :         if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
    3495                 :         {
    3496          184152 :                 xid = HeapTupleHeaderGetXmax(tuple);
    3497          184152 :                 if (TransactionIdIsNormal(xid) &&
    3498                 :                         TransactionIdPrecedes(xid, cutoff_xid))
    3499                 :                 {
    3500               0 :                         if (buf != InvalidBuffer)
    3501                 :                         {
    3502                 :                                 /* trade in share lock for exclusive lock */
    3503               0 :                                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
    3504               0 :                                 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
    3505               0 :                                 buf = InvalidBuffer;
    3506               0 :                                 goto recheck_xmax;              /* see comment above */
    3507                 :                         }
    3508               0 :                         HeapTupleHeaderSetXmax(tuple, InvalidTransactionId);
    3509                 : 
    3510                 :                         /*
    3511                 :                          * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED
    3512                 :                          * + LOCKED.  Normalize to INVALID just to be sure no one gets
    3513                 :                          * confused.
    3514                 :                          */
    3515               0 :                         tuple->t_infomask &= ~HEAP_XMAX_COMMITTED;
    3516               0 :                         tuple->t_infomask |= HEAP_XMAX_INVALID;
    3517               0 :                         HeapTupleHeaderClearHotUpdated(tuple);
    3518               0 :                         changed = true;
    3519                 :                 }
    3520                 :         }
    3521                 :         else
    3522                 :         {
    3523                 :                 /*----------
    3524                 :                  * XXX perhaps someday we should zero out very old MultiXactIds here?
    3525                 :                  *
    3526                 :                  * The only way a stale MultiXactId could pose a problem is if a
    3527                 :                  * tuple, having once been multiply-share-locked, is not touched by
    3528                 :                  * any vacuum or attempted lock or deletion for just over 4G MultiXact
    3529                 :                  * creations, and then in the probably-narrow window where its xmax
    3530                 :                  * is again a live MultiXactId, someone tries to lock or delete it.
    3531                 :                  * Even then, another share-lock attempt would work fine.  An
    3532                 :                  * exclusive-lock or delete attempt would face unexpected delay, or
    3533                 :                  * in the very worst case get a deadlock error.  This seems an
    3534                 :                  * extremely low-probability scenario with minimal downside even if
    3535                 :                  * it does happen, so for now we don't do the extra bookkeeping that
    3536                 :                  * would be needed to clean out MultiXactIds.
    3537                 :                  *----------
    3538                 :                  */
    3539                 :         }
    3540                 : 
    3541                 :         /*
    3542                 :          * Although xvac per se could only be set by VACUUM, it shares physical
    3543                 :          * storage space with cmax, and so could be wiped out by someone setting
    3544                 :          * xmax.  Hence recheck after changing lock, same as for xmax itself.
    3545                 :          */
    3546          184152 : recheck_xvac:
    3547          184152 :         if (tuple->t_infomask & HEAP_MOVED)
    3548                 :         {
    3549               0 :                 xid = HeapTupleHeaderGetXvac(tuple);
    3550               0 :                 if (TransactionIdIsNormal(xid) &&
    3551                 :                         TransactionIdPrecedes(xid, cutoff_xid))
    3552                 :                 {
    3553               0 :                         if (buf != InvalidBuffer)
    3554                 :                         {
    3555                 :                                 /* trade in share lock for exclusive lock */
    3556               0 :                                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
    3557               0 :                                 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
    3558               0 :                                 buf = InvalidBuffer;
    3559               0 :                                 goto recheck_xvac;              /* see comment above */
    3560                 :                         }
    3561                 : 
    3562                 :                         /*
    3563                 :                          * If a MOVED_OFF tuple is not dead, the xvac transaction must
    3564                 :                          * have failed; whereas a non-dead MOVED_IN tuple must mean the
    3565                 :                          * xvac transaction succeeded.
    3566                 :                          */
    3567               0 :                         if (tuple->t_infomask & HEAP_MOVED_OFF)
    3568               0 :                                 HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
    3569                 :                         else
    3570               0 :                                 HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
    3571                 : 
    3572                 :                         /*
    3573                 :                          * Might as well fix the hint bits too; usually XMIN_COMMITTED
    3574                 :                          * will already be set here, but there's a small chance not.
    3575                 :                          */
    3576                 :                         Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
    3577               0 :                         tuple->t_infomask |= HEAP_XMIN_COMMITTED;
    3578               0 :                         changed = true;
    3579                 :                 }
    3580                 :         }
    3581                 : 
    3582          184152 :         return changed;
    3583                 : }
    3584                 : 
    3585                 : 
    3586                 : /* ----------------
    3587                 :  *              heap_markpos    - mark scan position
    3588                 :  * ----------------
    3589                 :  */
    3590                 : void
    3591                 : heap_markpos(HeapScanDesc scan)
    3592               0 : {
    3593                 :         /* Note: no locking manipulations needed */
    3594                 : 
    3595               0 :         if (scan->rs_ctup.t_data != NULL)
    3596                 :         {
    3597               0 :                 scan->rs_mctid = scan->rs_ctup.t_self;
    3598               0 :                 if (scan->rs_pageatatime)
    3599               0 :                         scan->rs_mindex = scan->rs_cindex;
    3600                 :         }
    3601                 :         else
    3602               0 :                 ItemPointerSetInvalid(&scan->rs_mctid);
    3603               0 : }
    3604                 : 
    3605                 : /* ----------------
    3606                 :  *              heap_restrpos   - restore position to marked location
    3607                 :  * ----------------
    3608                 :  */
    3609                 : void
    3610                 : heap_restrpos(HeapScanDesc scan)
    3611               0 : {
    3612                 :         /* XXX no amrestrpos checking that ammarkpos called */
    3613                 : 
    3614               0 :         if (!ItemPointerIsValid(&scan->rs_mctid))
    3615                 :         {
    3616               0 :                 scan->rs_ctup.t_data = NULL;
    3617                 : 
    3618                 :                 /*
    3619                 :                  * unpin scan buffers
    3620                 :                  */
    3621               0 :                 if (BufferIsValid(scan->rs_cbuf))
    3622               0 :                         ReleaseBuffer(scan->rs_cbuf);
    3623               0 :                 scan->rs_cbuf = InvalidBuffer;
    3624               0 :                 scan->rs_cblock = InvalidBlockNumber;
    3625               0 :                 scan->rs_inited = false;
    3626                 :         }
    3627                 :         else
    3628                 :         {
    3629                 :                 /*
    3630                 :                  * If we reached end of scan, rs_inited will now be false.      We must
    3631                 :                  * reset it to true to keep heapgettup from doing the wrong thing.
    3632                 :                  */
    3633               0 :                 scan->rs_inited = true;
    3634               0 :                 scan->rs_ctup.t_self = scan->rs_mctid;
    3635               0 :                 if (scan->rs_pageatatime)
    3636                 :                 {
    3637               0 :                         scan->rs_cindex = scan->rs_mindex;
    3638               0 :                         heapgettup_pagemode(scan,
    3639                 :                                                                 NoMovementScanDirection,
    3640                 :                                                                 0,              /* needn't recheck scan keys */
    3641                 :                                                                 NULL);
    3642                 :                 }
    3643                 :                 else
    3644               0 :                         heapgettup(scan,
    3645                 :                                            NoMovementScanDirection,
    3646                 :                                            0,           /* needn't recheck scan keys */
    3647                 :                                            NULL);
    3648                 :         }
    3649               0 : }
    3650                 : 
    3651                 : /*
    3652                 :  * Perform XLogInsert for a heap-clean operation.  Caller must already
    3653                 :  * have modified the buffer and marked it dirty.
    3654                 :  *
    3655                 :  * Note: prior to Postgres 8.3, the entries in the nowunused[] array were
    3656                 :  * zero-based tuple indexes.  Now they are one-based like other uses
    3657                 :  * of OffsetNumber.
    3658                 :  */
    3659                 : XLogRecPtr
    3660                 : log_heap_clean(Relation reln, Buffer buffer,
    3661                 :                            OffsetNumber *redirected, int nredirected,
    3662                 :                            OffsetNumber *nowdead, int ndead,
    3663                 :                            OffsetNumber *nowunused, int nunused,
    3664                 :                            bool redirect_move)
    3665            1048 : {
    3666                 :         xl_heap_clean xlrec;
    3667                 :         uint8           info;
    3668                 :         XLogRecPtr      recptr;
    3669                 :         XLogRecData rdata[4];
    3670                 : 
    3671                 :         /* Caller should not call me on a temp relation */
    3672                 :         Assert(!reln->rd_istemp);
    3673                 : 
    3674            1048 :         xlrec.node = reln->rd_node;
    3675            1048 :         xlrec.block = BufferGetBlockNumber(buffer);
    3676            1048 :         xlrec.nredirected = nredirected;
    3677            1048 :         xlrec.ndead = ndead;
    3678                 : 
    3679            1048 :         rdata[0].data = (char *) &xlrec;
    3680            1048 :         rdata[0].len = SizeOfHeapClean;
    3681            1048 :         rdata[0].buffer = InvalidBuffer;
    3682            1048 :         rdata[0].next = &(rdata[1]);
    3683                 : 
    3684                 :         /*
    3685                 :          * The OffsetNumber arrays are not actually in the buffer, but we pretend
    3686                 :          * that they are.  When XLogInsert stores the whole buffer, the offset
    3687                 :          * arrays need not be stored too.  Note that even if all three arrays are
    3688                 :          * empty, we want to expose the buffer as a candidate for whole-page
    3689                 :          * storage, since this record type implies a defragmentation operation
    3690                 :          * even if no item pointers changed state.
    3691                 :          */
    3692            1048 :         if (nredirected > 0)
    3693                 :         {
    3694             200 :                 rdata[1].data = (char *) redirected;
    3695             200 :                 rdata[1].len = nredirected * sizeof(OffsetNumber) * 2;
    3696                 :         }
    3697                 :         else
    3698                 :         {
    3699             848 :                 rdata[1].data = NULL;
    3700             848 :                 rdata[1].len = 0;
    3701                 :         }
    3702            1048 :         rdata[1].buffer = buffer;
    3703            1048 :         rdata[1].buffer_std = true;
    3704            1048 :         rdata[1].next = &(rdata[2]);
    3705                 : 
    3706            1048 :         if (ndead > 0)
    3707                 :         {
    3708             753 :                 rdata[2].data = (char *) nowdead;
    3709             753 :                 rdata[2].len = ndead * sizeof(OffsetNumber);
    3710                 :         }
    3711                 :         else
    3712                 :         {
    3713             295 :                 rdata[2].data = NULL;
    3714             295 :                 rdata[2].len = 0;
    3715                 :         }
    3716            1048 :         rdata[2].buffer = buffer;
    3717            1048 :         rdata[2].buffer_std = true;
    3718            1048 :         rdata[2].next = &(rdata[3]);
    3719                 : 
    3720            1048 :         if (nunused > 0)
    3721                 :         {
    3722             383 :                 rdata[3].data = (char *) nowunused;
    3723             383 :                 rdata[3].len = nunused * sizeof(OffsetNumber);
    3724                 :         }
    3725                 :         else
    3726                 :         {
    3727             665 :                 rdata[3].data = NULL;
    3728             665 :                 rdata[3].len = 0;
    3729                 :         }
    3730            1048 :         rdata[3].buffer = buffer;
    3731            1048 :         rdata[3].buffer_std = true;
    3732            1048 :         rdata[3].next = NULL;
    3733                 : 
    3734            1048 :         info = redirect_move ? XLOG_HEAP2_CLEAN_MOVE : XLOG_HEAP2_CLEAN;
    3735            1048 :         recptr = XLogInsert(RM_HEAP2_ID, info, rdata);
    3736                 : 
    3737            1048 :         return recptr;
    3738                 : }
    3739                 : 
    3740                 : /*
    3741                 :  * Perform XLogInsert for a heap-freeze operation.      Caller must already
    3742                 :  * have modified the buffer and marked it dirty.
    3743                 :  */
    3744                 : XLogRecPtr
    3745                 : log_heap_freeze(Relation reln, Buffer buffer,
    3746                 :                                 TransactionId cutoff_xid,
    3747                 :                                 OffsetNumber *offsets, int offcnt)
    3748             142 : {
    3749                 :         xl_heap_freeze xlrec;
    3750                 :         XLogRecPtr      recptr;
    3751                 :         XLogRecData rdata[2];
    3752                 : 
    3753                 :         /* Caller should not call me on a temp relation */
    3754                 :         Assert(!reln->rd_istemp);
    3755                 : 
    3756             142 :         xlrec.node = reln->rd_node;
    3757             142 :         xlrec.block = BufferGetBlockNumber(buffer);
    3758             142 :         xlrec.cutoff_xid = cutoff_xid;
    3759                 : 
    3760             142 :         rdata[0].data = (char *) &xlrec;
    3761             142 :         rdata[0].len = SizeOfHeapFreeze;
    3762             142 :         rdata[0].buffer = InvalidBuffer;
    3763             142 :         rdata[0].next = &(rdata[1]);
    3764                 : 
    3765                 :         /*
    3766                 :          * The tuple-offsets array is not actually in the buffer, but pretend that
    3767                 :          * it is.  When XLogInsert stores the whole buffer, the offsets array need
    3768                 :          * not be stored too.
    3769                 :          */
    3770             142 :         if (offcnt > 0)
    3771                 :         {
    3772             142 :                 rdata[1].data = (char *) offsets;
    3773             142 :                 rdata[1].len = offcnt * sizeof(OffsetNumber);
    3774                 :         }
    3775                 :         else
    3776                 :         {
    3777               0 :                 rdata[1].data = NULL;
    3778               0 :                 rdata[1].len = 0;
    3779                 :         }
    3780             142 :         rdata[1].buffer = buffer;
    3781             142 :         rdata[1].buffer_std = true;
    3782             142 :         rdata[1].next = NULL;
    3783                 : 
    3784             142 :         recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE, rdata);
    3785                 : 
    3786             142 :         return recptr;
    3787                 : }
    3788                 : 
    3789                 : /*
    3790                 :  * Perform XLogInsert for a heap-update operation.      Caller must already
    3791                 :  * have modified the buffer(s) and marked them dirty.
    3792                 :  */
    3793                 : static XLogRecPtr
    3794                 : log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
    3795                 :                                 Buffer newbuf, HeapTuple newtup, bool move)
    3796            4632 : {
    3797                 :         /*
    3798                 :          * Note: xlhdr is declared to have adequate size and correct alignment for
    3799                 :          * an xl_heap_header.  However the two tids, if present at all, will be
    3800                 :          * packed in with no wasted space after the xl_heap_header; they aren't
    3801                 :          * necessarily aligned as implied by this struct declaration.
    3802                 :          */
    3803                 :         struct
    3804                 :         {
    3805                 :                 xl_heap_header hdr;
    3806                 :                 TransactionId tid1;
    3807                 :                 TransactionId tid2;
    3808                 :         }                       xlhdr;
    3809            4632 :         int                     hsize = SizeOfHeapHeader;
    3810                 :         xl_heap_update xlrec;
    3811                 :         uint8           info;
    3812                 :         XLogRecPtr      recptr;
    3813                 :         XLogRecData rdata[4];
    3814            4632 :         Page            page = BufferGetPage(newbuf);
    3815                 : 
    3816                 :         /* Caller should not call me on a temp relation */
    3817                 :         Assert(!reln->rd_istemp);
    3818                 : 
    3819            4632 :         if (move)
    3820                 :         {
    3821                 :                 Assert(!HeapTupleIsHeapOnly(newtup));
    3822             111 :                 info = XLOG_HEAP_MOVE;
    3823                 :         }
    3824            4521 :         else if (HeapTupleIsHeapOnly(newtup))
    3825            2138 :                 info = XLOG_HEAP_HOT_UPDATE;
    3826                 :         else
    3827            2383 :                 info = XLOG_HEAP_UPDATE;
    3828                 : 
    3829            4632 :         xlrec.target.node = reln->rd_node;
    3830            4632 :         xlrec.target.tid = from;
    3831            4632 :         xlrec.newtid = newtup->t_self;
    3832                 : 
    3833            4632 :         rdata[0].data = (char *) &xlrec;
    3834            4632 :         rdata[0].len = SizeOfHeapUpdate;
    3835            4632 :         rdata[0].buffer = InvalidBuffer;
    3836            4632 :         rdata[0].next = &(rdata[1]);
    3837                 : 
    3838            4632 :         rdata[1].data = NULL;
    3839            4632 :         rdata[1].len = 0;
    3840            4632 :         rdata[1].buffer = oldbuf;
    3841            4632 :         rdata[1].buffer_std = true;
    3842            4632 :         rdata[1].next = &(rdata[2]);
    3843                 : 
    3844            4632 :         xlhdr.hdr.t_infomask2 = newtup->t_data->t_infomask2;
    3845            4632 :         xlhdr.hdr.t_infomask = newtup->t_data->t_infomask;
    3846            4632 :         xlhdr.hdr.t_hoff = newtup->t_data->t_hoff;
    3847            4632 :         if (move)                                       /* remember xmax & xmin */
    3848                 :         {
    3849                 :                 TransactionId xid[2];   /* xmax, xmin */
    3850                 : 
    3851             111 :                 if (newtup->t_data->t_infomask & (HEAP_XMAX_INVALID | HEAP_IS_LOCKED))
    3852             111 :                         xid[0] = InvalidTransactionId;
    3853                 :                 else
    3854               0 :                         xid[0] = HeapTupleHeaderGetXmax(newtup->t_data);
    3855             111 :                 xid[1] = HeapTupleHeaderGetXmin(newtup->t_data);
    3856             111 :                 memcpy((char *) &xlhdr + hsize,
    3857                 :                            (char *) xid,
    3858                 :                            2 * sizeof(TransactionId));
    3859             111 :                 hsize += 2 * sizeof(TransactionId);
    3860                 :         }
    3861                 : 
    3862                 :         /*
    3863                 :          * As with insert records, we need not store the rdata[2] segment if we
    3864                 :          * decide to store the whole buffer instead.
    3865                 :          */
    3866            4632 :         rdata[2].data = (char *) &xlhdr;
    3867            4632 :         rdata[2].len = hsize;
    3868            4632 :         rdata[2].buffer = newbuf;
    3869            4632 :         rdata[2].buffer_std = true;
    3870            4632 :         rdata[2].next = &(rdata[3]);
    3871                 : 
    3872                 :         /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
    3873            4632 :         rdata[3].data = (char *) newtup->t_data + offsetof(HeapTupleHeaderData, t_bits);
    3874            4632 :         rdata[3].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits);
    3875            4632 :         rdata[3].buffer = newbuf;
    3876            4632 :         rdata[3].buffer_std = true;
    3877            4632 :         rdata[3].next = NULL;
    3878                 : 
    3879                 :         /* If new tuple is the single and first tuple on page... */
    3880            4632 :         if (ItemPointerGetOffsetNumber(&(newtup->t_self)) == FirstOffsetNumber &&
    3881                 :                 PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
    3882                 :         {
    3883              55 :                 info |= XLOG_HEAP_INIT_PAGE;
    3884              55 :                 rdata[2].buffer = rdata[3].buffer = InvalidBuffer;
    3885                 :         }
    3886                 : 
    3887            4632 :         recptr = XLogInsert(RM_HEAP_ID, info, rdata);
    3888                 : 
    3889            4632 :         return recptr;
    3890                 : }
    3891                 : 
    3892                 : /*
    3893                 :  * Perform XLogInsert for a heap-move operation.  Caller must already
    3894                 :  * have modified the buffers and marked them dirty.
    3895                 :  */
    3896                 : XLogRecPtr
    3897                 : log_heap_move(Relation reln, Buffer oldbuf, ItemPointerData from,
    3898                 :                           Buffer newbuf, HeapTuple newtup)
    3899             111 : {
    3900             111 :         return log_heap_update(reln, oldbuf, from, newbuf, newtup, true);
    3901                 : }
    3902                 : 
    3903                 : /*
    3904                 :  * Perform XLogInsert of a HEAP_NEWPAGE record to WAL. Caller is responsible
    3905                 :  * for writing the page to disk after calling this routine.
    3906                 :  *
    3907                 :  * Note: all current callers build pages in private memory and write them
    3908                 :  * directly to smgr, rather than using bufmgr.  Therefore there is no need
    3909                 :  * to pass a buffer ID to XLogInsert, nor to perform MarkBufferDirty within
    3910                 :  * the critical section.
    3911                 :  *
    3912                 :  * Note: the NEWPAGE log record is used for both heaps and indexes, so do
    3913                 :  * not do anything that assumes we are touching a heap.
    3914                 :  */
    3915                 : XLogRecPtr
    3916                 : log_newpage(RelFileNode *rnode, BlockNumber blkno, Page page)
    3917               0 : {
    3918                 :         xl_heap_newpage xlrec;
    3919                 :         XLogRecPtr      recptr;
    3920                 :         XLogRecData rdata[2];
    3921                 : 
    3922                 :         /* NO ELOG(ERROR) from here till newpage op is logged */
    3923               0 :         START_CRIT_SECTION();
    3924                 : 
    3925               0 :         xlrec.node = *rnode;
    3926               0 :         xlrec.blkno = blkno;
    3927                 : 
    3928               0 :         rdata[0].data = (char *) &xlrec;
    3929               0 :         rdata[0].len = SizeOfHeapNewpage;
    3930               0 :         rdata[0].buffer = InvalidBuffer;
    3931               0 :         rdata[0].next = &(rdata[1]);
    3932                 : 
    3933               0 :         rdata[1].data = (char *) page;
    3934               0 :         rdata[1].len = BLCKSZ;
    3935               0 :         rdata[1].buffer = InvalidBuffer;
    3936               0 :         rdata[1].next = NULL;
    3937                 : 
    3938               0 :         recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
    3939                 : 
    3940               0 :         PageSetLSN(page, recptr);
    3941               0 :         PageSetTLI(page, ThisTimeLineID);
    3942                 : 
    3943               0 :         END_CRIT_SECTION();
    3944                 : 
    3945               0 :         return recptr;
    3946                 : }
    3947                 : 
    3948                 : /*
    3949                 :  * Handles CLEAN and CLEAN_MOVE record types
    3950                 :  */
    3951                 : static void
    3952                 : heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
    3953               0 : {
    3954               0 :         xl_heap_clean *xlrec = (xl_heap_clean *) XLogRecGetData(record);
    3955                 :         Relation        reln;
    3956                 :         Buffer          buffer;
    3957                 :         Page            page;
    3958                 :         OffsetNumber *offnum;
    3959                 :         OffsetNumber *end;
    3960                 :         int                     nredirected;
    3961                 :         int                     ndead;
    3962                 :         int                     i;
    3963                 : 
    3964               0 :         if (record->xl_info & XLR_BKP_BLOCK_1)
    3965               0 :                 return;
    3966                 : 
    3967               0 :         reln = XLogOpenRelation(xlrec->node);
    3968               0 :         buffer = XLogReadBuffer(reln, xlrec->block, false);
    3969               0 :         if (!BufferIsValid(buffer))
    3970                 :                 return;
    3971               0 :         page = (Page) BufferGetPage(buffer);
    3972                 : 
    3973               0 :         if (XLByteLE(lsn, PageGetLSN(page)))
    3974                 :         {
    3975               0 :                 UnlockReleaseBuffer(buffer);
    3976               0 :                 return;
    3977                 :         }
    3978                 : 
    3979               0 :         nredirected = xlrec->nredirected;
    3980               0 :         ndead = xlrec->ndead;
    3981               0 :         offnum = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
    3982               0 :         end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
    3983                 : 
    3984                 :         /* Update all redirected or moved line pointers */
    3985               0 :         for (i = 0; i < nredirected; i++)
    3986                 :         {
    3987               0 :                 OffsetNumber fromoff = *offnum++;
    3988               0 :                 OffsetNumber tooff = *offnum++;
    3989               0 :                 ItemId          fromlp = PageGetItemId(page, fromoff);
    3990                 : 
    3991               0 :                 if (clean_move)
    3992                 :                 {
    3993                 :                         /* Physically move the "to" item to the "from" slot */
    3994               0 :                         ItemId          tolp = PageGetItemId(page, tooff);
    3995                 :                         HeapTupleHeader htup;
    3996                 : 
    3997               0 :                         *fromlp = *tolp;
    3998               0 :                         ItemIdSetUnused(tolp);
    3999                 : 
    4000                 :                         /* We also have to clear the tuple's heap-only bit */
    4001                 :                         Assert(ItemIdIsNormal(fromlp));
    4002               0 :                         htup = (HeapTupleHeader) PageGetItem(page, fromlp);
    4003                 :                         Assert(HeapTupleHeaderIsHeapOnly(htup));
    4004               0 :                         HeapTupleHeaderClearHeapOnly(htup);
    4005                 :                 }
    4006                 :                 else
    4007                 :                 {
    4008                 :                         /* Just insert a REDIRECT link at fromoff */
    4009               0 :                         ItemIdSetRedirect(fromlp, tooff);
    4010                 :                 }
    4011                 :         }
    4012                 : 
    4013                 :         /* Update all now-dead line pointers */
    4014               0 :         for (i = 0; i < ndead; i++)
    4015                 :         {
    4016               0 :                 OffsetNumber off = *offnum++;
    4017               0 :                 ItemId          lp = PageGetItemId(page, off);
    4018                 : 
    4019               0 :                 ItemIdSetDead(lp);
    4020                 :         }
    4021                 : 
    4022                 :         /* Update all now-unused line pointers */
    4023               0 :         while (offnum < end)
    4024                 :         {
    4025               0 :                 OffsetNumber off = *offnum++;
    4026               0 :                 ItemId          lp = PageGetItemId(page, off);
    4027                 : 
    4028               0 :                 ItemIdSetUnused(lp);
    4029                 :         }
    4030                 : 
    4031                 :         /*
    4032                 :          * Finally, repair any fragmentation, and update the page's hint bit about
    4033                 :          * whether it has free pointers.
    4034                 :          */
    4035               0 :         PageRepairFragmentation(page);
    4036                 : 
    4037               0 :         PageSetLSN(page, lsn);
    4038               0 :         PageSetTLI(page, ThisTimeLineID);
    4039               0 :         MarkBufferDirty(buffer);
    4040               0 :         UnlockReleaseBuffer(buffer);
    4041                 : }
    4042                 : 
    4043                 : static void
    4044                 : heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record)
    4045               0 : {
    4046               0 :         xl_heap_freeze *xlrec = (xl_heap_freeze *) XLogRecGetData(record);
    4047               0 :         TransactionId cutoff_xid = xlrec->cutoff_xid;
    4048                 :         Relation        reln;
    4049                 :         Buffer          buffer;
    4050                 :         Page            page;
    4051                 : 
    4052               0 :         if (record->xl_info & XLR_BKP_BLOCK_1)
    4053               0 :                 return;
    4054                 : 
    4055               0 :         reln = XLogOpenRelation(xlrec->node);
    4056               0 :         buffer = XLogReadBuffer(reln, xlrec->block, false);
    4057               0 :         if (!BufferIsValid(buffer))
    4058                 :                 return;
    4059               0 :         page = (Page) BufferGetPage(buffer);
    4060                 : 
    4061               0 :         if (XLByteLE(lsn, PageGetLSN(page)))
    4062                 :         {
    4063               0 :                 UnlockReleaseBuffer(buffer);
    4064               0 :                 return;
    4065                 :         }
    4066                 : 
    4067               0 :         if (record->xl_len > SizeOfHeapFreeze)
    4068                 :         {
    4069                 :                 OffsetNumber *offsets;
    4070                 :                 OffsetNumber *offsets_end;
    4071                 : 
    4072               0 :                 offsets = (OffsetNumber *) ((char *) xlrec + SizeOfHeapFreeze);
    4073               0 :                 offsets_end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
    4074                 : 
    4075               0 :                 while (offsets < offsets_end)
    4076                 :                 {
    4077                 :                         /* offsets[] entries are one-based */
    4078               0 :                         ItemId          lp = PageGetItemId(page, *offsets);
    4079               0 :                         HeapTupleHeader tuple = (HeapTupleHeader) PageGetItem(page, lp);
    4080                 : 
    4081               0 :                         (void) heap_freeze_tuple(tuple, cutoff_xid, InvalidBuffer);
    4082               0 :                         offsets++;
    4083                 :                 }
    4084                 :         }
    4085                 : 
    4086               0 :         PageSetLSN(page, lsn);
    4087               0 :         PageSetTLI(page, ThisTimeLineID);
    4088               0 :         MarkBufferDirty(buffer);
    4089               0 :         UnlockReleaseBuffer(buffer);
    4090                 : }
    4091                 : 
    4092                 : static void
    4093                 : heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
    4094               0 : {
    4095               0 :         xl_heap_newpage *xlrec = (xl_heap_newpage *) XLogRecGetData(record);
    4096                 :         Relation        reln;
    4097                 :         Buffer          buffer;
    4098                 :         Page            page;
    4099                 : 
    4100                 :         /*
    4101                 :          * Note: the NEWPAGE log record is used for both heaps and indexes, so do
    4102                 :          * not do anything that assumes we are touching a heap.
    4103                 :          */
    4104               0 :         reln = XLogOpenRelation(xlrec->node);
    4105               0 :         buffer = XLogReadBuffer(reln, xlrec->blkno, true);
    4106                 :         Assert(BufferIsValid(buffer));
    4107               0 :         page = (Page) BufferGetPage(buffer);
    4108                 : 
    4109                 :         Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ);
    4110               0 :         memcpy(page, (char *) xlrec + SizeOfHeapNewpage, BLCKSZ);
    4111                 : 
    4112               0 :         PageSetLSN(page, lsn);
    4113               0 :         PageSetTLI(page, ThisTimeLineID);
    4114               0 :         MarkBufferDirty(buffer);
    4115               0 :         UnlockReleaseBuffer(buffer);
    4116               0 : }
    4117                 : 
    4118                 : static void
    4119                 : heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
    4120               0 : {
    4121               0 :         xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
    4122                 :         Relation        reln;
    4123                 :         Buffer          buffer;
    4124                 :         Page            page;
    4125                 :         OffsetNumber offnum;
    4126               0 :         ItemId          lp = NULL;
    4127                 :         HeapTupleHeader htup;
    4128                 : 
    4129               0 :         if (record->xl_info & XLR_BKP_BLOCK_1)
    4130               0 :                 return;
    4131                 : 
    4132               0 :         reln = XLogOpenRelation(xlrec->target.node);
    4133               0 :         buffer = XLogReadBuffer(reln,
    4134                 :                                                         ItemPointerGetBlockNumber(&(xlrec->target.tid)),
    4135                 :                                                         false);
    4136               0 :         if (!BufferIsValid(buffer))
    4137                 :                 return;
    4138               0 :         page = (Page) BufferGetPage(buffer);
    4139                 : 
    4140               0 :         if (XLByteLE(lsn, PageGetLSN(page)))            /* changes are applied */
    4141                 :         {
    4142               0 :                 UnlockReleaseBuffer(buffer);
    4143               0 :                 return;
    4144                 :         }
    4145                 : 
    4146               0 :         offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
    4147               0 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    4148               0 :                 lp = PageGetItemId(page, offnum);
    4149                 : 
    4150               0 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    4151               0 :                 elog(PANIC, "heap_delete_redo: invalid lp");
    4152                 : 
    4153               0 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    4154                 : 
    4155               0 :         htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
    4156                 :                                                   HEAP_XMAX_INVALID |
    4157                 :                                                   HEAP_XMAX_IS_MULTI |
    4158                 :                                                   HEAP_IS_LOCKED |
    4159                 :                                                   HEAP_MOVED);
    4160               0 :         HeapTupleHeaderClearHotUpdated(htup);
    4161               0 :         HeapTupleHeaderSetXmax(htup, record->xl_xid);
    4162               0 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
    4163                 : 
    4164                 :         /* Mark the page as a candidate for pruning */
    4165               0 :         PageSetPrunable(page, record->xl_xid);
    4166                 : 
    4167                 :         /* Make sure there is no forward chain link in t_ctid */
    4168               0 :         htup->t_ctid = xlrec->target.tid;
    4169               0 :         PageSetLSN(page, lsn);
    4170               0 :         PageSetTLI(page, ThisTimeLineID);
    4171               0 :         MarkBufferDirty(buffer);
    4172               0 :         UnlockReleaseBuffer(buffer);
    4173                 : }
    4174                 : 
    4175                 : static void
    4176                 : heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
    4177               0 : {
    4178               0 :         xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
    4179                 :         Relation        reln;
    4180                 :         Buffer          buffer;
    4181                 :         Page            page;
    4182                 :         OffsetNumber offnum;
    4183                 :         struct
    4184                 :         {
    4185                 :                 HeapTupleHeaderData hdr;
    4186                 :                 char            data[MaxHeapTupleSize];
    4187                 :         }                       tbuf;
    4188                 :         HeapTupleHeader htup;
    4189                 :         xl_heap_header xlhdr;
    4190                 :         uint32          newlen;
    4191                 : 
    4192               0 :         if (record->xl_info & XLR_BKP_BLOCK_1)
    4193               0 :                 return;
    4194                 : 
    4195               0 :         reln = XLogOpenRelation(xlrec->target.node);
    4196                 : 
    4197               0 :         if (record->xl_info & XLOG_HEAP_INIT_PAGE)
    4198                 :         {
    4199               0 :                 buffer = XLogReadBuffer(reln,
    4200                 :                                                          ItemPointerGetBlockNumber(&(xlrec->target.tid)),
    4201                 :                                                                 true);
    4202                 :                 Assert(BufferIsValid(buffer));
    4203               0 :                 page = (Page) BufferGetPage(buffer);
    4204                 : 
    4205               0 :                 PageInit(page, BufferGetPageSize(buffer), 0);
    4206                 :         }
    4207                 :         else
    4208                 :         {
    4209               0 :                 buffer = XLogReadBuffer(reln,
    4210                 :                                                          ItemPointerGetBlockNumber(&(xlrec->target.tid)),
    4211                 :                                                                 false);
    4212               0 :                 if (!BufferIsValid(buffer))
    4213                 :                         return;
    4214               0 :                 page = (Page) BufferGetPage(buffer);
    4215                 : 
    4216               0 :                 if (XLByteLE(lsn, PageGetLSN(page)))    /* changes are applied */
    4217                 :                 {
    4218               0 :                         UnlockReleaseBuffer(buffer);
    4219               0 :                         return;
    4220                 :                 }
    4221                 :         }
    4222                 : 
    4223               0 :         offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
    4224               0 :         if (PageGetMaxOffsetNumber(page) + 1 < offnum)
    4225               0 :                 elog(PANIC, "heap_insert_redo: invalid max offset number");
    4226                 : 
    4227               0 :         newlen = record->xl_len - SizeOfHeapInsert - SizeOfHeapHeader;
    4228                 :         Assert(newlen <= MaxHeapTupleSize);
    4229               0 :         memcpy((char *) &xlhdr,
    4230                 :                    (char *) xlrec + SizeOfHeapInsert,
    4231                 :                    SizeOfHeapHeader);
    4232               0 :         htup = &tbuf.hdr;
    4233               0 :         MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
    4234                 :         /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
    4235               0 :         memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
    4236                 :                    (char *) xlrec + SizeOfHeapInsert + SizeOfHeapHeader,
    4237                 :                    newlen);
    4238               0 :         newlen += offsetof(HeapTupleHeaderData, t_bits);
    4239               0 :         htup->t_infomask2 = xlhdr.t_infomask2;
    4240               0 :         htup->t_infomask = xlhdr.t_infomask;
    4241               0 :         htup->t_hoff = xlhdr.t_hoff;
    4242               0 :         HeapTupleHeaderSetXmin(htup, record->xl_xid);
    4243               0 :         HeapTupleHeaderSetCmin(htup, FirstCommandId);
    4244               0 :         htup->t_ctid = xlrec->target.tid;
    4245                 : 
    4246               0 :         offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
    4247               0 :         if (offnum == InvalidOffsetNumber)
    4248               0 :                 elog(PANIC, "heap_insert_redo: failed to add tuple");
    4249               0 :         PageSetLSN(page, lsn);
    4250               0 :         PageSetTLI(page, ThisTimeLineID);
    4251               0 :         MarkBufferDirty(buffer);
    4252               0 :         UnlockReleaseBuffer(buffer);
    4253                 : }
    4254                 : 
    4255                 : /*
    4256                 :  * Handles UPDATE, HOT_UPDATE & MOVE
    4257                 :  */
    4258                 : static void
    4259                 : heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move, bool hot_update)
    4260               0 : {
    4261               0 :         xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
    4262               0 :         Relation        reln = XLogOpenRelation(xlrec->target.node);
    4263                 :         Buffer          buffer;
    4264                 :         bool            samepage = (ItemPointerGetBlockNumber(&(xlrec->newtid)) ==
    4265               0 :                                                         ItemPointerGetBlockNumber(&(xlrec->target.tid)));
    4266                 :         Page            page;
    4267                 :         OffsetNumber offnum;
    4268               0 :         ItemId          lp = NULL;
    4269                 :         HeapTupleHeader htup;
    4270                 :         struct
    4271                 :         {
    4272                 :                 HeapTupleHeaderData hdr;
    4273                 :                 char            data[MaxHeapTupleSize];
    4274                 :         }                       tbuf;
    4275                 :         xl_heap_header xlhdr;
    4276                 :         int                     hsize;
    4277                 :         uint32          newlen;
    4278                 : 
    4279               0 :         if (record->xl_info & XLR_BKP_BLOCK_1)
    4280                 :         {
    4281               0 :                 if (samepage)
    4282               0 :                         return;                         /* backup block covered both changes */
    4283                 :                 goto newt;
    4284                 :         }
    4285                 : 
    4286                 :         /* Deal with old tuple version */
    4287                 : 
    4288               0 :         buffer = XLogReadBuffer(reln,
    4289                 :                                                         ItemPointerGetBlockNumber(&(xlrec->target.tid)),
    4290                 :                                                         false);
    4291               0 :         if (!BufferIsValid(buffer))
    4292                 :                 goto newt;
    4293               0 :         page = (Page) BufferGetPage(buffer);
    4294                 : 
    4295               0 :         if (XLByteLE(lsn, PageGetLSN(page)))            /* changes are applied */
    4296                 :         {
    4297               0 :                 UnlockReleaseBuffer(buffer);
    4298               0 :                 if (samepage)
    4299               0 :                         return;
    4300                 :                 goto newt;
    4301                 :         }
    4302                 : 
    4303               0 :         offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
    4304               0 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    4305               0 :                 lp = PageGetItemId(page, offnum);
    4306                 : 
    4307               0 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    4308               0 :                 elog(PANIC, "heap_update_redo: invalid lp");
    4309                 : 
    4310               0 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    4311                 : 
    4312               0 :         if (move)
    4313                 :         {
    4314               0 :                 htup->t_infomask &= ~(HEAP_XMIN_COMMITTED |
    4315                 :                                                           HEAP_XMIN_INVALID |
    4316                 :                                                           HEAP_MOVED_IN);
    4317               0 :                 htup->t_infomask |= HEAP_MOVED_OFF;
    4318               0 :                 HeapTupleHeaderClearHotUpdated(htup);
    4319               0 :                 HeapTupleHeaderSetXvac(htup, record->xl_xid);
    4320                 :                 /* Make sure there is no forward chain link in t_ctid */
    4321               0 :                 htup->t_ctid = xlrec->target.tid;
    4322                 :         }
    4323                 :         else
    4324                 :         {
    4325               0 :                 htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
    4326                 :                                                           HEAP_XMAX_INVALID |
    4327                 :                                                           HEAP_XMAX_IS_MULTI |
    4328                 :                                                           HEAP_IS_LOCKED |
    4329                 :                                                           HEAP_MOVED);
    4330               0 :                 if (hot_update)
    4331               0 :                         HeapTupleHeaderSetHotUpdated(htup);
    4332                 :                 else
    4333               0 :                         HeapTupleHeaderClearHotUpdated(htup);
    4334               0 :                 HeapTupleHeaderSetXmax(htup, record->xl_xid);
    4335               0 :                 HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
    4336                 :                 /* Set forward chain link in t_ctid */
    4337               0 :                 htup->t_ctid = xlrec->newtid;
    4338                 :         }
    4339                 : 
    4340                 :         /* Mark the page as a candidate for pruning */
    4341               0 :         PageSetPrunable(page, record->xl_xid);
    4342                 : 
    4343                 :         /*
    4344                 :          * this test is ugly, but necessary to avoid thinking that insert change
    4345                 :          * is already applied
    4346                 :          */
    4347               0 :         if (samepage)
    4348               0 :                 goto newsame;
    4349               0 :         PageSetLSN(page, lsn);
    4350               0 :         PageSetTLI(page, ThisTimeLineID);
    4351               0 :         MarkBufferDirty(buffer);
    4352               0 :         UnlockReleaseBuffer(buffer);
    4353                 : 
    4354                 :         /* Deal with new tuple */
    4355                 : 
    4356               0 : newt:;
    4357                 : 
    4358               0 :         if (record->xl_info & XLR_BKP_BLOCK_2)
    4359               0 :                 return;
    4360                 : 
    4361               0 :         if (record->xl_info & XLOG_HEAP_INIT_PAGE)
    4362                 :         {
    4363               0 :                 buffer = XLogReadBuffer(reln,
    4364                 :                                                                 ItemPointerGetBlockNumber(&(xlrec->newtid)),
    4365                 :                                                                 true);
    4366                 :                 Assert(BufferIsValid(buffer));
    4367               0 :                 page = (Page) BufferGetPage(buffer);
    4368                 : 
    4369               0 :                 PageInit(page, BufferGetPageSize(buffer), 0);
    4370                 :         }
    4371                 :         else
    4372                 :         {
    4373               0 :                 buffer = XLogReadBuffer(reln,
    4374                 :                                                                 ItemPointerGetBlockNumber(&(xlrec->newtid)),
    4375                 :                                                                 false);
    4376               0 :                 if (!BufferIsValid(buffer))
    4377                 :                         return;
    4378               0 :                 page = (Page) BufferGetPage(buffer);
    4379                 : 
    4380               0 :                 if (XLByteLE(lsn, PageGetLSN(page)))    /* changes are applied */
    4381                 :                 {
    4382               0 :                         UnlockReleaseBuffer(buffer);
    4383               0 :                         return;
    4384                 :                 }
    4385                 :         }
    4386                 : 
    4387               0 : newsame:;
    4388                 : 
    4389               0 :         offnum = ItemPointerGetOffsetNumber(&(xlrec->newtid));
    4390               0 :         if (PageGetMaxOffsetNumber(page) + 1 < offnum)
    4391               0 :                 elog(PANIC, "heap_update_redo: invalid max offset number");
    4392                 : 
    4393               0 :         hsize = SizeOfHeapUpdate + SizeOfHeapHeader;
    4394               0 :         if (move)
    4395               0 :                 hsize += (2 * sizeof(TransactionId));
    4396                 : 
    4397               0 :         newlen = record->xl_len - hsize;
    4398                 :         Assert(newlen <= MaxHeapTupleSize);
    4399               0 :         memcpy((char *) &xlhdr,
    4400                 :                    (char *) xlrec + SizeOfHeapUpdate,
    4401                 :                    SizeOfHeapHeader);
    4402               0 :         htup = &tbuf.hdr;
    4403               0 :         MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
    4404                 :         /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
    4405               0 :         memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
    4406                 :                    (char *) xlrec + hsize,
    4407                 :                    newlen);
    4408               0 :         newlen += offsetof(HeapTupleHeaderData, t_bits);
    4409               0 :         htup->t_infomask2 = xlhdr.t_infomask2;
    4410               0 :         htup->t_infomask = xlhdr.t_infomask;
    4411               0 :         htup->t_hoff = xlhdr.t_hoff;
    4412                 : 
    4413               0 :         if (move)
    4414                 :         {
    4415                 :                 TransactionId xid[2];   /* xmax, xmin */
    4416                 : 
    4417               0 :                 memcpy((char *) xid,
    4418                 :                            (char *) xlrec + SizeOfHeapUpdate + SizeOfHeapHeader,
    4419                 :                            2 * sizeof(TransactionId));
    4420               0 :                 HeapTupleHeaderSetXmin(htup, xid[1]);
    4421               0 :                 HeapTupleHeaderSetXmax(htup, xid[0]);
    4422               0 :                 HeapTupleHeaderSetXvac(htup, record->xl_xid);
    4423                 :         }
    4424                 :         else
    4425                 :         {
    4426               0 :                 HeapTupleHeaderSetXmin(htup, record->xl_xid);
    4427               0 :                 HeapTupleHeaderSetCmin(htup, FirstCommandId);
    4428                 :         }
    4429                 :         /* Make sure there is no forward chain link in t_ctid */
    4430               0 :         htup->t_ctid = xlrec->newtid;
    4431                 : 
    4432               0 :         offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
    4433               0 :         if (offnum == InvalidOffsetNumber)
    4434               0 :                 elog(PANIC, "heap_update_redo: failed to add tuple");
    4435               0 :         PageSetLSN(page, lsn);
    4436               0 :         PageSetTLI(page, ThisTimeLineID);
    4437               0 :         MarkBufferDirty(buffer);
    4438               0 :         UnlockReleaseBuffer(buffer);
    4439                 : }
    4440                 : 
    4441                 : static void
    4442                 : heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
    4443               0 : {
    4444               0 :         xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
    4445                 :         Relation        reln;
    4446                 :         Buffer          buffer;
    4447                 :         Page            page;
    4448                 :         OffsetNumber offnum;
    4449               0 :         ItemId          lp = NULL;
    4450                 :         HeapTupleHeader htup;
    4451                 : 
    4452               0 :         if (record->xl_info & XLR_BKP_BLOCK_1)
    4453               0 :                 return;
    4454                 : 
    4455               0 :         reln = XLogOpenRelation(xlrec->target.node);
    4456               0 :         buffer = XLogReadBuffer(reln,
    4457                 :                                                         ItemPointerGetBlockNumber(&(xlrec->target.tid)),
    4458                 :                                                         false);
    4459               0 :         if (!BufferIsValid(buffer))
    4460                 :                 return;
    4461               0 :         page = (Page) BufferGetPage(buffer);
    4462                 : 
    4463               0 :         if (XLByteLE(lsn, PageGetLSN(page)))            /* changes are applied */
    4464                 :         {
    4465               0 :                 UnlockReleaseBuffer(buffer);
    4466               0 :                 return;
    4467                 :         }
    4468                 : 
    4469               0 :         offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
    4470               0 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    4471               0 :                 lp = PageGetItemId(page, offnum);
    4472                 : 
    4473               0 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    4474               0 :                 elog(PANIC, "heap_lock_redo: invalid lp");
    4475                 : 
    4476               0 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    4477                 : 
    4478               0 :         htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
    4479                 :                                                   HEAP_XMAX_INVALID |
    4480                 :                                                   HEAP_XMAX_IS_MULTI |
    4481                 :                                                   HEAP_IS_LOCKED |
    4482                 :                                                   HEAP_MOVED);
    4483               0 :         if (xlrec->xid_is_mxact)
    4484               0 :                 htup->t_infomask |= HEAP_XMAX_IS_MULTI;
    4485               0 :         if (xlrec->shared_lock)
    4486               0 :                 htup->t_infomask |= HEAP_XMAX_SHARED_LOCK;
    4487                 :         else
    4488               0 :                 htup->t_infomask |= HEAP_XMAX_EXCL_LOCK;
    4489               0 :         HeapTupleHeaderClearHotUpdated(htup);
    4490               0 :         HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
    4491               0 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
    4492                 :         /* Make sure there is no forward chain link in t_ctid */
    4493               0 :         htup->t_ctid = xlrec->target.tid;
    4494               0 :         PageSetLSN(page, lsn);
    4495               0 :         PageSetTLI(page, ThisTimeLineID);
    4496               0 :         MarkBufferDirty(buffer);
    4497               0 :         UnlockReleaseBuffer(buffer);
    4498                 : }
    4499                 : 
    4500                 : static void
    4501                 : heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
    4502               0 : {
    4503               0 :         xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
    4504               0 :         Relation        reln = XLogOpenRelation(xlrec->target.node);
    4505                 :         Buffer          buffer;
    4506                 :         Page            page;
    4507                 :         OffsetNumber offnum;
    4508               0 :         ItemId          lp = NULL;
    4509                 :         HeapTupleHeader htup;
    4510                 :         uint32          oldlen;
    4511                 :         uint32          newlen;
    4512                 : 
    4513               0 :         if (record->xl_info & XLR_BKP_BLOCK_1)
    4514               0 :                 return;
    4515                 : 
    4516               0 :         buffer = XLogReadBuffer(reln,
    4517                 :                                                         ItemPointerGetBlockNumber(&(xlrec->target.tid)),
    4518                 :                                                         false);
    4519               0 :         if (!BufferIsValid(buffer))
    4520                 :                 return;
    4521               0 :         page = (Page) BufferGetPage(buffer);
    4522                 : 
    4523               0 :         if (XLByteLE(lsn, PageGetLSN(page)))            /* changes are applied */
    4524                 :         {
    4525               0 :                 UnlockReleaseBuffer(buffer);
    4526               0 :                 return;
    4527                 :         }
    4528                 : 
    4529               0 :         offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
    4530               0 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    4531               0 :                 lp = PageGetItemId(page, offnum);
    4532                 : 
    4533               0 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    4534               0 :                 elog(PANIC, "heap_inplace_redo: invalid lp");
    4535                 : 
    4536               0 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    4537                 : 
    4538               0 :         oldlen = ItemIdGetLength(lp) - htup->t_hoff;
    4539               0 :         newlen = record->xl_len - SizeOfHeapInplace;
    4540               0 :         if (oldlen != newlen)
    4541               0 :                 elog(PANIC, "heap_inplace_redo: wrong tuple length");
    4542                 : 
    4543               0 :         memcpy((char *) htup + htup->t_hoff,
    4544                 :                    (char *) xlrec + SizeOfHeapInplace,
    4545                 :                    newlen);
    4546                 : 
    4547               0 :         PageSetLSN(page, lsn);
    4548               0 :         PageSetTLI(page, ThisTimeLineID);
    4549               0 :         MarkBufferDirty(buffer);
    4550               0 :         UnlockReleaseBuffer(buffer);
    4551                 : }
    4552                 : 
    4553                 : void
    4554                 : heap_redo(XLogRecPtr lsn, XLogRecord *record)
    4555               0 : {
    4556               0 :         uint8           info = record->xl_info & ~XLR_INFO_MASK;
    4557                 : 
    4558               0 :         switch (info & XLOG_HEAP_OPMASK)
    4559                 :         {
    4560                 :                 case XLOG_HEAP_INSERT:
    4561               0 :                         heap_xlog_insert(lsn, record);
    4562               0 :                         break;
    4563                 :                 case XLOG_HEAP_DELETE:
    4564               0 :                         heap_xlog_delete(lsn, record);
    4565               0 :                         break;
    4566                 :                 case XLOG_HEAP_UPDATE:
    4567               0 :                         heap_xlog_update(lsn, record, false, false);
    4568               0 :                         break;
    4569                 :                 case XLOG_HEAP_MOVE:
    4570               0 :                         heap_xlog_update(lsn, record, true, false);
    4571               0 :                         break;
    4572                 :                 case XLOG_HEAP_HOT_UPDATE:
    4573               0 :                         heap_xlog_update(lsn, record, false, true);
    4574               0 :                         break;
    4575                 :                 case XLOG_HEAP_NEWPAGE:
    4576               0 :                         heap_xlog_newpage(lsn, record);
    4577               0 :                         break;
    4578                 :                 case XLOG_HEAP_LOCK:
    4579               0 :                         heap_xlog_lock(lsn, record);
    4580               0 :                         break;
    4581                 :                 case XLOG_HEAP_INPLACE:
    4582               0 :                         heap_xlog_inplace(lsn, record);
    4583               0 :                         break;
    4584                 :                 default:
    4585               0 :                         elog(PANIC, "heap_redo: unknown op code %u", info);
    4586                 :         }
    4587               0 : }
    4588                 : 
    4589                 : void
    4590                 : heap2_redo(XLogRecPtr lsn, XLogRecord *record)
    4591               0 : {
    4592               0 :         uint8           info = record->xl_info & ~XLR_INFO_MASK;
    4593                 : 
    4594               0 :         switch (info & XLOG_HEAP_OPMASK)
    4595                 :         {
    4596                 :                 case XLOG_HEAP2_FREEZE:
    4597               0 :                         heap_xlog_freeze(lsn, record);
    4598               0 :                         break;
    4599                 :                 case XLOG_HEAP2_CLEAN:
    4600               0 :                         heap_xlog_clean(lsn, record, false);
    4601               0 :                         break;
    4602                 :                 case XLOG_HEAP2_CLEAN_MOVE:
    4603               0 :                         heap_xlog_clean(lsn, record, true);
    4604               0 :                         break;
    4605                 :                 default:
    4606               0 :                         elog(PANIC, "heap2_redo: unknown op code %u", info);
    4607                 :         }
    4608               0 : }
    4609                 : 
    4610                 : static void
    4611                 : out_target(StringInfo buf, xl_heaptid *target)
    4612               0 : {
    4613               0 :         appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
    4614                 :                          target->node.spcNode, target->node.dbNode, target->node.relNode,
    4615                 :                                          ItemPointerGetBlockNumber(&(target->tid)),
    4616                 :                                          ItemPointerGetOffsetNumber(&(target->tid)));
    4617               0 : }
    4618                 : 
    4619                 : void
    4620                 : heap_desc(StringInfo buf, uint8 xl_info, char *rec)
    4621               0 : {
    4622               0 :         uint8           info = xl_info & ~XLR_INFO_MASK;
    4623                 : 
    4624               0 :         info &= XLOG_HEAP_OPMASK;
    4625               0 :         if (info == XLOG_HEAP_INSERT)
    4626                 :         {
    4627               0 :                 xl_heap_insert *xlrec = (xl_heap_insert *) rec;
    4628                 : 
    4629               0 :                 if (xl_info & XLOG_HEAP_INIT_PAGE)
    4630               0 :                         appendStringInfo(buf, "insert(init): ");
    4631                 :                 else
    4632               0 :                         appendStringInfo(buf, "insert: ");
    4633               0 :                 out_target(buf, &(xlrec->target));
    4634                 :         }
    4635               0 :         else if (info == XLOG_HEAP_DELETE)
    4636                 :         {
    4637               0 :                 xl_heap_delete *xlrec = (xl_heap_delete *) rec;
    4638                 : 
    4639               0 :                 appendStringInfo(buf, "delete: ");
    4640               0 :                 out_target(buf, &(xlrec->target));
    4641                 :         }
    4642               0 :         else if (info == XLOG_HEAP_UPDATE)
    4643                 :         {
    4644               0 :                 xl_heap_update *xlrec = (xl_heap_update *) rec;
    4645                 : 
    4646               0 :                 if (xl_info & XLOG_HEAP_INIT_PAGE)
    4647               0 :                         appendStringInfo(buf, "update(init): ");
    4648                 :                 else
    4649               0 :                         appendStringInfo(buf, "update: ");
    4650               0 :                 out_target(buf, &(xlrec->target));
    4651               0 :                 appendStringInfo(buf, "; new %u/%u",
    4652                 :                                                  ItemPointerGetBlockNumber(&(xlrec->newtid)),
    4653                 :                                                  ItemPointerGetOffsetNumber(&(xlrec->newtid)));
    4654                 :         }
    4655               0 :         else if (info == XLOG_HEAP_MOVE)
    4656                 :         {
    4657               0 :                 xl_heap_update *xlrec = (xl_heap_update *) rec;
    4658                 : 
    4659               0 :                 if (xl_info & XLOG_HEAP_INIT_PAGE)
    4660               0 :                         appendStringInfo(buf, "move(init): ");
    4661                 :                 else
    4662               0 :                         appendStringInfo(buf, "move: ");
    4663               0 :                 out_target(buf, &(xlrec->target));
    4664               0 :                 appendStringInfo(buf, "; new %u/%u",
    4665                 :                                                  ItemPointerGetBlockNumber(&(xlrec->newtid)),
    4666                 :                                                  ItemPointerGetOffsetNumber(&(xlrec->newtid)));
    4667                 :         }
    4668               0 :         else if (info == XLOG_HEAP_HOT_UPDATE)
    4669                 :         {
    4670               0 :                 xl_heap_update *xlrec = (xl_heap_update *) rec;
    4671                 : 
    4672               0 :                 if (xl_info & XLOG_HEAP_INIT_PAGE)          /* can this case happen? */
    4673               0 :                         appendStringInfo(buf, "hot_update(init): ");
    4674                 :                 else
    4675               0 :                         appendStringInfo(buf, "hot_update: ");
    4676               0 :                 out_target(buf, &(xlrec->target));
    4677               0 :                 appendStringInfo(buf, "; new %u/%u",
    4678                 :                                                  ItemPointerGetBlockNumber(&(xlrec->newtid)),
    4679                 :                                                  ItemPointerGetOffsetNumber(&(xlrec->newtid)));
    4680                 :         }
    4681               0 :         else if (info == XLOG_HEAP_NEWPAGE)
    4682                 :         {
    4683               0 :                 xl_heap_newpage *xlrec = (xl_heap_newpage *) rec;
    4684                 : 
    4685               0 :                 appendStringInfo(buf, "newpage: rel %u/%u/%u; blk %u",
    4686                 :                                                  xlrec->node.spcNode, xlrec->node.dbNode,
    4687                 :                                                  xlrec->node.relNode, xlrec->blkno);
    4688                 :         }
    4689               0 :         else if (info == XLOG_HEAP_LOCK)
    4690                 :         {
    4691               0 :                 xl_heap_lock *xlrec = (xl_heap_lock *) rec;
    4692                 : 
    4693               0 :                 if (xlrec->shared_lock)
    4694               0 :                         appendStringInfo(buf, "shared_lock: ");
    4695                 :                 else
    4696               0 :                         appendStringInfo(buf, "exclusive_lock: ");
    4697               0 :                 if (xlrec->xid_is_mxact)
    4698               0 :                         appendStringInfo(buf, "mxid ");
    4699                 :                 else
    4700               0 :                         appendStringInfo(buf, "xid ");
    4701               0 :                 appendStringInfo(buf, "%u ", xlrec->locking_xid);
    4702               0 :                 out_target(buf, &(xlrec->target));
    4703                 :         }
    4704               0 :         else if (info == XLOG_HEAP_INPLACE)
    4705                 :         {
    4706               0 :                 xl_heap_inplace *xlrec = (xl_heap_inplace *) rec;
    4707                 : 
    4708               0 :                 appendStringInfo(buf, "inplace: ");
    4709               0 :                 out_target(buf, &(xlrec->target));
    4710                 :         }
    4711                 :         else
    4712               0 :                 appendStringInfo(buf, "UNKNOWN");
    4713               0 : }
    4714                 : 
    4715                 : void
    4716                 : heap2_desc(StringInfo buf, uint8 xl_info, char *rec)
    4717               0 : {
    4718               0 :         uint8           info = xl_info & ~XLR_INFO_MASK;
    4719                 : 
    4720               0 :         info &= XLOG_HEAP_OPMASK;
    4721               0 :         if (info == XLOG_HEAP2_FREEZE)
    4722                 :         {
    4723               0 :                 xl_heap_freeze *xlrec = (xl_heap_freeze *) rec;
    4724                 : 
    4725               0 :                 appendStringInfo(buf, "freeze: rel %u/%u/%u; blk %u; cutoff %u",
    4726                 :                                                  xlrec->node.spcNode, xlrec->node.dbNode,
    4727                 :                                                  xlrec->node.relNode, xlrec->block,
    4728                 :                                                  xlrec->cutoff_xid);
    4729                 :         }
    4730               0 :         else if (info == XLOG_HEAP2_CLEAN)
    4731                 :         {
    4732               0 :                 xl_heap_clean *xlrec = (xl_heap_clean *) rec;
    4733                 : 
    4734               0 :                 appendStringInfo(buf, "clean: rel %u/%u/%u; blk %u",
    4735                 :                                                  xlrec->node.spcNode, xlrec->node.dbNode,
    4736                 :                                                  xlrec->node.relNode, xlrec->block);
    4737                 :         }
    4738               0 :         else if (info == XLOG_HEAP2_CLEAN_MOVE)
    4739                 :         {
    4740               0 :                 xl_heap_clean *xlrec = (xl_heap_clean *) rec;
    4741                 : 
    4742               0 :                 appendStringInfo(buf, "clean_move: rel %u/%u/%u; blk %u",
    4743                 :                                                  xlrec->node.spcNode, xlrec->node.dbNode,
    4744                 :                                                  xlrec->node.relNode, xlrec->block);
    4745                 :         }
    4746                 :         else
    4747               0 :                 appendStringInfo(buf, "UNKNOWN");
    4748               0 : }
    4749                 : 
    4750                 : /*
    4751                 :  *      heap_sync               - sync a heap, for use when no WAL has been written
    4752                 :  *
    4753                 :  * This forces the heap contents (including TOAST heap if any) down to disk.
    4754                 :  * If we skipped using WAL, and it's not a temp relation, we must force the
    4755                 :  * relation down to disk before it's safe to commit the transaction.  This
    4756                 :  * requires writing out any dirty buffers and then doing a forced fsync.
    4757                 :  *
    4758                 :  * Indexes are not touched.  (Currently, index operations associated with
    4759                 :  * the commands that use this are WAL-logged and so do not need fsync.
    4760                 :  * That behavior might change someday, but in any case it's likely that
    4761                 :  * any fsync decisions required would be per-index and hence not appropriate
    4762                 :  * to be done here.)
    4763                 :  */
    4764                 : void
    4765                 : heap_sync(Relation rel)
    4766              36 : {
    4767                 :         /* temp tables never need fsync */
    4768              36 :         if (rel->rd_istemp)
    4769              10 :                 return;
    4770                 : 
    4771                 :         /* main heap */
    4772              26 :         FlushRelationBuffers(rel);
    4773                 :         /* FlushRelationBuffers will have opened rd_smgr */
    4774              26 :         smgrimmedsync(rel->rd_smgr);
    4775                 : 
    4776                 :         /* toast heap, if any */
    4777              26 :         if (OidIsValid(rel->rd_rel->reltoastrelid))
    4778                 :         {
    4779                 :                 Relation        toastrel;
    4780                 : 
    4781               2 :                 toastrel = heap_open(rel->rd_rel->reltoastrelid, AccessShareLock);
    4782               2 :                 FlushRelationBuffers(toastrel);
    4783               2 :                 smgrimmedsync(toastrel->rd_smgr);
    4784               2 :                 heap_close(toastrel, AccessShareLock);
    4785                 :         }
    4786                 : }

Generated by: LTP GCOV extension version 1.5