LTP GCOV extension - code coverage report
Current view: directory - access/nbtree - nbtxlog.c
Test: unnamed
Date: 2008-07-03 Instrumented lines: 403
Code covered: 0.0 % Executed lines: 0
Legend: not executed executed

       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * nbtxlog.c
       4                 :  *        WAL replay logic for btrees.
       5                 :  *
       6                 :  *
       7                 :  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
       8                 :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *        $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.49 2007/11/16 19:53:50 tgl Exp $
      12                 :  *
      13                 :  *-------------------------------------------------------------------------
      14                 :  */
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include "access/nbtree.h"
      18                 : #include "access/transam.h"
      19                 : 
      20                 : /*
      21                 :  * We must keep track of expected insertions due to page splits, and apply
      22                 :  * them manually if they are not seen in the WAL log during replay.  This
      23                 :  * makes it safe for page insertion to be a multiple-WAL-action process.
      24                 :  *
      25                 :  * Similarly, deletion of an only child page and deletion of its parent page
      26                 :  * form multiple WAL log entries, and we have to be prepared to follow through
      27                 :  * with the deletion if the log ends between.
      28                 :  *
      29                 :  * The data structure is a simple linked list --- this should be good enough,
      30                 :  * since we don't expect a page split or multi deletion to remain incomplete
      31                 :  * for long.  In any case we need to respect the order of operations.
      32                 :  */
      33                 : typedef struct bt_incomplete_action
      34                 : {
      35                 :         RelFileNode node;                       /* the index */
      36                 :         bool            is_split;               /* T = pending split, F = pending delete */
      37                 :         /* these fields are for a split: */
      38                 :         bool            is_root;                /* we split the root */
      39                 :         BlockNumber leftblk;            /* left half of split */
      40                 :         BlockNumber rightblk;           /* right half of split */
      41                 :         /* these fields are for a delete: */
      42                 :         BlockNumber delblk;                     /* parent block to be deleted */
      43                 : } bt_incomplete_action;
      44                 : 
      45                 : static List *incomplete_actions;
      46                 : 
      47                 : 
      48                 : static void
      49                 : log_incomplete_split(RelFileNode node, BlockNumber leftblk,
      50                 :                                          BlockNumber rightblk, bool is_root)
      51               0 : {
      52               0 :         bt_incomplete_action *action = palloc(sizeof(bt_incomplete_action));
      53                 : 
      54               0 :         action->node = node;
      55               0 :         action->is_split = true;
      56               0 :         action->is_root = is_root;
      57               0 :         action->leftblk = leftblk;
      58               0 :         action->rightblk = rightblk;
      59               0 :         incomplete_actions = lappend(incomplete_actions, action);
      60               0 : }
      61                 : 
      62                 : static void
      63                 : forget_matching_split(RelFileNode node, BlockNumber downlink, bool is_root)
      64               0 : {
      65                 :         ListCell   *l;
      66                 : 
      67               0 :         foreach(l, incomplete_actions)
      68                 :         {
      69               0 :                 bt_incomplete_action *action = (bt_incomplete_action *) lfirst(l);
      70                 : 
      71               0 :                 if (RelFileNodeEquals(node, action->node) &&
      72                 :                         action->is_split &&
      73                 :                         downlink == action->rightblk)
      74                 :                 {
      75               0 :                         if (is_root != action->is_root)
      76               0 :                                 elog(LOG, "forget_matching_split: fishy is_root data (expected %d, got %d)",
      77                 :                                          action->is_root, is_root);
      78               0 :                         incomplete_actions = list_delete_ptr(incomplete_actions, action);
      79               0 :                         pfree(action);
      80               0 :                         break;                          /* need not look further */
      81                 :                 }
      82                 :         }
      83               0 : }
      84                 : 
      85                 : static void
      86                 : log_incomplete_deletion(RelFileNode node, BlockNumber delblk)
      87               0 : {
      88               0 :         bt_incomplete_action *action = palloc(sizeof(bt_incomplete_action));
      89                 : 
      90               0 :         action->node = node;
      91               0 :         action->is_split = false;
      92               0 :         action->delblk = delblk;
      93               0 :         incomplete_actions = lappend(incomplete_actions, action);
      94               0 : }
      95                 : 
      96                 : static void
      97                 : forget_matching_deletion(RelFileNode node, BlockNumber delblk)
      98               0 : {
      99                 :         ListCell   *l;
     100                 : 
     101               0 :         foreach(l, incomplete_actions)
     102                 :         {
     103               0 :                 bt_incomplete_action *action = (bt_incomplete_action *) lfirst(l);
     104                 : 
     105               0 :                 if (RelFileNodeEquals(node, action->node) &&
     106                 :                         !action->is_split &&
     107                 :                         delblk == action->delblk)
     108                 :                 {
     109               0 :                         incomplete_actions = list_delete_ptr(incomplete_actions, action);
     110               0 :                         pfree(action);
     111               0 :                         break;                          /* need not look further */
     112                 :                 }
     113                 :         }
     114               0 : }
     115                 : 
     116                 : /*
     117                 :  * _bt_restore_page -- re-enter all the index tuples on a page
     118                 :  *
     119                 :  * The page is freshly init'd, and *from (length len) is a copy of what
     120                 :  * had been its upper part (pd_upper to pd_special).  We assume that the
     121                 :  * tuples had been added to the page in item-number order, and therefore
     122                 :  * the one with highest item number appears first (lowest on the page).
     123                 :  *
     124                 :  * NOTE: the way this routine is coded, the rebuilt page will have the items
     125                 :  * in correct itemno sequence, but physically the opposite order from the
     126                 :  * original, because we insert them in the opposite of itemno order.  This
     127                 :  * does not matter in any current btree code, but it's something to keep an
     128                 :  * eye on.      Is it worth changing just on general principles?  See also the
     129                 :  * notes in btree_xlog_split().
     130                 :  */
     131                 : static void
     132                 : _bt_restore_page(Page page, char *from, int len)
     133               0 : {
     134                 :         IndexTupleData itupdata;
     135                 :         Size            itemsz;
     136               0 :         char       *end = from + len;
     137                 : 
     138               0 :         for (; from < end;)
     139                 :         {
     140                 :                 /* Need to copy tuple header due to alignment considerations */
     141               0 :                 memcpy(&itupdata, from, sizeof(IndexTupleData));
     142               0 :                 itemsz = IndexTupleDSize(itupdata);
     143               0 :                 itemsz = MAXALIGN(itemsz);
     144               0 :                 if (PageAddItem(page, (Item) from, itemsz, FirstOffsetNumber,
     145                 :                                                 false, false) == InvalidOffsetNumber)
     146               0 :                         elog(PANIC, "_bt_restore_page: cannot add item to page");
     147               0 :                 from += itemsz;
     148                 :         }
     149               0 : }
     150                 : 
     151                 : static void
     152                 : _bt_restore_meta(Relation reln, XLogRecPtr lsn,
     153                 :                                  BlockNumber root, uint32 level,
     154                 :                                  BlockNumber fastroot, uint32 fastlevel)
     155               0 : {
     156                 :         Buffer          metabuf;
     157                 :         Page            metapg;
     158                 :         BTMetaPageData *md;
     159                 :         BTPageOpaque pageop;
     160                 : 
     161               0 :         metabuf = XLogReadBuffer(reln, BTREE_METAPAGE, true);
     162                 :         Assert(BufferIsValid(metabuf));
     163               0 :         metapg = BufferGetPage(metabuf);
     164                 : 
     165               0 :         _bt_pageinit(metapg, BufferGetPageSize(metabuf));
     166                 : 
     167               0 :         md = BTPageGetMeta(metapg);
     168               0 :         md->btm_magic = BTREE_MAGIC;
     169               0 :         md->btm_version = BTREE_VERSION;
     170               0 :         md->btm_root = root;
     171               0 :         md->btm_level = level;
     172               0 :         md->btm_fastroot = fastroot;
     173               0 :         md->btm_fastlevel = fastlevel;
     174                 : 
     175               0 :         pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
     176               0 :         pageop->btpo_flags = BTP_META;
     177                 : 
     178                 :         /*
     179                 :          * Set pd_lower just past the end of the metadata.      This is not essential
     180                 :          * but it makes the page look compressible to xlog.c.
     181                 :          */
     182               0 :         ((PageHeader) metapg)->pd_lower =
     183                 :                 ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
     184                 : 
     185               0 :         PageSetLSN(metapg, lsn);
     186               0 :         PageSetTLI(metapg, ThisTimeLineID);
     187               0 :         MarkBufferDirty(metabuf);
     188               0 :         UnlockReleaseBuffer(metabuf);
     189               0 : }
     190                 : 
     191                 : static void
     192                 : btree_xlog_insert(bool isleaf, bool ismeta,
     193                 :                                   XLogRecPtr lsn, XLogRecord *record)
     194               0 : {
     195               0 :         xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
     196                 :         Relation        reln;
     197                 :         Buffer          buffer;
     198                 :         Page            page;
     199                 :         char       *datapos;
     200                 :         int                     datalen;
     201                 :         xl_btree_metadata md;
     202               0 :         BlockNumber downlink = 0;
     203                 : 
     204               0 :         datapos = (char *) xlrec + SizeOfBtreeInsert;
     205               0 :         datalen = record->xl_len - SizeOfBtreeInsert;
     206               0 :         if (!isleaf)
     207                 :         {
     208               0 :                 memcpy(&downlink, datapos, sizeof(BlockNumber));
     209               0 :                 datapos += sizeof(BlockNumber);
     210               0 :                 datalen -= sizeof(BlockNumber);
     211                 :         }
     212               0 :         if (ismeta)
     213                 :         {
     214               0 :                 memcpy(&md, datapos, sizeof(xl_btree_metadata));
     215               0 :                 datapos += sizeof(xl_btree_metadata);
     216               0 :                 datalen -= sizeof(xl_btree_metadata);
     217                 :         }
     218                 : 
     219               0 :         if ((record->xl_info & XLR_BKP_BLOCK_1) && !ismeta && isleaf)
     220               0 :                 return;                                 /* nothing to do */
     221                 : 
     222               0 :         reln = XLogOpenRelation(xlrec->target.node);
     223                 : 
     224               0 :         if (!(record->xl_info & XLR_BKP_BLOCK_1))
     225                 :         {
     226               0 :                 buffer = XLogReadBuffer(reln,
     227                 :                                                          ItemPointerGetBlockNumber(&(xlrec->target.tid)),
     228                 :                                                                 false);
     229               0 :                 if (BufferIsValid(buffer))
     230                 :                 {
     231               0 :                         page = (Page) BufferGetPage(buffer);
     232                 : 
     233               0 :                         if (XLByteLE(lsn, PageGetLSN(page)))
     234                 :                         {
     235               0 :                                 UnlockReleaseBuffer(buffer);
     236                 :                         }
     237                 :                         else
     238                 :                         {
     239               0 :                                 if (PageAddItem(page, (Item) datapos, datalen,
     240                 :                                                         ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
     241                 :                                                                 false, false) == InvalidOffsetNumber)
     242               0 :                                         elog(PANIC, "btree_insert_redo: failed to add item");
     243                 : 
     244               0 :                                 PageSetLSN(page, lsn);
     245               0 :                                 PageSetTLI(page, ThisTimeLineID);
     246               0 :                                 MarkBufferDirty(buffer);
     247               0 :                                 UnlockReleaseBuffer(buffer);
     248                 :                         }
     249                 :                 }
     250                 :         }
     251                 : 
     252               0 :         if (ismeta)
     253               0 :                 _bt_restore_meta(reln, lsn,
     254                 :                                                  md.root, md.level,
     255                 :                                                  md.fastroot, md.fastlevel);
     256                 : 
     257                 :         /* Forget any split this insertion completes */
     258               0 :         if (!isleaf)
     259               0 :                 forget_matching_split(xlrec->target.node, downlink, false);
     260                 : }
     261                 : 
     262                 : static void
     263                 : btree_xlog_split(bool onleft, bool isroot,
     264                 :                                  XLogRecPtr lsn, XLogRecord *record)
     265               0 : {
     266               0 :         xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
     267                 :         Relation        reln;
     268                 :         Buffer          rbuf;
     269                 :         Page            rpage;
     270                 :         BTPageOpaque ropaque;
     271                 :         char       *datapos;
     272                 :         int                     datalen;
     273               0 :         OffsetNumber newitemoff = 0;
     274               0 :         Item            newitem = NULL;
     275               0 :         Size            newitemsz = 0;
     276               0 :         Item            left_hikey = NULL;
     277               0 :         Size            left_hikeysz = 0;
     278                 : 
     279               0 :         reln = XLogOpenRelation(xlrec->node);
     280                 : 
     281               0 :         datapos = (char *) xlrec + SizeOfBtreeSplit;
     282               0 :         datalen = record->xl_len - SizeOfBtreeSplit;
     283                 : 
     284                 :         /* Forget any split this insertion completes */
     285               0 :         if (xlrec->level > 0)
     286                 :         {
     287                 :                 /* we assume SizeOfBtreeSplit is at least 16-bit aligned */
     288               0 :                 BlockNumber downlink = BlockIdGetBlockNumber((BlockId) datapos);
     289                 : 
     290               0 :                 datapos += sizeof(BlockIdData);
     291               0 :                 datalen -= sizeof(BlockIdData);
     292                 : 
     293               0 :                 forget_matching_split(xlrec->node, downlink, false);
     294                 : 
     295                 :                 /* Extract left hikey and its size (still assuming 16-bit alignment) */
     296               0 :                 if (!(record->xl_info & XLR_BKP_BLOCK_1))
     297                 :                 {
     298                 :                         /* We assume 16-bit alignment is enough for IndexTupleSize */
     299               0 :                         left_hikey = (Item) datapos;
     300               0 :                         left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
     301                 : 
     302               0 :                         datapos += left_hikeysz;
     303               0 :                         datalen -= left_hikeysz;
     304                 :                 }
     305                 :         }
     306                 : 
     307                 :         /* Extract newitem and newitemoff, if present */
     308               0 :         if (onleft)
     309                 :         {
     310                 :                 /* Extract the offset (still assuming 16-bit alignment) */
     311               0 :                 memcpy(&newitemoff, datapos, sizeof(OffsetNumber));
     312               0 :                 datapos += sizeof(OffsetNumber);
     313               0 :                 datalen -= sizeof(OffsetNumber);
     314                 :         }
     315                 : 
     316               0 :         if (onleft && !(record->xl_info & XLR_BKP_BLOCK_1))
     317                 :         {
     318                 :                 /*
     319                 :                  * We assume that 16-bit alignment is enough to apply IndexTupleSize
     320                 :                  * (since it's fetching from a uint16 field) and also enough for
     321                 :                  * PageAddItem to insert the tuple.
     322                 :                  */
     323               0 :                 newitem = (Item) datapos;
     324               0 :                 newitemsz = MAXALIGN(IndexTupleSize(newitem));
     325               0 :                 datapos += newitemsz;
     326               0 :                 datalen -= newitemsz;
     327                 :         }
     328                 : 
     329                 :         /* Reconstruct right (new) sibling from scratch */
     330               0 :         rbuf = XLogReadBuffer(reln, xlrec->rightsib, true);
     331                 :         Assert(BufferIsValid(rbuf));
     332               0 :         rpage = (Page) BufferGetPage(rbuf);
     333                 : 
     334               0 :         _bt_pageinit(rpage, BufferGetPageSize(rbuf));
     335               0 :         ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
     336                 : 
     337               0 :         ropaque->btpo_prev = xlrec->leftsib;
     338               0 :         ropaque->btpo_next = xlrec->rnext;
     339               0 :         ropaque->btpo.level = xlrec->level;
     340               0 :         ropaque->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0;
     341               0 :         ropaque->btpo_cycleid = 0;
     342                 : 
     343               0 :         _bt_restore_page(rpage, datapos, datalen);
     344                 : 
     345                 :         /*
     346                 :          * On leaf level, the high key of the left page is equal to the
     347                 :          * first key on the right page.
     348                 :          */
     349               0 :         if (xlrec->level == 0)
     350                 :         {
     351               0 :                 ItemId          hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque));
     352                 : 
     353               0 :                 left_hikey = PageGetItem(rpage, hiItemId);
     354               0 :                 left_hikeysz = ItemIdGetLength(hiItemId);
     355                 :         }
     356                 : 
     357               0 :         PageSetLSN(rpage, lsn);
     358               0 :         PageSetTLI(rpage, ThisTimeLineID);
     359               0 :         MarkBufferDirty(rbuf);
     360                 : 
     361                 :         /* don't release the buffer yet; we touch right page's first item below */
     362                 : 
     363                 :         /*
     364                 :          * Reconstruct left (original) sibling if needed.  Note that this code
     365                 :          * ensures that the items remaining on the left page are in the correct
     366                 :          * item number order, but it does not reproduce the physical order they
     367                 :          * would have had.      Is this worth changing?  See also _bt_restore_page().
     368                 :          */
     369               0 :         if (!(record->xl_info & XLR_BKP_BLOCK_1))
     370                 :         {
     371               0 :                 Buffer          lbuf = XLogReadBuffer(reln, xlrec->leftsib, false);
     372                 : 
     373               0 :                 if (BufferIsValid(lbuf))
     374                 :                 {
     375               0 :                         Page            lpage = (Page) BufferGetPage(lbuf);
     376               0 :                         BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
     377                 : 
     378               0 :                         if (!XLByteLE(lsn, PageGetLSN(lpage)))
     379                 :                         {
     380                 :                                 OffsetNumber off;
     381               0 :                                 OffsetNumber maxoff = PageGetMaxOffsetNumber(lpage);
     382                 :                                 OffsetNumber deletable[MaxOffsetNumber];
     383               0 :                                 int                     ndeletable = 0;
     384                 : 
     385                 :                                 /*
     386                 :                                  * Remove the items from the left page that were copied to the
     387                 :                                  * right page.  Also remove the old high key, if any. (We must
     388                 :                                  * remove everything before trying to insert any items, else
     389                 :                                  * we risk not having enough space.)
     390                 :                                  */
     391               0 :                                 if (!P_RIGHTMOST(lopaque))
     392                 :                                 {
     393               0 :                                         deletable[ndeletable++] = P_HIKEY;
     394                 : 
     395                 :                                         /*
     396                 :                                          * newitemoff is given to us relative to the original
     397                 :                                          * page's item numbering, so adjust it for this deletion.
     398                 :                                          */
     399               0 :                                         newitemoff--;
     400                 :                                 }
     401               0 :                                 for (off = xlrec->firstright; off <= maxoff; off++)
     402               0 :                                         deletable[ndeletable++] = off;
     403               0 :                                 if (ndeletable > 0)
     404               0 :                                         PageIndexMultiDelete(lpage, deletable, ndeletable);
     405                 : 
     406                 :                                 /*
     407                 :                                  * Add the new item if it was inserted on left page.
     408                 :                                  */
     409               0 :                                 if (onleft)
     410                 :                                 {
     411               0 :                                         if (PageAddItem(lpage, newitem, newitemsz, newitemoff,
     412                 :                                                                         false, false) == InvalidOffsetNumber)
     413               0 :                                                 elog(PANIC, "failed to add new item to left page after split");
     414                 :                                 }
     415                 : 
     416                 :                                 /* Set high key */
     417               0 :                                 if (PageAddItem(lpage, left_hikey, left_hikeysz,
     418                 :                                                                 P_HIKEY, false, false) == InvalidOffsetNumber)
     419               0 :                                         elog(PANIC, "failed to add high key to left page after split");
     420                 : 
     421                 :                                 /* Fix opaque fields */
     422               0 :                                 lopaque->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0;
     423               0 :                                 lopaque->btpo_next = xlrec->rightsib;
     424               0 :                                 lopaque->btpo_cycleid = 0;
     425                 : 
     426               0 :                                 PageSetLSN(lpage, lsn);
     427               0 :                                 PageSetTLI(lpage, ThisTimeLineID);
     428               0 :                                 MarkBufferDirty(lbuf);
     429                 :                         }
     430                 : 
     431               0 :                         UnlockReleaseBuffer(lbuf);
     432                 :                 }
     433                 :         }
     434                 : 
     435                 :         /* We no longer need the right buffer */
     436               0 :         UnlockReleaseBuffer(rbuf);
     437                 : 
     438                 :         /* Fix left-link of the page to the right of the new right sibling */
     439               0 :         if (xlrec->rnext != P_NONE && !(record->xl_info & XLR_BKP_BLOCK_2))
     440                 :         {
     441               0 :                 Buffer          buffer = XLogReadBuffer(reln, xlrec->rnext, false);
     442                 : 
     443               0 :                 if (BufferIsValid(buffer))
     444                 :                 {
     445               0 :                         Page            page = (Page) BufferGetPage(buffer);
     446                 : 
     447               0 :                         if (!XLByteLE(lsn, PageGetLSN(page)))
     448                 :                         {
     449               0 :                                 BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
     450                 : 
     451               0 :                                 pageop->btpo_prev = xlrec->rightsib;
     452                 : 
     453               0 :                                 PageSetLSN(page, lsn);
     454               0 :                                 PageSetTLI(page, ThisTimeLineID);
     455               0 :                                 MarkBufferDirty(buffer);
     456                 :                         }
     457               0 :                         UnlockReleaseBuffer(buffer);
     458                 :                 }
     459                 :         }
     460                 : 
     461                 :         /* The job ain't done till the parent link is inserted... */
     462               0 :         log_incomplete_split(xlrec->node,
     463                 :                                                  xlrec->leftsib, xlrec->rightsib, isroot);
     464               0 : }
     465                 : 
     466                 : static void
     467                 : btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
     468               0 : {
     469                 :         xl_btree_delete *xlrec;
     470                 :         Relation        reln;
     471                 :         Buffer          buffer;
     472                 :         Page            page;
     473                 :         BTPageOpaque opaque;
     474                 : 
     475               0 :         if (record->xl_info & XLR_BKP_BLOCK_1)
     476               0 :                 return;
     477                 : 
     478               0 :         xlrec = (xl_btree_delete *) XLogRecGetData(record);
     479               0 :         reln = XLogOpenRelation(xlrec->node);
     480               0 :         buffer = XLogReadBuffer(reln, xlrec->block, false);
     481               0 :         if (!BufferIsValid(buffer))
     482                 :                 return;
     483               0 :         page = (Page) BufferGetPage(buffer);
     484                 : 
     485               0 :         if (XLByteLE(lsn, PageGetLSN(page)))
     486                 :         {
     487               0 :                 UnlockReleaseBuffer(buffer);
     488               0 :                 return;
     489                 :         }
     490                 : 
     491               0 :         if (record->xl_len > SizeOfBtreeDelete)
     492                 :         {
     493                 :                 OffsetNumber *unused;
     494                 :                 OffsetNumber *unend;
     495                 : 
     496               0 :                 unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
     497               0 :                 unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
     498                 : 
     499               0 :                 PageIndexMultiDelete(page, unused, unend - unused);
     500                 :         }
     501                 : 
     502                 :         /*
     503                 :          * Mark the page as not containing any LP_DEAD items --- see comments in
     504                 :          * _bt_delitems().
     505                 :          */
     506               0 :         opaque = (BTPageOpaque) PageGetSpecialPointer(page);
     507               0 :         opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     508                 : 
     509               0 :         PageSetLSN(page, lsn);
     510               0 :         PageSetTLI(page, ThisTimeLineID);
     511               0 :         MarkBufferDirty(buffer);
     512               0 :         UnlockReleaseBuffer(buffer);
     513                 : }
     514                 : 
     515                 : static void
     516                 : btree_xlog_delete_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
     517               0 : {
     518               0 :         xl_btree_delete_page *xlrec = (xl_btree_delete_page *) XLogRecGetData(record);
     519                 :         Relation        reln;
     520                 :         BlockNumber parent;
     521                 :         BlockNumber target;
     522                 :         BlockNumber leftsib;
     523                 :         BlockNumber rightsib;
     524                 :         Buffer          buffer;
     525                 :         Page            page;
     526                 :         BTPageOpaque pageop;
     527                 : 
     528               0 :         reln = XLogOpenRelation(xlrec->target.node);
     529               0 :         parent = ItemPointerGetBlockNumber(&(xlrec->target.tid));
     530               0 :         target = xlrec->deadblk;
     531               0 :         leftsib = xlrec->leftblk;
     532               0 :         rightsib = xlrec->rightblk;
     533                 : 
     534                 :         /* parent page */
     535               0 :         if (!(record->xl_info & XLR_BKP_BLOCK_1))
     536                 :         {
     537               0 :                 buffer = XLogReadBuffer(reln, parent, false);
     538               0 :                 if (BufferIsValid(buffer))
     539                 :                 {
     540               0 :                         page = (Page) BufferGetPage(buffer);
     541               0 :                         pageop = (BTPageOpaque) PageGetSpecialPointer(page);
     542               0 :                         if (XLByteLE(lsn, PageGetLSN(page)))
     543                 :                         {
     544               0 :                                 UnlockReleaseBuffer(buffer);
     545                 :                         }
     546                 :                         else
     547                 :                         {
     548                 :                                 OffsetNumber poffset;
     549                 : 
     550               0 :                                 poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
     551               0 :                                 if (poffset >= PageGetMaxOffsetNumber(page))
     552                 :                                 {
     553                 :                                         Assert(info == XLOG_BTREE_DELETE_PAGE_HALF);
     554                 :                                         Assert(poffset == P_FIRSTDATAKEY(pageop));
     555               0 :                                         PageIndexTupleDelete(page, poffset);
     556               0 :                                         pageop->btpo_flags |= BTP_HALF_DEAD;
     557                 :                                 }
     558                 :                                 else
     559                 :                                 {
     560                 :                                         ItemId          itemid;
     561                 :                                         IndexTuple      itup;
     562                 :                                         OffsetNumber nextoffset;
     563                 : 
     564                 :                                         Assert(info != XLOG_BTREE_DELETE_PAGE_HALF);
     565               0 :                                         itemid = PageGetItemId(page, poffset);
     566               0 :                                         itup = (IndexTuple) PageGetItem(page, itemid);
     567               0 :                                         ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY);
     568               0 :                                         nextoffset = OffsetNumberNext(poffset);
     569               0 :                                         PageIndexTupleDelete(page, nextoffset);
     570                 :                                 }
     571                 : 
     572               0 :                                 PageSetLSN(page, lsn);
     573               0 :                                 PageSetTLI(page, ThisTimeLineID);
     574               0 :                                 MarkBufferDirty(buffer);
     575               0 :                                 UnlockReleaseBuffer(buffer);
     576                 :                         }
     577                 :                 }
     578                 :         }
     579                 : 
     580                 :         /* Fix left-link of right sibling */
     581               0 :         if (!(record->xl_info & XLR_BKP_BLOCK_2))
     582                 :         {
     583               0 :                 buffer = XLogReadBuffer(reln, rightsib, false);
     584               0 :                 if (BufferIsValid(buffer))
     585                 :                 {
     586               0 :                         page = (Page) BufferGetPage(buffer);
     587               0 :                         if (XLByteLE(lsn, PageGetLSN(page)))
     588                 :                         {
     589               0 :                                 UnlockReleaseBuffer(buffer);
     590                 :                         }
     591                 :                         else
     592                 :                         {
     593               0 :                                 pageop = (BTPageOpaque) PageGetSpecialPointer(page);
     594               0 :                                 pageop->btpo_prev = leftsib;
     595                 : 
     596               0 :                                 PageSetLSN(page, lsn);
     597               0 :                                 PageSetTLI(page, ThisTimeLineID);
     598               0 :                                 MarkBufferDirty(buffer);
     599               0 :                                 UnlockReleaseBuffer(buffer);
     600                 :                         }
     601                 :                 }
     602                 :         }
     603                 : 
     604                 :         /* Fix right-link of left sibling, if any */
     605               0 :         if (!(record->xl_info & XLR_BKP_BLOCK_3))
     606                 :         {
     607               0 :                 if (leftsib != P_NONE)
     608                 :                 {
     609               0 :                         buffer = XLogReadBuffer(reln, leftsib, false);
     610               0 :                         if (BufferIsValid(buffer))
     611                 :                         {
     612               0 :                                 page = (Page) BufferGetPage(buffer);
     613               0 :                                 if (XLByteLE(lsn, PageGetLSN(page)))
     614                 :                                 {
     615               0 :                                         UnlockReleaseBuffer(buffer);
     616                 :                                 }
     617                 :                                 else
     618                 :                                 {
     619               0 :                                         pageop = (BTPageOpaque) PageGetSpecialPointer(page);
     620               0 :                                         pageop->btpo_next = rightsib;
     621                 : 
     622               0 :                                         PageSetLSN(page, lsn);
     623               0 :                                         PageSetTLI(page, ThisTimeLineID);
     624               0 :                                         MarkBufferDirty(buffer);
     625               0 :                                         UnlockReleaseBuffer(buffer);
     626                 :                                 }
     627                 :                         }
     628                 :                 }
     629                 :         }
     630                 : 
     631                 :         /* Rewrite target page as empty deleted page */
     632               0 :         buffer = XLogReadBuffer(reln, target, true);
     633                 :         Assert(BufferIsValid(buffer));
     634               0 :         page = (Page) BufferGetPage(buffer);
     635                 : 
     636               0 :         _bt_pageinit(page, BufferGetPageSize(buffer));
     637               0 :         pageop = (BTPageOpaque) PageGetSpecialPointer(page);
     638                 : 
     639               0 :         pageop->btpo_prev = leftsib;
     640               0 :         pageop->btpo_next = rightsib;
     641               0 :         pageop->btpo.xact = FrozenTransactionId;
     642               0 :         pageop->btpo_flags = BTP_DELETED;
     643               0 :         pageop->btpo_cycleid = 0;
     644                 : 
     645               0 :         PageSetLSN(page, lsn);
     646               0 :         PageSetTLI(page, ThisTimeLineID);
     647               0 :         MarkBufferDirty(buffer);
     648               0 :         UnlockReleaseBuffer(buffer);
     649                 : 
     650                 :         /* Update metapage if needed */
     651               0 :         if (info == XLOG_BTREE_DELETE_PAGE_META)
     652                 :         {
     653                 :                 xl_btree_metadata md;
     654                 : 
     655               0 :                 memcpy(&md, (char *) xlrec + SizeOfBtreeDeletePage,
     656                 :                            sizeof(xl_btree_metadata));
     657               0 :                 _bt_restore_meta(reln, lsn,
     658                 :                                                  md.root, md.level,
     659                 :                                                  md.fastroot, md.fastlevel);
     660                 :         }
     661                 : 
     662                 :         /* Forget any completed deletion */
     663               0 :         forget_matching_deletion(xlrec->target.node, target);
     664                 : 
     665                 :         /* If parent became half-dead, remember it for deletion */
     666               0 :         if (info == XLOG_BTREE_DELETE_PAGE_HALF)
     667               0 :                 log_incomplete_deletion(xlrec->target.node, parent);
     668               0 : }
     669                 : 
     670                 : static void
     671                 : btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
     672               0 : {
     673               0 :         xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
     674                 :         Relation        reln;
     675                 :         Buffer          buffer;
     676                 :         Page            page;
     677                 :         BTPageOpaque pageop;
     678               0 :         BlockNumber downlink = 0;
     679                 : 
     680               0 :         reln = XLogOpenRelation(xlrec->node);
     681               0 :         buffer = XLogReadBuffer(reln, xlrec->rootblk, true);
     682                 :         Assert(BufferIsValid(buffer));
     683               0 :         page = (Page) BufferGetPage(buffer);
     684                 : 
     685               0 :         _bt_pageinit(page, BufferGetPageSize(buffer));
     686               0 :         pageop = (BTPageOpaque) PageGetSpecialPointer(page);
     687                 : 
     688               0 :         pageop->btpo_flags = BTP_ROOT;
     689               0 :         pageop->btpo_prev = pageop->btpo_next = P_NONE;
     690               0 :         pageop->btpo.level = xlrec->level;
     691               0 :         if (xlrec->level == 0)
     692               0 :                 pageop->btpo_flags |= BTP_LEAF;
     693               0 :         pageop->btpo_cycleid = 0;
     694                 : 
     695               0 :         if (record->xl_len > SizeOfBtreeNewroot)
     696                 :         {
     697                 :                 IndexTuple      itup;
     698                 : 
     699               0 :                 _bt_restore_page(page,
     700                 :                                                  (char *) xlrec + SizeOfBtreeNewroot,
     701                 :                                                  record->xl_len - SizeOfBtreeNewroot);
     702                 :                 /* extract downlink to the right-hand split page */
     703               0 :                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, P_FIRSTKEY));
     704               0 :                 downlink = ItemPointerGetBlockNumber(&(itup->t_tid));
     705                 :                 Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
     706                 :         }
     707                 : 
     708               0 :         PageSetLSN(page, lsn);
     709               0 :         PageSetTLI(page, ThisTimeLineID);
     710               0 :         MarkBufferDirty(buffer);
     711               0 :         UnlockReleaseBuffer(buffer);
     712                 : 
     713               0 :         _bt_restore_meta(reln, lsn,
     714                 :                                          xlrec->rootblk, xlrec->level,
     715                 :                                          xlrec->rootblk, xlrec->level);
     716                 : 
     717                 :         /* Check to see if this satisfies any incomplete insertions */
     718               0 :         if (record->xl_len > SizeOfBtreeNewroot)
     719               0 :                 forget_matching_split(xlrec->node, downlink, true);
     720               0 : }
     721                 : 
     722                 : 
     723                 : void
     724                 : btree_redo(XLogRecPtr lsn, XLogRecord *record)
     725               0 : {
     726               0 :         uint8           info = record->xl_info & ~XLR_INFO_MASK;
     727                 : 
     728               0 :         switch (info)
     729                 :         {
     730                 :                 case XLOG_BTREE_INSERT_LEAF:
     731               0 :                         btree_xlog_insert(true, false, lsn, record);
     732               0 :                         break;
     733                 :                 case XLOG_BTREE_INSERT_UPPER:
     734               0 :                         btree_xlog_insert(false, false, lsn, record);
     735               0 :                         break;
     736                 :                 case XLOG_BTREE_INSERT_META:
     737               0 :                         btree_xlog_insert(false, true, lsn, record);
     738               0 :                         break;
     739                 :                 case XLOG_BTREE_SPLIT_L:
     740               0 :                         btree_xlog_split(true, false, lsn, record);
     741               0 :                         break;
     742                 :                 case XLOG_BTREE_SPLIT_R:
     743               0 :                         btree_xlog_split(false, false, lsn, record);
     744               0 :                         break;
     745                 :                 case XLOG_BTREE_SPLIT_L_ROOT:
     746               0 :                         btree_xlog_split(true, true, lsn, record);
     747               0 :                         break;
     748                 :                 case XLOG_BTREE_SPLIT_R_ROOT:
     749               0 :                         btree_xlog_split(false, true, lsn, record);
     750               0 :                         break;
     751                 :                 case XLOG_BTREE_DELETE:
     752               0 :                         btree_xlog_delete(lsn, record);
     753               0 :                         break;
     754                 :                 case XLOG_BTREE_DELETE_PAGE:
     755                 :                 case XLOG_BTREE_DELETE_PAGE_META:
     756                 :                 case XLOG_BTREE_DELETE_PAGE_HALF:
     757               0 :                         btree_xlog_delete_page(info, lsn, record);
     758               0 :                         break;
     759                 :                 case XLOG_BTREE_NEWROOT:
     760               0 :                         btree_xlog_newroot(lsn, record);
     761               0 :                         break;
     762                 :                 default:
     763               0 :                         elog(PANIC, "btree_redo: unknown op code %u", info);
     764                 :         }
     765               0 : }
     766                 : 
     767                 : static void
     768                 : out_target(StringInfo buf, xl_btreetid *target)
     769               0 : {
     770               0 :         appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
     771                 :                          target->node.spcNode, target->node.dbNode, target->node.relNode,
     772                 :                                          ItemPointerGetBlockNumber(&(target->tid)),
     773                 :                                          ItemPointerGetOffsetNumber(&(target->tid)));
     774               0 : }
     775                 : 
     776                 : void
     777                 : btree_desc(StringInfo buf, uint8 xl_info, char *rec)
     778               0 : {
     779               0 :         uint8           info = xl_info & ~XLR_INFO_MASK;
     780                 : 
     781               0 :         switch (info)
     782                 :         {
     783                 :                 case XLOG_BTREE_INSERT_LEAF:
     784                 :                         {
     785               0 :                                 xl_btree_insert *xlrec = (xl_btree_insert *) rec;
     786                 : 
     787               0 :                                 appendStringInfo(buf, "insert: ");
     788               0 :                                 out_target(buf, &(xlrec->target));
     789               0 :                                 break;
     790                 :                         }
     791                 :                 case XLOG_BTREE_INSERT_UPPER:
     792                 :                         {
     793               0 :                                 xl_btree_insert *xlrec = (xl_btree_insert *) rec;
     794                 : 
     795               0 :                                 appendStringInfo(buf, "insert_upper: ");
     796               0 :                                 out_target(buf, &(xlrec->target));
     797               0 :                                 break;
     798                 :                         }
     799                 :                 case XLOG_BTREE_INSERT_META:
     800                 :                         {
     801               0 :                                 xl_btree_insert *xlrec = (xl_btree_insert *) rec;
     802                 : 
     803               0 :                                 appendStringInfo(buf, "insert_meta: ");
     804               0 :                                 out_target(buf, &(xlrec->target));
     805               0 :                                 break;
     806                 :                         }
     807                 :                 case XLOG_BTREE_SPLIT_L:
     808                 :                         {
     809               0 :                                 xl_btree_split *xlrec = (xl_btree_split *) rec;
     810                 : 
     811               0 :                                 appendStringInfo(buf, "split_l: rel %u/%u/%u ",
     812                 :                                                                  xlrec->node.spcNode, xlrec->node.dbNode,
     813                 :                                                                  xlrec->node.relNode);
     814               0 :                                 appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
     815                 :                                                            xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
     816                 :                                                                  xlrec->level, xlrec->firstright);
     817               0 :                                 break;
     818                 :                         }
     819                 :                 case XLOG_BTREE_SPLIT_R:
     820                 :                         {
     821               0 :                                 xl_btree_split *xlrec = (xl_btree_split *) rec;
     822                 : 
     823               0 :                                 appendStringInfo(buf, "split_r: rel %u/%u/%u ",
     824                 :                                                                  xlrec->node.spcNode, xlrec->node.dbNode,
     825                 :                                                                  xlrec->node.relNode);
     826               0 :                                 appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
     827                 :                                                            xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
     828                 :                                                                  xlrec->level, xlrec->firstright);
     829               0 :                                 break;
     830                 :                         }
     831                 :                 case XLOG_BTREE_SPLIT_L_ROOT:
     832                 :                         {
     833               0 :                                 xl_btree_split *xlrec = (xl_btree_split *) rec;
     834                 : 
     835               0 :                                 appendStringInfo(buf, "split_l_root: rel %u/%u/%u ",
     836                 :                                                                  xlrec->node.spcNode, xlrec->node.dbNode,
     837                 :                                                                  xlrec->node.relNode);
     838               0 :                                 appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
     839                 :                                                            xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
     840                 :                                                                  xlrec->level, xlrec->firstright);
     841               0 :                                 break;
     842                 :                         }
     843                 :                 case XLOG_BTREE_SPLIT_R_ROOT:
     844                 :                         {
     845               0 :                                 xl_btree_split *xlrec = (xl_btree_split *) rec;
     846                 : 
     847               0 :                                 appendStringInfo(buf, "split_r_root: rel %u/%u/%u ",
     848                 :                                                                  xlrec->node.spcNode, xlrec->node.dbNode,
     849                 :                                                                  xlrec->node.relNode);
     850               0 :                                 appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
     851                 :                                                            xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
     852                 :                                                                  xlrec->level, xlrec->firstright);
     853               0 :                                 break;
     854                 :                         }
     855                 :                 case XLOG_BTREE_DELETE:
     856                 :                         {
     857               0 :                                 xl_btree_delete *xlrec = (xl_btree_delete *) rec;
     858                 : 
     859               0 :                                 appendStringInfo(buf, "delete: rel %u/%u/%u; blk %u",
     860                 :                                                                  xlrec->node.spcNode, xlrec->node.dbNode,
     861                 :                                                                  xlrec->node.relNode, xlrec->block);
     862               0 :                                 break;
     863                 :                         }
     864                 :                 case XLOG_BTREE_DELETE_PAGE:
     865                 :                 case XLOG_BTREE_DELETE_PAGE_META:
     866                 :                 case XLOG_BTREE_DELETE_PAGE_HALF:
     867                 :                         {
     868               0 :                                 xl_btree_delete_page *xlrec = (xl_btree_delete_page *) rec;
     869                 : 
     870               0 :                                 appendStringInfo(buf, "delete_page: ");
     871               0 :                                 out_target(buf, &(xlrec->target));
     872               0 :                                 appendStringInfo(buf, "; dead %u; left %u; right %u",
     873                 :                                                         xlrec->deadblk, xlrec->leftblk, xlrec->rightblk);
     874               0 :                                 break;
     875                 :                         }
     876                 :                 case XLOG_BTREE_NEWROOT:
     877                 :                         {
     878               0 :                                 xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
     879                 : 
     880               0 :                                 appendStringInfo(buf, "newroot: rel %u/%u/%u; root %u lev %u",
     881                 :                                                                  xlrec->node.spcNode, xlrec->node.dbNode,
     882                 :                                                                  xlrec->node.relNode,
     883                 :                                                                  xlrec->rootblk, xlrec->level);
     884               0 :                                 break;
     885                 :                         }
     886                 :                 default:
     887               0 :                         appendStringInfo(buf, "UNKNOWN");
     888                 :                         break;
     889                 :         }
     890               0 : }
     891                 : 
     892                 : void
     893                 : btree_xlog_startup(void)
     894               0 : {
     895               0 :         incomplete_actions = NIL;
     896               0 : }
     897                 : 
     898                 : void
     899                 : btree_xlog_cleanup(void)
     900               0 : {
     901                 :         ListCell   *l;
     902                 : 
     903               0 :         foreach(l, incomplete_actions)
     904                 :         {
     905               0 :                 bt_incomplete_action *action = (bt_incomplete_action *) lfirst(l);
     906                 :                 Relation        reln;
     907                 : 
     908               0 :                 reln = XLogOpenRelation(action->node);
     909               0 :                 if (action->is_split)
     910                 :                 {
     911                 :                         /* finish an incomplete split */
     912                 :                         Buffer          lbuf,
     913                 :                                                 rbuf;
     914                 :                         Page            lpage,
     915                 :                                                 rpage;
     916                 :                         BTPageOpaque lpageop,
     917                 :                                                 rpageop;
     918                 :                         bool            is_only;
     919                 : 
     920               0 :                         lbuf = XLogReadBuffer(reln, action->leftblk, false);
     921                 :                         /* failure is impossible because we wrote this page earlier */
     922               0 :                         if (!BufferIsValid(lbuf))
     923               0 :                                 elog(PANIC, "btree_xlog_cleanup: left block unfound");
     924               0 :                         lpage = (Page) BufferGetPage(lbuf);
     925               0 :                         lpageop = (BTPageOpaque) PageGetSpecialPointer(lpage);
     926               0 :                         rbuf = XLogReadBuffer(reln, action->rightblk, false);
     927                 :                         /* failure is impossible because we wrote this page earlier */
     928               0 :                         if (!BufferIsValid(rbuf))
     929               0 :                                 elog(PANIC, "btree_xlog_cleanup: right block unfound");
     930               0 :                         rpage = (Page) BufferGetPage(rbuf);
     931               0 :                         rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage);
     932                 : 
     933                 :                         /* if the pages are all of their level, it's a only-page split */
     934               0 :                         is_only = P_LEFTMOST(lpageop) && P_RIGHTMOST(rpageop);
     935                 : 
     936               0 :                         _bt_insert_parent(reln, lbuf, rbuf, NULL,
     937                 :                                                           action->is_root, is_only);
     938                 :                 }
     939                 :                 else
     940                 :                 {
     941                 :                         /* finish an incomplete deletion (of a half-dead page) */
     942                 :                         Buffer          buf;
     943                 : 
     944               0 :                         buf = XLogReadBuffer(reln, action->delblk, false);
     945               0 :                         if (BufferIsValid(buf))
     946               0 :                                 if (_bt_pagedel(reln, buf, NULL, true) == 0)
     947               0 :                                         elog(PANIC, "btree_xlog_cleanup: _bt_pagdel failed");
     948                 :                 }
     949                 :         }
     950               0 :         incomplete_actions = NIL;
     951               0 : }
     952                 : 
     953                 : bool
     954                 : btree_safe_restartpoint(void)
     955               0 : {
     956               0 :         if (incomplete_actions)
     957               0 :                 return false;
     958               0 :         return true;
     959                 : }

Generated by: LTP GCOV extension version 1.5