LTP GCOV extension - code coverage report
Current view: directory - access/transam - multixact.c
Test: unnamed
Date: 2008-07-03 Instrumented lines: 460
Code covered: 27.6 % Executed lines: 127
Legend: not executed executed

       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * multixact.c
       4                 :  *              PostgreSQL multi-transaction-log manager
       5                 :  *
       6                 :  * The pg_multixact manager is a pg_clog-like manager that stores an array
       7                 :  * of TransactionIds for each MultiXactId.      It is a fundamental part of the
       8                 :  * shared-row-lock implementation.      A share-locked tuple stores a
       9                 :  * MultiXactId in its Xmax, and a transaction that needs to wait for the
      10                 :  * tuple to be unlocked can sleep on the potentially-several TransactionIds
      11                 :  * that compose the MultiXactId.
      12                 :  *
      13                 :  * We use two SLRU areas, one for storing the offsets at which the data
      14                 :  * starts for each MultiXactId in the other one.  This trick allows us to
      15                 :  * store variable length arrays of TransactionIds.      (We could alternatively
      16                 :  * use one area containing counts and TransactionIds, with valid MultiXactId
      17                 :  * values pointing at slots containing counts; but that way seems less robust
      18                 :  * since it would get completely confused if someone inquired about a bogus
      19                 :  * MultiXactId that pointed to an intermediate slot containing an XID.)
      20                 :  *
      21                 :  * XLOG interactions: this module generates an XLOG record whenever a new
      22                 :  * OFFSETs or MEMBERs page is initialized to zeroes, as well as an XLOG record
      23                 :  * whenever a new MultiXactId is defined.  This allows us to completely
      24                 :  * rebuild the data entered since the last checkpoint during XLOG replay.
      25                 :  * Because this is possible, we need not follow the normal rule of
      26                 :  * "write WAL before data"; the only correctness guarantee needed is that
      27                 :  * we flush and sync all dirty OFFSETs and MEMBERs pages to disk before a
      28                 :  * checkpoint is considered complete.  If a page does make it to disk ahead
      29                 :  * of corresponding WAL records, it will be forcibly zeroed before use anyway.
      30                 :  * Therefore, we don't need to mark our pages with LSN information; we have
      31                 :  * enough synchronization already.
      32                 :  *
      33                 :  * Like clog.c, and unlike subtrans.c, we have to preserve state across
      34                 :  * crashes and ensure that MXID and offset numbering increases monotonically
      35                 :  * across a crash.      We do this in the same way as it's done for transaction
      36                 :  * IDs: the WAL record is guaranteed to contain evidence of every MXID we
      37                 :  * could need to worry about, and we just make sure that at the end of
      38                 :  * replay, the next-MXID and next-offset counters are at least as large as
      39                 :  * anything we saw during replay.
      40                 :  *
      41                 :  *
      42                 :  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
      43                 :  * Portions Copyright (c) 1994, Regents of the University of California
      44                 :  *
      45                 :  * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.26 2007/11/15 21:14:32 momjian Exp $
      46                 :  *
      47                 :  *-------------------------------------------------------------------------
      48                 :  */
      49                 : #include "postgres.h"
      50                 : 
      51                 : #include "access/multixact.h"
      52                 : #include "access/slru.h"
      53                 : #include "access/transam.h"
      54                 : #include "access/xact.h"
      55                 : #include "miscadmin.h"
      56                 : #include "storage/backendid.h"
      57                 : #include "storage/lmgr.h"
      58                 : #include "utils/memutils.h"
      59                 : #include "storage/procarray.h"
      60                 : 
      61                 : 
      62                 : /*
      63                 :  * Defines for MultiXactOffset page sizes.      A page is the same BLCKSZ as is
      64                 :  * used everywhere else in Postgres.
      65                 :  *
      66                 :  * Note: because both MultiXactOffsets and TransactionIds are 32 bits and
      67                 :  * wrap around at 0xFFFFFFFF, MultiXact page numbering also wraps around at
      68                 :  * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE, and segment numbering at
      69                 :  * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE/SLRU_SEGMENTS_PER_PAGE.      We need take no
      70                 :  * explicit notice of that fact in this module, except when comparing segment
      71                 :  * and page numbers in TruncateMultiXact
      72                 :  * (see MultiXact{Offset,Member}PagePrecedes).
      73                 :  */
      74                 : 
      75                 : /* We need four bytes per offset and also four bytes per member */
      76                 : #define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
      77                 : #define MULTIXACT_MEMBERS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
      78                 : 
      79                 : #define MultiXactIdToOffsetPage(xid) \
      80                 :         ((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
      81                 : #define MultiXactIdToOffsetEntry(xid) \
      82                 :         ((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
      83                 : 
      84                 : #define MXOffsetToMemberPage(xid) \
      85                 :         ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
      86                 : #define MXOffsetToMemberEntry(xid) \
      87                 :         ((xid) % (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
      88                 : 
      89                 : 
      90                 : /*
      91                 :  * Links to shared-memory data structures for MultiXact control
      92                 :  */
      93                 : static SlruCtlData MultiXactOffsetCtlData;
      94                 : static SlruCtlData MultiXactMemberCtlData;
      95                 : 
      96                 : #define MultiXactOffsetCtl      (&MultiXactOffsetCtlData)
      97                 : #define MultiXactMemberCtl      (&MultiXactMemberCtlData)
      98                 : 
      99                 : /*
     100                 :  * MultiXact state shared across all backends.  All this state is protected
     101                 :  * by MultiXactGenLock.  (We also use MultiXactOffsetControlLock and
     102                 :  * MultiXactMemberControlLock to guard accesses to the two sets of SLRU
     103                 :  * buffers.  For concurrency's sake, we avoid holding more than one of these
     104                 :  * locks at a time.)
     105                 :  */
     106                 : typedef struct MultiXactStateData
     107                 : {
     108                 :         /* next-to-be-assigned MultiXactId */
     109                 :         MultiXactId nextMXact;
     110                 : 
     111                 :         /* next-to-be-assigned offset */
     112                 :         MultiXactOffset nextOffset;
     113                 : 
     114                 :         /* the Offset SLRU area was last truncated at this MultiXactId */
     115                 :         MultiXactId lastTruncationPoint;
     116                 : 
     117                 :         /*
     118                 :          * Per-backend data starts here.  We have two arrays stored in the area
     119                 :          * immediately following the MultiXactStateData struct. Each is indexed by
     120                 :          * BackendId.  (Note: valid BackendIds run from 1 to MaxBackends; element
     121                 :          * zero of each array is never used.)
     122                 :          *
     123                 :          * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current
     124                 :          * transaction(s) could possibly be a member of, or InvalidMultiXactId
     125                 :          * when the backend has no live transaction that could possibly be a
     126                 :          * member of a MultiXact.  Each backend sets its entry to the current
     127                 :          * nextMXact counter just before first acquiring a shared lock in a given
     128                 :          * transaction, and clears it at transaction end. (This works because only
     129                 :          * during or after acquiring a shared lock could an XID possibly become a
     130                 :          * member of a MultiXact, and that MultiXact would have to be created
     131                 :          * during or after the lock acquisition.)
     132                 :          *
     133                 :          * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's
     134                 :          * current transaction(s) think is potentially live, or InvalidMultiXactId
     135                 :          * when not in a transaction or not in a transaction that's paid any
     136                 :          * attention to MultiXacts yet.  This is computed when first needed in a
     137                 :          * given transaction, and cleared at transaction end.  We can compute it
     138                 :          * as the minimum of the valid OldestMemberMXactId[] entries at the time
     139                 :          * we compute it (using nextMXact if none are valid).  Each backend is
     140                 :          * required not to attempt to access any SLRU data for MultiXactIds older
     141                 :          * than its own OldestVisibleMXactId[] setting; this is necessary because
     142                 :          * the checkpointer could truncate away such data at any instant.
     143                 :          *
     144                 :          * The checkpointer can compute the safe truncation point as the oldest
     145                 :          * valid value among all the OldestMemberMXactId[] and
     146                 :          * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
     147                 :          * Clearly, it is not possible for any later-computed OldestVisibleMXactId
     148                 :          * value to be older than this, and so there is no risk of truncating data
     149                 :          * that is still needed.
     150                 :          */
     151                 :         MultiXactId perBackendXactIds[1];       /* VARIABLE LENGTH ARRAY */
     152                 : } MultiXactStateData;
     153                 : 
     154                 : /* Pointers to the state data in shared memory */
     155                 : static MultiXactStateData *MultiXactState;
     156                 : static MultiXactId *OldestMemberMXactId;
     157                 : static MultiXactId *OldestVisibleMXactId;
     158                 : 
     159                 : 
     160                 : /*
     161                 :  * Definitions for the backend-local MultiXactId cache.
     162                 :  *
     163                 :  * We use this cache to store known MultiXacts, so we don't need to go to
     164                 :  * SLRU areas everytime.
     165                 :  *
     166                 :  * The cache lasts for the duration of a single transaction, the rationale
     167                 :  * for this being that most entries will contain our own TransactionId and
     168                 :  * so they will be uninteresting by the time our next transaction starts.
     169                 :  * (XXX not clear that this is correct --- other members of the MultiXact
     170                 :  * could hang around longer than we did.  However, it's not clear what a
     171                 :  * better policy for flushing old cache entries would be.)
     172                 :  *
     173                 :  * We allocate the cache entries in a memory context that is deleted at
     174                 :  * transaction end, so we don't need to do retail freeing of entries.
     175                 :  */
     176                 : typedef struct mXactCacheEnt
     177                 : {
     178                 :         struct mXactCacheEnt *next;
     179                 :         MultiXactId multi;
     180                 :         int                     nxids;
     181                 :         TransactionId xids[1];          /* VARIABLE LENGTH ARRAY */
     182                 : } mXactCacheEnt;
     183                 : 
     184                 : static mXactCacheEnt *MXactCache = NULL;
     185                 : static MemoryContext MXactContext = NULL;
     186                 : 
     187                 : 
     188                 : #ifdef MULTIXACT_DEBUG
     189                 : #define debug_elog2(a,b) elog(a,b)
     190                 : #define debug_elog3(a,b,c) elog(a,b,c)
     191                 : #define debug_elog4(a,b,c,d) elog(a,b,c,d)
     192                 : #define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
     193                 : #else
     194                 : #define debug_elog2(a,b)
     195                 : #define debug_elog3(a,b,c)
     196                 : #define debug_elog4(a,b,c,d)
     197                 : #define debug_elog5(a,b,c,d,e)
     198                 : #endif
     199                 : 
     200                 : /* internal MultiXactId management */
     201                 : static void MultiXactIdSetOldestVisible(void);
     202                 : static MultiXactId CreateMultiXactId(int nxids, TransactionId *xids);
     203                 : static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
     204                 :                                    int nxids, TransactionId *xids);
     205                 : static MultiXactId GetNewMultiXactId(int nxids, MultiXactOffset *offset);
     206                 : 
     207                 : /* MultiXact cache management */
     208                 : static MultiXactId mXactCacheGetBySet(int nxids, TransactionId *xids);
     209                 : static int      mXactCacheGetById(MultiXactId multi, TransactionId **xids);
     210                 : static void mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids);
     211                 : static int      xidComparator(const void *arg1, const void *arg2);
     212                 : 
     213                 : #ifdef MULTIXACT_DEBUG
     214                 : static char *mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids);
     215                 : #endif
     216                 : 
     217                 : /* management of SLRU infrastructure */
     218                 : static int      ZeroMultiXactOffsetPage(int pageno, bool writeXlog);
     219                 : static int      ZeroMultiXactMemberPage(int pageno, bool writeXlog);
     220                 : static bool MultiXactOffsetPagePrecedes(int page1, int page2);
     221                 : static bool MultiXactMemberPagePrecedes(int page1, int page2);
     222                 : static bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
     223                 : static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
     224                 :                                                 MultiXactOffset offset2);
     225                 : static void ExtendMultiXactOffset(MultiXactId multi);
     226                 : static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
     227                 : static void TruncateMultiXact(void);
     228                 : static void WriteMZeroPageXlogRec(int pageno, uint8 info);
     229                 : 
     230                 : 
     231                 : /*
     232                 :  * MultiXactIdCreate
     233                 :  *              Construct a MultiXactId representing two TransactionIds.
     234                 :  *
     235                 :  * The two XIDs must be different.
     236                 :  *
     237                 :  * NB - we don't worry about our local MultiXactId cache here, because that
     238                 :  * is handled by the lower-level routines.
     239                 :  */
     240                 : MultiXactId
     241                 : MultiXactIdCreate(TransactionId xid1, TransactionId xid2)
     242               0 : {
     243                 :         MultiXactId newMulti;
     244                 :         TransactionId xids[2];
     245                 : 
     246                 :         AssertArg(TransactionIdIsValid(xid1));
     247                 :         AssertArg(TransactionIdIsValid(xid2));
     248                 : 
     249                 :         Assert(!TransactionIdEquals(xid1, xid2));
     250                 : 
     251                 :         /*
     252                 :          * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
     253                 :          * are still running.  In typical usage, xid2 will be our own XID and the
     254                 :          * caller just did a check on xid1, so it'd be wasted effort.
     255                 :          */
     256                 : 
     257               0 :         xids[0] = xid1;
     258               0 :         xids[1] = xid2;
     259                 : 
     260               0 :         newMulti = CreateMultiXactId(2, xids);
     261                 : 
     262                 :         debug_elog5(DEBUG2, "Create: returning %u for %u, %u",
     263                 :                                 newMulti, xid1, xid2);
     264                 : 
     265               0 :         return newMulti;
     266                 : }
     267                 : 
     268                 : /*
     269                 :  * MultiXactIdExpand
     270                 :  *              Add a TransactionId to a pre-existing MultiXactId.
     271                 :  *
     272                 :  * If the TransactionId is already a member of the passed MultiXactId,
     273                 :  * just return it as-is.
     274                 :  *
     275                 :  * Note that we do NOT actually modify the membership of a pre-existing
     276                 :  * MultiXactId; instead we create a new one.  This is necessary to avoid
     277                 :  * a race condition against MultiXactIdWait (see notes there).
     278                 :  *
     279                 :  * NB - we don't worry about our local MultiXactId cache here, because that
     280                 :  * is handled by the lower-level routines.
     281                 :  */
     282                 : MultiXactId
     283                 : MultiXactIdExpand(MultiXactId multi, TransactionId xid)
     284               0 : {
     285                 :         MultiXactId newMulti;
     286                 :         TransactionId *members;
     287                 :         TransactionId *newMembers;
     288                 :         int                     nmembers;
     289                 :         int                     i;
     290                 :         int                     j;
     291                 : 
     292                 :         AssertArg(MultiXactIdIsValid(multi));
     293                 :         AssertArg(TransactionIdIsValid(xid));
     294                 : 
     295                 :         debug_elog4(DEBUG2, "Expand: received multi %u, xid %u",
     296                 :                                 multi, xid);
     297                 : 
     298               0 :         nmembers = GetMultiXactIdMembers(multi, &members);
     299                 : 
     300               0 :         if (nmembers < 0)
     301                 :         {
     302                 :                 /*
     303                 :                  * The MultiXactId is obsolete.  This can only happen if all the
     304                 :                  * MultiXactId members stop running between the caller checking and
     305                 :                  * passing it to us.  It would be better to return that fact to the
     306                 :                  * caller, but it would complicate the API and it's unlikely to happen
     307                 :                  * too often, so just deal with it by creating a singleton MultiXact.
     308                 :                  */
     309               0 :                 newMulti = CreateMultiXactId(1, &xid);
     310                 : 
     311                 :                 debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
     312                 :                                         multi, newMulti);
     313               0 :                 return newMulti;
     314                 :         }
     315                 : 
     316                 :         /*
     317                 :          * If the TransactionId is already a member of the MultiXactId, just
     318                 :          * return the existing MultiXactId.
     319                 :          */
     320               0 :         for (i = 0; i < nmembers; i++)
     321                 :         {
     322               0 :                 if (TransactionIdEquals(members[i], xid))
     323                 :                 {
     324                 :                         debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
     325                 :                                                 xid, multi);
     326               0 :                         pfree(members);
     327               0 :                         return multi;
     328                 :                 }
     329                 :         }
     330                 : 
     331                 :         /*
     332                 :          * Determine which of the members of the MultiXactId are still running,
     333                 :          * and use them to create a new one.  (Removing dead members is just an
     334                 :          * optimization, but a useful one.      Note we have the same race condition
     335                 :          * here as above: j could be 0 at the end of the loop.)
     336                 :          */
     337               0 :         newMembers = (TransactionId *)
     338                 :                 palloc(sizeof(TransactionId) * (nmembers + 1));
     339                 : 
     340               0 :         for (i = 0, j = 0; i < nmembers; i++)
     341                 :         {
     342               0 :                 if (TransactionIdIsInProgress(members[i]))
     343               0 :                         newMembers[j++] = members[i];
     344                 :         }
     345                 : 
     346               0 :         newMembers[j++] = xid;
     347               0 :         newMulti = CreateMultiXactId(j, newMembers);
     348                 : 
     349               0 :         pfree(members);
     350               0 :         pfree(newMembers);
     351                 : 
     352                 :         debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti);
     353                 : 
     354               0 :         return newMulti;
     355                 : }
     356                 : 
     357                 : /*
     358                 :  * MultiXactIdIsRunning
     359                 :  *              Returns whether a MultiXactId is "running".
     360                 :  *
     361                 :  * We return true if at least one member of the given MultiXactId is still
     362                 :  * running.  Note that a "false" result is certain not to change,
     363                 :  * because it is not legal to add members to an existing MultiXactId.
     364                 :  */
     365                 : bool
     366                 : MultiXactIdIsRunning(MultiXactId multi)
     367               0 : {
     368                 :         TransactionId *members;
     369                 :         int                     nmembers;
     370                 :         int                     i;
     371                 : 
     372                 :         debug_elog3(DEBUG2, "IsRunning %u?", multi);
     373                 : 
     374               0 :         nmembers = GetMultiXactIdMembers(multi, &members);
     375                 : 
     376               0 :         if (nmembers < 0)
     377                 :         {
     378                 :                 debug_elog2(DEBUG2, "IsRunning: no members");
     379               0 :                 return false;
     380                 :         }
     381                 : 
     382                 :         /*
     383                 :          * Checking for myself is cheap compared to looking in shared memory, so
     384                 :          * first do the equivalent of MultiXactIdIsCurrent().  This is not needed
     385                 :          * for correctness, it's just a fast path.
     386                 :          */
     387               0 :         for (i = 0; i < nmembers; i++)
     388                 :         {
     389               0 :                 if (TransactionIdIsCurrentTransactionId(members[i]))
     390                 :                 {
     391                 :                         debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
     392               0 :                         pfree(members);
     393               0 :                         return true;
     394                 :                 }
     395                 :         }
     396                 : 
     397                 :         /*
     398                 :          * This could be made faster by having another entry point in procarray.c,
     399                 :          * walking the PGPROC array only once for all the members.      But in most
     400                 :          * cases nmembers should be small enough that it doesn't much matter.
     401                 :          */
     402               0 :         for (i = 0; i < nmembers; i++)
     403                 :         {
     404               0 :                 if (TransactionIdIsInProgress(members[i]))
     405                 :                 {
     406                 :                         debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
     407                 :                                                 i, members[i]);
     408               0 :                         pfree(members);
     409               0 :                         return true;
     410                 :                 }
     411                 :         }
     412                 : 
     413               0 :         pfree(members);
     414                 : 
     415                 :         debug_elog3(DEBUG2, "IsRunning: %u is not running", multi);
     416                 : 
     417               0 :         return false;
     418                 : }
     419                 : 
     420                 : /*
     421                 :  * MultiXactIdIsCurrent
     422                 :  *              Returns true if the current transaction is a member of the MultiXactId.
     423                 :  *
     424                 :  * We return true if any live subtransaction of the current top-level
     425                 :  * transaction is a member.  This is appropriate for the same reason that a
     426                 :  * lock held by any such subtransaction is globally equivalent to a lock
     427                 :  * held by the current subtransaction: no such lock could be released without
     428                 :  * aborting this subtransaction, and hence releasing its locks.  So it's not
     429                 :  * necessary to add the current subxact to the MultiXact separately.
     430                 :  */
     431                 : bool
     432                 : MultiXactIdIsCurrent(MultiXactId multi)
     433               0 : {
     434               0 :         bool            result = false;
     435                 :         TransactionId *members;
     436                 :         int                     nmembers;
     437                 :         int                     i;
     438                 : 
     439               0 :         nmembers = GetMultiXactIdMembers(multi, &members);
     440                 : 
     441               0 :         if (nmembers < 0)
     442               0 :                 return false;
     443                 : 
     444               0 :         for (i = 0; i < nmembers; i++)
     445                 :         {
     446               0 :                 if (TransactionIdIsCurrentTransactionId(members[i]))
     447                 :                 {
     448               0 :                         result = true;
     449               0 :                         break;
     450                 :                 }
     451                 :         }
     452                 : 
     453               0 :         pfree(members);
     454                 : 
     455               0 :         return result;
     456                 : }
     457                 : 
     458                 : /*
     459                 :  * MultiXactIdSetOldestMember
     460                 :  *              Save the oldest MultiXactId this transaction could be a member of.
     461                 :  *
     462                 :  * We set the OldestMemberMXactId for a given transaction the first time
     463                 :  * it's going to acquire a shared lock.  We need to do this even if we end
     464                 :  * up using a TransactionId instead of a MultiXactId, because there is a
     465                 :  * chance that another transaction would add our XID to a MultiXactId.
     466                 :  *
     467                 :  * The value to set is the next-to-be-assigned MultiXactId, so this is meant
     468                 :  * to be called just before acquiring a shared lock.
     469                 :  */
     470                 : void
     471                 : MultiXactIdSetOldestMember(void)
     472             111 : {
     473             111 :         if (!MultiXactIdIsValid(OldestMemberMXactId[MyBackendId]))
     474                 :         {
     475                 :                 MultiXactId nextMXact;
     476                 : 
     477                 :                 /*
     478                 :                  * You might think we don't need to acquire a lock here, since
     479                 :                  * fetching and storing of TransactionIds is probably atomic, but in
     480                 :                  * fact we do: suppose we pick up nextMXact and then lose the CPU for
     481                 :                  * a long time.  Someone else could advance nextMXact, and then
     482                 :                  * another someone else could compute an OldestVisibleMXactId that
     483                 :                  * would be after the value we are going to store when we get control
     484                 :                  * back.  Which would be wrong.
     485                 :                  */
     486             105 :                 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
     487                 : 
     488                 :                 /*
     489                 :                  * We have to beware of the possibility that nextMXact is in the
     490                 :                  * wrapped-around state.  We don't fix the counter itself here, but we
     491                 :                  * must be sure to store a valid value in our array entry.
     492                 :                  */
     493             105 :                 nextMXact = MultiXactState->nextMXact;
     494             105 :                 if (nextMXact < FirstMultiXactId)
     495               0 :                         nextMXact = FirstMultiXactId;
     496                 : 
     497             105 :                 OldestMemberMXactId[MyBackendId] = nextMXact;
     498                 : 
     499             105 :                 LWLockRelease(MultiXactGenLock);
     500                 : 
     501                 :                 debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u",
     502                 :                                         MyBackendId, nextMXact);
     503                 :         }
     504             111 : }
     505                 : 
     506                 : /*
     507                 :  * MultiXactIdSetOldestVisible
     508                 :  *              Save the oldest MultiXactId this transaction considers possibly live.
     509                 :  *
     510                 :  * We set the OldestVisibleMXactId for a given transaction the first time
     511                 :  * it's going to inspect any MultiXactId.  Once we have set this, we are
     512                 :  * guaranteed that the checkpointer won't truncate off SLRU data for
     513                 :  * MultiXactIds at or after our OldestVisibleMXactId.
     514                 :  *
     515                 :  * The value to set is the oldest of nextMXact and all the valid per-backend
     516                 :  * OldestMemberMXactId[] entries.  Because of the locking we do, we can be
     517                 :  * certain that no subsequent call to MultiXactIdSetOldestMember can set
     518                 :  * an OldestMemberMXactId[] entry older than what we compute here.      Therefore
     519                 :  * there is no live transaction, now or later, that can be a member of any
     520                 :  * MultiXactId older than the OldestVisibleMXactId we compute here.
     521                 :  */
     522                 : static void
     523                 : MultiXactIdSetOldestVisible(void)
     524               0 : {
     525               0 :         if (!MultiXactIdIsValid(OldestVisibleMXactId[MyBackendId]))
     526                 :         {
     527                 :                 MultiXactId oldestMXact;
     528                 :                 int                     i;
     529                 : 
     530               0 :                 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
     531                 : 
     532                 :                 /*
     533                 :                  * We have to beware of the possibility that nextMXact is in the
     534                 :                  * wrapped-around state.  We don't fix the counter itself here, but we
     535                 :                  * must be sure to store a valid value in our array entry.
     536                 :                  */
     537               0 :                 oldestMXact = MultiXactState->nextMXact;
     538               0 :                 if (oldestMXact < FirstMultiXactId)
     539               0 :                         oldestMXact = FirstMultiXactId;
     540                 : 
     541               0 :                 for (i = 1; i <= MaxBackends; i++)
     542                 :                 {
     543               0 :                         MultiXactId thisoldest = OldestMemberMXactId[i];
     544                 : 
     545               0 :                         if (MultiXactIdIsValid(thisoldest) &&
     546                 :                                 MultiXactIdPrecedes(thisoldest, oldestMXact))
     547               0 :                                 oldestMXact = thisoldest;
     548                 :                 }
     549                 : 
     550               0 :                 OldestVisibleMXactId[MyBackendId] = oldestMXact;
     551                 : 
     552               0 :                 LWLockRelease(MultiXactGenLock);
     553                 : 
     554                 :                 debug_elog4(DEBUG2, "MultiXact: setting OldestVisible[%d] = %u",
     555                 :                                         MyBackendId, oldestMXact);
     556                 :         }
     557               0 : }
     558                 : 
     559                 : /*
     560                 :  * MultiXactIdWait
     561                 :  *              Sleep on a MultiXactId.
     562                 :  *
     563                 :  * We do this by sleeping on each member using XactLockTableWait.  Any
     564                 :  * members that belong to the current backend are *not* waited for, however;
     565                 :  * this would not merely be useless but would lead to Assert failure inside
     566                 :  * XactLockTableWait.  By the time this returns, it is certain that all
     567                 :  * transactions *of other backends* that were members of the MultiXactId
     568                 :  * are dead (and no new ones can have been added, since it is not legal
     569                 :  * to add members to an existing MultiXactId).
     570                 :  *
     571                 :  * But by the time we finish sleeping, someone else may have changed the Xmax
     572                 :  * of the containing tuple, so the caller needs to iterate on us somehow.
     573                 :  */
     574                 : void
     575                 : MultiXactIdWait(MultiXactId multi)
     576               0 : {
     577                 :         TransactionId *members;
     578                 :         int                     nmembers;
     579                 : 
     580               0 :         nmembers = GetMultiXactIdMembers(multi, &members);
     581                 : 
     582               0 :         if (nmembers >= 0)
     583                 :         {
     584                 :                 int                     i;
     585                 : 
     586               0 :                 for (i = 0; i < nmembers; i++)
     587                 :                 {
     588               0 :                         TransactionId member = members[i];
     589                 : 
     590                 :                         debug_elog4(DEBUG2, "MultiXactIdWait: waiting for %d (%u)",
     591                 :                                                 i, member);
     592               0 :                         if (!TransactionIdIsCurrentTransactionId(member))
     593               0 :                                 XactLockTableWait(member);
     594                 :                 }
     595                 : 
     596               0 :                 pfree(members);
     597                 :         }
     598               0 : }
     599                 : 
     600                 : /*
     601                 :  * ConditionalMultiXactIdWait
     602                 :  *              As above, but only lock if we can get the lock without blocking.
     603                 :  */
     604                 : bool
     605                 : ConditionalMultiXactIdWait(MultiXactId multi)
     606               0 : {
     607               0 :         bool            result = true;
     608                 :         TransactionId *members;
     609                 :         int                     nmembers;
     610                 : 
     611               0 :         nmembers = GetMultiXactIdMembers(multi, &members);
     612                 : 
     613               0 :         if (nmembers >= 0)
     614                 :         {
     615                 :                 int                     i;
     616                 : 
     617               0 :                 for (i = 0; i < nmembers; i++)
     618                 :                 {
     619               0 :                         TransactionId member = members[i];
     620                 : 
     621                 :                         debug_elog4(DEBUG2, "ConditionalMultiXactIdWait: trying %d (%u)",
     622                 :                                                 i, member);
     623               0 :                         if (!TransactionIdIsCurrentTransactionId(member))
     624                 :                         {
     625               0 :                                 result = ConditionalXactLockTableWait(member);
     626               0 :                                 if (!result)
     627               0 :                                         break;
     628                 :                         }
     629                 :                 }
     630                 : 
     631               0 :                 pfree(members);
     632                 :         }
     633                 : 
     634               0 :         return result;
     635                 : }
     636                 : 
     637                 : /*
     638                 :  * CreateMultiXactId
     639                 :  *              Make a new MultiXactId
     640                 :  *
     641                 :  * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
     642                 :  * given TransactionIds as members.  Returns the newly created MultiXactId.
     643                 :  *
     644                 :  * NB: the passed xids[] array will be sorted in-place.
     645                 :  */
     646                 : static MultiXactId
     647                 : CreateMultiXactId(int nxids, TransactionId *xids)
     648               0 : {
     649                 :         MultiXactId multi;
     650                 :         MultiXactOffset offset;
     651                 :         XLogRecData rdata[2];
     652                 :         xl_multixact_create xlrec;
     653                 : 
     654                 :         debug_elog3(DEBUG2, "Create: %s",
     655                 :                                 mxid_to_string(InvalidMultiXactId, nxids, xids));
     656                 : 
     657                 :         /*
     658                 :          * See if the same set of XIDs already exists in our cache; if so, just
     659                 :          * re-use that MultiXactId.  (Note: it might seem that looking in our
     660                 :          * cache is insufficient, and we ought to search disk to see if a
     661                 :          * duplicate definition already exists.  But since we only ever create
     662                 :          * MultiXacts containing our own XID, in most cases any such MultiXacts
     663                 :          * were in fact created by us, and so will be in our cache.  There are
     664                 :          * corner cases where someone else added us to a MultiXact without our
     665                 :          * knowledge, but it's not worth checking for.)
     666                 :          */
     667               0 :         multi = mXactCacheGetBySet(nxids, xids);
     668               0 :         if (MultiXactIdIsValid(multi))
     669                 :         {
     670                 :                 debug_elog2(DEBUG2, "Create: in cache!");
     671               0 :                 return multi;
     672                 :         }
     673                 : 
     674                 :         /*
     675                 :          * Assign the MXID and offsets range to use, and make sure there is space
     676                 :          * in the OFFSETs and MEMBERs files.  NB: this routine does
     677                 :          * START_CRIT_SECTION().
     678                 :          */
     679               0 :         multi = GetNewMultiXactId(nxids, &offset);
     680                 : 
     681                 :         /*
     682                 :          * Make an XLOG entry describing the new MXID.
     683                 :          *
     684                 :          * Note: we need not flush this XLOG entry to disk before proceeding. The
     685                 :          * only way for the MXID to be referenced from any data page is for
     686                 :          * heap_lock_tuple() to have put it there, and heap_lock_tuple() generates
     687                 :          * an XLOG record that must follow ours.  The normal LSN interlock between
     688                 :          * the data page and that XLOG record will ensure that our XLOG record
     689                 :          * reaches disk first.  If the SLRU members/offsets data reaches disk
     690                 :          * sooner than the XLOG record, we do not care because we'll overwrite it
     691                 :          * with zeroes unless the XLOG record is there too; see notes at top of
     692                 :          * this file.
     693                 :          */
     694               0 :         xlrec.mid = multi;
     695               0 :         xlrec.moff = offset;
     696               0 :         xlrec.nxids = nxids;
     697                 : 
     698               0 :         rdata[0].data = (char *) (&xlrec);
     699               0 :         rdata[0].len = MinSizeOfMultiXactCreate;
     700               0 :         rdata[0].buffer = InvalidBuffer;
     701               0 :         rdata[0].next = &(rdata[1]);
     702               0 :         rdata[1].data = (char *) xids;
     703               0 :         rdata[1].len = nxids * sizeof(TransactionId);
     704               0 :         rdata[1].buffer = InvalidBuffer;
     705               0 :         rdata[1].next = NULL;
     706                 : 
     707               0 :         (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, rdata);
     708                 : 
     709                 :         /* Now enter the information into the OFFSETs and MEMBERs logs */
     710               0 :         RecordNewMultiXact(multi, offset, nxids, xids);
     711                 : 
     712                 :         /* Done with critical section */
     713               0 :         END_CRIT_SECTION();
     714                 : 
     715                 :         /* Store the new MultiXactId in the local cache, too */
     716               0 :         mXactCachePut(multi, nxids, xids);
     717                 : 
     718                 :         debug_elog2(DEBUG2, "Create: all done");
     719                 : 
     720               0 :         return multi;
     721                 : }
     722                 : 
     723                 : /*
     724                 :  * RecordNewMultiXact
     725                 :  *              Write info about a new multixact into the offsets and members files
     726                 :  *
     727                 :  * This is broken out of CreateMultiXactId so that xlog replay can use it.
     728                 :  */
     729                 : static void
     730                 : RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
     731                 :                                    int nxids, TransactionId *xids)
     732               0 : {
     733                 :         int                     pageno;
     734                 :         int                     prev_pageno;
     735                 :         int                     entryno;
     736                 :         int                     slotno;
     737                 :         MultiXactOffset *offptr;
     738                 :         int                     i;
     739                 : 
     740               0 :         LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
     741                 : 
     742               0 :         pageno = MultiXactIdToOffsetPage(multi);
     743               0 :         entryno = MultiXactIdToOffsetEntry(multi);
     744                 : 
     745                 :         /*
     746                 :          * Note: we pass the MultiXactId to SimpleLruReadPage as the "transaction"
     747                 :          * to complain about if there's any I/O error.  This is kinda bogus, but
     748                 :          * since the errors will always give the full pathname, it should be clear
     749                 :          * enough that a MultiXactId is really involved.  Perhaps someday we'll
     750                 :          * take the trouble to generalize the slru.c error reporting code.
     751                 :          */
     752               0 :         slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
     753               0 :         offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
     754               0 :         offptr += entryno;
     755                 : 
     756               0 :         *offptr = offset;
     757                 : 
     758               0 :         MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
     759                 : 
     760                 :         /* Exchange our lock */
     761               0 :         LWLockRelease(MultiXactOffsetControlLock);
     762                 : 
     763               0 :         LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
     764                 : 
     765               0 :         prev_pageno = -1;
     766                 : 
     767               0 :         for (i = 0; i < nxids; i++, offset++)
     768                 :         {
     769                 :                 TransactionId *memberptr;
     770                 : 
     771               0 :                 pageno = MXOffsetToMemberPage(offset);
     772               0 :                 entryno = MXOffsetToMemberEntry(offset);
     773                 : 
     774               0 :                 if (pageno != prev_pageno)
     775                 :                 {
     776               0 :                         slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
     777               0 :                         prev_pageno = pageno;
     778                 :                 }
     779                 : 
     780               0 :                 memberptr = (TransactionId *)
     781                 :                         MultiXactMemberCtl->shared->page_buffer[slotno];
     782               0 :                 memberptr += entryno;
     783                 : 
     784               0 :                 *memberptr = xids[i];
     785                 : 
     786               0 :                 MultiXactMemberCtl->shared->page_dirty[slotno] = true;
     787                 :         }
     788                 : 
     789               0 :         LWLockRelease(MultiXactMemberControlLock);
     790               0 : }
     791                 : 
     792                 : /*
     793                 :  * GetNewMultiXactId
     794                 :  *              Get the next MultiXactId.
     795                 :  *
     796                 :  * Also, reserve the needed amount of space in the "members" area.    The
     797                 :  * starting offset of the reserved space is returned in *offset.
     798                 :  *
     799                 :  * This may generate XLOG records for expansion of the offsets and/or members
     800                 :  * files.  Unfortunately, we have to do that while holding MultiXactGenLock
     801                 :  * to avoid race conditions --- the XLOG record for zeroing a page must appear
     802                 :  * before any backend can possibly try to store data in that page!
     803                 :  *
     804                 :  * We start a critical section before advancing the shared counters.  The
     805                 :  * caller must end the critical section after writing SLRU data.
     806                 :  */
     807                 : static MultiXactId
     808                 : GetNewMultiXactId(int nxids, MultiXactOffset *offset)
     809               0 : {
     810                 :         MultiXactId result;
     811                 :         MultiXactOffset nextOffset;
     812                 : 
     813                 :         debug_elog3(DEBUG2, "GetNew: for %d xids", nxids);
     814                 : 
     815                 :         /* MultiXactIdSetOldestMember() must have been called already */
     816                 :         Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId]));
     817                 : 
     818               0 :         LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
     819                 : 
     820                 :         /* Handle wraparound of the nextMXact counter */
     821               0 :         if (MultiXactState->nextMXact < FirstMultiXactId)
     822               0 :                 MultiXactState->nextMXact = FirstMultiXactId;
     823                 : 
     824                 :         /*
     825                 :          * Assign the MXID, and make sure there is room for it in the file.
     826                 :          */
     827               0 :         result = MultiXactState->nextMXact;
     828                 : 
     829               0 :         ExtendMultiXactOffset(result);
     830                 : 
     831                 :         /*
     832                 :          * Reserve the members space, similarly to above.  Also, be careful not to
     833                 :          * return zero as the starting offset for any multixact. See
     834                 :          * GetMultiXactIdMembers() for motivation.
     835                 :          */
     836               0 :         nextOffset = MultiXactState->nextOffset;
     837               0 :         if (nextOffset == 0)
     838                 :         {
     839               0 :                 *offset = 1;
     840               0 :                 nxids++;                                /* allocate member slot 0 too */
     841                 :         }
     842                 :         else
     843               0 :                 *offset = nextOffset;
     844                 : 
     845               0 :         ExtendMultiXactMember(nextOffset, nxids);
     846                 : 
     847                 :         /*
     848                 :          * Critical section from here until caller has written the data into the
     849                 :          * just-reserved SLRU space; we don't want to error out with a partly
     850                 :          * written MultiXact structure.  (In particular, failing to write our
     851                 :          * start offset after advancing nextMXact would effectively corrupt the
     852                 :          * previous MultiXact.)
     853                 :          */
     854               0 :         START_CRIT_SECTION();
     855                 : 
     856                 :         /*
     857                 :          * Advance counters.  As in GetNewTransactionId(), this must not happen
     858                 :          * until after file extension has succeeded!
     859                 :          *
     860                 :          * We don't care about MultiXactId wraparound here; it will be handled by
     861                 :          * the next iteration.  But note that nextMXact may be InvalidMultiXactId
     862                 :          * after this routine exits, so anyone else looking at the variable must
     863                 :          * be prepared to deal with that.  Similarly, nextOffset may be zero, but
     864                 :          * we won't use that as the actual start offset of the next multixact.
     865                 :          */
     866               0 :         (MultiXactState->nextMXact)++;
     867                 : 
     868               0 :         MultiXactState->nextOffset += nxids;
     869                 : 
     870               0 :         LWLockRelease(MultiXactGenLock);
     871                 : 
     872                 :         debug_elog4(DEBUG2, "GetNew: returning %u offset %u", result, *offset);
     873               0 :         return result;
     874                 : }
     875                 : 
     876                 : /*
     877                 :  * GetMultiXactIdMembers
     878                 :  *              Returns the set of TransactionIds that make up a MultiXactId
     879                 :  *
     880                 :  * We return -1 if the MultiXactId is too old to possibly have any members
     881                 :  * still running; in that case we have not actually looked them up, and
     882                 :  * *xids is not set.
     883                 :  */
     884                 : int
     885                 : GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
     886               0 : {
     887                 :         int                     pageno;
     888                 :         int                     prev_pageno;
     889                 :         int                     entryno;
     890                 :         int                     slotno;
     891                 :         MultiXactOffset *offptr;
     892                 :         MultiXactOffset offset;
     893                 :         int                     length;
     894                 :         int                     truelength;
     895                 :         int                     i;
     896                 :         MultiXactId nextMXact;
     897                 :         MultiXactId tmpMXact;
     898                 :         MultiXactOffset nextOffset;
     899                 :         TransactionId *ptr;
     900                 : 
     901                 :         debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
     902                 : 
     903                 :         Assert(MultiXactIdIsValid(multi));
     904                 : 
     905                 :         /* See if the MultiXactId is in the local cache */
     906               0 :         length = mXactCacheGetById(multi, xids);
     907               0 :         if (length >= 0)
     908                 :         {
     909                 :                 debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
     910                 :                                         mxid_to_string(multi, length, *xids));
     911               0 :                 return length;
     912                 :         }
     913                 : 
     914                 :         /* Set our OldestVisibleMXactId[] entry if we didn't already */
     915               0 :         MultiXactIdSetOldestVisible();
     916                 : 
     917                 :         /*
     918                 :          * We check known limits on MultiXact before resorting to the SLRU area.
     919                 :          *
     920                 :          * An ID older than our OldestVisibleMXactId[] entry can't possibly still
     921                 :          * be running, and we'd run the risk of trying to read already-truncated
     922                 :          * SLRU data if we did try to examine it.
     923                 :          *
     924                 :          * Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is
     925                 :          * seen, it implies undetected ID wraparound has occurred.      We just
     926                 :          * silently assume that such an ID is no longer running.
     927                 :          *
     928                 :          * Shared lock is enough here since we aren't modifying any global state.
     929                 :          * Also, we can examine our own OldestVisibleMXactId without the lock,
     930                 :          * since no one else is allowed to change it.
     931                 :          */
     932               0 :         if (MultiXactIdPrecedes(multi, OldestVisibleMXactId[MyBackendId]))
     933                 :         {
     934                 :                 debug_elog2(DEBUG2, "GetMembers: it's too old");
     935               0 :                 *xids = NULL;
     936               0 :                 return -1;
     937                 :         }
     938                 : 
     939                 :         /*
     940                 :          * Acquire the shared lock just long enough to grab the current counter
     941                 :          * values.      We may need both nextMXact and nextOffset; see below.
     942                 :          */
     943               0 :         LWLockAcquire(MultiXactGenLock, LW_SHARED);
     944                 : 
     945               0 :         nextMXact = MultiXactState->nextMXact;
     946               0 :         nextOffset = MultiXactState->nextOffset;
     947                 : 
     948               0 :         LWLockRelease(MultiXactGenLock);
     949                 : 
     950               0 :         if (!MultiXactIdPrecedes(multi, nextMXact))
     951                 :         {
     952                 :                 debug_elog2(DEBUG2, "GetMembers: it's too new!");
     953               0 :                 *xids = NULL;
     954               0 :                 return -1;
     955                 :         }
     956                 : 
     957                 :         /*
     958                 :          * Find out the offset at which we need to start reading MultiXactMembers
     959                 :          * and the number of members in the multixact.  We determine the latter as
     960                 :          * the difference between this multixact's starting offset and the next
     961                 :          * one's.  However, there are some corner cases to worry about:
     962                 :          *
     963                 :          * 1. This multixact may be the latest one created, in which case there is
     964                 :          * no next one to look at.      In this case the nextOffset value we just
     965                 :          * saved is the correct endpoint.
     966                 :          *
     967                 :          * 2. The next multixact may still be in process of being filled in: that
     968                 :          * is, another process may have done GetNewMultiXactId but not yet written
     969                 :          * the offset entry for that ID.  In that scenario, it is guaranteed that
     970                 :          * the offset entry for that multixact exists (because GetNewMultiXactId
     971                 :          * won't release MultiXactGenLock until it does) but contains zero
     972                 :          * (because we are careful to pre-zero offset pages). Because
     973                 :          * GetNewMultiXactId will never return zero as the starting offset for a
     974                 :          * multixact, when we read zero as the next multixact's offset, we know we
     975                 :          * have this case.      We sleep for a bit and try again.
     976                 :          *
     977                 :          * 3. Because GetNewMultiXactId increments offset zero to offset one to
     978                 :          * handle case #2, there is an ambiguity near the point of offset
     979                 :          * wraparound.  If we see next multixact's offset is one, is that our
     980                 :          * multixact's actual endpoint, or did it end at zero with a subsequent
     981                 :          * increment?  We handle this using the knowledge that if the zero'th
     982                 :          * member slot wasn't filled, it'll contain zero, and zero isn't a valid
     983                 :          * transaction ID so it can't be a multixact member.  Therefore, if we
     984                 :          * read a zero from the members array, just ignore it.
     985                 :          *
     986                 :          * This is all pretty messy, but the mess occurs only in infrequent corner
     987                 :          * cases, so it seems better than holding the MultiXactGenLock for a long
     988                 :          * time on every multixact creation.
     989                 :          */
     990               0 : retry:
     991               0 :         LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
     992                 : 
     993               0 :         pageno = MultiXactIdToOffsetPage(multi);
     994               0 :         entryno = MultiXactIdToOffsetEntry(multi);
     995                 : 
     996               0 :         slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
     997               0 :         offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
     998               0 :         offptr += entryno;
     999               0 :         offset = *offptr;
    1000                 : 
    1001                 :         Assert(offset != 0);
    1002                 : 
    1003                 :         /*
    1004                 :          * Use the same increment rule as GetNewMultiXactId(), that is, don't
    1005                 :          * handle wraparound explicitly until needed.
    1006                 :          */
    1007               0 :         tmpMXact = multi + 1;
    1008                 : 
    1009               0 :         if (nextMXact == tmpMXact)
    1010                 :         {
    1011                 :                 /* Corner case 1: there is no next multixact */
    1012               0 :                 length = nextOffset - offset;
    1013                 :         }
    1014                 :         else
    1015                 :         {
    1016                 :                 MultiXactOffset nextMXOffset;
    1017                 : 
    1018                 :                 /* handle wraparound if needed */
    1019               0 :                 if (tmpMXact < FirstMultiXactId)
    1020               0 :                         tmpMXact = FirstMultiXactId;
    1021                 : 
    1022               0 :                 prev_pageno = pageno;
    1023                 : 
    1024               0 :                 pageno = MultiXactIdToOffsetPage(tmpMXact);
    1025               0 :                 entryno = MultiXactIdToOffsetEntry(tmpMXact);
    1026                 : 
    1027               0 :                 if (pageno != prev_pageno)
    1028               0 :                         slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, tmpMXact);
    1029                 : 
    1030               0 :                 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
    1031               0 :                 offptr += entryno;
    1032               0 :                 nextMXOffset = *offptr;
    1033                 : 
    1034               0 :                 if (nextMXOffset == 0)
    1035                 :                 {
    1036                 :                         /* Corner case 2: next multixact is still being filled in */
    1037               0 :                         LWLockRelease(MultiXactOffsetControlLock);
    1038               0 :                         pg_usleep(1000L);
    1039               0 :                         goto retry;
    1040                 :                 }
    1041                 : 
    1042               0 :                 length = nextMXOffset - offset;
    1043                 :         }
    1044                 : 
    1045               0 :         LWLockRelease(MultiXactOffsetControlLock);
    1046                 : 
    1047               0 :         ptr = (TransactionId *) palloc(length * sizeof(TransactionId));
    1048               0 :         *xids = ptr;
    1049                 : 
    1050                 :         /* Now get the members themselves. */
    1051               0 :         LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
    1052                 : 
    1053               0 :         truelength = 0;
    1054               0 :         prev_pageno = -1;
    1055               0 :         for (i = 0; i < length; i++, offset++)
    1056                 :         {
    1057                 :                 TransactionId *xactptr;
    1058                 : 
    1059               0 :                 pageno = MXOffsetToMemberPage(offset);
    1060               0 :                 entryno = MXOffsetToMemberEntry(offset);
    1061                 : 
    1062               0 :                 if (pageno != prev_pageno)
    1063                 :                 {
    1064               0 :                         slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
    1065               0 :                         prev_pageno = pageno;
    1066                 :                 }
    1067                 : 
    1068               0 :                 xactptr = (TransactionId *)
    1069                 :                         MultiXactMemberCtl->shared->page_buffer[slotno];
    1070               0 :                 xactptr += entryno;
    1071                 : 
    1072               0 :                 if (!TransactionIdIsValid(*xactptr))
    1073                 :                 {
    1074                 :                         /* Corner case 3: we must be looking at unused slot zero */
    1075                 :                         Assert(offset == 0);
    1076               0 :                         continue;
    1077                 :                 }
    1078                 : 
    1079               0 :                 ptr[truelength++] = *xactptr;
    1080                 :         }
    1081                 : 
    1082               0 :         LWLockRelease(MultiXactMemberControlLock);
    1083                 : 
    1084                 :         /*
    1085                 :          * Copy the result into the local cache.
    1086                 :          */
    1087               0 :         mXactCachePut(multi, truelength, ptr);
    1088                 : 
    1089                 :         debug_elog3(DEBUG2, "GetMembers: no cache for %s",
    1090                 :                                 mxid_to_string(multi, truelength, ptr));
    1091               0 :         return truelength;
    1092                 : }
    1093                 : 
    1094                 : /*
    1095                 :  * mXactCacheGetBySet
    1096                 :  *              returns a MultiXactId from the cache based on the set of
    1097                 :  *              TransactionIds that compose it, or InvalidMultiXactId if
    1098                 :  *              none matches.
    1099                 :  *
    1100                 :  * This is helpful, for example, if two transactions want to lock a huge
    1101                 :  * table.  By using the cache, the second will use the same MultiXactId
    1102                 :  * for the majority of tuples, thus keeping MultiXactId usage low (saving
    1103                 :  * both I/O and wraparound issues).
    1104                 :  *
    1105                 :  * NB: the passed xids[] array will be sorted in-place.
    1106                 :  */
    1107                 : static MultiXactId
    1108                 : mXactCacheGetBySet(int nxids, TransactionId *xids)
    1109               0 : {
    1110                 :         mXactCacheEnt *entry;
    1111                 : 
    1112                 :         debug_elog3(DEBUG2, "CacheGet: looking for %s",
    1113                 :                                 mxid_to_string(InvalidMultiXactId, nxids, xids));
    1114                 : 
    1115                 :         /* sort the array so comparison is easy */
    1116               0 :         qsort(xids, nxids, sizeof(TransactionId), xidComparator);
    1117                 : 
    1118               0 :         for (entry = MXactCache; entry != NULL; entry = entry->next)
    1119                 :         {
    1120               0 :                 if (entry->nxids != nxids)
    1121               0 :                         continue;
    1122                 : 
    1123                 :                 /* We assume the cache entries are sorted */
    1124               0 :                 if (memcmp(xids, entry->xids, nxids * sizeof(TransactionId)) == 0)
    1125                 :                 {
    1126                 :                         debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
    1127               0 :                         return entry->multi;
    1128                 :                 }
    1129                 :         }
    1130                 : 
    1131                 :         debug_elog2(DEBUG2, "CacheGet: not found :-(");
    1132               0 :         return InvalidMultiXactId;
    1133                 : }
    1134                 : 
    1135                 : /*
    1136                 :  * mXactCacheGetById
    1137                 :  *              returns the composing TransactionId set from the cache for a
    1138                 :  *              given MultiXactId, if present.
    1139                 :  *
    1140                 :  * If successful, *xids is set to the address of a palloc'd copy of the
    1141                 :  * TransactionId set.  Return value is number of members, or -1 on failure.
    1142                 :  */
    1143                 : static int
    1144                 : mXactCacheGetById(MultiXactId multi, TransactionId **xids)
    1145               0 : {
    1146                 :         mXactCacheEnt *entry;
    1147                 : 
    1148                 :         debug_elog3(DEBUG2, "CacheGet: looking for %u", multi);
    1149                 : 
    1150               0 :         for (entry = MXactCache; entry != NULL; entry = entry->next)
    1151                 :         {
    1152               0 :                 if (entry->multi == multi)
    1153                 :                 {
    1154                 :                         TransactionId *ptr;
    1155                 :                         Size            size;
    1156                 : 
    1157               0 :                         size = sizeof(TransactionId) * entry->nxids;
    1158               0 :                         ptr = (TransactionId *) palloc(size);
    1159               0 :                         *xids = ptr;
    1160                 : 
    1161               0 :                         memcpy(ptr, entry->xids, size);
    1162                 : 
    1163                 :                         debug_elog3(DEBUG2, "CacheGet: found %s",
    1164                 :                                                 mxid_to_string(multi, entry->nxids, entry->xids));
    1165               0 :                         return entry->nxids;
    1166                 :                 }
    1167                 :         }
    1168                 : 
    1169                 :         debug_elog2(DEBUG2, "CacheGet: not found");
    1170               0 :         return -1;
    1171                 : }
    1172                 : 
    1173                 : /*
    1174                 :  * mXactCachePut
    1175                 :  *              Add a new MultiXactId and its composing set into the local cache.
    1176                 :  */
    1177                 : static void
    1178                 : mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids)
    1179               0 : {
    1180                 :         mXactCacheEnt *entry;
    1181                 : 
    1182                 :         debug_elog3(DEBUG2, "CachePut: storing %s",
    1183                 :                                 mxid_to_string(multi, nxids, xids));
    1184                 : 
    1185               0 :         if (MXactContext == NULL)
    1186                 :         {
    1187                 :                 /* The cache only lives as long as the current transaction */
    1188                 :                 debug_elog2(DEBUG2, "CachePut: initializing memory context");
    1189               0 :                 MXactContext = AllocSetContextCreate(TopTransactionContext,
    1190                 :                                                                                          "MultiXact Cache Context",
    1191                 :                                                                                          ALLOCSET_SMALL_MINSIZE,
    1192                 :                                                                                          ALLOCSET_SMALL_INITSIZE,
    1193                 :                                                                                          ALLOCSET_SMALL_MAXSIZE);
    1194                 :         }
    1195                 : 
    1196               0 :         entry = (mXactCacheEnt *)
    1197                 :                 MemoryContextAlloc(MXactContext,
    1198                 :                                                    offsetof(mXactCacheEnt, xids) +
    1199                 :                                                    nxids * sizeof(TransactionId));
    1200                 : 
    1201               0 :         entry->multi = multi;
    1202               0 :         entry->nxids = nxids;
    1203               0 :         memcpy(entry->xids, xids, nxids * sizeof(TransactionId));
    1204                 : 
    1205                 :         /* mXactCacheGetBySet assumes the entries are sorted, so sort them */
    1206               0 :         qsort(entry->xids, nxids, sizeof(TransactionId), xidComparator);
    1207                 : 
    1208               0 :         entry->next = MXactCache;
    1209               0 :         MXactCache = entry;
    1210               0 : }
    1211                 : 
    1212                 : /*
    1213                 :  * xidComparator
    1214                 :  *              qsort comparison function for XIDs
    1215                 :  *
    1216                 :  * We don't need to use wraparound comparison for XIDs, and indeed must
    1217                 :  * not do so since that does not respect the triangle inequality!  Any
    1218                 :  * old sort order will do.
    1219                 :  */
    1220                 : static int
    1221                 : xidComparator(const void *arg1, const void *arg2)
    1222               0 : {
    1223               0 :         TransactionId xid1 = *(const TransactionId *) arg1;
    1224               0 :         TransactionId xid2 = *(const TransactionId *) arg2;
    1225                 : 
    1226               0 :         if (xid1 > xid2)
    1227               0 :                 return 1;
    1228               0 :         if (xid1 < xid2)
    1229               0 :                 return -1;
    1230               0 :         return 0;
    1231                 : }
    1232                 : 
    1233                 : #ifdef MULTIXACT_DEBUG
    1234                 : static char *
    1235                 : mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids)
    1236                 : {
    1237                 :         char       *str = palloc(15 * (nxids + 1) + 4);
    1238                 :         int                     i;
    1239                 : 
    1240                 :         snprintf(str, 47, "%u %d[%u", multi, nxids, xids[0]);
    1241                 : 
    1242                 :         for (i = 1; i < nxids; i++)
    1243                 :                 snprintf(str + strlen(str), 17, ", %u", xids[i]);
    1244                 : 
    1245                 :         strcat(str, "]");
    1246                 :         return str;
    1247                 : }
    1248                 : #endif
    1249                 : 
    1250                 : /*
    1251                 :  * AtEOXact_MultiXact
    1252                 :  *              Handle transaction end for MultiXact
    1253                 :  *
    1254                 :  * This is called at top transaction commit or abort (we don't care which).
    1255                 :  */
    1256                 : void
    1257                 : AtEOXact_MultiXact(void)
    1258           13673 : {
    1259                 :         /*
    1260                 :          * Reset our OldestMemberMXactId and OldestVisibleMXactId values, both of
    1261                 :          * which should only be valid while within a transaction.
    1262                 :          *
    1263                 :          * We assume that storing a MultiXactId is atomic and so we need not take
    1264                 :          * MultiXactGenLock to do this.
    1265                 :          */
    1266           13673 :         OldestMemberMXactId[MyBackendId] = InvalidMultiXactId;
    1267           13673 :         OldestVisibleMXactId[MyBackendId] = InvalidMultiXactId;
    1268                 : 
    1269                 :         /*
    1270                 :          * Discard the local MultiXactId cache.  Since MXactContext was created as
    1271                 :          * a child of TopTransactionContext, we needn't delete it explicitly.
    1272                 :          */
    1273           13673 :         MXactContext = NULL;
    1274           13673 :         MXactCache = NULL;
    1275           13673 : }
    1276                 : 
    1277                 : /*
    1278                 :  * Initialization of shared memory for MultiXact.  We use two SLRU areas,
    1279                 :  * thus double memory.  Also, reserve space for the shared MultiXactState
    1280                 :  * struct and the per-backend MultiXactId arrays (two of those, too).
    1281                 :  */
    1282                 : Size
    1283                 : MultiXactShmemSize(void)
    1284              18 : {
    1285                 :         Size            size;
    1286                 : 
    1287                 : #define SHARED_MULTIXACT_STATE_SIZE \
    1288                 :         add_size(sizeof(MultiXactStateData), \
    1289                 :                          mul_size(sizeof(MultiXactId) * 2, MaxBackends))
    1290                 : 
    1291              18 :         size = SHARED_MULTIXACT_STATE_SIZE;
    1292              18 :         size = add_size(size, SimpleLruShmemSize(NUM_MXACTOFFSET_BUFFERS, 0));
    1293              18 :         size = add_size(size, SimpleLruShmemSize(NUM_MXACTMEMBER_BUFFERS, 0));
    1294                 : 
    1295              18 :         return size;
    1296                 : }
    1297                 : 
    1298                 : void
    1299                 : MultiXactShmemInit(void)
    1300              16 : {
    1301                 :         bool            found;
    1302                 : 
    1303                 :         debug_elog2(DEBUG2, "Shared Memory Init for MultiXact");
    1304                 : 
    1305              16 :         MultiXactOffsetCtl->PagePrecedes = MultiXactOffsetPagePrecedes;
    1306              16 :         MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;
    1307                 : 
    1308              16 :         SimpleLruInit(MultiXactOffsetCtl,
    1309                 :                                   "MultiXactOffset Ctl", NUM_MXACTOFFSET_BUFFERS, 0,
    1310                 :                                   MultiXactOffsetControlLock, "pg_multixact/offsets");
    1311              16 :         SimpleLruInit(MultiXactMemberCtl,
    1312                 :                                   "MultiXactMember Ctl", NUM_MXACTMEMBER_BUFFERS, 0,
    1313                 :                                   MultiXactMemberControlLock, "pg_multixact/members");
    1314                 : 
    1315                 :         /* Initialize our shared state struct */
    1316              16 :         MultiXactState = ShmemInitStruct("Shared MultiXact State",
    1317                 :                                                                          SHARED_MULTIXACT_STATE_SIZE,
    1318                 :                                                                          &found);
    1319              16 :         if (!IsUnderPostmaster)
    1320                 :         {
    1321                 :                 Assert(!found);
    1322                 : 
    1323                 :                 /* Make sure we zero out the per-backend state */
    1324              16 :                 MemSet(MultiXactState, 0, SHARED_MULTIXACT_STATE_SIZE);
    1325                 :         }
    1326                 :         else
    1327                 :                 Assert(found);
    1328                 : 
    1329                 :         /*
    1330                 :          * Set up array pointers.  Note that perBackendXactIds[0] is wasted space
    1331                 :          * since we only use indexes 1..MaxBackends in each array.
    1332                 :          */
    1333              16 :         OldestMemberMXactId = MultiXactState->perBackendXactIds;
    1334              16 :         OldestVisibleMXactId = OldestMemberMXactId + MaxBackends;
    1335              16 : }
    1336                 : 
    1337                 : /*
    1338                 :  * This func must be called ONCE on system install.  It creates the initial
    1339                 :  * MultiXact segments.  (The MultiXacts directories are assumed to have been
    1340                 :  * created by initdb, and MultiXactShmemInit must have been called already.)
    1341                 :  */
    1342                 : void
    1343                 : BootStrapMultiXact(void)
    1344               1 : {
    1345                 :         int                     slotno;
    1346                 : 
    1347               1 :         LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
    1348                 : 
    1349                 :         /* Create and zero the first page of the offsets log */
    1350               1 :         slotno = ZeroMultiXactOffsetPage(0, false);
    1351                 : 
    1352                 :         /* Make sure it's written out */
    1353               1 :         SimpleLruWritePage(MultiXactOffsetCtl, slotno, NULL);
    1354                 :         Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
    1355                 : 
    1356               1 :         LWLockRelease(MultiXactOffsetControlLock);
    1357                 : 
    1358               1 :         LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
    1359                 : 
    1360                 :         /* Create and zero the first page of the members log */
    1361               1 :         slotno = ZeroMultiXactMemberPage(0, false);
    1362                 : 
    1363                 :         /* Make sure it's written out */
    1364               1 :         SimpleLruWritePage(MultiXactMemberCtl, slotno, NULL);
    1365                 :         Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
    1366                 : 
    1367               1 :         LWLockRelease(MultiXactMemberControlLock);
    1368               1 : }
    1369                 : 
    1370                 : /*
    1371                 :  * Initialize (or reinitialize) a page of MultiXactOffset to zeroes.
    1372                 :  * If writeXlog is TRUE, also emit an XLOG record saying we did this.
    1373                 :  *
    1374                 :  * The page is not actually written, just set up in shared memory.
    1375                 :  * The slot number of the new page is returned.
    1376                 :  *
    1377                 :  * Control lock must be held at entry, and will be held at exit.
    1378                 :  */
    1379                 : static int
    1380                 : ZeroMultiXactOffsetPage(int pageno, bool writeXlog)
    1381               1 : {
    1382                 :         int                     slotno;
    1383                 : 
    1384               1 :         slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
    1385                 : 
    1386               1 :         if (writeXlog)
    1387               0 :                 WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_OFF_PAGE);
    1388                 : 
    1389               1 :         return slotno;
    1390                 : }
    1391                 : 
    1392                 : /*
    1393                 :  * Ditto, for MultiXactMember
    1394                 :  */
    1395                 : static int
    1396                 : ZeroMultiXactMemberPage(int pageno, bool writeXlog)
    1397               1 : {
    1398                 :         int                     slotno;
    1399                 : 
    1400               1 :         slotno = SimpleLruZeroPage(MultiXactMemberCtl, pageno);
    1401                 : 
    1402               1 :         if (writeXlog)
    1403               0 :                 WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_MEM_PAGE);
    1404                 : 
    1405               1 :         return slotno;
    1406                 : }
    1407                 : 
    1408                 : /*
    1409                 :  * This must be called ONCE during postmaster or standalone-backend startup.
    1410                 :  *
    1411                 :  * StartupXLOG has already established nextMXact/nextOffset by calling
    1412                 :  * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact.      Note that we
    1413                 :  * may already have replayed WAL data into the SLRU files.
    1414                 :  *
    1415                 :  * We don't need any locks here, really; the SLRU locks are taken
    1416                 :  * only because slru.c expects to be called with locks held.
    1417                 :  */
    1418                 : void
    1419                 : StartupMultiXact(void)
    1420              14 : {
    1421              14 :         MultiXactId multi = MultiXactState->nextMXact;
    1422              14 :         MultiXactOffset offset = MultiXactState->nextOffset;
    1423                 :         int                     pageno;
    1424                 :         int                     entryno;
    1425                 : 
    1426                 :         /* Clean up offsets state */
    1427              14 :         LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
    1428                 : 
    1429                 :         /*
    1430                 :          * Initialize our idea of the latest page number.
    1431                 :          */
    1432              14 :         pageno = MultiXactIdToOffsetPage(multi);
    1433              14 :         MultiXactOffsetCtl->shared->latest_page_number = pageno;
    1434                 : 
    1435                 :         /*
    1436                 :          * Zero out the remainder of the current offsets page.  See notes in
    1437                 :          * StartupCLOG() for motivation.
    1438                 :          */
    1439              14 :         entryno = MultiXactIdToOffsetEntry(multi);
    1440              14 :         if (entryno != 0)
    1441                 :         {
    1442                 :                 int                     slotno;
    1443                 :                 MultiXactOffset *offptr;
    1444                 : 
    1445              14 :                 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
    1446              14 :                 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
    1447              14 :                 offptr += entryno;
    1448                 : 
    1449              14 :                 MemSet(offptr, 0, BLCKSZ - (entryno * sizeof(MultiXactOffset)));
    1450                 : 
    1451              14 :                 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
    1452                 :         }
    1453                 : 
    1454              14 :         LWLockRelease(MultiXactOffsetControlLock);
    1455                 : 
    1456                 :         /* And the same for members */
    1457              14 :         LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
    1458                 : 
    1459                 :         /*
    1460                 :          * Initialize our idea of the latest page number.
    1461                 :          */
    1462              14 :         pageno = MXOffsetToMemberPage(offset);
    1463              14 :         MultiXactMemberCtl->shared->latest_page_number = pageno;
    1464                 : 
    1465                 :         /*
    1466                 :          * Zero out the remainder of the current members page.  See notes in
    1467                 :          * StartupCLOG() for motivation.
    1468                 :          */
    1469              14 :         entryno = MXOffsetToMemberEntry(offset);
    1470              14 :         if (entryno != 0)
    1471                 :         {
    1472                 :                 int                     slotno;
    1473                 :                 TransactionId *xidptr;
    1474                 : 
    1475               0 :                 slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, offset);
    1476               0 :                 xidptr = (TransactionId *) MultiXactMemberCtl->shared->page_buffer[slotno];
    1477               0 :                 xidptr += entryno;
    1478                 : 
    1479               0 :                 MemSet(xidptr, 0, BLCKSZ - (entryno * sizeof(TransactionId)));
    1480                 : 
    1481               0 :                 MultiXactMemberCtl->shared->page_dirty[slotno] = true;
    1482                 :         }
    1483                 : 
    1484              14 :         LWLockRelease(MultiXactMemberControlLock);
    1485                 : 
    1486                 :         /*
    1487                 :          * Initialize lastTruncationPoint to invalid, ensuring that the first
    1488                 :          * checkpoint will try to do truncation.
    1489                 :          */
    1490              14 :         MultiXactState->lastTruncationPoint = InvalidMultiXactId;
    1491              14 : }
    1492                 : 
    1493                 : /*
    1494                 :  * This must be called ONCE during postmaster or standalone-backend shutdown
    1495                 :  */
    1496                 : void
    1497                 : ShutdownMultiXact(void)
    1498              13 : {
    1499                 :         /* Flush dirty MultiXact pages to disk */
    1500              13 :         SimpleLruFlush(MultiXactOffsetCtl, false);
    1501              13 :         SimpleLruFlush(MultiXactMemberCtl, false);
    1502              13 : }
    1503                 : 
    1504                 : /*
    1505                 :  * Get the next MultiXactId and offset to save in a checkpoint record
    1506                 :  */
    1507                 : void
    1508                 : MultiXactGetCheckptMulti(bool is_shutdown,
    1509                 :                                                  MultiXactId *nextMulti,
    1510                 :                                                  MultiXactOffset *nextMultiOffset)
    1511              19 : {
    1512              19 :         LWLockAcquire(MultiXactGenLock, LW_SHARED);
    1513                 : 
    1514              19 :         *nextMulti = MultiXactState->nextMXact;
    1515              19 :         *nextMultiOffset = MultiXactState->nextOffset;
    1516                 : 
    1517              19 :         LWLockRelease(MultiXactGenLock);
    1518                 : 
    1519                 :         debug_elog4(DEBUG2, "MultiXact: checkpoint is nextMulti %u, nextOffset %u",
    1520                 :                                 *nextMulti, *nextMultiOffset);
    1521              19 : }
    1522                 : 
    1523                 : /*
    1524                 :  * Perform a checkpoint --- either during shutdown, or on-the-fly
    1525                 :  */
    1526                 : void
    1527                 : CheckPointMultiXact(void)
    1528              19 : {
    1529                 :         /* Flush dirty MultiXact pages to disk */
    1530              19 :         SimpleLruFlush(MultiXactOffsetCtl, true);
    1531              19 :         SimpleLruFlush(MultiXactMemberCtl, true);
    1532                 : 
    1533                 :         /*
    1534                 :          * Truncate the SLRU files.  This could be done at any time, but
    1535                 :          * checkpoint seems a reasonable place for it.  There is one exception: if
    1536                 :          * we are called during xlog recovery, then shared->latest_page_number
    1537                 :          * isn't valid (because StartupMultiXact hasn't been called yet) and so
    1538                 :          * SimpleLruTruncate would get confused.  It seems best not to risk
    1539                 :          * removing any data during recovery anyway, so don't truncate.
    1540                 :          */
    1541              19 :         if (!InRecovery)
    1542              19 :                 TruncateMultiXact();
    1543              19 : }
    1544                 : 
    1545                 : /*
    1546                 :  * Set the next-to-be-assigned MultiXactId and offset
    1547                 :  *
    1548                 :  * This is used when we can determine the correct next ID/offset exactly
    1549                 :  * from a checkpoint record.  We need no locking since it is only called
    1550                 :  * during bootstrap and XLog replay.
    1551                 :  */
    1552                 : void
    1553                 : MultiXactSetNextMXact(MultiXactId nextMulti,
    1554                 :                                           MultiXactOffset nextMultiOffset)
    1555              15 : {
    1556                 :         debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u",
    1557                 :                                 nextMulti, nextMultiOffset);
    1558              15 :         MultiXactState->nextMXact = nextMulti;
    1559              15 :         MultiXactState->nextOffset = nextMultiOffset;
    1560              15 : }
    1561                 : 
    1562                 : /*
    1563                 :  * Ensure the next-to-be-assigned MultiXactId is at least minMulti,
    1564                 :  * and similarly nextOffset is at least minMultiOffset
    1565                 :  *
    1566                 :  * This is used when we can determine minimum safe values from an XLog
    1567                 :  * record (either an on-line checkpoint or an mxact creation log entry).
    1568                 :  * We need no locking since it is only called during XLog replay.
    1569                 :  */
    1570                 : void
    1571                 : MultiXactAdvanceNextMXact(MultiXactId minMulti,
    1572                 :                                                   MultiXactOffset minMultiOffset)
    1573               0 : {
    1574               0 :         if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti))
    1575                 :         {
    1576                 :                 debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti);
    1577               0 :                 MultiXactState->nextMXact = minMulti;
    1578                 :         }
    1579               0 :         if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset))
    1580                 :         {
    1581                 :                 debug_elog3(DEBUG2, "MultiXact: setting next offset to %u",
    1582                 :                                         minMultiOffset);
    1583               0 :                 MultiXactState->nextOffset = minMultiOffset;
    1584                 :         }
    1585               0 : }
    1586                 : 
    1587                 : /*
    1588                 :  * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
    1589                 :  *
    1590                 :  * NB: this is called while holding MultiXactGenLock.  We want it to be very
    1591                 :  * fast most of the time; even when it's not so fast, no actual I/O need
    1592                 :  * happen unless we're forced to write out a dirty log or xlog page to make
    1593                 :  * room in shared memory.
    1594                 :  */
    1595                 : static void
    1596                 : ExtendMultiXactOffset(MultiXactId multi)
    1597               0 : {
    1598                 :         int                     pageno;
    1599                 : 
    1600                 :         /*
    1601                 :          * No work except at first MultiXactId of a page.  But beware: just after
    1602                 :          * wraparound, the first MultiXactId of page zero is FirstMultiXactId.
    1603                 :          */
    1604               0 :         if (MultiXactIdToOffsetEntry(multi) != 0 &&
    1605                 :                 multi != FirstMultiXactId)
    1606               0 :                 return;
    1607                 : 
    1608               0 :         pageno = MultiXactIdToOffsetPage(multi);
    1609                 : 
    1610               0 :         LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
    1611                 : 
    1612                 :         /* Zero the page and make an XLOG entry about it */
    1613               0 :         ZeroMultiXactOffsetPage(pageno, true);
    1614                 : 
    1615               0 :         LWLockRelease(MultiXactOffsetControlLock);
    1616                 : }
    1617                 : 
    1618                 : /*
    1619                 :  * Make sure that MultiXactMember has room for the members of a newly-
    1620                 :  * allocated MultiXactId.
    1621                 :  *
    1622                 :  * Like the above routine, this is called while holding MultiXactGenLock;
    1623                 :  * same comments apply.
    1624                 :  */
    1625                 : static void
    1626                 : ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
    1627               0 : {
    1628                 :         /*
    1629                 :          * It's possible that the members span more than one page of the members
    1630                 :          * file, so we loop to ensure we consider each page.  The coding is not
    1631                 :          * optimal if the members span several pages, but that seems unusual
    1632                 :          * enough to not worry much about.
    1633                 :          */
    1634               0 :         while (nmembers > 0)
    1635                 :         {
    1636                 :                 int                     entryno;
    1637                 : 
    1638                 :                 /*
    1639                 :                  * Only zero when at first entry of a page.
    1640                 :                  */
    1641               0 :                 entryno = MXOffsetToMemberEntry(offset);
    1642               0 :                 if (entryno == 0)
    1643                 :                 {
    1644                 :                         int                     pageno;
    1645                 : 
    1646               0 :                         pageno = MXOffsetToMemberPage(offset);
    1647                 : 
    1648               0 :                         LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
    1649                 : 
    1650                 :                         /* Zero the page and make an XLOG entry about it */
    1651               0 :                         ZeroMultiXactMemberPage(pageno, true);
    1652                 : 
    1653               0 :                         LWLockRelease(MultiXactMemberControlLock);
    1654                 :                 }
    1655                 : 
    1656                 :                 /* Advance to next page (OK if nmembers goes negative) */
    1657               0 :                 offset += (MULTIXACT_MEMBERS_PER_PAGE - entryno);
    1658               0 :                 nmembers -= (MULTIXACT_MEMBERS_PER_PAGE - entryno);
    1659                 :         }
    1660               0 : }
    1661                 : 
    1662                 : /*
    1663                 :  * Remove all MultiXactOffset and MultiXactMember segments before the oldest
    1664                 :  * ones still of interest.
    1665                 :  *
    1666                 :  * This is called only during checkpoints.      We assume no more than one
    1667                 :  * backend does this at a time.
    1668                 :  *
    1669                 :  * XXX do we have any issues with needing to checkpoint here?
    1670                 :  */
    1671                 : static void
    1672                 : TruncateMultiXact(void)
    1673              19 : {
    1674                 :         MultiXactId nextMXact;
    1675                 :         MultiXactOffset nextOffset;
    1676                 :         MultiXactId oldestMXact;
    1677                 :         MultiXactOffset oldestOffset;
    1678                 :         int                     cutoffPage;
    1679                 :         int                     i;
    1680                 : 
    1681                 :         /*
    1682                 :          * First, compute where we can safely truncate.  Per notes above, this is
    1683                 :          * the oldest valid value among all the OldestMemberMXactId[] and
    1684                 :          * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
    1685                 :          */
    1686              19 :         LWLockAcquire(MultiXactGenLock, LW_SHARED);
    1687                 : 
    1688                 :         /*
    1689                 :          * We have to beware of the possibility that nextMXact is in the
    1690                 :          * wrapped-around state.  We don't fix the counter itself here, but we
    1691                 :          * must be sure to use a valid value in our calculation.
    1692                 :          */
    1693              19 :         nextMXact = MultiXactState->nextMXact;
    1694              19 :         if (nextMXact < FirstMultiXactId)
    1695               0 :                 nextMXact = FirstMultiXactId;
    1696                 : 
    1697              19 :         oldestMXact = nextMXact;
    1698            1976 :         for (i = 1; i <= MaxBackends; i++)
    1699                 :         {
    1700                 :                 MultiXactId thisoldest;
    1701                 : 
    1702            1957 :                 thisoldest = OldestMemberMXactId[i];
    1703            1957 :                 if (MultiXactIdIsValid(thisoldest) &&
    1704                 :                         MultiXactIdPrecedes(thisoldest, oldestMXact))
    1705               0 :                         oldestMXact = thisoldest;
    1706            1957 :                 thisoldest = OldestVisibleMXactId[i];
    1707            1957 :                 if (MultiXactIdIsValid(thisoldest) &&
    1708                 :                         MultiXactIdPrecedes(thisoldest, oldestMXact))
    1709               0 :                         oldestMXact = thisoldest;
    1710                 :         }
    1711                 : 
    1712                 :         /* Save the current nextOffset too */
    1713              19 :         nextOffset = MultiXactState->nextOffset;
    1714                 : 
    1715              19 :         LWLockRelease(MultiXactGenLock);
    1716                 : 
    1717                 :         debug_elog3(DEBUG2, "MultiXact: truncation point = %u", oldestMXact);
    1718                 : 
    1719                 :         /*
    1720                 :          * If we already truncated at this point, do nothing.  This saves time
    1721                 :          * when no MultiXacts are getting used, which is probably not uncommon.
    1722                 :          */
    1723              19 :         if (MultiXactState->lastTruncationPoint == oldestMXact)
    1724               5 :                 return;
    1725                 : 
    1726                 :         /*
    1727                 :          * We need to determine where to truncate MultiXactMember.      If we found a
    1728                 :          * valid oldest MultiXactId, read its starting offset; otherwise we use
    1729                 :          * the nextOffset value we saved above.
    1730                 :          */
    1731              14 :         if (oldestMXact == nextMXact)
    1732              14 :                 oldestOffset = nextOffset;
    1733                 :         else
    1734                 :         {
    1735                 :                 int                     pageno;
    1736                 :                 int                     slotno;
    1737                 :                 int                     entryno;
    1738                 :                 MultiXactOffset *offptr;
    1739                 : 
    1740                 :                 /* lock is acquired by SimpleLruReadPage_ReadOnly */
    1741                 : 
    1742               0 :                 pageno = MultiXactIdToOffsetPage(oldestMXact);
    1743               0 :                 entryno = MultiXactIdToOffsetEntry(oldestMXact);
    1744                 : 
    1745               0 :                 slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, oldestMXact);
    1746               0 :                 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
    1747               0 :                 offptr += entryno;
    1748               0 :                 oldestOffset = *offptr;
    1749                 : 
    1750               0 :                 LWLockRelease(MultiXactOffsetControlLock);
    1751                 :         }
    1752                 : 
    1753                 :         /*
    1754                 :          * The cutoff point is the start of the segment containing oldestMXact. We
    1755                 :          * pass the *page* containing oldestMXact to SimpleLruTruncate.
    1756                 :          */
    1757              14 :         cutoffPage = MultiXactIdToOffsetPage(oldestMXact);
    1758                 : 
    1759              14 :         SimpleLruTruncate(MultiXactOffsetCtl, cutoffPage);
    1760                 : 
    1761                 :         /*
    1762                 :          * Also truncate MultiXactMember at the previously determined offset.
    1763                 :          */
    1764              14 :         cutoffPage = MXOffsetToMemberPage(oldestOffset);
    1765                 : 
    1766              14 :         SimpleLruTruncate(MultiXactMemberCtl, cutoffPage);
    1767                 : 
    1768                 :         /*
    1769                 :          * Set the last known truncation point.  We don't need a lock for this
    1770                 :          * since only one backend does checkpoints at a time.
    1771                 :          */
    1772              14 :         MultiXactState->lastTruncationPoint = oldestMXact;
    1773                 : }
    1774                 : 
    1775                 : /*
    1776                 :  * Decide which of two MultiXactOffset page numbers is "older" for truncation
    1777                 :  * purposes.
    1778                 :  *
    1779                 :  * We need to use comparison of MultiXactId here in order to do the right
    1780                 :  * thing with wraparound.  However, if we are asked about page number zero, we
    1781                 :  * don't want to hand InvalidMultiXactId to MultiXactIdPrecedes: it'll get
    1782                 :  * weird.  So, offset both multis by FirstMultiXactId to avoid that.
    1783                 :  * (Actually, the current implementation doesn't do anything weird with
    1784                 :  * InvalidMultiXactId, but there's no harm in leaving this code like this.)
    1785                 :  */
    1786                 : static bool
    1787                 : MultiXactOffsetPagePrecedes(int page1, int page2)
    1788              42 : {
    1789                 :         MultiXactId multi1;
    1790                 :         MultiXactId multi2;
    1791                 : 
    1792              42 :         multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE;
    1793              42 :         multi1 += FirstMultiXactId;
    1794              42 :         multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE;
    1795              42 :         multi2 += FirstMultiXactId;
    1796                 : 
    1797              42 :         return MultiXactIdPrecedes(multi1, multi2);
    1798                 : }
    1799                 : 
    1800                 : /*
    1801                 :  * Decide which of two MultiXactMember page numbers is "older" for truncation
    1802                 :  * purposes.  There is no "invalid offset number" so use the numbers verbatim.
    1803                 :  */
    1804                 : static bool
    1805                 : MultiXactMemberPagePrecedes(int page1, int page2)
    1806              29 : {
    1807                 :         MultiXactOffset offset1;
    1808                 :         MultiXactOffset offset2;
    1809                 : 
    1810              29 :         offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE;
    1811              29 :         offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE;
    1812                 : 
    1813              29 :         return MultiXactOffsetPrecedes(offset1, offset2);
    1814                 : }
    1815                 : 
    1816                 : /*
    1817                 :  * Decide which of two MultiXactIds is earlier.
    1818                 :  *
    1819                 :  * XXX do we need to do something special for InvalidMultiXactId?
    1820                 :  * (Doesn't look like it.)
    1821                 :  */
    1822                 : static bool
    1823                 : MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
    1824              42 : {
    1825              42 :         int32           diff = (int32) (multi1 - multi2);
    1826                 : 
    1827              42 :         return (diff < 0);
    1828                 : }
    1829                 : 
    1830                 : /*
    1831                 :  * Decide which of two offsets is earlier.
    1832                 :  */
    1833                 : static bool
    1834                 : MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
    1835              29 : {
    1836              29 :         int32           diff = (int32) (offset1 - offset2);
    1837                 : 
    1838              29 :         return (diff < 0);
    1839                 : }
    1840                 : 
    1841                 : 
    1842                 : /*
    1843                 :  * Write an xlog record reflecting the zeroing of either a MEMBERs or
    1844                 :  * OFFSETs page (info shows which)
    1845                 :  */
    1846                 : static void
    1847                 : WriteMZeroPageXlogRec(int pageno, uint8 info)
    1848               0 : {
    1849                 :         XLogRecData rdata;
    1850                 : 
    1851               0 :         rdata.data = (char *) (&pageno);
    1852               0 :         rdata.len = sizeof(int);
    1853               0 :         rdata.buffer = InvalidBuffer;
    1854               0 :         rdata.next = NULL;
    1855               0 :         (void) XLogInsert(RM_MULTIXACT_ID, info, &rdata);
    1856               0 : }
    1857                 : 
    1858                 : /*
    1859                 :  * MULTIXACT resource manager's routines
    1860                 :  */
    1861                 : void
    1862                 : multixact_redo(XLogRecPtr lsn, XLogRecord *record)
    1863               0 : {
    1864               0 :         uint8           info = record->xl_info & ~XLR_INFO_MASK;
    1865                 : 
    1866               0 :         if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
    1867                 :         {
    1868                 :                 int                     pageno;
    1869                 :                 int                     slotno;
    1870                 : 
    1871               0 :                 memcpy(&pageno, XLogRecGetData(record), sizeof(int));
    1872                 : 
    1873               0 :                 LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
    1874                 : 
    1875               0 :                 slotno = ZeroMultiXactOffsetPage(pageno, false);
    1876               0 :                 SimpleLruWritePage(MultiXactOffsetCtl, slotno, NULL);
    1877                 :                 Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
    1878                 : 
    1879               0 :                 LWLockRelease(MultiXactOffsetControlLock);
    1880                 :         }
    1881               0 :         else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
    1882                 :         {
    1883                 :                 int                     pageno;
    1884                 :                 int                     slotno;
    1885                 : 
    1886               0 :                 memcpy(&pageno, XLogRecGetData(record), sizeof(int));
    1887                 : 
    1888               0 :                 LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
    1889                 : 
    1890               0 :                 slotno = ZeroMultiXactMemberPage(pageno, false);
    1891               0 :                 SimpleLruWritePage(MultiXactMemberCtl, slotno, NULL);
    1892                 :                 Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
    1893                 : 
    1894               0 :                 LWLockRelease(MultiXactMemberControlLock);
    1895                 :         }
    1896               0 :         else if (info == XLOG_MULTIXACT_CREATE_ID)
    1897                 :         {
    1898               0 :                 xl_multixact_create *xlrec = (xl_multixact_create *) XLogRecGetData(record);
    1899               0 :                 TransactionId *xids = xlrec->xids;
    1900                 :                 TransactionId max_xid;
    1901                 :                 int                     i;
    1902                 : 
    1903                 :                 /* Store the data back into the SLRU files */
    1904               0 :                 RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nxids, xids);
    1905                 : 
    1906                 :                 /* Make sure nextMXact/nextOffset are beyond what this record has */
    1907               0 :                 MultiXactAdvanceNextMXact(xlrec->mid + 1, xlrec->moff + xlrec->nxids);
    1908                 : 
    1909                 :                 /*
    1910                 :                  * Make sure nextXid is beyond any XID mentioned in the record. This
    1911                 :                  * should be unnecessary, since any XID found here ought to have other
    1912                 :                  * evidence in the XLOG, but let's be safe.
    1913                 :                  */
    1914               0 :                 max_xid = record->xl_xid;
    1915               0 :                 for (i = 0; i < xlrec->nxids; i++)
    1916                 :                 {
    1917               0 :                         if (TransactionIdPrecedes(max_xid, xids[i]))
    1918               0 :                                 max_xid = xids[i];
    1919                 :                 }
    1920               0 :                 if (TransactionIdFollowsOrEquals(max_xid,
    1921                 :                                                                                  ShmemVariableCache->nextXid))
    1922                 :                 {
    1923               0 :                         ShmemVariableCache->nextXid = max_xid;
    1924               0 :                         TransactionIdAdvance(ShmemVariableCache->nextXid);
    1925                 :                 }
    1926                 :         }
    1927                 :         else
    1928               0 :                 elog(PANIC, "multixact_redo: unknown op code %u", info);
    1929               0 : }
    1930                 : 
    1931                 : void
    1932                 : multixact_desc(StringInfo buf, uint8 xl_info, char *rec)
    1933               0 : {
    1934               0 :         uint8           info = xl_info & ~XLR_INFO_MASK;
    1935                 : 
    1936               0 :         if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
    1937                 :         {
    1938                 :                 int                     pageno;
    1939                 : 
    1940               0 :                 memcpy(&pageno, rec, sizeof(int));
    1941               0 :                 appendStringInfo(buf, "zero offsets page: %d", pageno);
    1942                 :         }
    1943               0 :         else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
    1944                 :         {
    1945                 :                 int                     pageno;
    1946                 : 
    1947               0 :                 memcpy(&pageno, rec, sizeof(int));
    1948               0 :                 appendStringInfo(buf, "zero members page: %d", pageno);
    1949                 :         }
    1950               0 :         else if (info == XLOG_MULTIXACT_CREATE_ID)
    1951                 :         {
    1952               0 :                 xl_multixact_create *xlrec = (xl_multixact_create *) rec;
    1953                 :                 int                     i;
    1954                 : 
    1955               0 :                 appendStringInfo(buf, "create multixact %u offset %u:",
    1956                 :                                                  xlrec->mid, xlrec->moff);
    1957               0 :                 for (i = 0; i < xlrec->nxids; i++)
    1958               0 :                         appendStringInfo(buf, " %u", xlrec->xids[i]);
    1959                 :         }
    1960                 :         else
    1961               0 :                 appendStringInfo(buf, "UNKNOWN");
    1962               0 : }

Generated by: LTP GCOV extension version 1.5