LTP GCOV extension - code coverage report
Current view: directory - access/transam - twophase.c
Test: unnamed
Date: 2008-07-03 Instrumented lines: 539
Code covered: 69.9 % Executed lines: 377
Legend: not executed executed

       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * twophase.c
       4                 :  *              Two-phase commit support functions.
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  * IDENTIFICATION
      10                 :  *              $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.38 2007/11/15 21:14:32 momjian Exp $
      11                 :  *
      12                 :  * NOTES
      13                 :  *              Each global transaction is associated with a global transaction
      14                 :  *              identifier (GID). The client assigns a GID to a postgres
      15                 :  *              transaction with the PREPARE TRANSACTION command.
      16                 :  *
      17                 :  *              We keep all active global transactions in a shared memory array.
      18                 :  *              When the PREPARE TRANSACTION command is issued, the GID is
      19                 :  *              reserved for the transaction in the array. This is done before
      20                 :  *              a WAL entry is made, because the reservation checks for duplicate
      21                 :  *              GIDs and aborts the transaction if there already is a global
      22                 :  *              transaction in prepared state with the same GID.
      23                 :  *
      24                 :  *              A global transaction (gxact) also has a dummy PGPROC that is entered
      25                 :  *              into the ProcArray array; this is what keeps the XID considered
      26                 :  *              running by TransactionIdIsInProgress.  It is also convenient as a
      27                 :  *              PGPROC to hook the gxact's locks to.
      28                 :  *
      29                 :  *              In order to survive crashes and shutdowns, all prepared
      30                 :  *              transactions must be stored in permanent storage. This includes
      31                 :  *              locking information, pending notifications etc. All that state
      32                 :  *              information is written to the per-transaction state file in
      33                 :  *              the pg_twophase directory.
      34                 :  *
      35                 :  *-------------------------------------------------------------------------
      36                 :  */
      37                 : #include "postgres.h"
      38                 : 
      39                 : #include <fcntl.h>
      40                 : #include <sys/stat.h>
      41                 : #include <sys/types.h>
      42                 : #include <time.h>
      43                 : #include <unistd.h>
      44                 : 
      45                 : #include "access/heapam.h"
      46                 : #include "access/subtrans.h"
      47                 : #include "access/transam.h"
      48                 : #include "access/twophase.h"
      49                 : #include "access/twophase_rmgr.h"
      50                 : #include "access/xact.h"
      51                 : #include "catalog/pg_type.h"
      52                 : #include "funcapi.h"
      53                 : #include "miscadmin.h"
      54                 : #include "pgstat.h"
      55                 : #include "storage/fd.h"
      56                 : #include "storage/procarray.h"
      57                 : #include "storage/smgr.h"
      58                 : #include "utils/builtins.h"
      59                 : 
      60                 : 
      61                 : /*
      62                 :  * Directory where Two-phase commit files reside within PGDATA
      63                 :  */
      64                 : #define TWOPHASE_DIR "pg_twophase"
      65                 : 
      66                 : /* GUC variable, can't be changed after startup */
      67                 : int                     max_prepared_xacts = 5;
      68                 : 
      69                 : /*
      70                 :  * This struct describes one global transaction that is in prepared state
      71                 :  * or attempting to become prepared.
      72                 :  *
      73                 :  * The first component of the struct is a dummy PGPROC that is inserted
      74                 :  * into the global ProcArray so that the transaction appears to still be
      75                 :  * running and holding locks.  It must be first because we cast pointers
      76                 :  * to PGPROC and pointers to GlobalTransactionData back and forth.
      77                 :  *
      78                 :  * The lifecycle of a global transaction is:
      79                 :  *
      80                 :  * 1. After checking that the requested GID is not in use, set up an
      81                 :  * entry in the TwoPhaseState->prepXacts array with the correct XID and GID,
      82                 :  * with locking_xid = my own XID and valid = false.
      83                 :  *
      84                 :  * 2. After successfully completing prepare, set valid = true and enter the
      85                 :  * contained PGPROC into the global ProcArray.
      86                 :  *
      87                 :  * 3. To begin COMMIT PREPARED or ROLLBACK PREPARED, check that the entry
      88                 :  * is valid and its locking_xid is no longer active, then store my current
      89                 :  * XID into locking_xid.  This prevents concurrent attempts to commit or
      90                 :  * rollback the same prepared xact.
      91                 :  *
      92                 :  * 4. On completion of COMMIT PREPARED or ROLLBACK PREPARED, remove the entry
      93                 :  * from the ProcArray and the TwoPhaseState->prepXacts array and return it to
      94                 :  * the freelist.
      95                 :  *
      96                 :  * Note that if the preparing transaction fails between steps 1 and 2, the
      97                 :  * entry will remain in prepXacts until recycled.  We can detect recyclable
      98                 :  * entries by checking for valid = false and locking_xid no longer active.
      99                 :  *
     100                 :  * typedef struct GlobalTransactionData *GlobalTransaction appears in
     101                 :  * twophase.h
     102                 :  */
     103                 : #define GIDSIZE 200
     104                 : 
     105                 : typedef struct GlobalTransactionData
     106                 : {
     107                 :         PGPROC          proc;                   /* dummy proc */
     108                 :         TimestampTz prepared_at;        /* time of preparation */
     109                 :         XLogRecPtr      prepare_lsn;    /* XLOG offset of prepare record */
     110                 :         Oid                     owner;                  /* ID of user that executed the xact */
     111                 :         TransactionId locking_xid;      /* top-level XID of backend working on xact */
     112                 :         bool            valid;                  /* TRUE if fully prepared */
     113                 :         char            gid[GIDSIZE];   /* The GID assigned to the prepared xact */
     114                 : } GlobalTransactionData;
     115                 : 
     116                 : /*
     117                 :  * Two Phase Commit shared state.  Access to this struct is protected
     118                 :  * by TwoPhaseStateLock.
     119                 :  */
     120                 : typedef struct TwoPhaseStateData
     121                 : {
     122                 :         /* Head of linked list of free GlobalTransactionData structs */
     123                 :         SHMEM_OFFSET freeGXacts;
     124                 : 
     125                 :         /* Number of valid prepXacts entries. */
     126                 :         int                     numPrepXacts;
     127                 : 
     128                 :         /*
     129                 :          * There are max_prepared_xacts items in this array, but C wants a
     130                 :          * fixed-size array.
     131                 :          */
     132                 :         GlobalTransaction prepXacts[1];         /* VARIABLE LENGTH ARRAY */
     133                 : } TwoPhaseStateData;                    /* VARIABLE LENGTH STRUCT */
     134                 : 
     135                 : static TwoPhaseStateData *TwoPhaseState;
     136                 : 
     137                 : 
     138                 : static void RecordTransactionCommitPrepared(TransactionId xid,
     139                 :                                                                 int nchildren,
     140                 :                                                                 TransactionId *children,
     141                 :                                                                 int nrels,
     142                 :                                                                 RelFileNode *rels);
     143                 : static void RecordTransactionAbortPrepared(TransactionId xid,
     144                 :                                                            int nchildren,
     145                 :                                                            TransactionId *children,
     146                 :                                                            int nrels,
     147                 :                                                            RelFileNode *rels);
     148                 : static void ProcessRecords(char *bufptr, TransactionId xid,
     149                 :                            const TwoPhaseCallback callbacks[]);
     150                 : 
     151                 : 
     152                 : /*
     153                 :  * Initialization of shared memory
     154                 :  */
     155                 : Size
     156                 : TwoPhaseShmemSize(void)
     157              34 : {
     158                 :         Size            size;
     159                 : 
     160                 :         /* Need the fixed struct, the array of pointers, and the GTD structs */
     161              34 :         size = offsetof(TwoPhaseStateData, prepXacts);
     162              34 :         size = add_size(size, mul_size(max_prepared_xacts,
     163                 :                                                                    sizeof(GlobalTransaction)));
     164              34 :         size = MAXALIGN(size);
     165              34 :         size = add_size(size, mul_size(max_prepared_xacts,
     166                 :                                                                    sizeof(GlobalTransactionData)));
     167                 : 
     168              34 :         return size;
     169                 : }
     170                 : 
     171                 : void
     172                 : TwoPhaseShmemInit(void)
     173              16 : {
     174                 :         bool            found;
     175                 : 
     176              16 :         TwoPhaseState = ShmemInitStruct("Prepared Transaction Table",
     177                 :                                                                         TwoPhaseShmemSize(),
     178                 :                                                                         &found);
     179              16 :         if (!IsUnderPostmaster)
     180                 :         {
     181                 :                 GlobalTransaction gxacts;
     182                 :                 int                     i;
     183                 : 
     184                 :                 Assert(!found);
     185              16 :                 TwoPhaseState->freeGXacts = INVALID_OFFSET;
     186              16 :                 TwoPhaseState->numPrepXacts = 0;
     187                 : 
     188                 :                 /*
     189                 :                  * Initialize the linked list of free GlobalTransactionData structs
     190                 :                  */
     191              16 :                 gxacts = (GlobalTransaction)
     192                 :                         ((char *) TwoPhaseState +
     193                 :                          MAXALIGN(offsetof(TwoPhaseStateData, prepXacts) +
     194                 :                                           sizeof(GlobalTransaction) * max_prepared_xacts));
     195              96 :                 for (i = 0; i < max_prepared_xacts; i++)
     196                 :                 {
     197              80 :                         gxacts[i].proc.links.next = TwoPhaseState->freeGXacts;
     198              80 :                         TwoPhaseState->freeGXacts = MAKE_OFFSET(&gxacts[i]);
     199                 :                 }
     200                 :         }
     201                 :         else
     202                 :                 Assert(found);
     203              16 : }
     204                 : 
     205                 : 
     206                 : /*
     207                 :  * MarkAsPreparing
     208                 :  *              Reserve the GID for the given transaction.
     209                 :  *
     210                 :  * Internally, this creates a gxact struct and puts it into the active array.
     211                 :  * NOTE: this is also used when reloading a gxact after a crash; so avoid
     212                 :  * assuming that we can use very much backend context.
     213                 :  */
     214                 : GlobalTransaction
     215                 : MarkAsPreparing(TransactionId xid, const char *gid,
     216                 :                                 TimestampTz prepared_at, Oid owner, Oid databaseid)
     217               6 : {
     218                 :         GlobalTransaction gxact;
     219                 :         int                     i;
     220                 : 
     221               6 :         if (strlen(gid) >= GIDSIZE)
     222               0 :                 ereport(ERROR,
     223                 :                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     224                 :                                  errmsg("transaction identifier \"%s\" is too long",
     225                 :                                                 gid)));
     226                 : 
     227               6 :         LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
     228                 : 
     229                 :         /*
     230                 :          * First, find and recycle any gxacts that failed during prepare. We do
     231                 :          * this partly to ensure we don't mistakenly say their GIDs are still
     232                 :          * reserved, and partly so we don't fail on out-of-slots unnecessarily.
     233                 :          */
     234               8 :         for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
     235                 :         {
     236               2 :                 gxact = TwoPhaseState->prepXacts[i];
     237               2 :                 if (!gxact->valid && !TransactionIdIsActive(gxact->locking_xid))
     238                 :                 {
     239                 :                         /* It's dead Jim ... remove from the active array */
     240               0 :                         TwoPhaseState->numPrepXacts--;
     241               0 :                         TwoPhaseState->prepXacts[i] = TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts];
     242                 :                         /* and put it back in the freelist */
     243               0 :                         gxact->proc.links.next = TwoPhaseState->freeGXacts;
     244               0 :                         TwoPhaseState->freeGXacts = MAKE_OFFSET(gxact);
     245                 :                         /* Back up index count too, so we don't miss scanning one */
     246               0 :                         i--;
     247                 :                 }
     248                 :         }
     249                 : 
     250                 :         /* Check for conflicting GID */
     251               7 :         for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
     252                 :         {
     253               2 :                 gxact = TwoPhaseState->prepXacts[i];
     254               2 :                 if (strcmp(gxact->gid, gid) == 0)
     255                 :                 {
     256               1 :                         ereport(ERROR,
     257                 :                                         (errcode(ERRCODE_DUPLICATE_OBJECT),
     258                 :                                          errmsg("transaction identifier \"%s\" is already in use",
     259                 :                                                         gid)));
     260                 :                 }
     261                 :         }
     262                 : 
     263                 :         /* Get a free gxact from the freelist */
     264               5 :         if (TwoPhaseState->freeGXacts == INVALID_OFFSET)
     265               0 :                 ereport(ERROR,
     266                 :                                 (errcode(ERRCODE_OUT_OF_MEMORY),
     267                 :                                  errmsg("maximum number of prepared transactions reached"),
     268                 :                                  errhint("Increase max_prepared_transactions (currently %d).",
     269                 :                                                  max_prepared_xacts)));
     270               5 :         gxact = (GlobalTransaction) MAKE_PTR(TwoPhaseState->freeGXacts);
     271               5 :         TwoPhaseState->freeGXacts = gxact->proc.links.next;
     272                 : 
     273                 :         /* Initialize it */
     274               5 :         MemSet(&gxact->proc, 0, sizeof(PGPROC));
     275               5 :         SHMQueueElemInit(&(gxact->proc.links));
     276               5 :         gxact->proc.waitStatus = STATUS_OK;
     277                 :         /* We set up the gxact's VXID as InvalidBackendId/XID */
     278               5 :         gxact->proc.lxid = (LocalTransactionId) xid;
     279               5 :         gxact->proc.xid = xid;
     280               5 :         gxact->proc.xmin = InvalidTransactionId;
     281               5 :         gxact->proc.pid = 0;
     282               5 :         gxact->proc.backendId = InvalidBackendId;
     283               5 :         gxact->proc.databaseId = databaseid;
     284               5 :         gxact->proc.roleId = owner;
     285               5 :         gxact->proc.inCommit = false;
     286               5 :         gxact->proc.vacuumFlags = 0;
     287               5 :         gxact->proc.lwWaiting = false;
     288               5 :         gxact->proc.lwExclusive = false;
     289               5 :         gxact->proc.lwWaitLink = NULL;
     290               5 :         gxact->proc.waitLock = NULL;
     291               5 :         gxact->proc.waitProcLock = NULL;
     292              85 :         for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
     293              80 :                 SHMQueueInit(&(gxact->proc.myProcLocks[i]));
     294                 :         /* subxid data must be filled later by GXactLoadSubxactData */
     295               5 :         gxact->proc.subxids.overflowed = false;
     296               5 :         gxact->proc.subxids.nxids = 0;
     297                 : 
     298               5 :         gxact->prepared_at = prepared_at;
     299                 :         /* initialize LSN to 0 (start of WAL) */
     300               5 :         gxact->prepare_lsn.xlogid = 0;
     301               5 :         gxact->prepare_lsn.xrecoff = 0;
     302               5 :         gxact->owner = owner;
     303               5 :         gxact->locking_xid = xid;
     304               5 :         gxact->valid = false;
     305               5 :         strcpy(gxact->gid, gid);
     306                 : 
     307                 :         /* And insert it into the active array */
     308                 :         Assert(TwoPhaseState->numPrepXacts < max_prepared_xacts);
     309               5 :         TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts++] = gxact;
     310                 : 
     311               5 :         LWLockRelease(TwoPhaseStateLock);
     312                 : 
     313               5 :         return gxact;
     314                 : }
     315                 : 
     316                 : /*
     317                 :  * GXactLoadSubxactData
     318                 :  *
     319                 :  * If the transaction being persisted had any subtransactions, this must
     320                 :  * be called before MarkAsPrepared() to load information into the dummy
     321                 :  * PGPROC.
     322                 :  */
     323                 : static void
     324                 : GXactLoadSubxactData(GlobalTransaction gxact, int nsubxacts,
     325                 :                                          TransactionId *children)
     326               1 : {
     327                 :         /* We need no extra lock since the GXACT isn't valid yet */
     328               1 :         if (nsubxacts > PGPROC_MAX_CACHED_SUBXIDS)
     329                 :         {
     330               0 :                 gxact->proc.subxids.overflowed = true;
     331               0 :                 nsubxacts = PGPROC_MAX_CACHED_SUBXIDS;
     332                 :         }
     333               1 :         if (nsubxacts > 0)
     334                 :         {
     335               1 :                 memcpy(gxact->proc.subxids.xids, children,
     336                 :                            nsubxacts * sizeof(TransactionId));
     337               1 :                 gxact->proc.subxids.nxids = nsubxacts;
     338                 :         }
     339               1 : }
     340                 : 
     341                 : /*
     342                 :  * MarkAsPrepared
     343                 :  *              Mark the GXACT as fully valid, and enter it into the global ProcArray.
     344                 :  */
     345                 : static void
     346                 : MarkAsPrepared(GlobalTransaction gxact)
     347               5 : {
     348                 :         /* Lock here may be overkill, but I'm not convinced of that ... */
     349               5 :         LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
     350                 :         Assert(!gxact->valid);
     351               5 :         gxact->valid = true;
     352               5 :         LWLockRelease(TwoPhaseStateLock);
     353                 : 
     354                 :         /*
     355                 :          * Put it into the global ProcArray so TransactionIdIsInProgress considers
     356                 :          * the XID as still running.
     357                 :          */
     358               5 :         ProcArrayAdd(&gxact->proc);
     359               5 : }
     360                 : 
     361                 : /*
     362                 :  * LockGXact
     363                 :  *              Locate the prepared transaction and mark it busy for COMMIT or PREPARE.
     364                 :  */
     365                 : static GlobalTransaction
     366                 : LockGXact(const char *gid, Oid user)
     367               5 : {
     368                 :         int                     i;
     369                 : 
     370               5 :         LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
     371                 : 
     372               5 :         for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
     373                 :         {
     374               5 :                 GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
     375                 : 
     376                 :                 /* Ignore not-yet-valid GIDs */
     377               5 :                 if (!gxact->valid)
     378               0 :                         continue;
     379               5 :                 if (strcmp(gxact->gid, gid) != 0)
     380               0 :                         continue;
     381                 : 
     382                 :                 /* Found it, but has someone else got it locked? */
     383               5 :                 if (TransactionIdIsValid(gxact->locking_xid))
     384                 :                 {
     385               5 :                         if (TransactionIdIsActive(gxact->locking_xid))
     386               0 :                                 ereport(ERROR,
     387                 :                                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     388                 :                                 errmsg("prepared transaction with identifier \"%s\" is busy",
     389                 :                                            gid)));
     390               5 :                         gxact->locking_xid = InvalidTransactionId;
     391                 :                 }
     392                 : 
     393               5 :                 if (user != gxact->owner && !superuser_arg(user))
     394               0 :                         ereport(ERROR,
     395                 :                                         (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     396                 :                                   errmsg("permission denied to finish prepared transaction"),
     397                 :                                          errhint("Must be superuser or the user that prepared the transaction.")));
     398                 : 
     399                 :                 /*
     400                 :                  * Note: it probably would be possible to allow committing from
     401                 :                  * another database; but at the moment NOTIFY is known not to work and
     402                 :                  * there may be some other issues as well.      Hence disallow until
     403                 :                  * someone gets motivated to make it work.
     404                 :                  */
     405               5 :                 if (MyDatabaseId != gxact->proc.databaseId)
     406               0 :                         ereport(ERROR,
     407                 :                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     408                 :                                   errmsg("prepared transaction belongs to another database"),
     409                 :                                          errhint("Connect to the database where the transaction was prepared to finish it.")));
     410                 : 
     411                 :                 /* OK for me to lock it */
     412               5 :                 gxact->locking_xid = GetTopTransactionId();
     413                 : 
     414               5 :                 LWLockRelease(TwoPhaseStateLock);
     415                 : 
     416               5 :                 return gxact;
     417                 :         }
     418                 : 
     419               0 :         LWLockRelease(TwoPhaseStateLock);
     420                 : 
     421               0 :         ereport(ERROR,
     422                 :                         (errcode(ERRCODE_UNDEFINED_OBJECT),
     423                 :                  errmsg("prepared transaction with identifier \"%s\" does not exist",
     424                 :                                 gid)));
     425                 : 
     426                 :         /* NOTREACHED */
     427               0 :         return NULL;
     428                 : }
     429                 : 
     430                 : /*
     431                 :  * RemoveGXact
     432                 :  *              Remove the prepared transaction from the shared memory array.
     433                 :  *
     434                 :  * NB: caller should have already removed it from ProcArray
     435                 :  */
     436                 : static void
     437                 : RemoveGXact(GlobalTransaction gxact)
     438               5 : {
     439                 :         int                     i;
     440                 : 
     441               5 :         LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
     442                 : 
     443               5 :         for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
     444                 :         {
     445               5 :                 if (gxact == TwoPhaseState->prepXacts[i])
     446                 :                 {
     447                 :                         /* remove from the active array */
     448               5 :                         TwoPhaseState->numPrepXacts--;
     449               5 :                         TwoPhaseState->prepXacts[i] = TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts];
     450                 : 
     451                 :                         /* and put it back in the freelist */
     452               5 :                         gxact->proc.links.next = TwoPhaseState->freeGXacts;
     453               5 :                         TwoPhaseState->freeGXacts = MAKE_OFFSET(gxact);
     454                 : 
     455               5 :                         LWLockRelease(TwoPhaseStateLock);
     456                 : 
     457               5 :                         return;
     458                 :                 }
     459                 :         }
     460                 : 
     461               0 :         LWLockRelease(TwoPhaseStateLock);
     462                 : 
     463               0 :         elog(ERROR, "failed to find %p in GlobalTransaction array", gxact);
     464                 : }
     465                 : 
     466                 : /*
     467                 :  * TransactionIdIsPrepared
     468                 :  *              True iff transaction associated with the identifier is prepared
     469                 :  *              for two-phase commit
     470                 :  *
     471                 :  * Note: only gxacts marked "valid" are considered; but notice we do not
     472                 :  * check the locking status.
     473                 :  *
     474                 :  * This is not currently exported, because it is only needed internally.
     475                 :  */
     476                 : static bool
     477                 : TransactionIdIsPrepared(TransactionId xid)
     478               0 : {
     479               0 :         bool            result = false;
     480                 :         int                     i;
     481                 : 
     482               0 :         LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
     483                 : 
     484               0 :         for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
     485                 :         {
     486               0 :                 GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
     487                 : 
     488               0 :                 if (gxact->valid && gxact->proc.xid == xid)
     489                 :                 {
     490               0 :                         result = true;
     491               0 :                         break;
     492                 :                 }
     493                 :         }
     494                 : 
     495               0 :         LWLockRelease(TwoPhaseStateLock);
     496                 : 
     497               0 :         return result;
     498                 : }
     499                 : 
     500                 : /*
     501                 :  * Returns an array of all prepared transactions for the user-level
     502                 :  * function pg_prepared_xact.
     503                 :  *
     504                 :  * The returned array and all its elements are copies of internal data
     505                 :  * structures, to minimize the time we need to hold the TwoPhaseStateLock.
     506                 :  *
     507                 :  * WARNING -- we return even those transactions that are not fully prepared
     508                 :  * yet.  The caller should filter them out if he doesn't want them.
     509                 :  *
     510                 :  * The returned array is palloc'd.
     511                 :  */
     512                 : static int
     513                 : GetPreparedTransactionList(GlobalTransaction *gxacts)
     514               7 : {
     515                 :         GlobalTransaction array;
     516                 :         int                     num;
     517                 :         int                     i;
     518                 : 
     519               7 :         LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
     520                 : 
     521               7 :         if (TwoPhaseState->numPrepXacts == 0)
     522                 :         {
     523               2 :                 LWLockRelease(TwoPhaseStateLock);
     524                 : 
     525               2 :                 *gxacts = NULL;
     526               2 :                 return 0;
     527                 :         }
     528                 : 
     529               5 :         num = TwoPhaseState->numPrepXacts;
     530               5 :         array = (GlobalTransaction) palloc(sizeof(GlobalTransactionData) * num);
     531               5 :         *gxacts = array;
     532              12 :         for (i = 0; i < num; i++)
     533               7 :                 memcpy(array + i, TwoPhaseState->prepXacts[i],
     534                 :                            sizeof(GlobalTransactionData));
     535                 : 
     536               5 :         LWLockRelease(TwoPhaseStateLock);
     537                 : 
     538               5 :         return num;
     539                 : }
     540                 : 
     541                 : 
     542                 : /* Working status for pg_prepared_xact */
     543                 : typedef struct
     544                 : {
     545                 :         GlobalTransaction array;
     546                 :         int                     ngxacts;
     547                 :         int                     currIdx;
     548                 : } Working_State;
     549                 : 
     550                 : /*
     551                 :  * pg_prepared_xact
     552                 :  *              Produce a view with one row per prepared transaction.
     553                 :  *
     554                 :  * This function is here so we don't have to export the
     555                 :  * GlobalTransactionData struct definition.
     556                 :  */
     557                 : Datum
     558                 : pg_prepared_xact(PG_FUNCTION_ARGS)
     559              14 : {
     560                 :         FuncCallContext *funcctx;
     561                 :         Working_State *status;
     562                 : 
     563              14 :         if (SRF_IS_FIRSTCALL())
     564                 :         {
     565                 :                 TupleDesc       tupdesc;
     566                 :                 MemoryContext oldcontext;
     567                 : 
     568                 :                 /* create a function context for cross-call persistence */
     569               7 :                 funcctx = SRF_FIRSTCALL_INIT();
     570                 : 
     571                 :                 /*
     572                 :                  * Switch to memory context appropriate for multiple function calls
     573                 :                  */
     574               7 :                 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     575                 : 
     576                 :                 /* build tupdesc for result tuples */
     577                 :                 /* this had better match pg_prepared_xacts view in system_views.sql */
     578               7 :                 tupdesc = CreateTemplateTupleDesc(5, false);
     579               7 :                 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "transaction",
     580                 :                                                    XIDOID, -1, 0);
     581               7 :                 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "gid",
     582                 :                                                    TEXTOID, -1, 0);
     583               7 :                 TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepared",
     584                 :                                                    TIMESTAMPTZOID, -1, 0);
     585               7 :                 TupleDescInitEntry(tupdesc, (AttrNumber) 4, "ownerid",
     586                 :                                                    OIDOID, -1, 0);
     587               7 :                 TupleDescInitEntry(tupdesc, (AttrNumber) 5, "dbid",
     588                 :                                                    OIDOID, -1, 0);
     589                 : 
     590               7 :                 funcctx->tuple_desc = BlessTupleDesc(tupdesc);
     591                 : 
     592                 :                 /*
     593                 :                  * Collect all the 2PC status information that we will format and send
     594                 :                  * out as a result set.
     595                 :                  */
     596               7 :                 status = (Working_State *) palloc(sizeof(Working_State));
     597               7 :                 funcctx->user_fctx = (void *) status;
     598                 : 
     599               7 :                 status->ngxacts = GetPreparedTransactionList(&status->array);
     600               7 :                 status->currIdx = 0;
     601                 : 
     602               7 :                 MemoryContextSwitchTo(oldcontext);
     603                 :         }
     604                 : 
     605              14 :         funcctx = SRF_PERCALL_SETUP();
     606              14 :         status = (Working_State *) funcctx->user_fctx;
     607                 : 
     608              28 :         while (status->array != NULL && status->currIdx < status->ngxacts)
     609                 :         {
     610               7 :                 GlobalTransaction gxact = &status->array[status->currIdx++];
     611                 :                 Datum           values[5];
     612                 :                 bool            nulls[5];
     613                 :                 HeapTuple       tuple;
     614                 :                 Datum           result;
     615                 : 
     616               7 :                 if (!gxact->valid)
     617               0 :                         continue;
     618                 : 
     619                 :                 /*
     620                 :                  * Form tuple with appropriate data.
     621                 :                  */
     622               7 :                 MemSet(values, 0, sizeof(values));
     623               7 :                 MemSet(nulls, 0, sizeof(nulls));
     624                 : 
     625               7 :                 values[0] = TransactionIdGetDatum(gxact->proc.xid);
     626               7 :                 values[1] = DirectFunctionCall1(textin, CStringGetDatum(gxact->gid));
     627               7 :                 values[2] = TimestampTzGetDatum(gxact->prepared_at);
     628               7 :                 values[3] = ObjectIdGetDatum(gxact->owner);
     629               7 :                 values[4] = ObjectIdGetDatum(gxact->proc.databaseId);
     630                 : 
     631               7 :                 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     632               7 :                 result = HeapTupleGetDatum(tuple);
     633               7 :                 SRF_RETURN_NEXT(funcctx, result);
     634                 :         }
     635                 : 
     636               7 :         SRF_RETURN_DONE(funcctx);
     637                 : }
     638                 : 
     639                 : /*
     640                 :  * TwoPhaseGetDummyProc
     641                 :  *              Get the PGPROC that represents a prepared transaction specified by XID
     642                 :  */
     643                 : PGPROC *
     644                 : TwoPhaseGetDummyProc(TransactionId xid)
     645              22 : {
     646              22 :         PGPROC     *result = NULL;
     647                 :         int                     i;
     648                 : 
     649                 :         static TransactionId cached_xid = InvalidTransactionId;
     650                 :         static PGPROC *cached_proc = NULL;
     651                 : 
     652                 :         /*
     653                 :          * During a recovery, COMMIT PREPARED, or ABORT PREPARED, we'll be called
     654                 :          * repeatedly for the same XID.  We can save work with a simple cache.
     655                 :          */
     656              22 :         if (xid == cached_xid)
     657              15 :                 return cached_proc;
     658                 : 
     659               7 :         LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
     660                 : 
     661               8 :         for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
     662                 :         {
     663               8 :                 GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
     664                 : 
     665               8 :                 if (gxact->proc.xid == xid)
     666                 :                 {
     667               7 :                         result = &gxact->proc;
     668               7 :                         break;
     669                 :                 }
     670                 :         }
     671                 : 
     672               7 :         LWLockRelease(TwoPhaseStateLock);
     673                 : 
     674               7 :         if (result == NULL)                     /* should not happen */
     675               0 :                 elog(ERROR, "failed to find dummy PGPROC for xid %u", xid);
     676                 : 
     677               7 :         cached_xid = xid;
     678               7 :         cached_proc = result;
     679                 : 
     680               7 :         return result;
     681                 : }
     682                 : 
     683                 : /************************************************************************/
     684                 : /* State file support                                                                                                   */
     685                 : /************************************************************************/
     686                 : 
     687                 : #define TwoPhaseFilePath(path, xid) \
     688                 :         snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X", xid)
     689                 : 
     690                 : /*
     691                 :  * 2PC state file format:
     692                 :  *
     693                 :  *      1. TwoPhaseFileHeader
     694                 :  *      2. TransactionId[] (subtransactions)
     695                 :  *      3. RelFileNode[] (files to be deleted at commit)
     696                 :  *      4. RelFileNode[] (files to be deleted at abort)
     697                 :  *      5. TwoPhaseRecordOnDisk
     698                 :  *      6. ...
     699                 :  *      7. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID)
     700                 :  *      8. CRC32
     701                 :  *
     702                 :  * Each segment except the final CRC32 is MAXALIGN'd.
     703                 :  */
     704                 : 
     705                 : /*
     706                 :  * Header for a 2PC state file
     707                 :  */
     708                 : #define TWOPHASE_MAGIC  0x57F94531              /* format identifier */
     709                 : 
     710                 : typedef struct TwoPhaseFileHeader
     711                 : {
     712                 :         uint32          magic;                  /* format identifier */
     713                 :         uint32          total_len;              /* actual file length */
     714                 :         TransactionId xid;                      /* original transaction XID */
     715                 :         Oid                     database;               /* OID of database it was in */
     716                 :         TimestampTz prepared_at;        /* time of preparation */
     717                 :         Oid                     owner;                  /* user running the transaction */
     718                 :         int32           nsubxacts;              /* number of following subxact XIDs */
     719                 :         int32           ncommitrels;    /* number of delete-on-commit rels */
     720                 :         int32           nabortrels;             /* number of delete-on-abort rels */
     721                 :         char            gid[GIDSIZE];   /* GID for transaction */
     722                 : } TwoPhaseFileHeader;
     723                 : 
     724                 : /*
     725                 :  * Header for each record in a state file
     726                 :  *
     727                 :  * NOTE: len counts only the rmgr data, not the TwoPhaseRecordOnDisk header.
     728                 :  * The rmgr data will be stored starting on a MAXALIGN boundary.
     729                 :  */
     730                 : typedef struct TwoPhaseRecordOnDisk
     731                 : {
     732                 :         uint32          len;                    /* length of rmgr data */
     733                 :         TwoPhaseRmgrId rmid;            /* resource manager for this record */
     734                 :         uint16          info;                   /* flag bits for use by rmgr */
     735                 : } TwoPhaseRecordOnDisk;
     736                 : 
     737                 : /*
     738                 :  * During prepare, the state file is assembled in memory before writing it
     739                 :  * to WAL and the actual state file.  We use a chain of XLogRecData blocks
     740                 :  * so that we will be able to pass the state file contents directly to
     741                 :  * XLogInsert.
     742                 :  */
     743                 : static struct xllist
     744                 : {
     745                 :         XLogRecData *head;                      /* first data block in the chain */
     746                 :         XLogRecData *tail;                      /* last block in chain */
     747                 :         uint32          bytes_free;             /* free bytes left in tail block */
     748                 :         uint32          total_len;              /* total data bytes in chain */
     749                 : }       records;
     750                 : 
     751                 : 
     752                 : /*
     753                 :  * Append a block of data to records data structure.
     754                 :  *
     755                 :  * NB: each block is padded to a MAXALIGN multiple.  This must be
     756                 :  * accounted for when the file is later read!
     757                 :  *
     758                 :  * The data is copied, so the caller is free to modify it afterwards.
     759                 :  */
     760                 : static void
     761                 : save_state_data(const void *data, uint32 len)
     762             206 : {
     763             206 :         uint32          padlen = MAXALIGN(len);
     764                 : 
     765             206 :         if (padlen > records.bytes_free)
     766                 :         {
     767               5 :                 records.tail->next = palloc0(sizeof(XLogRecData));
     768               5 :                 records.tail = records.tail->next;
     769               5 :                 records.tail->buffer = InvalidBuffer;
     770               5 :                 records.tail->len = 0;
     771               5 :                 records.tail->next = NULL;
     772                 : 
     773               5 :                 records.bytes_free = Max(padlen, 512);
     774               5 :                 records.tail->data = palloc(records.bytes_free);
     775                 :         }
     776                 : 
     777             206 :         memcpy(((char *) records.tail->data) + records.tail->len, data, len);
     778             206 :         records.tail->len += padlen;
     779             206 :         records.bytes_free -= padlen;
     780             206 :         records.total_len += padlen;
     781             206 : }
     782                 : 
     783                 : /*
     784                 :  * Start preparing a state file.
     785                 :  *
     786                 :  * Initializes data structure and inserts the 2PC file header record.
     787                 :  */
     788                 : void
     789                 : StartPrepare(GlobalTransaction gxact)
     790               5 : {
     791               5 :         TransactionId xid = gxact->proc.xid;
     792                 :         TwoPhaseFileHeader hdr;
     793                 :         TransactionId *children;
     794                 :         RelFileNode *commitrels;
     795                 :         RelFileNode *abortrels;
     796                 : 
     797                 :         /* Initialize linked list */
     798               5 :         records.head = palloc0(sizeof(XLogRecData));
     799               5 :         records.head->buffer = InvalidBuffer;
     800               5 :         records.head->len = 0;
     801               5 :         records.head->next = NULL;
     802                 : 
     803               5 :         records.bytes_free = Max(sizeof(TwoPhaseFileHeader), 512);
     804               5 :         records.head->data = palloc(records.bytes_free);
     805                 : 
     806               5 :         records.tail = records.head;
     807                 : 
     808               5 :         records.total_len = 0;
     809                 : 
     810                 :         /* Create header */
     811               5 :         hdr.magic = TWOPHASE_MAGIC;
     812               5 :         hdr.total_len = 0;                      /* EndPrepare will fill this in */
     813               5 :         hdr.xid = xid;
     814               5 :         hdr.database = gxact->proc.databaseId;
     815               5 :         hdr.prepared_at = gxact->prepared_at;
     816               5 :         hdr.owner = gxact->owner;
     817               5 :         hdr.nsubxacts = xactGetCommittedChildren(&children);
     818               5 :         hdr.ncommitrels = smgrGetPendingDeletes(true, &commitrels, NULL);
     819               5 :         hdr.nabortrels = smgrGetPendingDeletes(false, &abortrels, NULL);
     820               5 :         StrNCpy(hdr.gid, gxact->gid, GIDSIZE);
     821                 : 
     822               5 :         save_state_data(&hdr, sizeof(TwoPhaseFileHeader));
     823                 : 
     824                 :         /* Add the additional info about subxacts and deletable files */
     825               5 :         if (hdr.nsubxacts > 0)
     826                 :         {
     827               1 :                 save_state_data(children, hdr.nsubxacts * sizeof(TransactionId));
     828                 :                 /* While we have the child-xact data, stuff it in the gxact too */
     829               1 :                 GXactLoadSubxactData(gxact, hdr.nsubxacts, children);
     830               1 :                 pfree(children);
     831                 :         }
     832               5 :         if (hdr.ncommitrels > 0)
     833                 :         {
     834               1 :                 save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileNode));
     835               1 :                 pfree(commitrels);
     836                 :         }
     837               5 :         if (hdr.nabortrels > 0)
     838                 :         {
     839               2 :                 save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileNode));
     840               2 :                 pfree(abortrels);
     841                 :         }
     842               5 : }
     843                 : 
     844                 : /*
     845                 :  * Finish preparing state file.
     846                 :  *
     847                 :  * Calculates CRC and writes state file to WAL and in pg_twophase directory.
     848                 :  */
     849                 : void
     850                 : EndPrepare(GlobalTransaction gxact)
     851               5 : {
     852               5 :         TransactionId xid = gxact->proc.xid;
     853                 :         TwoPhaseFileHeader *hdr;
     854                 :         char            path[MAXPGPATH];
     855                 :         XLogRecData *record;
     856                 :         pg_crc32        statefile_crc;
     857                 :         pg_crc32        bogus_crc;
     858                 :         int                     fd;
     859                 : 
     860                 :         /* Add the end sentinel to the list of 2PC records */
     861               5 :         RegisterTwoPhaseRecord(TWOPHASE_RM_END_ID, 0,
     862                 :                                                    NULL, 0);
     863                 : 
     864                 :         /* Go back and fill in total_len in the file header record */
     865               5 :         hdr = (TwoPhaseFileHeader *) records.head->data;
     866                 :         Assert(hdr->magic == TWOPHASE_MAGIC);
     867               5 :         hdr->total_len = records.total_len + sizeof(pg_crc32);
     868                 : 
     869                 :         /*
     870                 :          * Create the 2PC state file.
     871                 :          *
     872                 :          * Note: because we use BasicOpenFile(), we are responsible for ensuring
     873                 :          * the FD gets closed in any error exit path.  Once we get into the
     874                 :          * critical section, though, it doesn't matter since any failure causes
     875                 :          * PANIC anyway.
     876                 :          */
     877               5 :         TwoPhaseFilePath(path, xid);
     878                 : 
     879               5 :         fd = BasicOpenFile(path,
     880                 :                                            O_CREAT | O_EXCL | O_WRONLY | PG_BINARY,
     881                 :                                            S_IRUSR | S_IWUSR);
     882               5 :         if (fd < 0)
     883               0 :                 ereport(ERROR,
     884                 :                                 (errcode_for_file_access(),
     885                 :                                  errmsg("could not create two-phase state file \"%s\": %m",
     886                 :                                                 path)));
     887                 : 
     888                 :         /* Write data to file, and calculate CRC as we pass over it */
     889               5 :         INIT_CRC32(statefile_crc);
     890                 : 
     891              15 :         for (record = records.head; record != NULL; record = record->next)
     892                 :         {
     893              10 :                 COMP_CRC32(statefile_crc, record->data, record->len);
     894              10 :                 if ((write(fd, record->data, record->len)) != record->len)
     895                 :                 {
     896               0 :                         close(fd);
     897               0 :                         ereport(ERROR,
     898                 :                                         (errcode_for_file_access(),
     899                 :                                          errmsg("could not write two-phase state file: %m")));
     900                 :                 }
     901                 :         }
     902                 : 
     903               5 :         FIN_CRC32(statefile_crc);
     904                 : 
     905                 :         /*
     906                 :          * Write a deliberately bogus CRC to the state file; this is just paranoia
     907                 :          * to catch the case where four more bytes will run us out of disk space.
     908                 :          */
     909               5 :         bogus_crc = ~statefile_crc;
     910                 : 
     911               5 :         if ((write(fd, &bogus_crc, sizeof(pg_crc32))) != sizeof(pg_crc32))
     912                 :         {
     913               0 :                 close(fd);
     914               0 :                 ereport(ERROR,
     915                 :                                 (errcode_for_file_access(),
     916                 :                                  errmsg("could not write two-phase state file: %m")));
     917                 :         }
     918                 : 
     919                 :         /* Back up to prepare for rewriting the CRC */
     920               5 :         if (lseek(fd, -((off_t) sizeof(pg_crc32)), SEEK_CUR) < 0)
     921                 :         {
     922               0 :                 close(fd);
     923               0 :                 ereport(ERROR,
     924                 :                                 (errcode_for_file_access(),
     925                 :                                  errmsg("could not seek in two-phase state file: %m")));
     926                 :         }
     927                 : 
     928                 :         /*
     929                 :          * The state file isn't valid yet, because we haven't written the correct
     930                 :          * CRC yet.  Before we do that, insert entry in WAL and flush it to disk.
     931                 :          *
     932                 :          * Between the time we have written the WAL entry and the time we write
     933                 :          * out the correct state file CRC, we have an inconsistency: the xact is
     934                 :          * prepared according to WAL but not according to our on-disk state. We
     935                 :          * use a critical section to force a PANIC if we are unable to complete
     936                 :          * the write --- then, WAL replay should repair the inconsistency.      The
     937                 :          * odds of a PANIC actually occurring should be very tiny given that we
     938                 :          * were able to write the bogus CRC above.
     939                 :          *
     940                 :          * We have to set inCommit here, too; otherwise a checkpoint starting
     941                 :          * immediately after the WAL record is inserted could complete without
     942                 :          * fsync'ing our state file.  (This is essentially the same kind of race
     943                 :          * condition as the COMMIT-to-clog-write case that RecordTransactionCommit
     944                 :          * uses inCommit for; see notes there.)
     945                 :          *
     946                 :          * We save the PREPARE record's location in the gxact for later use by
     947                 :          * CheckPointTwoPhase.
     948                 :          */
     949               5 :         START_CRIT_SECTION();
     950                 : 
     951               5 :         MyProc->inCommit = true;
     952                 : 
     953               5 :         gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE,
     954                 :                                                                         records.head);
     955               5 :         XLogFlush(gxact->prepare_lsn);
     956                 : 
     957                 :         /* If we crash now, we have prepared: WAL replay will fix things */
     958                 : 
     959                 :         /* write correct CRC and close file */
     960               5 :         if ((write(fd, &statefile_crc, sizeof(pg_crc32))) != sizeof(pg_crc32))
     961                 :         {
     962               0 :                 close(fd);
     963               0 :                 ereport(ERROR,
     964                 :                                 (errcode_for_file_access(),
     965                 :                                  errmsg("could not write two-phase state file: %m")));
     966                 :         }
     967                 : 
     968               5 :         if (close(fd) != 0)
     969               0 :                 ereport(ERROR,
     970                 :                                 (errcode_for_file_access(),
     971                 :                                  errmsg("could not close two-phase state file: %m")));
     972                 : 
     973                 :         /*
     974                 :          * Mark the prepared transaction as valid.      As soon as xact.c marks MyProc
     975                 :          * as not running our XID (which it will do immediately after this
     976                 :          * function returns), others can commit/rollback the xact.
     977                 :          *
     978                 :          * NB: a side effect of this is to make a dummy ProcArray entry for the
     979                 :          * prepared XID.  This must happen before we clear the XID from MyProc,
     980                 :          * else there is a window where the XID is not running according to
     981                 :          * TransactionIdIsInProgress, and onlookers would be entitled to assume
     982                 :          * the xact crashed.  Instead we have a window where the same XID appears
     983                 :          * twice in ProcArray, which is OK.
     984                 :          */
     985               5 :         MarkAsPrepared(gxact);
     986                 : 
     987                 :         /*
     988                 :          * Now we can mark ourselves as out of the commit critical section: a
     989                 :          * checkpoint starting after this will certainly see the gxact as a
     990                 :          * candidate for fsyncing.
     991                 :          */
     992               5 :         MyProc->inCommit = false;
     993                 : 
     994               5 :         END_CRIT_SECTION();
     995                 : 
     996               5 :         records.tail = records.head = NULL;
     997               5 : }
     998                 : 
     999                 : /*
    1000                 :  * Register a 2PC record to be written to state file.
    1001                 :  */
    1002                 : void
    1003                 : RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
    1004                 :                                            const void *data, uint32 len)
    1005             101 : {
    1006                 :         TwoPhaseRecordOnDisk record;
    1007                 : 
    1008             101 :         record.rmid = rmid;
    1009             101 :         record.info = info;
    1010             101 :         record.len = len;
    1011             101 :         save_state_data(&record, sizeof(TwoPhaseRecordOnDisk));
    1012             101 :         if (len > 0)
    1013              96 :                 save_state_data(data, len);
    1014             101 : }
    1015                 : 
    1016                 : 
    1017                 : /*
    1018                 :  * Read and validate the state file for xid.
    1019                 :  *
    1020                 :  * If it looks OK (has a valid magic number and CRC), return the palloc'd
    1021                 :  * contents of the file.  Otherwise return NULL.
    1022                 :  */
    1023                 : static char *
    1024                 : ReadTwoPhaseFile(TransactionId xid)
    1025               5 : {
    1026                 :         char            path[MAXPGPATH];
    1027                 :         char       *buf;
    1028                 :         TwoPhaseFileHeader *hdr;
    1029                 :         int                     fd;
    1030                 :         struct stat stat;
    1031                 :         uint32          crc_offset;
    1032                 :         pg_crc32        calc_crc,
    1033                 :                                 file_crc;
    1034                 : 
    1035               5 :         TwoPhaseFilePath(path, xid);
    1036                 : 
    1037               5 :         fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
    1038               5 :         if (fd < 0)
    1039                 :         {
    1040               0 :                 ereport(WARNING,
    1041                 :                                 (errcode_for_file_access(),
    1042                 :                                  errmsg("could not open two-phase state file \"%s\": %m",
    1043                 :                                                 path)));
    1044               0 :                 return NULL;
    1045                 :         }
    1046                 : 
    1047                 :         /*
    1048                 :          * Check file length.  We can determine a lower bound pretty easily. We
    1049                 :          * set an upper bound mainly to avoid palloc() failure on a corrupt file.
    1050                 :          */
    1051               5 :         if (fstat(fd, &stat))
    1052                 :         {
    1053               0 :                 close(fd);
    1054               0 :                 ereport(WARNING,
    1055                 :                                 (errcode_for_file_access(),
    1056                 :                                  errmsg("could not stat two-phase state file \"%s\": %m",
    1057                 :                                                 path)));
    1058               0 :                 return NULL;
    1059                 :         }
    1060                 : 
    1061               5 :         if (stat.st_size < (MAXALIGN(sizeof(TwoPhaseFileHeader)) +
    1062                 :                                                 MAXALIGN(sizeof(TwoPhaseRecordOnDisk)) +
    1063                 :                                                 sizeof(pg_crc32)) ||
    1064                 :                 stat.st_size > 10000000)
    1065                 :         {
    1066               0 :                 close(fd);
    1067               0 :                 return NULL;
    1068                 :         }
    1069                 : 
    1070               5 :         crc_offset = stat.st_size - sizeof(pg_crc32);
    1071               5 :         if (crc_offset != MAXALIGN(crc_offset))
    1072                 :         {
    1073               0 :                 close(fd);
    1074               0 :                 return NULL;
    1075                 :         }
    1076                 : 
    1077                 :         /*
    1078                 :          * OK, slurp in the file.
    1079                 :          */
    1080               5 :         buf = (char *) palloc(stat.st_size);
    1081                 : 
    1082               5 :         if (read(fd, buf, stat.st_size) != stat.st_size)
    1083                 :         {
    1084               0 :                 close(fd);
    1085               0 :                 ereport(WARNING,
    1086                 :                                 (errcode_for_file_access(),
    1087                 :                                  errmsg("could not read two-phase state file \"%s\": %m",
    1088                 :                                                 path)));
    1089               0 :                 pfree(buf);
    1090               0 :                 return NULL;
    1091                 :         }
    1092                 : 
    1093               5 :         close(fd);
    1094                 : 
    1095               5 :         hdr = (TwoPhaseFileHeader *) buf;
    1096               5 :         if (hdr->magic != TWOPHASE_MAGIC || hdr->total_len != stat.st_size)
    1097                 :         {
    1098               0 :                 pfree(buf);
    1099               0 :                 return NULL;
    1100                 :         }
    1101                 : 
    1102               5 :         INIT_CRC32(calc_crc);
    1103               5 :         COMP_CRC32(calc_crc, buf, crc_offset);
    1104               5 :         FIN_CRC32(calc_crc);
    1105                 : 
    1106               5 :         file_crc = *((pg_crc32 *) (buf + crc_offset));
    1107                 : 
    1108               5 :         if (!EQ_CRC32(calc_crc, file_crc))
    1109                 :         {
    1110               0 :                 pfree(buf);
    1111               0 :                 return NULL;
    1112                 :         }
    1113                 : 
    1114               5 :         return buf;
    1115                 : }
    1116                 : 
    1117                 : 
    1118                 : /*
    1119                 :  * FinishPreparedTransaction: execute COMMIT PREPARED or ROLLBACK PREPARED
    1120                 :  */
    1121                 : void
    1122                 : FinishPreparedTransaction(const char *gid, bool isCommit)
    1123               5 : {
    1124                 :         GlobalTransaction gxact;
    1125                 :         TransactionId xid;
    1126                 :         char       *buf;
    1127                 :         char       *bufptr;
    1128                 :         TwoPhaseFileHeader *hdr;
    1129                 :         TransactionId latestXid;
    1130                 :         TransactionId *children;
    1131                 :         RelFileNode *commitrels;
    1132                 :         RelFileNode *abortrels;
    1133                 :         int                     i;
    1134                 : 
    1135                 :         /*
    1136                 :          * Validate the GID, and lock the GXACT to ensure that two backends do not
    1137                 :          * try to commit the same GID at once.
    1138                 :          */
    1139               5 :         gxact = LockGXact(gid, GetUserId());
    1140               5 :         xid = gxact->proc.xid;
    1141                 : 
    1142                 :         /*
    1143                 :          * Read and validate the state file
    1144                 :          */
    1145               5 :         buf = ReadTwoPhaseFile(xid);
    1146               5 :         if (buf == NULL)
    1147               0 :                 ereport(ERROR,
    1148                 :                                 (errcode(ERRCODE_DATA_CORRUPTED),
    1149                 :                                  errmsg("two-phase state file for transaction %u is corrupt",
    1150                 :                                                 xid)));
    1151                 : 
    1152                 :         /*
    1153                 :          * Disassemble the header area
    1154                 :          */
    1155               5 :         hdr = (TwoPhaseFileHeader *) buf;
    1156                 :         Assert(TransactionIdEquals(hdr->xid, xid));
    1157               5 :         bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
    1158               5 :         children = (TransactionId *) bufptr;
    1159               5 :         bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
    1160               5 :         commitrels = (RelFileNode *) bufptr;
    1161               5 :         bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
    1162               5 :         abortrels = (RelFileNode *) bufptr;
    1163               5 :         bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
    1164                 : 
    1165                 :         /* compute latestXid among all children */
    1166               5 :         latestXid = TransactionIdLatest(xid, hdr->nsubxacts, children);
    1167                 : 
    1168                 :         /*
    1169                 :          * The order of operations here is critical: make the XLOG entry for
    1170                 :          * commit or abort, then mark the transaction committed or aborted in
    1171                 :          * pg_clog, then remove its PGPROC from the global ProcArray (which means
    1172                 :          * TransactionIdIsInProgress will stop saying the prepared xact is in
    1173                 :          * progress), then run the post-commit or post-abort callbacks. The
    1174                 :          * callbacks will release the locks the transaction held.
    1175                 :          */
    1176               5 :         if (isCommit)
    1177               3 :                 RecordTransactionCommitPrepared(xid,
    1178                 :                                                                                 hdr->nsubxacts, children,
    1179                 :                                                                                 hdr->ncommitrels, commitrels);
    1180                 :         else
    1181               2 :                 RecordTransactionAbortPrepared(xid,
    1182                 :                                                                            hdr->nsubxacts, children,
    1183                 :                                                                            hdr->nabortrels, abortrels);
    1184                 : 
    1185               5 :         ProcArrayRemove(&gxact->proc, latestXid);
    1186                 : 
    1187                 :         /*
    1188                 :          * In case we fail while running the callbacks, mark the gxact invalid so
    1189                 :          * no one else will try to commit/rollback, and so it can be recycled
    1190                 :          * properly later.      It is still locked by our XID so it won't go away yet.
    1191                 :          *
    1192                 :          * (We assume it's safe to do this without taking TwoPhaseStateLock.)
    1193                 :          */
    1194               5 :         gxact->valid = false;
    1195                 : 
    1196                 :         /*
    1197                 :          * We have to remove any files that were supposed to be dropped. For
    1198                 :          * consistency with the regular xact.c code paths, must do this before
    1199                 :          * releasing locks, so do it before running the callbacks.
    1200                 :          *
    1201                 :          * NB: this code knows that we couldn't be dropping any temp rels ...
    1202                 :          */
    1203               5 :         if (isCommit)
    1204                 :         {
    1205               4 :                 for (i = 0; i < hdr->ncommitrels; i++)
    1206               1 :                         smgrdounlink(smgropen(commitrels[i]), false, false);
    1207                 :         }
    1208                 :         else
    1209                 :         {
    1210               2 :                 for (i = 0; i < hdr->nabortrels; i++)
    1211               0 :                         smgrdounlink(smgropen(abortrels[i]), false, false);
    1212                 :         }
    1213                 : 
    1214                 :         /* And now do the callbacks */
    1215               5 :         if (isCommit)
    1216               3 :                 ProcessRecords(bufptr, xid, twophase_postcommit_callbacks);
    1217                 :         else
    1218               2 :                 ProcessRecords(bufptr, xid, twophase_postabort_callbacks);
    1219                 : 
    1220                 :         /* Count the prepared xact as committed or aborted */
    1221               5 :         AtEOXact_PgStat(isCommit);
    1222                 : 
    1223                 :         /*
    1224                 :          * And now we can clean up our mess.
    1225                 :          */
    1226               5 :         RemoveTwoPhaseFile(xid, true);
    1227                 : 
    1228               5 :         RemoveGXact(gxact);
    1229                 : 
    1230               5 :         pfree(buf);
    1231               5 : }
    1232                 : 
    1233                 : /*
    1234                 :  * Scan a 2PC state file (already read into memory by ReadTwoPhaseFile)
    1235                 :  * and call the indicated callbacks for each 2PC record.
    1236                 :  */
    1237                 : static void
    1238                 : ProcessRecords(char *bufptr, TransactionId xid,
    1239                 :                            const TwoPhaseCallback callbacks[])
    1240             101 : {
    1241                 :         for (;;)
    1242                 :         {
    1243             101 :                 TwoPhaseRecordOnDisk *record = (TwoPhaseRecordOnDisk *) bufptr;
    1244                 : 
    1245                 :                 Assert(record->rmid <= TWOPHASE_RM_MAX_ID);
    1246             101 :                 if (record->rmid == TWOPHASE_RM_END_ID)
    1247               5 :                         break;
    1248                 : 
    1249              96 :                 bufptr += MAXALIGN(sizeof(TwoPhaseRecordOnDisk));
    1250                 : 
    1251              96 :                 if (callbacks[record->rmid] != NULL)
    1252              96 :                         callbacks[record->rmid] (xid, record->info,
    1253                 :                                                                          (void *) bufptr, record->len);
    1254                 : 
    1255              96 :                 bufptr += MAXALIGN(record->len);
    1256              96 :         }
    1257               5 : }
    1258                 : 
    1259                 : /*
    1260                 :  * Remove the 2PC file for the specified XID.
    1261                 :  *
    1262                 :  * If giveWarning is false, do not complain about file-not-present;
    1263                 :  * this is an expected case during WAL replay.
    1264                 :  */
    1265                 : void
    1266                 : RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
    1267               5 : {
    1268                 :         char            path[MAXPGPATH];
    1269                 : 
    1270               5 :         TwoPhaseFilePath(path, xid);
    1271               5 :         if (unlink(path))
    1272               0 :                 if (errno != ENOENT || giveWarning)
    1273               0 :                         ereport(WARNING,
    1274                 :                                         (errcode_for_file_access(),
    1275                 :                                    errmsg("could not remove two-phase state file \"%s\": %m",
    1276                 :                                                   path)));
    1277               5 : }
    1278                 : 
    1279                 : /*
    1280                 :  * Recreates a state file. This is used in WAL replay.
    1281                 :  *
    1282                 :  * Note: content and len don't include CRC.
    1283                 :  */
    1284                 : void
    1285                 : RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
    1286               0 : {
    1287                 :         char            path[MAXPGPATH];
    1288                 :         pg_crc32        statefile_crc;
    1289                 :         int                     fd;
    1290                 : 
    1291                 :         /* Recompute CRC */
    1292               0 :         INIT_CRC32(statefile_crc);
    1293               0 :         COMP_CRC32(statefile_crc, content, len);
    1294               0 :         FIN_CRC32(statefile_crc);
    1295                 : 
    1296               0 :         TwoPhaseFilePath(path, xid);
    1297                 : 
    1298               0 :         fd = BasicOpenFile(path,
    1299                 :                                            O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY,
    1300                 :                                            S_IRUSR | S_IWUSR);
    1301               0 :         if (fd < 0)
    1302               0 :                 ereport(ERROR,
    1303                 :                                 (errcode_for_file_access(),
    1304                 :                                  errmsg("could not recreate two-phase state file \"%s\": %m",
    1305                 :                                                 path)));
    1306                 : 
    1307                 :         /* Write content and CRC */
    1308               0 :         if (write(fd, content, len) != len)
    1309                 :         {
    1310               0 :                 close(fd);
    1311               0 :                 ereport(ERROR,
    1312                 :                                 (errcode_for_file_access(),
    1313                 :                                  errmsg("could not write two-phase state file: %m")));
    1314                 :         }
    1315               0 :         if (write(fd, &statefile_crc, sizeof(pg_crc32)) != sizeof(pg_crc32))
    1316                 :         {
    1317               0 :                 close(fd);
    1318               0 :                 ereport(ERROR,
    1319                 :                                 (errcode_for_file_access(),
    1320                 :                                  errmsg("could not write two-phase state file: %m")));
    1321                 :         }
    1322                 : 
    1323                 :         /*
    1324                 :          * We must fsync the file because the end-of-replay checkpoint will not do
    1325                 :          * so, there being no GXACT in shared memory yet to tell it to.
    1326                 :          */
    1327               0 :         if (pg_fsync(fd) != 0)
    1328                 :         {
    1329               0 :                 close(fd);
    1330               0 :                 ereport(ERROR,
    1331                 :                                 (errcode_for_file_access(),
    1332                 :                                  errmsg("could not fsync two-phase state file: %m")));
    1333                 :         }
    1334                 : 
    1335               0 :         if (close(fd) != 0)
    1336               0 :                 ereport(ERROR,
    1337                 :                                 (errcode_for_file_access(),
    1338                 :                                  errmsg("could not close two-phase state file: %m")));
    1339               0 : }
    1340                 : 
    1341                 : /*
    1342                 :  * CheckPointTwoPhase -- handle 2PC component of checkpointing.
    1343                 :  *
    1344                 :  * We must fsync the state file of any GXACT that is valid and has a PREPARE
    1345                 :  * LSN <= the checkpoint's redo horizon.  (If the gxact isn't valid yet or
    1346                 :  * has a later LSN, this checkpoint is not responsible for fsyncing it.)
    1347                 :  *
    1348                 :  * This is deliberately run as late as possible in the checkpoint sequence,
    1349                 :  * because GXACTs ordinarily have short lifespans, and so it is quite
    1350                 :  * possible that GXACTs that were valid at checkpoint start will no longer
    1351                 :  * exist if we wait a little bit.
    1352                 :  *
    1353                 :  * If a GXACT remains valid across multiple checkpoints, it'll be fsynced
    1354                 :  * each time.  This is considered unusual enough that we don't bother to
    1355                 :  * expend any extra code to avoid the redundant fsyncs.  (They should be
    1356                 :  * reasonably cheap anyway, since they won't cause I/O.)
    1357                 :  */
    1358                 : void
    1359                 : CheckPointTwoPhase(XLogRecPtr redo_horizon)
    1360              19 : {
    1361                 :         TransactionId *xids;
    1362                 :         int                     nxids;
    1363                 :         char            path[MAXPGPATH];
    1364                 :         int                     i;
    1365                 : 
    1366                 :         /*
    1367                 :          * We don't want to hold the TwoPhaseStateLock while doing I/O, so we grab
    1368                 :          * it just long enough to make a list of the XIDs that require fsyncing,
    1369                 :          * and then do the I/O afterwards.
    1370                 :          *
    1371                 :          * This approach creates a race condition: someone else could delete a
    1372                 :          * GXACT between the time we release TwoPhaseStateLock and the time we try
    1373                 :          * to open its state file.      We handle this by special-casing ENOENT
    1374                 :          * failures: if we see that, we verify that the GXACT is no longer valid,
    1375                 :          * and if so ignore the failure.
    1376                 :          */
    1377              19 :         if (max_prepared_xacts <= 0)
    1378               0 :                 return;                                 /* nothing to do */
    1379              19 :         xids = (TransactionId *) palloc(max_prepared_xacts * sizeof(TransactionId));
    1380              19 :         nxids = 0;
    1381                 : 
    1382              19 :         LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
    1383                 : 
    1384              19 :         for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
    1385                 :         {
    1386               0 :                 GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
    1387                 : 
    1388               0 :                 if (gxact->valid &&
    1389                 :                         XLByteLE(gxact->prepare_lsn, redo_horizon))
    1390               0 :                         xids[nxids++] = gxact->proc.xid;
    1391                 :         }
    1392                 : 
    1393              19 :         LWLockRelease(TwoPhaseStateLock);
    1394                 : 
    1395              19 :         for (i = 0; i < nxids; i++)
    1396                 :         {
    1397               0 :                 TransactionId xid = xids[i];
    1398                 :                 int                     fd;
    1399                 : 
    1400               0 :                 TwoPhaseFilePath(path, xid);
    1401                 : 
    1402               0 :                 fd = BasicOpenFile(path, O_RDWR | PG_BINARY, 0);
    1403               0 :                 if (fd < 0)
    1404                 :                 {
    1405               0 :                         if (errno == ENOENT)
    1406                 :                         {
    1407                 :                                 /* OK if gxact is no longer valid */
    1408               0 :                                 if (!TransactionIdIsPrepared(xid))
    1409               0 :                                         continue;
    1410                 :                                 /* Restore errno in case it was changed */
    1411               0 :                                 errno = ENOENT;
    1412                 :                         }
    1413               0 :                         ereport(ERROR,
    1414                 :                                         (errcode_for_file_access(),
    1415                 :                                          errmsg("could not open two-phase state file \"%s\": %m",
    1416                 :                                                         path)));
    1417                 :                 }
    1418                 : 
    1419               0 :                 if (pg_fsync(fd) != 0)
    1420                 :                 {
    1421               0 :                         close(fd);
    1422               0 :                         ereport(ERROR,
    1423                 :                                         (errcode_for_file_access(),
    1424                 :                                          errmsg("could not fsync two-phase state file \"%s\": %m",
    1425                 :                                                         path)));
    1426                 :                 }
    1427                 : 
    1428               0 :                 if (close(fd) != 0)
    1429               0 :                         ereport(ERROR,
    1430                 :                                         (errcode_for_file_access(),
    1431                 :                                          errmsg("could not close two-phase state file \"%s\": %m",
    1432                 :                                                         path)));
    1433                 :         }
    1434                 : 
    1435              19 :         pfree(xids);
    1436                 : }
    1437                 : 
    1438                 : /*
    1439                 :  * PrescanPreparedTransactions
    1440                 :  *
    1441                 :  * Scan the pg_twophase directory and determine the range of valid XIDs
    1442                 :  * present.  This is run during database startup, after we have completed
    1443                 :  * reading WAL.  ShmemVariableCache->nextXid has been set to one more than
    1444                 :  * the highest XID for which evidence exists in WAL.
    1445                 :  *
    1446                 :  * We throw away any prepared xacts with main XID beyond nextXid --- if any
    1447                 :  * are present, it suggests that the DBA has done a PITR recovery to an
    1448                 :  * earlier point in time without cleaning out pg_twophase.      We dare not
    1449                 :  * try to recover such prepared xacts since they likely depend on database
    1450                 :  * state that doesn't exist now.
    1451                 :  *
    1452                 :  * However, we will advance nextXid beyond any subxact XIDs belonging to
    1453                 :  * valid prepared xacts.  We need to do this since subxact commit doesn't
    1454                 :  * write a WAL entry, and so there might be no evidence in WAL of those
    1455                 :  * subxact XIDs.
    1456                 :  *
    1457                 :  * Our other responsibility is to determine and return the oldest valid XID
    1458                 :  * among the prepared xacts (if none, return ShmemVariableCache->nextXid).
    1459                 :  * This is needed to synchronize pg_subtrans startup properly.
    1460                 :  */
    1461                 : TransactionId
    1462                 : PrescanPreparedTransactions(void)
    1463              14 : {
    1464              14 :         TransactionId origNextXid = ShmemVariableCache->nextXid;
    1465              14 :         TransactionId result = origNextXid;
    1466                 :         DIR                *cldir;
    1467                 :         struct dirent *clde;
    1468                 : 
    1469              14 :         cldir = AllocateDir(TWOPHASE_DIR);
    1470              56 :         while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL)
    1471                 :         {
    1472              28 :                 if (strlen(clde->d_name) == 8 &&
    1473               0 :                         strspn(clde->d_name, "0123456789ABCDEF") == 8)
    1474                 :                 {
    1475                 :                         TransactionId xid;
    1476                 :                         char       *buf;
    1477                 :                         TwoPhaseFileHeader *hdr;
    1478                 :                         TransactionId *subxids;
    1479                 :                         int                     i;
    1480                 : 
    1481               0 :                         xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
    1482                 : 
    1483                 :                         /* Reject XID if too new */
    1484               0 :                         if (TransactionIdFollowsOrEquals(xid, origNextXid))
    1485                 :                         {
    1486               0 :                                 ereport(WARNING,
    1487                 :                                                 (errmsg("removing future two-phase state file \"%s\"",
    1488                 :                                                                 clde->d_name)));
    1489               0 :                                 RemoveTwoPhaseFile(xid, true);
    1490               0 :                                 continue;
    1491                 :                         }
    1492                 : 
    1493                 :                         /*
    1494                 :                          * Note: we can't check if already processed because clog
    1495                 :                          * subsystem isn't up yet.
    1496                 :                          */
    1497                 : 
    1498                 :                         /* Read and validate file */
    1499               0 :                         buf = ReadTwoPhaseFile(xid);
    1500               0 :                         if (buf == NULL)
    1501                 :                         {
    1502               0 :                                 ereport(WARNING,
    1503                 :                                           (errmsg("removing corrupt two-phase state file \"%s\"",
    1504                 :                                                           clde->d_name)));
    1505               0 :                                 RemoveTwoPhaseFile(xid, true);
    1506               0 :                                 continue;
    1507                 :                         }
    1508                 : 
    1509                 :                         /* Deconstruct header */
    1510               0 :                         hdr = (TwoPhaseFileHeader *) buf;
    1511               0 :                         if (!TransactionIdEquals(hdr->xid, xid))
    1512                 :                         {
    1513               0 :                                 ereport(WARNING,
    1514                 :                                           (errmsg("removing corrupt two-phase state file \"%s\"",
    1515                 :                                                           clde->d_name)));
    1516               0 :                                 RemoveTwoPhaseFile(xid, true);
    1517               0 :                                 pfree(buf);
    1518               0 :                                 continue;
    1519                 :                         }
    1520                 : 
    1521                 :                         /*
    1522                 :                          * OK, we think this file is valid.  Incorporate xid into the
    1523                 :                          * running-minimum result.
    1524                 :                          */
    1525               0 :                         if (TransactionIdPrecedes(xid, result))
    1526               0 :                                 result = xid;
    1527                 : 
    1528                 :                         /*
    1529                 :                          * Examine subtransaction XIDs ... they should all follow main
    1530                 :                          * XID, and they may force us to advance nextXid.
    1531                 :                          */
    1532               0 :                         subxids = (TransactionId *)
    1533                 :                                 (buf + MAXALIGN(sizeof(TwoPhaseFileHeader)));
    1534               0 :                         for (i = 0; i < hdr->nsubxacts; i++)
    1535                 :                         {
    1536               0 :                                 TransactionId subxid = subxids[i];
    1537                 : 
    1538                 :                                 Assert(TransactionIdFollows(subxid, xid));
    1539               0 :                                 if (TransactionIdFollowsOrEquals(subxid,
    1540                 :                                                                                                  ShmemVariableCache->nextXid))
    1541                 :                                 {
    1542               0 :                                         ShmemVariableCache->nextXid = subxid;
    1543               0 :                                         TransactionIdAdvance(ShmemVariableCache->nextXid);
    1544                 :                                 }
    1545                 :                         }
    1546                 : 
    1547               0 :                         pfree(buf);
    1548                 :                 }
    1549                 :         }
    1550              14 :         FreeDir(cldir);
    1551                 : 
    1552              14 :         return result;
    1553                 : }
    1554                 : 
    1555                 : /*
    1556                 :  * RecoverPreparedTransactions
    1557                 :  *
    1558                 :  * Scan the pg_twophase directory and reload shared-memory state for each
    1559                 :  * prepared transaction (reacquire locks, etc).  This is run during database
    1560                 :  * startup.
    1561                 :  */
    1562                 : void
    1563                 : RecoverPreparedTransactions(void)
    1564              14 : {
    1565                 :         char            dir[MAXPGPATH];
    1566                 :         DIR                *cldir;
    1567                 :         struct dirent *clde;
    1568                 : 
    1569              14 :         snprintf(dir, MAXPGPATH, "%s", TWOPHASE_DIR);
    1570                 : 
    1571              14 :         cldir = AllocateDir(dir);
    1572              56 :         while ((clde = ReadDir(cldir, dir)) != NULL)
    1573                 :         {
    1574              28 :                 if (strlen(clde->d_name) == 8 &&
    1575               0 :                         strspn(clde->d_name, "0123456789ABCDEF") == 8)
    1576                 :                 {
    1577                 :                         TransactionId xid;
    1578                 :                         char       *buf;
    1579                 :                         char       *bufptr;
    1580                 :                         TwoPhaseFileHeader *hdr;
    1581                 :                         TransactionId *subxids;
    1582                 :                         GlobalTransaction gxact;
    1583                 :                         int                     i;
    1584                 : 
    1585               0 :                         xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
    1586                 : 
    1587                 :                         /* Already processed? */
    1588               0 :                         if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
    1589                 :                         {
    1590               0 :                                 ereport(WARNING,
    1591                 :                                                 (errmsg("removing stale two-phase state file \"%s\"",
    1592                 :                                                                 clde->d_name)));
    1593               0 :                                 RemoveTwoPhaseFile(xid, true);
    1594               0 :                                 continue;
    1595                 :                         }
    1596                 : 
    1597                 :                         /* Read and validate file */
    1598               0 :                         buf = ReadTwoPhaseFile(xid);
    1599               0 :                         if (buf == NULL)
    1600                 :                         {
    1601               0 :                                 ereport(WARNING,
    1602                 :                                           (errmsg("removing corrupt two-phase state file \"%s\"",
    1603                 :                                                           clde->d_name)));
    1604               0 :                                 RemoveTwoPhaseFile(xid, true);
    1605               0 :                                 continue;
    1606                 :                         }
    1607                 : 
    1608               0 :                         ereport(LOG,
    1609                 :                                         (errmsg("recovering prepared transaction %u", xid)));
    1610                 : 
    1611                 :                         /* Deconstruct header */
    1612               0 :                         hdr = (TwoPhaseFileHeader *) buf;
    1613                 :                         Assert(TransactionIdEquals(hdr->xid, xid));
    1614               0 :                         bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
    1615               0 :                         subxids = (TransactionId *) bufptr;
    1616               0 :                         bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
    1617               0 :                         bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
    1618               0 :                         bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
    1619                 : 
    1620                 :                         /*
    1621                 :                          * Reconstruct subtrans state for the transaction --- needed
    1622                 :                          * because pg_subtrans is not preserved over a restart.  Note that
    1623                 :                          * we are linking all the subtransactions directly to the
    1624                 :                          * top-level XID; there may originally have been a more complex
    1625                 :                          * hierarchy, but there's no need to restore that exactly.
    1626                 :                          */
    1627               0 :                         for (i = 0; i < hdr->nsubxacts; i++)
    1628               0 :                                 SubTransSetParent(subxids[i], xid);
    1629                 : 
    1630                 :                         /*
    1631                 :                          * Recreate its GXACT and dummy PGPROC
    1632                 :                          *
    1633                 :                          * Note: since we don't have the PREPARE record's WAL location at
    1634                 :                          * hand, we leave prepare_lsn zeroes.  This means the GXACT will
    1635                 :                          * be fsync'd on every future checkpoint.  We assume this
    1636                 :                          * situation is infrequent enough that the performance cost is
    1637                 :                          * negligible (especially since we know the state file has already
    1638                 :                          * been fsynced).
    1639                 :                          */
    1640               0 :                         gxact = MarkAsPreparing(xid, hdr->gid,
    1641                 :                                                                         hdr->prepared_at,
    1642                 :                                                                         hdr->owner, hdr->database);
    1643               0 :                         GXactLoadSubxactData(gxact, hdr->nsubxacts, subxids);
    1644               0 :                         MarkAsPrepared(gxact);
    1645                 : 
    1646                 :                         /*
    1647                 :                          * Recover other state (notably locks) using resource managers
    1648                 :                          */
    1649               0 :                         ProcessRecords(bufptr, xid, twophase_recover_callbacks);
    1650                 : 
    1651               0 :                         pfree(buf);
    1652                 :                 }
    1653                 :         }
    1654              14 :         FreeDir(cldir);
    1655              14 : }
    1656                 : 
    1657                 : /*
    1658                 :  *      RecordTransactionCommitPrepared
    1659                 :  *
    1660                 :  * This is basically the same as RecordTransactionCommit: in particular,
    1661                 :  * we must set the inCommit flag to avoid a race condition.
    1662                 :  *
    1663                 :  * We know the transaction made at least one XLOG entry (its PREPARE),
    1664                 :  * so it is never possible to optimize out the commit record.
    1665                 :  */
    1666                 : static void
    1667                 : RecordTransactionCommitPrepared(TransactionId xid,
    1668                 :                                                                 int nchildren,
    1669                 :                                                                 TransactionId *children,
    1670                 :                                                                 int nrels,
    1671                 :                                                                 RelFileNode *rels)
    1672               3 : {
    1673                 :         XLogRecData rdata[3];
    1674               3 :         int                     lastrdata = 0;
    1675                 :         xl_xact_commit_prepared xlrec;
    1676                 :         XLogRecPtr      recptr;
    1677                 : 
    1678               3 :         START_CRIT_SECTION();
    1679                 : 
    1680                 :         /* See notes in RecordTransactionCommit */
    1681               3 :         MyProc->inCommit = true;
    1682                 : 
    1683                 :         /* Emit the XLOG commit record */
    1684               3 :         xlrec.xid = xid;
    1685               3 :         xlrec.crec.xact_time = GetCurrentTimestamp();
    1686               3 :         xlrec.crec.nrels = nrels;
    1687               3 :         xlrec.crec.nsubxacts = nchildren;
    1688               3 :         rdata[0].data = (char *) (&xlrec);
    1689               3 :         rdata[0].len = MinSizeOfXactCommitPrepared;
    1690               3 :         rdata[0].buffer = InvalidBuffer;
    1691                 :         /* dump rels to delete */
    1692               3 :         if (nrels > 0)
    1693                 :         {
    1694               1 :                 rdata[0].next = &(rdata[1]);
    1695               1 :                 rdata[1].data = (char *) rels;
    1696               1 :                 rdata[1].len = nrels * sizeof(RelFileNode);
    1697               1 :                 rdata[1].buffer = InvalidBuffer;
    1698               1 :                 lastrdata = 1;
    1699                 :         }
    1700                 :         /* dump committed child Xids */
    1701               3 :         if (nchildren > 0)
    1702                 :         {
    1703               1 :                 rdata[lastrdata].next = &(rdata[2]);
    1704               1 :                 rdata[2].data = (char *) children;
    1705               1 :                 rdata[2].len = nchildren * sizeof(TransactionId);
    1706               1 :                 rdata[2].buffer = InvalidBuffer;
    1707               1 :                 lastrdata = 2;
    1708                 :         }
    1709               3 :         rdata[lastrdata].next = NULL;
    1710                 : 
    1711               3 :         recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED, rdata);
    1712                 : 
    1713                 :         /*
    1714                 :          * We don't currently try to sleep before flush here ... nor is there any
    1715                 :          * support for async commit of a prepared xact (the very idea is probably
    1716                 :          * a contradiction)
    1717                 :          */
    1718                 : 
    1719                 :         /* Flush XLOG to disk */
    1720               3 :         XLogFlush(recptr);
    1721                 : 
    1722                 :         /* Mark the transaction committed in pg_clog */
    1723               3 :         TransactionIdCommit(xid);
    1724                 :         /* to avoid race conditions, the parent must commit first */
    1725               3 :         TransactionIdCommitTree(nchildren, children);
    1726                 : 
    1727                 :         /* Checkpoint can proceed now */
    1728               3 :         MyProc->inCommit = false;
    1729                 : 
    1730               3 :         END_CRIT_SECTION();
    1731               3 : }
    1732                 : 
    1733                 : /*
    1734                 :  *      RecordTransactionAbortPrepared
    1735                 :  *
    1736                 :  * This is basically the same as RecordTransactionAbort.
    1737                 :  *
    1738                 :  * We know the transaction made at least one XLOG entry (its PREPARE),
    1739                 :  * so it is never possible to optimize out the abort record.
    1740                 :  */
    1741                 : static void
    1742                 : RecordTransactionAbortPrepared(TransactionId xid,
    1743                 :                                                            int nchildren,
    1744                 :                                                            TransactionId *children,
    1745                 :                                                            int nrels,
    1746                 :                                                            RelFileNode *rels)
    1747               2 : {
    1748                 :         XLogRecData rdata[3];
    1749               2 :         int                     lastrdata = 0;
    1750                 :         xl_xact_abort_prepared xlrec;
    1751                 :         XLogRecPtr      recptr;
    1752                 : 
    1753                 :         /*
    1754                 :          * Catch the scenario where we aborted partway through
    1755                 :          * RecordTransactionCommitPrepared ...
    1756                 :          */
    1757               2 :         if (TransactionIdDidCommit(xid))
    1758               0 :                 elog(PANIC, "cannot abort transaction %u, it was already committed",
    1759                 :                          xid);
    1760                 : 
    1761               2 :         START_CRIT_SECTION();
    1762                 : 
    1763                 :         /* Emit the XLOG abort record */
    1764               2 :         xlrec.xid = xid;
    1765               2 :         xlrec.arec.xact_time = GetCurrentTimestamp();
    1766               2 :         xlrec.arec.nrels = nrels;
    1767               2 :         xlrec.arec.nsubxacts = nchildren;
    1768               2 :         rdata[0].data = (char *) (&xlrec);
    1769               2 :         rdata[0].len = MinSizeOfXactAbortPrepared;
    1770               2 :         rdata[0].buffer = InvalidBuffer;
    1771                 :         /* dump rels to delete */
    1772               2 :         if (nrels > 0)
    1773                 :         {
    1774               0 :                 rdata[0].next = &(rdata[1]);
    1775               0 :                 rdata[1].data = (char *) rels;
    1776               0 :                 rdata[1].len = nrels * sizeof(RelFileNode);
    1777               0 :                 rdata[1].buffer = InvalidBuffer;
    1778               0 :                 lastrdata = 1;
    1779                 :         }
    1780                 :         /* dump committed child Xids */
    1781               2 :         if (nchildren > 0)
    1782                 :         {
    1783               0 :                 rdata[lastrdata].next = &(rdata[2]);
    1784               0 :                 rdata[2].data = (char *) children;
    1785               0 :                 rdata[2].len = nchildren * sizeof(TransactionId);
    1786               0 :                 rdata[2].buffer = InvalidBuffer;
    1787               0 :                 lastrdata = 2;
    1788                 :         }
    1789               2 :         rdata[lastrdata].next = NULL;
    1790                 : 
    1791               2 :         recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED, rdata);
    1792                 : 
    1793                 :         /* Always flush, since we're about to remove the 2PC state file */
    1794               2 :         XLogFlush(recptr);
    1795                 : 
    1796                 :         /*
    1797                 :          * Mark the transaction aborted in clog.  This is not absolutely necessary
    1798                 :          * but we may as well do it while we are here.
    1799                 :          */
    1800               2 :         TransactionIdAbort(xid);
    1801               2 :         TransactionIdAbortTree(nchildren, children);
    1802                 : 
    1803               2 :         END_CRIT_SECTION();
    1804               2 : }

Generated by: LTP GCOV extension version 1.5