1 : /*-------------------------------------------------------------------------
2 : *
3 : * multixact.c
4 : * PostgreSQL multi-transaction-log manager
5 : *
6 : * The pg_multixact manager is a pg_clog-like manager that stores an array
7 : * of TransactionIds for each MultiXactId. It is a fundamental part of the
8 : * shared-row-lock implementation. A share-locked tuple stores a
9 : * MultiXactId in its Xmax, and a transaction that needs to wait for the
10 : * tuple to be unlocked can sleep on the potentially-several TransactionIds
11 : * that compose the MultiXactId.
12 : *
13 : * We use two SLRU areas, one for storing the offsets at which the data
14 : * starts for each MultiXactId in the other one. This trick allows us to
15 : * store variable length arrays of TransactionIds. (We could alternatively
16 : * use one area containing counts and TransactionIds, with valid MultiXactId
17 : * values pointing at slots containing counts; but that way seems less robust
18 : * since it would get completely confused if someone inquired about a bogus
19 : * MultiXactId that pointed to an intermediate slot containing an XID.)
20 : *
21 : * XLOG interactions: this module generates an XLOG record whenever a new
22 : * OFFSETs or MEMBERs page is initialized to zeroes, as well as an XLOG record
23 : * whenever a new MultiXactId is defined. This allows us to completely
24 : * rebuild the data entered since the last checkpoint during XLOG replay.
25 : * Because this is possible, we need not follow the normal rule of
26 : * "write WAL before data"; the only correctness guarantee needed is that
27 : * we flush and sync all dirty OFFSETs and MEMBERs pages to disk before a
28 : * checkpoint is considered complete. If a page does make it to disk ahead
29 : * of corresponding WAL records, it will be forcibly zeroed before use anyway.
30 : * Therefore, we don't need to mark our pages with LSN information; we have
31 : * enough synchronization already.
32 : *
33 : * Like clog.c, and unlike subtrans.c, we have to preserve state across
34 : * crashes and ensure that MXID and offset numbering increases monotonically
35 : * across a crash. We do this in the same way as it's done for transaction
36 : * IDs: the WAL record is guaranteed to contain evidence of every MXID we
37 : * could need to worry about, and we just make sure that at the end of
38 : * replay, the next-MXID and next-offset counters are at least as large as
39 : * anything we saw during replay.
40 : *
41 : *
42 : * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
43 : * Portions Copyright (c) 1994, Regents of the University of California
44 : *
45 : * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.26 2007/11/15 21:14:32 momjian Exp $
46 : *
47 : *-------------------------------------------------------------------------
48 : */
49 : #include "postgres.h"
50 :
51 : #include "access/multixact.h"
52 : #include "access/slru.h"
53 : #include "access/transam.h"
54 : #include "access/xact.h"
55 : #include "miscadmin.h"
56 : #include "storage/backendid.h"
57 : #include "storage/lmgr.h"
58 : #include "utils/memutils.h"
59 : #include "storage/procarray.h"
60 :
61 :
62 : /*
63 : * Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
64 : * used everywhere else in Postgres.
65 : *
66 : * Note: because both MultiXactOffsets and TransactionIds are 32 bits and
67 : * wrap around at 0xFFFFFFFF, MultiXact page numbering also wraps around at
68 : * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE, and segment numbering at
69 : * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need take no
70 : * explicit notice of that fact in this module, except when comparing segment
71 : * and page numbers in TruncateMultiXact
72 : * (see MultiXact{Offset,Member}PagePrecedes).
73 : */
74 :
75 : /* We need four bytes per offset and also four bytes per member */
76 : #define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
77 : #define MULTIXACT_MEMBERS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
78 :
79 : #define MultiXactIdToOffsetPage(xid) \
80 : ((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
81 : #define MultiXactIdToOffsetEntry(xid) \
82 : ((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
83 :
84 : #define MXOffsetToMemberPage(xid) \
85 : ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
86 : #define MXOffsetToMemberEntry(xid) \
87 : ((xid) % (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
88 :
89 :
90 : /*
91 : * Links to shared-memory data structures for MultiXact control
92 : */
93 : static SlruCtlData MultiXactOffsetCtlData;
94 : static SlruCtlData MultiXactMemberCtlData;
95 :
96 : #define MultiXactOffsetCtl (&MultiXactOffsetCtlData)
97 : #define MultiXactMemberCtl (&MultiXactMemberCtlData)
98 :
99 : /*
100 : * MultiXact state shared across all backends. All this state is protected
101 : * by MultiXactGenLock. (We also use MultiXactOffsetControlLock and
102 : * MultiXactMemberControlLock to guard accesses to the two sets of SLRU
103 : * buffers. For concurrency's sake, we avoid holding more than one of these
104 : * locks at a time.)
105 : */
106 : typedef struct MultiXactStateData
107 : {
108 : /* next-to-be-assigned MultiXactId */
109 : MultiXactId nextMXact;
110 :
111 : /* next-to-be-assigned offset */
112 : MultiXactOffset nextOffset;
113 :
114 : /* the Offset SLRU area was last truncated at this MultiXactId */
115 : MultiXactId lastTruncationPoint;
116 :
117 : /*
118 : * Per-backend data starts here. We have two arrays stored in the area
119 : * immediately following the MultiXactStateData struct. Each is indexed by
120 : * BackendId. (Note: valid BackendIds run from 1 to MaxBackends; element
121 : * zero of each array is never used.)
122 : *
123 : * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current
124 : * transaction(s) could possibly be a member of, or InvalidMultiXactId
125 : * when the backend has no live transaction that could possibly be a
126 : * member of a MultiXact. Each backend sets its entry to the current
127 : * nextMXact counter just before first acquiring a shared lock in a given
128 : * transaction, and clears it at transaction end. (This works because only
129 : * during or after acquiring a shared lock could an XID possibly become a
130 : * member of a MultiXact, and that MultiXact would have to be created
131 : * during or after the lock acquisition.)
132 : *
133 : * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's
134 : * current transaction(s) think is potentially live, or InvalidMultiXactId
135 : * when not in a transaction or not in a transaction that's paid any
136 : * attention to MultiXacts yet. This is computed when first needed in a
137 : * given transaction, and cleared at transaction end. We can compute it
138 : * as the minimum of the valid OldestMemberMXactId[] entries at the time
139 : * we compute it (using nextMXact if none are valid). Each backend is
140 : * required not to attempt to access any SLRU data for MultiXactIds older
141 : * than its own OldestVisibleMXactId[] setting; this is necessary because
142 : * the checkpointer could truncate away such data at any instant.
143 : *
144 : * The checkpointer can compute the safe truncation point as the oldest
145 : * valid value among all the OldestMemberMXactId[] and
146 : * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
147 : * Clearly, it is not possible for any later-computed OldestVisibleMXactId
148 : * value to be older than this, and so there is no risk of truncating data
149 : * that is still needed.
150 : */
151 : MultiXactId perBackendXactIds[1]; /* VARIABLE LENGTH ARRAY */
152 : } MultiXactStateData;
153 :
154 : /* Pointers to the state data in shared memory */
155 : static MultiXactStateData *MultiXactState;
156 : static MultiXactId *OldestMemberMXactId;
157 : static MultiXactId *OldestVisibleMXactId;
158 :
159 :
160 : /*
161 : * Definitions for the backend-local MultiXactId cache.
162 : *
163 : * We use this cache to store known MultiXacts, so we don't need to go to
164 : * SLRU areas everytime.
165 : *
166 : * The cache lasts for the duration of a single transaction, the rationale
167 : * for this being that most entries will contain our own TransactionId and
168 : * so they will be uninteresting by the time our next transaction starts.
169 : * (XXX not clear that this is correct --- other members of the MultiXact
170 : * could hang around longer than we did. However, it's not clear what a
171 : * better policy for flushing old cache entries would be.)
172 : *
173 : * We allocate the cache entries in a memory context that is deleted at
174 : * transaction end, so we don't need to do retail freeing of entries.
175 : */
176 : typedef struct mXactCacheEnt
177 : {
178 : struct mXactCacheEnt *next;
179 : MultiXactId multi;
180 : int nxids;
181 : TransactionId xids[1]; /* VARIABLE LENGTH ARRAY */
182 : } mXactCacheEnt;
183 :
184 : static mXactCacheEnt *MXactCache = NULL;
185 : static MemoryContext MXactContext = NULL;
186 :
187 :
188 : #ifdef MULTIXACT_DEBUG
189 : #define debug_elog2(a,b) elog(a,b)
190 : #define debug_elog3(a,b,c) elog(a,b,c)
191 : #define debug_elog4(a,b,c,d) elog(a,b,c,d)
192 : #define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
193 : #else
194 : #define debug_elog2(a,b)
195 : #define debug_elog3(a,b,c)
196 : #define debug_elog4(a,b,c,d)
197 : #define debug_elog5(a,b,c,d,e)
198 : #endif
199 :
200 : /* internal MultiXactId management */
201 : static void MultiXactIdSetOldestVisible(void);
202 : static MultiXactId CreateMultiXactId(int nxids, TransactionId *xids);
203 : static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
204 : int nxids, TransactionId *xids);
205 : static MultiXactId GetNewMultiXactId(int nxids, MultiXactOffset *offset);
206 :
207 : /* MultiXact cache management */
208 : static MultiXactId mXactCacheGetBySet(int nxids, TransactionId *xids);
209 : static int mXactCacheGetById(MultiXactId multi, TransactionId **xids);
210 : static void mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids);
211 : static int xidComparator(const void *arg1, const void *arg2);
212 :
213 : #ifdef MULTIXACT_DEBUG
214 : static char *mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids);
215 : #endif
216 :
217 : /* management of SLRU infrastructure */
218 : static int ZeroMultiXactOffsetPage(int pageno, bool writeXlog);
219 : static int ZeroMultiXactMemberPage(int pageno, bool writeXlog);
220 : static bool MultiXactOffsetPagePrecedes(int page1, int page2);
221 : static bool MultiXactMemberPagePrecedes(int page1, int page2);
222 : static bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
223 : static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
224 : MultiXactOffset offset2);
225 : static void ExtendMultiXactOffset(MultiXactId multi);
226 : static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
227 : static void TruncateMultiXact(void);
228 : static void WriteMZeroPageXlogRec(int pageno, uint8 info);
229 :
230 :
231 : /*
232 : * MultiXactIdCreate
233 : * Construct a MultiXactId representing two TransactionIds.
234 : *
235 : * The two XIDs must be different.
236 : *
237 : * NB - we don't worry about our local MultiXactId cache here, because that
238 : * is handled by the lower-level routines.
239 : */
240 : MultiXactId
241 : MultiXactIdCreate(TransactionId xid1, TransactionId xid2)
242 0 : {
243 : MultiXactId newMulti;
244 : TransactionId xids[2];
245 :
246 : AssertArg(TransactionIdIsValid(xid1));
247 : AssertArg(TransactionIdIsValid(xid2));
248 :
249 : Assert(!TransactionIdEquals(xid1, xid2));
250 :
251 : /*
252 : * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
253 : * are still running. In typical usage, xid2 will be our own XID and the
254 : * caller just did a check on xid1, so it'd be wasted effort.
255 : */
256 :
257 0 : xids[0] = xid1;
258 0 : xids[1] = xid2;
259 :
260 0 : newMulti = CreateMultiXactId(2, xids);
261 :
262 : debug_elog5(DEBUG2, "Create: returning %u for %u, %u",
263 : newMulti, xid1, xid2);
264 :
265 0 : return newMulti;
266 : }
267 :
268 : /*
269 : * MultiXactIdExpand
270 : * Add a TransactionId to a pre-existing MultiXactId.
271 : *
272 : * If the TransactionId is already a member of the passed MultiXactId,
273 : * just return it as-is.
274 : *
275 : * Note that we do NOT actually modify the membership of a pre-existing
276 : * MultiXactId; instead we create a new one. This is necessary to avoid
277 : * a race condition against MultiXactIdWait (see notes there).
278 : *
279 : * NB - we don't worry about our local MultiXactId cache here, because that
280 : * is handled by the lower-level routines.
281 : */
282 : MultiXactId
283 : MultiXactIdExpand(MultiXactId multi, TransactionId xid)
284 0 : {
285 : MultiXactId newMulti;
286 : TransactionId *members;
287 : TransactionId *newMembers;
288 : int nmembers;
289 : int i;
290 : int j;
291 :
292 : AssertArg(MultiXactIdIsValid(multi));
293 : AssertArg(TransactionIdIsValid(xid));
294 :
295 : debug_elog4(DEBUG2, "Expand: received multi %u, xid %u",
296 : multi, xid);
297 :
298 0 : nmembers = GetMultiXactIdMembers(multi, &members);
299 :
300 0 : if (nmembers < 0)
301 : {
302 : /*
303 : * The MultiXactId is obsolete. This can only happen if all the
304 : * MultiXactId members stop running between the caller checking and
305 : * passing it to us. It would be better to return that fact to the
306 : * caller, but it would complicate the API and it's unlikely to happen
307 : * too often, so just deal with it by creating a singleton MultiXact.
308 : */
309 0 : newMulti = CreateMultiXactId(1, &xid);
310 :
311 : debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
312 : multi, newMulti);
313 0 : return newMulti;
314 : }
315 :
316 : /*
317 : * If the TransactionId is already a member of the MultiXactId, just
318 : * return the existing MultiXactId.
319 : */
320 0 : for (i = 0; i < nmembers; i++)
321 : {
322 0 : if (TransactionIdEquals(members[i], xid))
323 : {
324 : debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
325 : xid, multi);
326 0 : pfree(members);
327 0 : return multi;
328 : }
329 : }
330 :
331 : /*
332 : * Determine which of the members of the MultiXactId are still running,
333 : * and use them to create a new one. (Removing dead members is just an
334 : * optimization, but a useful one. Note we have the same race condition
335 : * here as above: j could be 0 at the end of the loop.)
336 : */
337 0 : newMembers = (TransactionId *)
338 : palloc(sizeof(TransactionId) * (nmembers + 1));
339 :
340 0 : for (i = 0, j = 0; i < nmembers; i++)
341 : {
342 0 : if (TransactionIdIsInProgress(members[i]))
343 0 : newMembers[j++] = members[i];
344 : }
345 :
346 0 : newMembers[j++] = xid;
347 0 : newMulti = CreateMultiXactId(j, newMembers);
348 :
349 0 : pfree(members);
350 0 : pfree(newMembers);
351 :
352 : debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti);
353 :
354 0 : return newMulti;
355 : }
356 :
357 : /*
358 : * MultiXactIdIsRunning
359 : * Returns whether a MultiXactId is "running".
360 : *
361 : * We return true if at least one member of the given MultiXactId is still
362 : * running. Note that a "false" result is certain not to change,
363 : * because it is not legal to add members to an existing MultiXactId.
364 : */
365 : bool
366 : MultiXactIdIsRunning(MultiXactId multi)
367 0 : {
368 : TransactionId *members;
369 : int nmembers;
370 : int i;
371 :
372 : debug_elog3(DEBUG2, "IsRunning %u?", multi);
373 :
374 0 : nmembers = GetMultiXactIdMembers(multi, &members);
375 :
376 0 : if (nmembers < 0)
377 : {
378 : debug_elog2(DEBUG2, "IsRunning: no members");
379 0 : return false;
380 : }
381 :
382 : /*
383 : * Checking for myself is cheap compared to looking in shared memory, so
384 : * first do the equivalent of MultiXactIdIsCurrent(). This is not needed
385 : * for correctness, it's just a fast path.
386 : */
387 0 : for (i = 0; i < nmembers; i++)
388 : {
389 0 : if (TransactionIdIsCurrentTransactionId(members[i]))
390 : {
391 : debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
392 0 : pfree(members);
393 0 : return true;
394 : }
395 : }
396 :
397 : /*
398 : * This could be made faster by having another entry point in procarray.c,
399 : * walking the PGPROC array only once for all the members. But in most
400 : * cases nmembers should be small enough that it doesn't much matter.
401 : */
402 0 : for (i = 0; i < nmembers; i++)
403 : {
404 0 : if (TransactionIdIsInProgress(members[i]))
405 : {
406 : debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
407 : i, members[i]);
408 0 : pfree(members);
409 0 : return true;
410 : }
411 : }
412 :
413 0 : pfree(members);
414 :
415 : debug_elog3(DEBUG2, "IsRunning: %u is not running", multi);
416 :
417 0 : return false;
418 : }
419 :
420 : /*
421 : * MultiXactIdIsCurrent
422 : * Returns true if the current transaction is a member of the MultiXactId.
423 : *
424 : * We return true if any live subtransaction of the current top-level
425 : * transaction is a member. This is appropriate for the same reason that a
426 : * lock held by any such subtransaction is globally equivalent to a lock
427 : * held by the current subtransaction: no such lock could be released without
428 : * aborting this subtransaction, and hence releasing its locks. So it's not
429 : * necessary to add the current subxact to the MultiXact separately.
430 : */
431 : bool
432 : MultiXactIdIsCurrent(MultiXactId multi)
433 0 : {
434 0 : bool result = false;
435 : TransactionId *members;
436 : int nmembers;
437 : int i;
438 :
439 0 : nmembers = GetMultiXactIdMembers(multi, &members);
440 :
441 0 : if (nmembers < 0)
442 0 : return false;
443 :
444 0 : for (i = 0; i < nmembers; i++)
445 : {
446 0 : if (TransactionIdIsCurrentTransactionId(members[i]))
447 : {
448 0 : result = true;
449 0 : break;
450 : }
451 : }
452 :
453 0 : pfree(members);
454 :
455 0 : return result;
456 : }
457 :
458 : /*
459 : * MultiXactIdSetOldestMember
460 : * Save the oldest MultiXactId this transaction could be a member of.
461 : *
462 : * We set the OldestMemberMXactId for a given transaction the first time
463 : * it's going to acquire a shared lock. We need to do this even if we end
464 : * up using a TransactionId instead of a MultiXactId, because there is a
465 : * chance that another transaction would add our XID to a MultiXactId.
466 : *
467 : * The value to set is the next-to-be-assigned MultiXactId, so this is meant
468 : * to be called just before acquiring a shared lock.
469 : */
470 : void
471 : MultiXactIdSetOldestMember(void)
472 111 : {
473 111 : if (!MultiXactIdIsValid(OldestMemberMXactId[MyBackendId]))
474 : {
475 : MultiXactId nextMXact;
476 :
477 : /*
478 : * You might think we don't need to acquire a lock here, since
479 : * fetching and storing of TransactionIds is probably atomic, but in
480 : * fact we do: suppose we pick up nextMXact and then lose the CPU for
481 : * a long time. Someone else could advance nextMXact, and then
482 : * another someone else could compute an OldestVisibleMXactId that
483 : * would be after the value we are going to store when we get control
484 : * back. Which would be wrong.
485 : */
486 105 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
487 :
488 : /*
489 : * We have to beware of the possibility that nextMXact is in the
490 : * wrapped-around state. We don't fix the counter itself here, but we
491 : * must be sure to store a valid value in our array entry.
492 : */
493 105 : nextMXact = MultiXactState->nextMXact;
494 105 : if (nextMXact < FirstMultiXactId)
495 0 : nextMXact = FirstMultiXactId;
496 :
497 105 : OldestMemberMXactId[MyBackendId] = nextMXact;
498 :
499 105 : LWLockRelease(MultiXactGenLock);
500 :
501 : debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u",
502 : MyBackendId, nextMXact);
503 : }
504 111 : }
505 :
506 : /*
507 : * MultiXactIdSetOldestVisible
508 : * Save the oldest MultiXactId this transaction considers possibly live.
509 : *
510 : * We set the OldestVisibleMXactId for a given transaction the first time
511 : * it's going to inspect any MultiXactId. Once we have set this, we are
512 : * guaranteed that the checkpointer won't truncate off SLRU data for
513 : * MultiXactIds at or after our OldestVisibleMXactId.
514 : *
515 : * The value to set is the oldest of nextMXact and all the valid per-backend
516 : * OldestMemberMXactId[] entries. Because of the locking we do, we can be
517 : * certain that no subsequent call to MultiXactIdSetOldestMember can set
518 : * an OldestMemberMXactId[] entry older than what we compute here. Therefore
519 : * there is no live transaction, now or later, that can be a member of any
520 : * MultiXactId older than the OldestVisibleMXactId we compute here.
521 : */
522 : static void
523 : MultiXactIdSetOldestVisible(void)
524 0 : {
525 0 : if (!MultiXactIdIsValid(OldestVisibleMXactId[MyBackendId]))
526 : {
527 : MultiXactId oldestMXact;
528 : int i;
529 :
530 0 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
531 :
532 : /*
533 : * We have to beware of the possibility that nextMXact is in the
534 : * wrapped-around state. We don't fix the counter itself here, but we
535 : * must be sure to store a valid value in our array entry.
536 : */
537 0 : oldestMXact = MultiXactState->nextMXact;
538 0 : if (oldestMXact < FirstMultiXactId)
539 0 : oldestMXact = FirstMultiXactId;
540 :
541 0 : for (i = 1; i <= MaxBackends; i++)
542 : {
543 0 : MultiXactId thisoldest = OldestMemberMXactId[i];
544 :
545 0 : if (MultiXactIdIsValid(thisoldest) &&
546 : MultiXactIdPrecedes(thisoldest, oldestMXact))
547 0 : oldestMXact = thisoldest;
548 : }
549 :
550 0 : OldestVisibleMXactId[MyBackendId] = oldestMXact;
551 :
552 0 : LWLockRelease(MultiXactGenLock);
553 :
554 : debug_elog4(DEBUG2, "MultiXact: setting OldestVisible[%d] = %u",
555 : MyBackendId, oldestMXact);
556 : }
557 0 : }
558 :
559 : /*
560 : * MultiXactIdWait
561 : * Sleep on a MultiXactId.
562 : *
563 : * We do this by sleeping on each member using XactLockTableWait. Any
564 : * members that belong to the current backend are *not* waited for, however;
565 : * this would not merely be useless but would lead to Assert failure inside
566 : * XactLockTableWait. By the time this returns, it is certain that all
567 : * transactions *of other backends* that were members of the MultiXactId
568 : * are dead (and no new ones can have been added, since it is not legal
569 : * to add members to an existing MultiXactId).
570 : *
571 : * But by the time we finish sleeping, someone else may have changed the Xmax
572 : * of the containing tuple, so the caller needs to iterate on us somehow.
573 : */
574 : void
575 : MultiXactIdWait(MultiXactId multi)
576 0 : {
577 : TransactionId *members;
578 : int nmembers;
579 :
580 0 : nmembers = GetMultiXactIdMembers(multi, &members);
581 :
582 0 : if (nmembers >= 0)
583 : {
584 : int i;
585 :
586 0 : for (i = 0; i < nmembers; i++)
587 : {
588 0 : TransactionId member = members[i];
589 :
590 : debug_elog4(DEBUG2, "MultiXactIdWait: waiting for %d (%u)",
591 : i, member);
592 0 : if (!TransactionIdIsCurrentTransactionId(member))
593 0 : XactLockTableWait(member);
594 : }
595 :
596 0 : pfree(members);
597 : }
598 0 : }
599 :
600 : /*
601 : * ConditionalMultiXactIdWait
602 : * As above, but only lock if we can get the lock without blocking.
603 : */
604 : bool
605 : ConditionalMultiXactIdWait(MultiXactId multi)
606 0 : {
607 0 : bool result = true;
608 : TransactionId *members;
609 : int nmembers;
610 :
611 0 : nmembers = GetMultiXactIdMembers(multi, &members);
612 :
613 0 : if (nmembers >= 0)
614 : {
615 : int i;
616 :
617 0 : for (i = 0; i < nmembers; i++)
618 : {
619 0 : TransactionId member = members[i];
620 :
621 : debug_elog4(DEBUG2, "ConditionalMultiXactIdWait: trying %d (%u)",
622 : i, member);
623 0 : if (!TransactionIdIsCurrentTransactionId(member))
624 : {
625 0 : result = ConditionalXactLockTableWait(member);
626 0 : if (!result)
627 0 : break;
628 : }
629 : }
630 :
631 0 : pfree(members);
632 : }
633 :
634 0 : return result;
635 : }
636 :
637 : /*
638 : * CreateMultiXactId
639 : * Make a new MultiXactId
640 : *
641 : * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
642 : * given TransactionIds as members. Returns the newly created MultiXactId.
643 : *
644 : * NB: the passed xids[] array will be sorted in-place.
645 : */
646 : static MultiXactId
647 : CreateMultiXactId(int nxids, TransactionId *xids)
648 0 : {
649 : MultiXactId multi;
650 : MultiXactOffset offset;
651 : XLogRecData rdata[2];
652 : xl_multixact_create xlrec;
653 :
654 : debug_elog3(DEBUG2, "Create: %s",
655 : mxid_to_string(InvalidMultiXactId, nxids, xids));
656 :
657 : /*
658 : * See if the same set of XIDs already exists in our cache; if so, just
659 : * re-use that MultiXactId. (Note: it might seem that looking in our
660 : * cache is insufficient, and we ought to search disk to see if a
661 : * duplicate definition already exists. But since we only ever create
662 : * MultiXacts containing our own XID, in most cases any such MultiXacts
663 : * were in fact created by us, and so will be in our cache. There are
664 : * corner cases where someone else added us to a MultiXact without our
665 : * knowledge, but it's not worth checking for.)
666 : */
667 0 : multi = mXactCacheGetBySet(nxids, xids);
668 0 : if (MultiXactIdIsValid(multi))
669 : {
670 : debug_elog2(DEBUG2, "Create: in cache!");
671 0 : return multi;
672 : }
673 :
674 : /*
675 : * Assign the MXID and offsets range to use, and make sure there is space
676 : * in the OFFSETs and MEMBERs files. NB: this routine does
677 : * START_CRIT_SECTION().
678 : */
679 0 : multi = GetNewMultiXactId(nxids, &offset);
680 :
681 : /*
682 : * Make an XLOG entry describing the new MXID.
683 : *
684 : * Note: we need not flush this XLOG entry to disk before proceeding. The
685 : * only way for the MXID to be referenced from any data page is for
686 : * heap_lock_tuple() to have put it there, and heap_lock_tuple() generates
687 : * an XLOG record that must follow ours. The normal LSN interlock between
688 : * the data page and that XLOG record will ensure that our XLOG record
689 : * reaches disk first. If the SLRU members/offsets data reaches disk
690 : * sooner than the XLOG record, we do not care because we'll overwrite it
691 : * with zeroes unless the XLOG record is there too; see notes at top of
692 : * this file.
693 : */
694 0 : xlrec.mid = multi;
695 0 : xlrec.moff = offset;
696 0 : xlrec.nxids = nxids;
697 :
698 0 : rdata[0].data = (char *) (&xlrec);
699 0 : rdata[0].len = MinSizeOfMultiXactCreate;
700 0 : rdata[0].buffer = InvalidBuffer;
701 0 : rdata[0].next = &(rdata[1]);
702 0 : rdata[1].data = (char *) xids;
703 0 : rdata[1].len = nxids * sizeof(TransactionId);
704 0 : rdata[1].buffer = InvalidBuffer;
705 0 : rdata[1].next = NULL;
706 :
707 0 : (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, rdata);
708 :
709 : /* Now enter the information into the OFFSETs and MEMBERs logs */
710 0 : RecordNewMultiXact(multi, offset, nxids, xids);
711 :
712 : /* Done with critical section */
713 0 : END_CRIT_SECTION();
714 :
715 : /* Store the new MultiXactId in the local cache, too */
716 0 : mXactCachePut(multi, nxids, xids);
717 :
718 : debug_elog2(DEBUG2, "Create: all done");
719 :
720 0 : return multi;
721 : }
722 :
723 : /*
724 : * RecordNewMultiXact
725 : * Write info about a new multixact into the offsets and members files
726 : *
727 : * This is broken out of CreateMultiXactId so that xlog replay can use it.
728 : */
729 : static void
730 : RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
731 : int nxids, TransactionId *xids)
732 0 : {
733 : int pageno;
734 : int prev_pageno;
735 : int entryno;
736 : int slotno;
737 : MultiXactOffset *offptr;
738 : int i;
739 :
740 0 : LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
741 :
742 0 : pageno = MultiXactIdToOffsetPage(multi);
743 0 : entryno = MultiXactIdToOffsetEntry(multi);
744 :
745 : /*
746 : * Note: we pass the MultiXactId to SimpleLruReadPage as the "transaction"
747 : * to complain about if there's any I/O error. This is kinda bogus, but
748 : * since the errors will always give the full pathname, it should be clear
749 : * enough that a MultiXactId is really involved. Perhaps someday we'll
750 : * take the trouble to generalize the slru.c error reporting code.
751 : */
752 0 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
753 0 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
754 0 : offptr += entryno;
755 :
756 0 : *offptr = offset;
757 :
758 0 : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
759 :
760 : /* Exchange our lock */
761 0 : LWLockRelease(MultiXactOffsetControlLock);
762 :
763 0 : LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
764 :
765 0 : prev_pageno = -1;
766 :
767 0 : for (i = 0; i < nxids; i++, offset++)
768 : {
769 : TransactionId *memberptr;
770 :
771 0 : pageno = MXOffsetToMemberPage(offset);
772 0 : entryno = MXOffsetToMemberEntry(offset);
773 :
774 0 : if (pageno != prev_pageno)
775 : {
776 0 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
777 0 : prev_pageno = pageno;
778 : }
779 :
780 0 : memberptr = (TransactionId *)
781 : MultiXactMemberCtl->shared->page_buffer[slotno];
782 0 : memberptr += entryno;
783 :
784 0 : *memberptr = xids[i];
785 :
786 0 : MultiXactMemberCtl->shared->page_dirty[slotno] = true;
787 : }
788 :
789 0 : LWLockRelease(MultiXactMemberControlLock);
790 0 : }
791 :
792 : /*
793 : * GetNewMultiXactId
794 : * Get the next MultiXactId.
795 : *
796 : * Also, reserve the needed amount of space in the "members" area. The
797 : * starting offset of the reserved space is returned in *offset.
798 : *
799 : * This may generate XLOG records for expansion of the offsets and/or members
800 : * files. Unfortunately, we have to do that while holding MultiXactGenLock
801 : * to avoid race conditions --- the XLOG record for zeroing a page must appear
802 : * before any backend can possibly try to store data in that page!
803 : *
804 : * We start a critical section before advancing the shared counters. The
805 : * caller must end the critical section after writing SLRU data.
806 : */
807 : static MultiXactId
808 : GetNewMultiXactId(int nxids, MultiXactOffset *offset)
809 0 : {
810 : MultiXactId result;
811 : MultiXactOffset nextOffset;
812 :
813 : debug_elog3(DEBUG2, "GetNew: for %d xids", nxids);
814 :
815 : /* MultiXactIdSetOldestMember() must have been called already */
816 : Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId]));
817 :
818 0 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
819 :
820 : /* Handle wraparound of the nextMXact counter */
821 0 : if (MultiXactState->nextMXact < FirstMultiXactId)
822 0 : MultiXactState->nextMXact = FirstMultiXactId;
823 :
824 : /*
825 : * Assign the MXID, and make sure there is room for it in the file.
826 : */
827 0 : result = MultiXactState->nextMXact;
828 :
829 0 : ExtendMultiXactOffset(result);
830 :
831 : /*
832 : * Reserve the members space, similarly to above. Also, be careful not to
833 : * return zero as the starting offset for any multixact. See
834 : * GetMultiXactIdMembers() for motivation.
835 : */
836 0 : nextOffset = MultiXactState->nextOffset;
837 0 : if (nextOffset == 0)
838 : {
839 0 : *offset = 1;
840 0 : nxids++; /* allocate member slot 0 too */
841 : }
842 : else
843 0 : *offset = nextOffset;
844 :
845 0 : ExtendMultiXactMember(nextOffset, nxids);
846 :
847 : /*
848 : * Critical section from here until caller has written the data into the
849 : * just-reserved SLRU space; we don't want to error out with a partly
850 : * written MultiXact structure. (In particular, failing to write our
851 : * start offset after advancing nextMXact would effectively corrupt the
852 : * previous MultiXact.)
853 : */
854 0 : START_CRIT_SECTION();
855 :
856 : /*
857 : * Advance counters. As in GetNewTransactionId(), this must not happen
858 : * until after file extension has succeeded!
859 : *
860 : * We don't care about MultiXactId wraparound here; it will be handled by
861 : * the next iteration. But note that nextMXact may be InvalidMultiXactId
862 : * after this routine exits, so anyone else looking at the variable must
863 : * be prepared to deal with that. Similarly, nextOffset may be zero, but
864 : * we won't use that as the actual start offset of the next multixact.
865 : */
866 0 : (MultiXactState->nextMXact)++;
867 :
868 0 : MultiXactState->nextOffset += nxids;
869 :
870 0 : LWLockRelease(MultiXactGenLock);
871 :
872 : debug_elog4(DEBUG2, "GetNew: returning %u offset %u", result, *offset);
873 0 : return result;
874 : }
875 :
876 : /*
877 : * GetMultiXactIdMembers
878 : * Returns the set of TransactionIds that make up a MultiXactId
879 : *
880 : * We return -1 if the MultiXactId is too old to possibly have any members
881 : * still running; in that case we have not actually looked them up, and
882 : * *xids is not set.
883 : */
884 : int
885 : GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
886 0 : {
887 : int pageno;
888 : int prev_pageno;
889 : int entryno;
890 : int slotno;
891 : MultiXactOffset *offptr;
892 : MultiXactOffset offset;
893 : int length;
894 : int truelength;
895 : int i;
896 : MultiXactId nextMXact;
897 : MultiXactId tmpMXact;
898 : MultiXactOffset nextOffset;
899 : TransactionId *ptr;
900 :
901 : debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
902 :
903 : Assert(MultiXactIdIsValid(multi));
904 :
905 : /* See if the MultiXactId is in the local cache */
906 0 : length = mXactCacheGetById(multi, xids);
907 0 : if (length >= 0)
908 : {
909 : debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
910 : mxid_to_string(multi, length, *xids));
911 0 : return length;
912 : }
913 :
914 : /* Set our OldestVisibleMXactId[] entry if we didn't already */
915 0 : MultiXactIdSetOldestVisible();
916 :
917 : /*
918 : * We check known limits on MultiXact before resorting to the SLRU area.
919 : *
920 : * An ID older than our OldestVisibleMXactId[] entry can't possibly still
921 : * be running, and we'd run the risk of trying to read already-truncated
922 : * SLRU data if we did try to examine it.
923 : *
924 : * Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is
925 : * seen, it implies undetected ID wraparound has occurred. We just
926 : * silently assume that such an ID is no longer running.
927 : *
928 : * Shared lock is enough here since we aren't modifying any global state.
929 : * Also, we can examine our own OldestVisibleMXactId without the lock,
930 : * since no one else is allowed to change it.
931 : */
932 0 : if (MultiXactIdPrecedes(multi, OldestVisibleMXactId[MyBackendId]))
933 : {
934 : debug_elog2(DEBUG2, "GetMembers: it's too old");
935 0 : *xids = NULL;
936 0 : return -1;
937 : }
938 :
939 : /*
940 : * Acquire the shared lock just long enough to grab the current counter
941 : * values. We may need both nextMXact and nextOffset; see below.
942 : */
943 0 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
944 :
945 0 : nextMXact = MultiXactState->nextMXact;
946 0 : nextOffset = MultiXactState->nextOffset;
947 :
948 0 : LWLockRelease(MultiXactGenLock);
949 :
950 0 : if (!MultiXactIdPrecedes(multi, nextMXact))
951 : {
952 : debug_elog2(DEBUG2, "GetMembers: it's too new!");
953 0 : *xids = NULL;
954 0 : return -1;
955 : }
956 :
957 : /*
958 : * Find out the offset at which we need to start reading MultiXactMembers
959 : * and the number of members in the multixact. We determine the latter as
960 : * the difference between this multixact's starting offset and the next
961 : * one's. However, there are some corner cases to worry about:
962 : *
963 : * 1. This multixact may be the latest one created, in which case there is
964 : * no next one to look at. In this case the nextOffset value we just
965 : * saved is the correct endpoint.
966 : *
967 : * 2. The next multixact may still be in process of being filled in: that
968 : * is, another process may have done GetNewMultiXactId but not yet written
969 : * the offset entry for that ID. In that scenario, it is guaranteed that
970 : * the offset entry for that multixact exists (because GetNewMultiXactId
971 : * won't release MultiXactGenLock until it does) but contains zero
972 : * (because we are careful to pre-zero offset pages). Because
973 : * GetNewMultiXactId will never return zero as the starting offset for a
974 : * multixact, when we read zero as the next multixact's offset, we know we
975 : * have this case. We sleep for a bit and try again.
976 : *
977 : * 3. Because GetNewMultiXactId increments offset zero to offset one to
978 : * handle case #2, there is an ambiguity near the point of offset
979 : * wraparound. If we see next multixact's offset is one, is that our
980 : * multixact's actual endpoint, or did it end at zero with a subsequent
981 : * increment? We handle this using the knowledge that if the zero'th
982 : * member slot wasn't filled, it'll contain zero, and zero isn't a valid
983 : * transaction ID so it can't be a multixact member. Therefore, if we
984 : * read a zero from the members array, just ignore it.
985 : *
986 : * This is all pretty messy, but the mess occurs only in infrequent corner
987 : * cases, so it seems better than holding the MultiXactGenLock for a long
988 : * time on every multixact creation.
989 : */
990 0 : retry:
991 0 : LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
992 :
993 0 : pageno = MultiXactIdToOffsetPage(multi);
994 0 : entryno = MultiXactIdToOffsetEntry(multi);
995 :
996 0 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
997 0 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
998 0 : offptr += entryno;
999 0 : offset = *offptr;
1000 :
1001 : Assert(offset != 0);
1002 :
1003 : /*
1004 : * Use the same increment rule as GetNewMultiXactId(), that is, don't
1005 : * handle wraparound explicitly until needed.
1006 : */
1007 0 : tmpMXact = multi + 1;
1008 :
1009 0 : if (nextMXact == tmpMXact)
1010 : {
1011 : /* Corner case 1: there is no next multixact */
1012 0 : length = nextOffset - offset;
1013 : }
1014 : else
1015 : {
1016 : MultiXactOffset nextMXOffset;
1017 :
1018 : /* handle wraparound if needed */
1019 0 : if (tmpMXact < FirstMultiXactId)
1020 0 : tmpMXact = FirstMultiXactId;
1021 :
1022 0 : prev_pageno = pageno;
1023 :
1024 0 : pageno = MultiXactIdToOffsetPage(tmpMXact);
1025 0 : entryno = MultiXactIdToOffsetEntry(tmpMXact);
1026 :
1027 0 : if (pageno != prev_pageno)
1028 0 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, tmpMXact);
1029 :
1030 0 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1031 0 : offptr += entryno;
1032 0 : nextMXOffset = *offptr;
1033 :
1034 0 : if (nextMXOffset == 0)
1035 : {
1036 : /* Corner case 2: next multixact is still being filled in */
1037 0 : LWLockRelease(MultiXactOffsetControlLock);
1038 0 : pg_usleep(1000L);
1039 0 : goto retry;
1040 : }
1041 :
1042 0 : length = nextMXOffset - offset;
1043 : }
1044 :
1045 0 : LWLockRelease(MultiXactOffsetControlLock);
1046 :
1047 0 : ptr = (TransactionId *) palloc(length * sizeof(TransactionId));
1048 0 : *xids = ptr;
1049 :
1050 : /* Now get the members themselves. */
1051 0 : LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
1052 :
1053 0 : truelength = 0;
1054 0 : prev_pageno = -1;
1055 0 : for (i = 0; i < length; i++, offset++)
1056 : {
1057 : TransactionId *xactptr;
1058 :
1059 0 : pageno = MXOffsetToMemberPage(offset);
1060 0 : entryno = MXOffsetToMemberEntry(offset);
1061 :
1062 0 : if (pageno != prev_pageno)
1063 : {
1064 0 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
1065 0 : prev_pageno = pageno;
1066 : }
1067 :
1068 0 : xactptr = (TransactionId *)
1069 : MultiXactMemberCtl->shared->page_buffer[slotno];
1070 0 : xactptr += entryno;
1071 :
1072 0 : if (!TransactionIdIsValid(*xactptr))
1073 : {
1074 : /* Corner case 3: we must be looking at unused slot zero */
1075 : Assert(offset == 0);
1076 0 : continue;
1077 : }
1078 :
1079 0 : ptr[truelength++] = *xactptr;
1080 : }
1081 :
1082 0 : LWLockRelease(MultiXactMemberControlLock);
1083 :
1084 : /*
1085 : * Copy the result into the local cache.
1086 : */
1087 0 : mXactCachePut(multi, truelength, ptr);
1088 :
1089 : debug_elog3(DEBUG2, "GetMembers: no cache for %s",
1090 : mxid_to_string(multi, truelength, ptr));
1091 0 : return truelength;
1092 : }
1093 :
1094 : /*
1095 : * mXactCacheGetBySet
1096 : * returns a MultiXactId from the cache based on the set of
1097 : * TransactionIds that compose it, or InvalidMultiXactId if
1098 : * none matches.
1099 : *
1100 : * This is helpful, for example, if two transactions want to lock a huge
1101 : * table. By using the cache, the second will use the same MultiXactId
1102 : * for the majority of tuples, thus keeping MultiXactId usage low (saving
1103 : * both I/O and wraparound issues).
1104 : *
1105 : * NB: the passed xids[] array will be sorted in-place.
1106 : */
1107 : static MultiXactId
1108 : mXactCacheGetBySet(int nxids, TransactionId *xids)
1109 0 : {
1110 : mXactCacheEnt *entry;
1111 :
1112 : debug_elog3(DEBUG2, "CacheGet: looking for %s",
1113 : mxid_to_string(InvalidMultiXactId, nxids, xids));
1114 :
1115 : /* sort the array so comparison is easy */
1116 0 : qsort(xids, nxids, sizeof(TransactionId), xidComparator);
1117 :
1118 0 : for (entry = MXactCache; entry != NULL; entry = entry->next)
1119 : {
1120 0 : if (entry->nxids != nxids)
1121 0 : continue;
1122 :
1123 : /* We assume the cache entries are sorted */
1124 0 : if (memcmp(xids, entry->xids, nxids * sizeof(TransactionId)) == 0)
1125 : {
1126 : debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
1127 0 : return entry->multi;
1128 : }
1129 : }
1130 :
1131 : debug_elog2(DEBUG2, "CacheGet: not found :-(");
1132 0 : return InvalidMultiXactId;
1133 : }
1134 :
1135 : /*
1136 : * mXactCacheGetById
1137 : * returns the composing TransactionId set from the cache for a
1138 : * given MultiXactId, if present.
1139 : *
1140 : * If successful, *xids is set to the address of a palloc'd copy of the
1141 : * TransactionId set. Return value is number of members, or -1 on failure.
1142 : */
1143 : static int
1144 : mXactCacheGetById(MultiXactId multi, TransactionId **xids)
1145 0 : {
1146 : mXactCacheEnt *entry;
1147 :
1148 : debug_elog3(DEBUG2, "CacheGet: looking for %u", multi);
1149 :
1150 0 : for (entry = MXactCache; entry != NULL; entry = entry->next)
1151 : {
1152 0 : if (entry->multi == multi)
1153 : {
1154 : TransactionId *ptr;
1155 : Size size;
1156 :
1157 0 : size = sizeof(TransactionId) * entry->nxids;
1158 0 : ptr = (TransactionId *) palloc(size);
1159 0 : *xids = ptr;
1160 :
1161 0 : memcpy(ptr, entry->xids, size);
1162 :
1163 : debug_elog3(DEBUG2, "CacheGet: found %s",
1164 : mxid_to_string(multi, entry->nxids, entry->xids));
1165 0 : return entry->nxids;
1166 : }
1167 : }
1168 :
1169 : debug_elog2(DEBUG2, "CacheGet: not found");
1170 0 : return -1;
1171 : }
1172 :
1173 : /*
1174 : * mXactCachePut
1175 : * Add a new MultiXactId and its composing set into the local cache.
1176 : */
1177 : static void
1178 : mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids)
1179 0 : {
1180 : mXactCacheEnt *entry;
1181 :
1182 : debug_elog3(DEBUG2, "CachePut: storing %s",
1183 : mxid_to_string(multi, nxids, xids));
1184 :
1185 0 : if (MXactContext == NULL)
1186 : {
1187 : /* The cache only lives as long as the current transaction */
1188 : debug_elog2(DEBUG2, "CachePut: initializing memory context");
1189 0 : MXactContext = AllocSetContextCreate(TopTransactionContext,
1190 : "MultiXact Cache Context",
1191 : ALLOCSET_SMALL_MINSIZE,
1192 : ALLOCSET_SMALL_INITSIZE,
1193 : ALLOCSET_SMALL_MAXSIZE);
1194 : }
1195 :
1196 0 : entry = (mXactCacheEnt *)
1197 : MemoryContextAlloc(MXactContext,
1198 : offsetof(mXactCacheEnt, xids) +
1199 : nxids * sizeof(TransactionId));
1200 :
1201 0 : entry->multi = multi;
1202 0 : entry->nxids = nxids;
1203 0 : memcpy(entry->xids, xids, nxids * sizeof(TransactionId));
1204 :
1205 : /* mXactCacheGetBySet assumes the entries are sorted, so sort them */
1206 0 : qsort(entry->xids, nxids, sizeof(TransactionId), xidComparator);
1207 :
1208 0 : entry->next = MXactCache;
1209 0 : MXactCache = entry;
1210 0 : }
1211 :
1212 : /*
1213 : * xidComparator
1214 : * qsort comparison function for XIDs
1215 : *
1216 : * We don't need to use wraparound comparison for XIDs, and indeed must
1217 : * not do so since that does not respect the triangle inequality! Any
1218 : * old sort order will do.
1219 : */
1220 : static int
1221 : xidComparator(const void *arg1, const void *arg2)
1222 0 : {
1223 0 : TransactionId xid1 = *(const TransactionId *) arg1;
1224 0 : TransactionId xid2 = *(const TransactionId *) arg2;
1225 :
1226 0 : if (xid1 > xid2)
1227 0 : return 1;
1228 0 : if (xid1 < xid2)
1229 0 : return -1;
1230 0 : return 0;
1231 : }
1232 :
1233 : #ifdef MULTIXACT_DEBUG
1234 : static char *
1235 : mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids)
1236 : {
1237 : char *str = palloc(15 * (nxids + 1) + 4);
1238 : int i;
1239 :
1240 : snprintf(str, 47, "%u %d[%u", multi, nxids, xids[0]);
1241 :
1242 : for (i = 1; i < nxids; i++)
1243 : snprintf(str + strlen(str), 17, ", %u", xids[i]);
1244 :
1245 : strcat(str, "]");
1246 : return str;
1247 : }
1248 : #endif
1249 :
1250 : /*
1251 : * AtEOXact_MultiXact
1252 : * Handle transaction end for MultiXact
1253 : *
1254 : * This is called at top transaction commit or abort (we don't care which).
1255 : */
1256 : void
1257 : AtEOXact_MultiXact(void)
1258 13673 : {
1259 : /*
1260 : * Reset our OldestMemberMXactId and OldestVisibleMXactId values, both of
1261 : * which should only be valid while within a transaction.
1262 : *
1263 : * We assume that storing a MultiXactId is atomic and so we need not take
1264 : * MultiXactGenLock to do this.
1265 : */
1266 13673 : OldestMemberMXactId[MyBackendId] = InvalidMultiXactId;
1267 13673 : OldestVisibleMXactId[MyBackendId] = InvalidMultiXactId;
1268 :
1269 : /*
1270 : * Discard the local MultiXactId cache. Since MXactContext was created as
1271 : * a child of TopTransactionContext, we needn't delete it explicitly.
1272 : */
1273 13673 : MXactContext = NULL;
1274 13673 : MXactCache = NULL;
1275 13673 : }
1276 :
1277 : /*
1278 : * Initialization of shared memory for MultiXact. We use two SLRU areas,
1279 : * thus double memory. Also, reserve space for the shared MultiXactState
1280 : * struct and the per-backend MultiXactId arrays (two of those, too).
1281 : */
1282 : Size
1283 : MultiXactShmemSize(void)
1284 18 : {
1285 : Size size;
1286 :
1287 : #define SHARED_MULTIXACT_STATE_SIZE \
1288 : add_size(sizeof(MultiXactStateData), \
1289 : mul_size(sizeof(MultiXactId) * 2, MaxBackends))
1290 :
1291 18 : size = SHARED_MULTIXACT_STATE_SIZE;
1292 18 : size = add_size(size, SimpleLruShmemSize(NUM_MXACTOFFSET_BUFFERS, 0));
1293 18 : size = add_size(size, SimpleLruShmemSize(NUM_MXACTMEMBER_BUFFERS, 0));
1294 :
1295 18 : return size;
1296 : }
1297 :
1298 : void
1299 : MultiXactShmemInit(void)
1300 16 : {
1301 : bool found;
1302 :
1303 : debug_elog2(DEBUG2, "Shared Memory Init for MultiXact");
1304 :
1305 16 : MultiXactOffsetCtl->PagePrecedes = MultiXactOffsetPagePrecedes;
1306 16 : MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;
1307 :
1308 16 : SimpleLruInit(MultiXactOffsetCtl,
1309 : "MultiXactOffset Ctl", NUM_MXACTOFFSET_BUFFERS, 0,
1310 : MultiXactOffsetControlLock, "pg_multixact/offsets");
1311 16 : SimpleLruInit(MultiXactMemberCtl,
1312 : "MultiXactMember Ctl", NUM_MXACTMEMBER_BUFFERS, 0,
1313 : MultiXactMemberControlLock, "pg_multixact/members");
1314 :
1315 : /* Initialize our shared state struct */
1316 16 : MultiXactState = ShmemInitStruct("Shared MultiXact State",
1317 : SHARED_MULTIXACT_STATE_SIZE,
1318 : &found);
1319 16 : if (!IsUnderPostmaster)
1320 : {
1321 : Assert(!found);
1322 :
1323 : /* Make sure we zero out the per-backend state */
1324 16 : MemSet(MultiXactState, 0, SHARED_MULTIXACT_STATE_SIZE);
1325 : }
1326 : else
1327 : Assert(found);
1328 :
1329 : /*
1330 : * Set up array pointers. Note that perBackendXactIds[0] is wasted space
1331 : * since we only use indexes 1..MaxBackends in each array.
1332 : */
1333 16 : OldestMemberMXactId = MultiXactState->perBackendXactIds;
1334 16 : OldestVisibleMXactId = OldestMemberMXactId + MaxBackends;
1335 16 : }
1336 :
1337 : /*
1338 : * This func must be called ONCE on system install. It creates the initial
1339 : * MultiXact segments. (The MultiXacts directories are assumed to have been
1340 : * created by initdb, and MultiXactShmemInit must have been called already.)
1341 : */
1342 : void
1343 : BootStrapMultiXact(void)
1344 1 : {
1345 : int slotno;
1346 :
1347 1 : LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
1348 :
1349 : /* Create and zero the first page of the offsets log */
1350 1 : slotno = ZeroMultiXactOffsetPage(0, false);
1351 :
1352 : /* Make sure it's written out */
1353 1 : SimpleLruWritePage(MultiXactOffsetCtl, slotno, NULL);
1354 : Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
1355 :
1356 1 : LWLockRelease(MultiXactOffsetControlLock);
1357 :
1358 1 : LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
1359 :
1360 : /* Create and zero the first page of the members log */
1361 1 : slotno = ZeroMultiXactMemberPage(0, false);
1362 :
1363 : /* Make sure it's written out */
1364 1 : SimpleLruWritePage(MultiXactMemberCtl, slotno, NULL);
1365 : Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
1366 :
1367 1 : LWLockRelease(MultiXactMemberControlLock);
1368 1 : }
1369 :
1370 : /*
1371 : * Initialize (or reinitialize) a page of MultiXactOffset to zeroes.
1372 : * If writeXlog is TRUE, also emit an XLOG record saying we did this.
1373 : *
1374 : * The page is not actually written, just set up in shared memory.
1375 : * The slot number of the new page is returned.
1376 : *
1377 : * Control lock must be held at entry, and will be held at exit.
1378 : */
1379 : static int
1380 : ZeroMultiXactOffsetPage(int pageno, bool writeXlog)
1381 1 : {
1382 : int slotno;
1383 :
1384 1 : slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
1385 :
1386 1 : if (writeXlog)
1387 0 : WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_OFF_PAGE);
1388 :
1389 1 : return slotno;
1390 : }
1391 :
1392 : /*
1393 : * Ditto, for MultiXactMember
1394 : */
1395 : static int
1396 : ZeroMultiXactMemberPage(int pageno, bool writeXlog)
1397 1 : {
1398 : int slotno;
1399 :
1400 1 : slotno = SimpleLruZeroPage(MultiXactMemberCtl, pageno);
1401 :
1402 1 : if (writeXlog)
1403 0 : WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_MEM_PAGE);
1404 :
1405 1 : return slotno;
1406 : }
1407 :
1408 : /*
1409 : * This must be called ONCE during postmaster or standalone-backend startup.
1410 : *
1411 : * StartupXLOG has already established nextMXact/nextOffset by calling
1412 : * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact. Note that we
1413 : * may already have replayed WAL data into the SLRU files.
1414 : *
1415 : * We don't need any locks here, really; the SLRU locks are taken
1416 : * only because slru.c expects to be called with locks held.
1417 : */
1418 : void
1419 : StartupMultiXact(void)
1420 14 : {
1421 14 : MultiXactId multi = MultiXactState->nextMXact;
1422 14 : MultiXactOffset offset = MultiXactState->nextOffset;
1423 : int pageno;
1424 : int entryno;
1425 :
1426 : /* Clean up offsets state */
1427 14 : LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
1428 :
1429 : /*
1430 : * Initialize our idea of the latest page number.
1431 : */
1432 14 : pageno = MultiXactIdToOffsetPage(multi);
1433 14 : MultiXactOffsetCtl->shared->latest_page_number = pageno;
1434 :
1435 : /*
1436 : * Zero out the remainder of the current offsets page. See notes in
1437 : * StartupCLOG() for motivation.
1438 : */
1439 14 : entryno = MultiXactIdToOffsetEntry(multi);
1440 14 : if (entryno != 0)
1441 : {
1442 : int slotno;
1443 : MultiXactOffset *offptr;
1444 :
1445 14 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
1446 14 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1447 14 : offptr += entryno;
1448 :
1449 14 : MemSet(offptr, 0, BLCKSZ - (entryno * sizeof(MultiXactOffset)));
1450 :
1451 14 : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
1452 : }
1453 :
1454 14 : LWLockRelease(MultiXactOffsetControlLock);
1455 :
1456 : /* And the same for members */
1457 14 : LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
1458 :
1459 : /*
1460 : * Initialize our idea of the latest page number.
1461 : */
1462 14 : pageno = MXOffsetToMemberPage(offset);
1463 14 : MultiXactMemberCtl->shared->latest_page_number = pageno;
1464 :
1465 : /*
1466 : * Zero out the remainder of the current members page. See notes in
1467 : * StartupCLOG() for motivation.
1468 : */
1469 14 : entryno = MXOffsetToMemberEntry(offset);
1470 14 : if (entryno != 0)
1471 : {
1472 : int slotno;
1473 : TransactionId *xidptr;
1474 :
1475 0 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, offset);
1476 0 : xidptr = (TransactionId *) MultiXactMemberCtl->shared->page_buffer[slotno];
1477 0 : xidptr += entryno;
1478 :
1479 0 : MemSet(xidptr, 0, BLCKSZ - (entryno * sizeof(TransactionId)));
1480 :
1481 0 : MultiXactMemberCtl->shared->page_dirty[slotno] = true;
1482 : }
1483 :
1484 14 : LWLockRelease(MultiXactMemberControlLock);
1485 :
1486 : /*
1487 : * Initialize lastTruncationPoint to invalid, ensuring that the first
1488 : * checkpoint will try to do truncation.
1489 : */
1490 14 : MultiXactState->lastTruncationPoint = InvalidMultiXactId;
1491 14 : }
1492 :
1493 : /*
1494 : * This must be called ONCE during postmaster or standalone-backend shutdown
1495 : */
1496 : void
1497 : ShutdownMultiXact(void)
1498 13 : {
1499 : /* Flush dirty MultiXact pages to disk */
1500 13 : SimpleLruFlush(MultiXactOffsetCtl, false);
1501 13 : SimpleLruFlush(MultiXactMemberCtl, false);
1502 13 : }
1503 :
1504 : /*
1505 : * Get the next MultiXactId and offset to save in a checkpoint record
1506 : */
1507 : void
1508 : MultiXactGetCheckptMulti(bool is_shutdown,
1509 : MultiXactId *nextMulti,
1510 : MultiXactOffset *nextMultiOffset)
1511 19 : {
1512 19 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
1513 :
1514 19 : *nextMulti = MultiXactState->nextMXact;
1515 19 : *nextMultiOffset = MultiXactState->nextOffset;
1516 :
1517 19 : LWLockRelease(MultiXactGenLock);
1518 :
1519 : debug_elog4(DEBUG2, "MultiXact: checkpoint is nextMulti %u, nextOffset %u",
1520 : *nextMulti, *nextMultiOffset);
1521 19 : }
1522 :
1523 : /*
1524 : * Perform a checkpoint --- either during shutdown, or on-the-fly
1525 : */
1526 : void
1527 : CheckPointMultiXact(void)
1528 19 : {
1529 : /* Flush dirty MultiXact pages to disk */
1530 19 : SimpleLruFlush(MultiXactOffsetCtl, true);
1531 19 : SimpleLruFlush(MultiXactMemberCtl, true);
1532 :
1533 : /*
1534 : * Truncate the SLRU files. This could be done at any time, but
1535 : * checkpoint seems a reasonable place for it. There is one exception: if
1536 : * we are called during xlog recovery, then shared->latest_page_number
1537 : * isn't valid (because StartupMultiXact hasn't been called yet) and so
1538 : * SimpleLruTruncate would get confused. It seems best not to risk
1539 : * removing any data during recovery anyway, so don't truncate.
1540 : */
1541 19 : if (!InRecovery)
1542 19 : TruncateMultiXact();
1543 19 : }
1544 :
1545 : /*
1546 : * Set the next-to-be-assigned MultiXactId and offset
1547 : *
1548 : * This is used when we can determine the correct next ID/offset exactly
1549 : * from a checkpoint record. We need no locking since it is only called
1550 : * during bootstrap and XLog replay.
1551 : */
1552 : void
1553 : MultiXactSetNextMXact(MultiXactId nextMulti,
1554 : MultiXactOffset nextMultiOffset)
1555 15 : {
1556 : debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u",
1557 : nextMulti, nextMultiOffset);
1558 15 : MultiXactState->nextMXact = nextMulti;
1559 15 : MultiXactState->nextOffset = nextMultiOffset;
1560 15 : }
1561 :
1562 : /*
1563 : * Ensure the next-to-be-assigned MultiXactId is at least minMulti,
1564 : * and similarly nextOffset is at least minMultiOffset
1565 : *
1566 : * This is used when we can determine minimum safe values from an XLog
1567 : * record (either an on-line checkpoint or an mxact creation log entry).
1568 : * We need no locking since it is only called during XLog replay.
1569 : */
1570 : void
1571 : MultiXactAdvanceNextMXact(MultiXactId minMulti,
1572 : MultiXactOffset minMultiOffset)
1573 0 : {
1574 0 : if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti))
1575 : {
1576 : debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti);
1577 0 : MultiXactState->nextMXact = minMulti;
1578 : }
1579 0 : if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset))
1580 : {
1581 : debug_elog3(DEBUG2, "MultiXact: setting next offset to %u",
1582 : minMultiOffset);
1583 0 : MultiXactState->nextOffset = minMultiOffset;
1584 : }
1585 0 : }
1586 :
1587 : /*
1588 : * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
1589 : *
1590 : * NB: this is called while holding MultiXactGenLock. We want it to be very
1591 : * fast most of the time; even when it's not so fast, no actual I/O need
1592 : * happen unless we're forced to write out a dirty log or xlog page to make
1593 : * room in shared memory.
1594 : */
1595 : static void
1596 : ExtendMultiXactOffset(MultiXactId multi)
1597 0 : {
1598 : int pageno;
1599 :
1600 : /*
1601 : * No work except at first MultiXactId of a page. But beware: just after
1602 : * wraparound, the first MultiXactId of page zero is FirstMultiXactId.
1603 : */
1604 0 : if (MultiXactIdToOffsetEntry(multi) != 0 &&
1605 : multi != FirstMultiXactId)
1606 0 : return;
1607 :
1608 0 : pageno = MultiXactIdToOffsetPage(multi);
1609 :
1610 0 : LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
1611 :
1612 : /* Zero the page and make an XLOG entry about it */
1613 0 : ZeroMultiXactOffsetPage(pageno, true);
1614 :
1615 0 : LWLockRelease(MultiXactOffsetControlLock);
1616 : }
1617 :
1618 : /*
1619 : * Make sure that MultiXactMember has room for the members of a newly-
1620 : * allocated MultiXactId.
1621 : *
1622 : * Like the above routine, this is called while holding MultiXactGenLock;
1623 : * same comments apply.
1624 : */
1625 : static void
1626 : ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
1627 0 : {
1628 : /*
1629 : * It's possible that the members span more than one page of the members
1630 : * file, so we loop to ensure we consider each page. The coding is not
1631 : * optimal if the members span several pages, but that seems unusual
1632 : * enough to not worry much about.
1633 : */
1634 0 : while (nmembers > 0)
1635 : {
1636 : int entryno;
1637 :
1638 : /*
1639 : * Only zero when at first entry of a page.
1640 : */
1641 0 : entryno = MXOffsetToMemberEntry(offset);
1642 0 : if (entryno == 0)
1643 : {
1644 : int pageno;
1645 :
1646 0 : pageno = MXOffsetToMemberPage(offset);
1647 :
1648 0 : LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
1649 :
1650 : /* Zero the page and make an XLOG entry about it */
1651 0 : ZeroMultiXactMemberPage(pageno, true);
1652 :
1653 0 : LWLockRelease(MultiXactMemberControlLock);
1654 : }
1655 :
1656 : /* Advance to next page (OK if nmembers goes negative) */
1657 0 : offset += (MULTIXACT_MEMBERS_PER_PAGE - entryno);
1658 0 : nmembers -= (MULTIXACT_MEMBERS_PER_PAGE - entryno);
1659 : }
1660 0 : }
1661 :
1662 : /*
1663 : * Remove all MultiXactOffset and MultiXactMember segments before the oldest
1664 : * ones still of interest.
1665 : *
1666 : * This is called only during checkpoints. We assume no more than one
1667 : * backend does this at a time.
1668 : *
1669 : * XXX do we have any issues with needing to checkpoint here?
1670 : */
1671 : static void
1672 : TruncateMultiXact(void)
1673 19 : {
1674 : MultiXactId nextMXact;
1675 : MultiXactOffset nextOffset;
1676 : MultiXactId oldestMXact;
1677 : MultiXactOffset oldestOffset;
1678 : int cutoffPage;
1679 : int i;
1680 :
1681 : /*
1682 : * First, compute where we can safely truncate. Per notes above, this is
1683 : * the oldest valid value among all the OldestMemberMXactId[] and
1684 : * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
1685 : */
1686 19 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
1687 :
1688 : /*
1689 : * We have to beware of the possibility that nextMXact is in the
1690 : * wrapped-around state. We don't fix the counter itself here, but we
1691 : * must be sure to use a valid value in our calculation.
1692 : */
1693 19 : nextMXact = MultiXactState->nextMXact;
1694 19 : if (nextMXact < FirstMultiXactId)
1695 0 : nextMXact = FirstMultiXactId;
1696 :
1697 19 : oldestMXact = nextMXact;
1698 1976 : for (i = 1; i <= MaxBackends; i++)
1699 : {
1700 : MultiXactId thisoldest;
1701 :
1702 1957 : thisoldest = OldestMemberMXactId[i];
1703 1957 : if (MultiXactIdIsValid(thisoldest) &&
1704 : MultiXactIdPrecedes(thisoldest, oldestMXact))
1705 0 : oldestMXact = thisoldest;
1706 1957 : thisoldest = OldestVisibleMXactId[i];
1707 1957 : if (MultiXactIdIsValid(thisoldest) &&
1708 : MultiXactIdPrecedes(thisoldest, oldestMXact))
1709 0 : oldestMXact = thisoldest;
1710 : }
1711 :
1712 : /* Save the current nextOffset too */
1713 19 : nextOffset = MultiXactState->nextOffset;
1714 :
1715 19 : LWLockRelease(MultiXactGenLock);
1716 :
1717 : debug_elog3(DEBUG2, "MultiXact: truncation point = %u", oldestMXact);
1718 :
1719 : /*
1720 : * If we already truncated at this point, do nothing. This saves time
1721 : * when no MultiXacts are getting used, which is probably not uncommon.
1722 : */
1723 19 : if (MultiXactState->lastTruncationPoint == oldestMXact)
1724 5 : return;
1725 :
1726 : /*
1727 : * We need to determine where to truncate MultiXactMember. If we found a
1728 : * valid oldest MultiXactId, read its starting offset; otherwise we use
1729 : * the nextOffset value we saved above.
1730 : */
1731 14 : if (oldestMXact == nextMXact)
1732 14 : oldestOffset = nextOffset;
1733 : else
1734 : {
1735 : int pageno;
1736 : int slotno;
1737 : int entryno;
1738 : MultiXactOffset *offptr;
1739 :
1740 : /* lock is acquired by SimpleLruReadPage_ReadOnly */
1741 :
1742 0 : pageno = MultiXactIdToOffsetPage(oldestMXact);
1743 0 : entryno = MultiXactIdToOffsetEntry(oldestMXact);
1744 :
1745 0 : slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, oldestMXact);
1746 0 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1747 0 : offptr += entryno;
1748 0 : oldestOffset = *offptr;
1749 :
1750 0 : LWLockRelease(MultiXactOffsetControlLock);
1751 : }
1752 :
1753 : /*
1754 : * The cutoff point is the start of the segment containing oldestMXact. We
1755 : * pass the *page* containing oldestMXact to SimpleLruTruncate.
1756 : */
1757 14 : cutoffPage = MultiXactIdToOffsetPage(oldestMXact);
1758 :
1759 14 : SimpleLruTruncate(MultiXactOffsetCtl, cutoffPage);
1760 :
1761 : /*
1762 : * Also truncate MultiXactMember at the previously determined offset.
1763 : */
1764 14 : cutoffPage = MXOffsetToMemberPage(oldestOffset);
1765 :
1766 14 : SimpleLruTruncate(MultiXactMemberCtl, cutoffPage);
1767 :
1768 : /*
1769 : * Set the last known truncation point. We don't need a lock for this
1770 : * since only one backend does checkpoints at a time.
1771 : */
1772 14 : MultiXactState->lastTruncationPoint = oldestMXact;
1773 : }
1774 :
1775 : /*
1776 : * Decide which of two MultiXactOffset page numbers is "older" for truncation
1777 : * purposes.
1778 : *
1779 : * We need to use comparison of MultiXactId here in order to do the right
1780 : * thing with wraparound. However, if we are asked about page number zero, we
1781 : * don't want to hand InvalidMultiXactId to MultiXactIdPrecedes: it'll get
1782 : * weird. So, offset both multis by FirstMultiXactId to avoid that.
1783 : * (Actually, the current implementation doesn't do anything weird with
1784 : * InvalidMultiXactId, but there's no harm in leaving this code like this.)
1785 : */
1786 : static bool
1787 : MultiXactOffsetPagePrecedes(int page1, int page2)
1788 42 : {
1789 : MultiXactId multi1;
1790 : MultiXactId multi2;
1791 :
1792 42 : multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE;
1793 42 : multi1 += FirstMultiXactId;
1794 42 : multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE;
1795 42 : multi2 += FirstMultiXactId;
1796 :
1797 42 : return MultiXactIdPrecedes(multi1, multi2);
1798 : }
1799 :
1800 : /*
1801 : * Decide which of two MultiXactMember page numbers is "older" for truncation
1802 : * purposes. There is no "invalid offset number" so use the numbers verbatim.
1803 : */
1804 : static bool
1805 : MultiXactMemberPagePrecedes(int page1, int page2)
1806 29 : {
1807 : MultiXactOffset offset1;
1808 : MultiXactOffset offset2;
1809 :
1810 29 : offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE;
1811 29 : offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE;
1812 :
1813 29 : return MultiXactOffsetPrecedes(offset1, offset2);
1814 : }
1815 :
1816 : /*
1817 : * Decide which of two MultiXactIds is earlier.
1818 : *
1819 : * XXX do we need to do something special for InvalidMultiXactId?
1820 : * (Doesn't look like it.)
1821 : */
1822 : static bool
1823 : MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
1824 42 : {
1825 42 : int32 diff = (int32) (multi1 - multi2);
1826 :
1827 42 : return (diff < 0);
1828 : }
1829 :
1830 : /*
1831 : * Decide which of two offsets is earlier.
1832 : */
1833 : static bool
1834 : MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
1835 29 : {
1836 29 : int32 diff = (int32) (offset1 - offset2);
1837 :
1838 29 : return (diff < 0);
1839 : }
1840 :
1841 :
1842 : /*
1843 : * Write an xlog record reflecting the zeroing of either a MEMBERs or
1844 : * OFFSETs page (info shows which)
1845 : */
1846 : static void
1847 : WriteMZeroPageXlogRec(int pageno, uint8 info)
1848 0 : {
1849 : XLogRecData rdata;
1850 :
1851 0 : rdata.data = (char *) (&pageno);
1852 0 : rdata.len = sizeof(int);
1853 0 : rdata.buffer = InvalidBuffer;
1854 0 : rdata.next = NULL;
1855 0 : (void) XLogInsert(RM_MULTIXACT_ID, info, &rdata);
1856 0 : }
1857 :
1858 : /*
1859 : * MULTIXACT resource manager's routines
1860 : */
1861 : void
1862 : multixact_redo(XLogRecPtr lsn, XLogRecord *record)
1863 0 : {
1864 0 : uint8 info = record->xl_info & ~XLR_INFO_MASK;
1865 :
1866 0 : if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
1867 : {
1868 : int pageno;
1869 : int slotno;
1870 :
1871 0 : memcpy(&pageno, XLogRecGetData(record), sizeof(int));
1872 :
1873 0 : LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
1874 :
1875 0 : slotno = ZeroMultiXactOffsetPage(pageno, false);
1876 0 : SimpleLruWritePage(MultiXactOffsetCtl, slotno, NULL);
1877 : Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
1878 :
1879 0 : LWLockRelease(MultiXactOffsetControlLock);
1880 : }
1881 0 : else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
1882 : {
1883 : int pageno;
1884 : int slotno;
1885 :
1886 0 : memcpy(&pageno, XLogRecGetData(record), sizeof(int));
1887 :
1888 0 : LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
1889 :
1890 0 : slotno = ZeroMultiXactMemberPage(pageno, false);
1891 0 : SimpleLruWritePage(MultiXactMemberCtl, slotno, NULL);
1892 : Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
1893 :
1894 0 : LWLockRelease(MultiXactMemberControlLock);
1895 : }
1896 0 : else if (info == XLOG_MULTIXACT_CREATE_ID)
1897 : {
1898 0 : xl_multixact_create *xlrec = (xl_multixact_create *) XLogRecGetData(record);
1899 0 : TransactionId *xids = xlrec->xids;
1900 : TransactionId max_xid;
1901 : int i;
1902 :
1903 : /* Store the data back into the SLRU files */
1904 0 : RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nxids, xids);
1905 :
1906 : /* Make sure nextMXact/nextOffset are beyond what this record has */
1907 0 : MultiXactAdvanceNextMXact(xlrec->mid + 1, xlrec->moff + xlrec->nxids);
1908 :
1909 : /*
1910 : * Make sure nextXid is beyond any XID mentioned in the record. This
1911 : * should be unnecessary, since any XID found here ought to have other
1912 : * evidence in the XLOG, but let's be safe.
1913 : */
1914 0 : max_xid = record->xl_xid;
1915 0 : for (i = 0; i < xlrec->nxids; i++)
1916 : {
1917 0 : if (TransactionIdPrecedes(max_xid, xids[i]))
1918 0 : max_xid = xids[i];
1919 : }
1920 0 : if (TransactionIdFollowsOrEquals(max_xid,
1921 : ShmemVariableCache->nextXid))
1922 : {
1923 0 : ShmemVariableCache->nextXid = max_xid;
1924 0 : TransactionIdAdvance(ShmemVariableCache->nextXid);
1925 : }
1926 : }
1927 : else
1928 0 : elog(PANIC, "multixact_redo: unknown op code %u", info);
1929 0 : }
1930 :
1931 : void
1932 : multixact_desc(StringInfo buf, uint8 xl_info, char *rec)
1933 0 : {
1934 0 : uint8 info = xl_info & ~XLR_INFO_MASK;
1935 :
1936 0 : if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
1937 : {
1938 : int pageno;
1939 :
1940 0 : memcpy(&pageno, rec, sizeof(int));
1941 0 : appendStringInfo(buf, "zero offsets page: %d", pageno);
1942 : }
1943 0 : else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
1944 : {
1945 : int pageno;
1946 :
1947 0 : memcpy(&pageno, rec, sizeof(int));
1948 0 : appendStringInfo(buf, "zero members page: %d", pageno);
1949 : }
1950 0 : else if (info == XLOG_MULTIXACT_CREATE_ID)
1951 : {
1952 0 : xl_multixact_create *xlrec = (xl_multixact_create *) rec;
1953 : int i;
1954 :
1955 0 : appendStringInfo(buf, "create multixact %u offset %u:",
1956 : xlrec->mid, xlrec->moff);
1957 0 : for (i = 0; i < xlrec->nxids; i++)
1958 0 : appendStringInfo(buf, " %u", xlrec->xids[i]);
1959 : }
1960 : else
1961 0 : appendStringInfo(buf, "UNKNOWN");
1962 0 : }
|