1 : /*-------------------------------------------------------------------------
2 : *
3 : * pruneheap.c
4 : * heap page pruning and HOT-chain management code
5 : *
6 : * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.5 2007/11/15 22:25:15 momjian Exp $
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/heapam.h"
18 : #include "access/transam.h"
19 : #include "miscadmin.h"
20 : #include "pgstat.h"
21 : #include "utils/inval.h"
22 :
23 :
24 : /* Local functions */
25 : static int heap_prune_chain(Relation relation, Buffer buffer,
26 : OffsetNumber rootoffnum,
27 : TransactionId OldestXmin,
28 : OffsetNumber *redirected, int *nredirected,
29 : OffsetNumber *nowdead, int *ndead,
30 : OffsetNumber *nowunused, int *nunused,
31 : bool redirect_move);
32 : static void heap_prune_record_redirect(OffsetNumber *redirected,
33 : int *nredirected,
34 : OffsetNumber offnum,
35 : OffsetNumber rdoffnum);
36 : static void heap_prune_record_dead(OffsetNumber *nowdead, int *ndead,
37 : OffsetNumber offnum);
38 : static void heap_prune_record_unused(OffsetNumber *nowunused, int *nunused,
39 : OffsetNumber offnum);
40 :
41 :
42 : /*
43 : * Optionally prune and repair fragmentation in the specified page.
44 : *
45 : * This is an opportunistic function. It will perform housekeeping
46 : * only if the page heuristically looks like a candidate for pruning and we
47 : * can acquire buffer cleanup lock without blocking.
48 : *
49 : * Note: this is called quite often. It's important that it fall out quickly
50 : * if there's not any use in pruning.
51 : *
52 : * Caller must have pin on the buffer, and must *not* have a lock on it.
53 : *
54 : * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
55 : * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
56 : */
57 : void
58 : heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
59 157000 : {
60 157000 : PageHeader dp = (PageHeader) BufferGetPage(buffer);
61 : Size minfree;
62 :
63 : /*
64 : * Let's see if we really need pruning.
65 : *
66 : * Forget it if page is not hinted to contain something prunable that's
67 : * older than OldestXmin.
68 : */
69 157000 : if (!PageIsPrunable(dp, OldestXmin))
70 135085 : return;
71 :
72 : /*
73 : * We prune when a previous UPDATE failed to find enough space on the page
74 : * for a new tuple version, or when free space falls below the relation's
75 : * fill-factor target (but not less than 10%).
76 : *
77 : * Checking free space here is questionable since we aren't holding any
78 : * lock on the buffer; in the worst case we could get a bogus answer. It's
79 : * unlikely to be *seriously* wrong, though, since reading either pd_lower
80 : * or pd_upper is probably atomic. Avoiding taking a lock seems better
81 : * than sometimes getting a wrong answer in what is after all just a
82 : * heuristic estimate.
83 : */
84 21915 : minfree = RelationGetTargetPageFreeSpace(relation,
85 : HEAP_DEFAULT_FILLFACTOR);
86 21915 : minfree = Max(minfree, BLCKSZ / 10);
87 :
88 21915 : if (PageIsFull(dp) || PageGetHeapFreeSpace((Page) dp) < minfree)
89 : {
90 : /* OK, try to get exclusive buffer lock */
91 769 : if (!ConditionalLockBufferForCleanup(buffer))
92 12 : return;
93 :
94 : /*
95 : * Now that we have buffer lock, get accurate information about the
96 : * page's free space, and recheck the heuristic about whether to
97 : * prune. (We needn't recheck PageIsPrunable, since no one else could
98 : * have pruned while we hold pin.)
99 : */
100 757 : if (PageIsFull(dp) || PageGetHeapFreeSpace((Page) dp) < minfree)
101 : {
102 : /* OK to prune (though not to remove redirects) */
103 757 : (void) heap_page_prune(relation, buffer, OldestXmin, false, true);
104 : }
105 :
106 : /* And release buffer lock */
107 757 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
108 : }
109 : }
110 :
111 :
112 : /*
113 : * Prune and repair fragmentation in the specified page.
114 : *
115 : * Caller must have pin and buffer cleanup lock on the page.
116 : *
117 : * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
118 : * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
119 : *
120 : * If redirect_move is set, we remove redirecting line pointers by
121 : * updating the root line pointer to point directly to the first non-dead
122 : * tuple in the chain. NOTE: eliminating the redirect changes the first
123 : * tuple's effective CTID, and is therefore unsafe except within VACUUM FULL.
124 : * The only reason we support this capability at all is that by using it,
125 : * VACUUM FULL need not cope with LP_REDIRECT items at all; which seems a
126 : * good thing since VACUUM FULL is overly complicated already.
127 : *
128 : * If report_stats is true then we send the number of reclaimed heap-only
129 : * tuples to pgstats. (This must be FALSE during vacuum, since vacuum will
130 : * send its own new total to pgstats, and we don't want this delta applied
131 : * on top of that.)
132 : *
133 : * Returns the number of tuples deleted from the page.
134 : */
135 : int
136 : heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
137 : bool redirect_move, bool report_stats)
138 3238 : {
139 3238 : int ndeleted = 0;
140 3238 : Page page = BufferGetPage(buffer);
141 : OffsetNumber offnum,
142 : maxoff;
143 : OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
144 : OffsetNumber nowdead[MaxHeapTuplesPerPage];
145 : OffsetNumber nowunused[MaxHeapTuplesPerPage];
146 3238 : int nredirected = 0;
147 3238 : int ndead = 0;
148 3238 : int nunused = 0;
149 3238 : bool page_was_full = false;
150 : TransactionId save_prune_xid;
151 :
152 3238 : START_CRIT_SECTION();
153 :
154 : /*
155 : * Save the current pd_prune_xid and mark the page as clear of prunable
156 : * tuples. If we find a tuple which may soon become prunable, we shall set
157 : * the hint again.
158 : */
159 3238 : save_prune_xid = ((PageHeader) page)->pd_prune_xid;
160 3238 : PageClearPrunable(page);
161 :
162 : /*
163 : * Also clear the "page is full" flag if it is set, since there's no point
164 : * in repeating the prune/defrag process until something else happens to
165 : * the page.
166 : */
167 3238 : if (PageIsFull(page))
168 : {
169 120 : PageClearFull(page);
170 120 : page_was_full = true;
171 : }
172 :
173 : /* Scan the page */
174 3238 : maxoff = PageGetMaxOffsetNumber(page);
175 3238 : for (offnum = FirstOffsetNumber;
176 262495 : offnum <= maxoff;
177 256019 : offnum = OffsetNumberNext(offnum))
178 : {
179 256019 : ItemId itemid = PageGetItemId(page, offnum);
180 :
181 : /* Nothing to do if slot is empty or already dead */
182 256019 : if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
183 26246 : continue;
184 :
185 : /* Process this item or chain of items */
186 229773 : ndeleted += heap_prune_chain(relation, buffer, offnum,
187 : OldestXmin,
188 : redirected, &nredirected,
189 : nowdead, &ndead,
190 : nowunused, &nunused,
191 : redirect_move);
192 : }
193 :
194 : /* Have we pruned any items? */
195 3238 : if (nredirected > 0 || ndead > 0 || nunused > 0)
196 : {
197 : /*
198 : * Repair page fragmentation, and update the page's hint bit about
199 : * whether it has free line pointers.
200 : */
201 862 : PageRepairFragmentation((Page) page);
202 :
203 862 : MarkBufferDirty(buffer);
204 :
205 : /*
206 : * Emit a WAL HEAP_CLEAN or HEAP_CLEAN_MOVE record showing what we did
207 : */
208 862 : if (!relation->rd_istemp)
209 : {
210 : XLogRecPtr recptr;
211 :
212 862 : recptr = log_heap_clean(relation, buffer,
213 : redirected, nredirected,
214 : nowdead, ndead,
215 : nowunused, nunused,
216 : redirect_move);
217 862 : PageSetTLI(BufferGetPage(buffer), ThisTimeLineID);
218 862 : PageSetLSN(BufferGetPage(buffer), recptr);
219 : }
220 : }
221 : else
222 : {
223 : /*
224 : * If we didn't prune anything, but have updated either the
225 : * pd_prune_xid field or the "page is full" flag, mark the buffer
226 : * dirty. This is treated as a non-WAL-logged hint.
227 : */
228 2376 : if (((PageHeader) page)->pd_prune_xid != save_prune_xid ||
229 : page_was_full)
230 0 : SetBufferCommitInfoNeedsSave(buffer);
231 : }
232 :
233 3238 : END_CRIT_SECTION();
234 :
235 : /*
236 : * If requested, report the number of tuples reclaimed to pgstats. This is
237 : * ndeleted minus ndead, because we don't want to count a now-DEAD root
238 : * item as a deletion for this purpose.
239 : */
240 3238 : if (report_stats && ndeleted > ndead)
241 237 : pgstat_update_heap_dead_tuples(relation, ndeleted - ndead);
242 :
243 : /*
244 : * XXX Should we update the FSM information of this page ?
245 : *
246 : * There are two schools of thought here. We may not want to update FSM
247 : * information so that the page is not used for unrelated UPDATEs/INSERTs
248 : * and any free space in this page will remain available for further
249 : * UPDATEs in *this* page, thus improving chances for doing HOT updates.
250 : *
251 : * But for a large table and where a page does not receive further UPDATEs
252 : * for a long time, we might waste this space by not updating the FSM
253 : * information. The relation may get extended and fragmented further.
254 : *
255 : * One possibility is to leave "fillfactor" worth of space in this page
256 : * and update FSM with the remaining space.
257 : *
258 : * In any case, the current FSM implementation doesn't accept
259 : * one-page-at-a-time updates, so this is all academic for now.
260 : */
261 :
262 3238 : return ndeleted;
263 : }
264 :
265 :
266 : /*
267 : * Prune specified item pointer or a HOT chain originating at that item.
268 : *
269 : * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
270 : * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
271 : * chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
272 : * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
273 : * DEAD, the OldestXmin test is just too coarse to detect it.
274 : *
275 : * The root line pointer is redirected to the tuple immediately after the
276 : * latest DEAD tuple. If all tuples in the chain are DEAD, the root line
277 : * pointer is marked LP_DEAD. (This includes the case of a DEAD simple
278 : * tuple, which we treat as a chain of length 1.)
279 : *
280 : * OldestXmin is the cutoff XID used to identify dead tuples.
281 : *
282 : * Redirected items are added to the redirected[] array (two entries per
283 : * redirection); items set to LP_DEAD state are added to nowdead[]; and
284 : * items set to LP_UNUSED state are added to nowunused[]. (These arrays
285 : * will be used to generate a WAL record after all chains are pruned.)
286 : *
287 : * If redirect_move is true, we get rid of redirecting line pointers.
288 : *
289 : * Returns the number of tuples deleted from the page.
290 : */
291 : static int
292 : heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
293 : TransactionId OldestXmin,
294 : OffsetNumber *redirected, int *nredirected,
295 : OffsetNumber *nowdead, int *ndead,
296 : OffsetNumber *nowunused, int *nunused,
297 : bool redirect_move)
298 229773 : {
299 229773 : int ndeleted = 0;
300 229773 : Page dp = (Page) BufferGetPage(buffer);
301 229773 : TransactionId priorXmax = InvalidTransactionId;
302 : ItemId rootlp;
303 : HeapTupleHeader htup;
304 229773 : OffsetNumber latestdead = InvalidOffsetNumber,
305 229773 : maxoff = PageGetMaxOffsetNumber(dp),
306 : offnum;
307 : OffsetNumber chainitems[MaxHeapTuplesPerPage];
308 229773 : int nchain = 0,
309 : i;
310 :
311 229773 : rootlp = PageGetItemId(dp, rootoffnum);
312 :
313 : /*
314 : * If it's a heap-only tuple, then it is not the start of a HOT chain.
315 : */
316 229773 : if (ItemIdIsNormal(rootlp))
317 : {
318 228416 : htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
319 228416 : if (HeapTupleHeaderIsHeapOnly(htup))
320 : {
321 : /*
322 : * If the tuple is DEAD and doesn't chain to anything else, mark
323 : * it unused immediately. (If it does chain, we can only remove
324 : * it as part of pruning its chain.)
325 : *
326 : * We need this primarily to handle aborted HOT updates, that is,
327 : * XMIN_INVALID heap-only tuples. Those might not be linked to by
328 : * any chain, since the parent tuple might be re-updated before
329 : * any pruning occurs. So we have to be able to reap them
330 : * separately from chain-pruning.
331 : *
332 : * Note that we might first arrive at a dead heap-only tuple
333 : * either here or while following a chain below. Whichever path
334 : * gets there first will mark the tuple unused.
335 : */
336 1929 : if (HeapTupleSatisfiesVacuum(htup, OldestXmin, buffer)
337 : == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup))
338 : {
339 84 : ItemIdSetUnused(rootlp);
340 84 : heap_prune_record_unused(nowunused, nunused, rootoffnum);
341 84 : ndeleted++;
342 : }
343 :
344 : /* Nothing more to do */
345 1929 : return ndeleted;
346 : }
347 : }
348 :
349 : /* Start from the root tuple */
350 227844 : offnum = rootoffnum;
351 :
352 : /* while not end of the chain */
353 : for (;;)
354 : {
355 : ItemId lp;
356 : bool tupdead,
357 : recent_dead;
358 :
359 : /* Some sanity checks */
360 230947 : if (offnum < FirstOffsetNumber || offnum > maxoff)
361 0 : break;
362 :
363 230947 : lp = PageGetItemId(dp, offnum);
364 :
365 230947 : if (!ItemIdIsUsed(lp))
366 26 : break;
367 :
368 : /*
369 : * If we are looking at the redirected root line pointer, jump to the
370 : * first normal tuple in the chain. If we find a redirect somewhere
371 : * else, stop --- it must not be same chain.
372 : */
373 230921 : if (ItemIdIsRedirected(lp))
374 : {
375 1357 : if (nchain > 0)
376 0 : break; /* not at start of chain */
377 1357 : chainitems[nchain++] = offnum;
378 1357 : offnum = ItemIdGetRedirect(rootlp);
379 1357 : continue;
380 : }
381 :
382 : /*
383 : * Likewise, a dead item pointer can't be part of the chain. (We
384 : * already eliminated the case of dead root tuple outside this
385 : * function.)
386 : */
387 229564 : if (ItemIdIsDead(lp))
388 0 : break;
389 :
390 : Assert(ItemIdIsNormal(lp));
391 229564 : htup = (HeapTupleHeader) PageGetItem(dp, lp);
392 :
393 : /*
394 : * Check the tuple XMIN against prior XMAX, if any
395 : */
396 229564 : if (TransactionIdIsValid(priorXmax) &&
397 : !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
398 0 : break;
399 :
400 : /*
401 : * OK, this tuple is indeed a member of the chain.
402 : */
403 229564 : chainitems[nchain++] = offnum;
404 :
405 : /*
406 : * Check tuple's visibility status.
407 : */
408 229564 : tupdead = recent_dead = false;
409 :
410 229564 : switch (HeapTupleSatisfiesVacuum(htup, OldestXmin, buffer))
411 : {
412 : case HEAPTUPLE_DEAD:
413 22241 : tupdead = true;
414 22241 : break;
415 :
416 : case HEAPTUPLE_RECENTLY_DEAD:
417 524 : recent_dead = true;
418 :
419 : /*
420 : * This tuple may soon become DEAD. Update the hint field so
421 : * that the page is reconsidered for pruning in future.
422 : */
423 524 : PageSetPrunable(dp, HeapTupleHeaderGetXmax(htup));
424 : break;
425 :
426 : case HEAPTUPLE_DELETE_IN_PROGRESS:
427 :
428 : /*
429 : * This tuple may soon become DEAD. Update the hint field so
430 : * that the page is reconsidered for pruning in future.
431 : */
432 104 : PageSetPrunable(dp, HeapTupleHeaderGetXmax(htup));
433 : break;
434 :
435 : case HEAPTUPLE_LIVE:
436 : case HEAPTUPLE_INSERT_IN_PROGRESS:
437 :
438 : /*
439 : * If we wanted to optimize for aborts, we might consider
440 : * marking the page prunable when we see INSERT_IN_PROGRESS.
441 : * But we don't. See related decisions about when to mark the
442 : * page prunable in heapam.c.
443 : */
444 : break;
445 :
446 : default:
447 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
448 : break;
449 : }
450 :
451 : /*
452 : * Remember the last DEAD tuple seen. We will advance past
453 : * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
454 : * but we can't advance past anything else. (XXX is it really worth
455 : * continuing to scan beyond RECENTLY_DEAD? The case where we will
456 : * find another DEAD tuple is a fairly unusual corner case.)
457 : */
458 229564 : if (tupdead)
459 22241 : latestdead = offnum;
460 207323 : else if (!recent_dead)
461 206799 : break;
462 :
463 : /*
464 : * If the tuple is not HOT-updated, then we are at the end of this
465 : * HOT-update chain.
466 : */
467 22765 : if (!HeapTupleHeaderIsHotUpdated(htup))
468 : break;
469 :
470 : /*
471 : * Advance to next chain member.
472 : */
473 : Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
474 : BufferGetBlockNumber(buffer));
475 1746 : offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
476 1746 : priorXmax = HeapTupleHeaderGetXmax(htup);
477 : }
478 :
479 : /*
480 : * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
481 : * the DEAD tuples at the start of the chain are removed and the root line
482 : * pointer is appropriately redirected.
483 : */
484 227844 : if (OffsetNumberIsValid(latestdead))
485 : {
486 : /*
487 : * Mark as unused each intermediate item that we are able to remove
488 : * from the chain.
489 : *
490 : * When the previous item is the last dead tuple seen, we are at the
491 : * right candidate for redirection.
492 : */
493 22520 : for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
494 : {
495 1329 : ItemId lp = PageGetItemId(dp, chainitems[i]);
496 :
497 1329 : ItemIdSetUnused(lp);
498 1329 : heap_prune_record_unused(nowunused, nunused, chainitems[i]);
499 1329 : ndeleted++;
500 : }
501 :
502 : /*
503 : * If the root entry had been a normal tuple, we are deleting it, so
504 : * count it in the result. But changing a redirect (even to DEAD
505 : * state) doesn't count.
506 : */
507 21191 : if (ItemIdIsNormal(rootlp))
508 20912 : ndeleted++;
509 :
510 : /*
511 : * If the DEAD tuple is at the end of the chain, the entire chain is
512 : * dead and the root line pointer can be marked dead. Otherwise just
513 : * redirect the root to the correct chain member.
514 : */
515 21191 : if (i >= nchain)
516 : {
517 20570 : ItemIdSetDead(rootlp);
518 20570 : heap_prune_record_dead(nowdead, ndead, rootoffnum);
519 : }
520 : else
521 : {
522 621 : ItemIdSetRedirect(rootlp, chainitems[i]);
523 621 : heap_prune_record_redirect(redirected, nredirected,
524 : rootoffnum,
525 : chainitems[i]);
526 : }
527 : }
528 206653 : else if (nchain < 2 && ItemIdIsRedirected(rootlp))
529 : {
530 : /*
531 : * We found a redirect item that doesn't point to a valid follow-on
532 : * item. This can happen if the loop in heap_page_prune caused us to
533 : * visit the dead successor of a redirect item before visiting the
534 : * redirect item. We can clean up by setting the redirect item to
535 : * DEAD state.
536 : */
537 6 : ItemIdSetDead(rootlp);
538 6 : heap_prune_record_dead(nowdead, ndead, rootoffnum);
539 : }
540 :
541 : /*
542 : * If requested, eliminate LP_REDIRECT items by moving tuples. Note that
543 : * if the root item is LP_REDIRECT and doesn't point to a valid follow-on
544 : * item, we already killed it above.
545 : */
546 227844 : if (redirect_move && ItemIdIsRedirected(rootlp))
547 : {
548 100 : OffsetNumber firstoffnum = ItemIdGetRedirect(rootlp);
549 100 : ItemId firstlp = PageGetItemId(dp, firstoffnum);
550 : HeapTupleData firsttup;
551 :
552 : Assert(ItemIdIsNormal(firstlp));
553 : /* Set up firsttup to reference the tuple at its existing CTID */
554 100 : firsttup.t_data = (HeapTupleHeader) PageGetItem(dp, firstlp);
555 100 : firsttup.t_len = ItemIdGetLength(firstlp);
556 100 : ItemPointerSet(&firsttup.t_self,
557 : BufferGetBlockNumber(buffer),
558 : firstoffnum);
559 100 : firsttup.t_tableOid = RelationGetRelid(relation);
560 :
561 : /*
562 : * Mark the tuple for invalidation. Needed because we're changing its
563 : * CTID.
564 : */
565 100 : CacheInvalidateHeapTuple(relation, &firsttup);
566 :
567 : /*
568 : * Change heap-only status of the tuple because after the line pointer
569 : * manipulation, it's no longer a heap-only tuple, but is directly
570 : * pointed to by index entries.
571 : */
572 : Assert(HeapTupleIsHeapOnly(&firsttup));
573 100 : HeapTupleClearHeapOnly(&firsttup);
574 :
575 : /* Now move the item pointer */
576 100 : *rootlp = *firstlp;
577 100 : ItemIdSetUnused(firstlp);
578 :
579 : /*
580 : * If latestdead is valid, we have already recorded the redirection
581 : * above. Otherwise, do it now.
582 : *
583 : * We don't record firstlp in the nowunused[] array, since the
584 : * redirection entry is enough to tell heap_xlog_clean what to do.
585 : */
586 100 : if (!OffsetNumberIsValid(latestdead))
587 37 : heap_prune_record_redirect(redirected, nredirected, rootoffnum,
588 : firstoffnum);
589 : }
590 :
591 227844 : return ndeleted;
592 : }
593 :
594 :
595 : /* Record newly-redirected item pointer */
596 : static void
597 : heap_prune_record_redirect(OffsetNumber *redirected, int *nredirected,
598 : OffsetNumber offnum, OffsetNumber rdoffnum)
599 658 : {
600 : Assert(*nredirected < MaxHeapTuplesPerPage);
601 658 : redirected[*nredirected * 2] = offnum;
602 658 : redirected[*nredirected * 2 + 1] = rdoffnum;
603 658 : (*nredirected)++;
604 658 : }
605 :
606 : /* Record newly-dead item pointer */
607 : static void
608 : heap_prune_record_dead(OffsetNumber *nowdead, int *ndead,
609 : OffsetNumber offnum)
610 20576 : {
611 : Assert(*ndead < MaxHeapTuplesPerPage);
612 20576 : nowdead[*ndead] = offnum;
613 20576 : (*ndead)++;
614 20576 : }
615 :
616 : /* Record newly-unused item pointer */
617 : static void
618 : heap_prune_record_unused(OffsetNumber *nowunused, int *nunused,
619 : OffsetNumber offnum)
620 1413 : {
621 : Assert(*nunused < MaxHeapTuplesPerPage);
622 1413 : nowunused[*nunused] = offnum;
623 1413 : (*nunused)++;
624 1413 : }
625 :
626 :
627 : /*
628 : * For all items in this page, find their respective root line pointers.
629 : * If item k is part of a HOT-chain with root at item j, then we set
630 : * root_offsets[k - 1] = j.
631 : *
632 : * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
633 : * We zero out all unused entries.
634 : *
635 : * The function must be called with at least share lock on the buffer, to
636 : * prevent concurrent prune operations.
637 : *
638 : * Note: The information collected here is valid only as long as the caller
639 : * holds a pin on the buffer. Once pin is released, a tuple might be pruned
640 : * and reused by a completely unrelated tuple.
641 : */
642 : void
643 : heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
644 4482 : {
645 : OffsetNumber offnum,
646 : maxoff;
647 :
648 4482 : MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
649 :
650 4482 : maxoff = PageGetMaxOffsetNumber(page);
651 233831 : for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum++)
652 : {
653 229349 : ItemId lp = PageGetItemId(page, offnum);
654 : HeapTupleHeader htup;
655 : OffsetNumber nextoffnum;
656 : TransactionId priorXmax;
657 :
658 : /* skip unused and dead items */
659 229349 : if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
660 40 : continue;
661 :
662 229309 : if (ItemIdIsNormal(lp))
663 : {
664 229309 : htup = (HeapTupleHeader) PageGetItem(page, lp);
665 :
666 : /*
667 : * Check if this tuple is part of a HOT-chain rooted at some other
668 : * tuple. If so, skip it for now; we'll process it when we find
669 : * its root.
670 : */
671 229309 : if (HeapTupleHeaderIsHeapOnly(htup))
672 16 : continue;
673 :
674 : /*
675 : * This is either a plain tuple or the root of a HOT-chain.
676 : * Remember it in the mapping.
677 : */
678 229293 : root_offsets[offnum - 1] = offnum;
679 :
680 : /* If it's not the start of a HOT-chain, we're done with it */
681 229293 : if (!HeapTupleHeaderIsHotUpdated(htup))
682 : continue;
683 :
684 : /* Set up to scan the HOT-chain */
685 8 : nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
686 8 : priorXmax = HeapTupleHeaderGetXmax(htup);
687 : }
688 : else
689 : {
690 : /* Must be a redirect item. We do not set its root_offsets entry */
691 : Assert(ItemIdIsRedirected(lp));
692 : /* Set up to scan the HOT-chain */
693 0 : nextoffnum = ItemIdGetRedirect(lp);
694 0 : priorXmax = InvalidTransactionId;
695 : }
696 :
697 : /*
698 : * Now follow the HOT-chain and collect other tuples in the chain.
699 : *
700 : * Note: Even though this is a nested loop, the complexity of the
701 : * function is O(N) because a tuple in the page should be visited not
702 : * more than twice, once in the outer loop and once in HOT-chain
703 : * chases.
704 : */
705 : for (;;)
706 : {
707 16 : lp = PageGetItemId(page, nextoffnum);
708 :
709 : /* Check for broken chains */
710 16 : if (!ItemIdIsNormal(lp))
711 0 : break;
712 :
713 16 : htup = (HeapTupleHeader) PageGetItem(page, lp);
714 :
715 16 : if (TransactionIdIsValid(priorXmax) &&
716 : !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
717 0 : break;
718 :
719 : /* Remember the root line pointer for this item */
720 16 : root_offsets[nextoffnum - 1] = offnum;
721 :
722 : /* Advance to next chain member, if any */
723 16 : if (!HeapTupleHeaderIsHotUpdated(htup))
724 : break;
725 :
726 8 : nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
727 8 : priorXmax = HeapTupleHeaderGetXmax(htup);
728 8 : }
729 : }
730 4482 : }
|