1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam.c
4 : * heap access method code
5 : *
6 : * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.248 2008/01/14 01:39:09 tgl Exp $
12 : *
13 : *
14 : * INTERFACE ROUTINES
15 : * relation_open - open any relation by relation OID
16 : * relation_openrv - open any relation specified by a RangeVar
17 : * relation_close - close any relation
18 : * heap_open - open a heap relation by relation OID
19 : * heap_openrv - open a heap relation specified by a RangeVar
20 : * heap_close - (now just a macro for relation_close)
21 : * heap_beginscan - begin relation scan
22 : * heap_rescan - restart a relation scan
23 : * heap_endscan - end relation scan
24 : * heap_getnext - retrieve next tuple in scan
25 : * heap_fetch - retrieve tuple with given tid
26 : * heap_insert - insert tuple into a relation
27 : * heap_delete - delete a tuple from a relation
28 : * heap_update - replace a tuple in a relation with another tuple
29 : * heap_markpos - mark scan position
30 : * heap_restrpos - restore position to marked location
31 : * heap_sync - sync heap, for when no WAL has been written
32 : *
33 : * NOTES
34 : * This file contains the heap_ routines which implement
35 : * the POSTGRES heap access method used for all POSTGRES
36 : * relations.
37 : *
38 : *-------------------------------------------------------------------------
39 : */
40 : #include "postgres.h"
41 :
42 : #include "access/heapam.h"
43 : #include "access/hio.h"
44 : #include "access/multixact.h"
45 : #include "access/transam.h"
46 : #include "access/tuptoaster.h"
47 : #include "access/valid.h"
48 : #include "access/xact.h"
49 : #include "catalog/catalog.h"
50 : #include "catalog/namespace.h"
51 : #include "miscadmin.h"
52 : #include "pgstat.h"
53 : #include "storage/procarray.h"
54 : #include "storage/smgr.h"
55 : #include "utils/datum.h"
56 : #include "utils/inval.h"
57 : #include "utils/lsyscache.h"
58 : #include "utils/relcache.h"
59 : #include "utils/syscache.h"
60 :
61 :
62 : /* GUC variable */
63 : bool synchronize_seqscans = true;
64 :
65 :
66 : static HeapScanDesc heap_beginscan_internal(Relation relation,
67 : Snapshot snapshot,
68 : int nkeys, ScanKey key,
69 : bool allow_strat, bool allow_sync,
70 : bool is_bitmapscan);
71 : static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
72 : ItemPointerData from, Buffer newbuf, HeapTuple newtup, bool move);
73 : static bool HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs,
74 : HeapTuple oldtup, HeapTuple newtup);
75 :
76 :
77 : /* ----------------------------------------------------------------
78 : * heap support routines
79 : * ----------------------------------------------------------------
80 : */
81 :
82 : /* ----------------
83 : * initscan - scan code common to heap_beginscan and heap_rescan
84 : * ----------------
85 : */
86 : static void
87 : initscan(HeapScanDesc scan, ScanKey key)
88 26662 : {
89 : bool allow_strat;
90 : bool allow_sync;
91 :
92 : /*
93 : * Determine the number of blocks we have to scan.
94 : *
95 : * It is sufficient to do this once at scan start, since any tuples added
96 : * while the scan is in progress will be invisible to my snapshot anyway.
97 : * (That is not true when using a non-MVCC snapshot. However, we couldn't
98 : * guarantee to return tuples added after scan start anyway, since they
99 : * might go into pages we already scanned. To guarantee consistent
100 : * results for a non-MVCC snapshot, the caller must hold some higher-level
101 : * lock that ensures the interesting tuple(s) won't change.)
102 : */
103 26662 : scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
104 :
105 : /*
106 : * If the table is large relative to NBuffers, use a bulk-read access
107 : * strategy and enable synchronized scanning (see syncscan.c). Although
108 : * the thresholds for these features could be different, we make them the
109 : * same so that there are only two behaviors to tune rather than four.
110 : * (However, some callers need to be able to disable one or both of
111 : * these behaviors, independently of the size of the table; also there
112 : * is a GUC variable that can disable synchronized scanning.)
113 : *
114 : * During a rescan, don't make a new strategy object if we don't have to.
115 : */
116 26662 : if (!scan->rs_rd->rd_istemp &&
117 : scan->rs_nblocks > NBuffers / 4)
118 : {
119 0 : allow_strat = scan->rs_allow_strat;
120 0 : allow_sync = scan->rs_allow_sync;
121 : }
122 : else
123 26662 : allow_strat = allow_sync = false;
124 :
125 26662 : if (allow_strat)
126 : {
127 0 : if (scan->rs_strategy == NULL)
128 0 : scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
129 : }
130 : else
131 : {
132 26662 : if (scan->rs_strategy != NULL)
133 0 : FreeAccessStrategy(scan->rs_strategy);
134 26662 : scan->rs_strategy = NULL;
135 : }
136 :
137 26662 : if (allow_sync && synchronize_seqscans)
138 : {
139 0 : scan->rs_syncscan = true;
140 0 : scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks);
141 : }
142 : else
143 : {
144 26662 : scan->rs_syncscan = false;
145 26662 : scan->rs_startblock = 0;
146 : }
147 :
148 26662 : scan->rs_inited = false;
149 26662 : scan->rs_ctup.t_data = NULL;
150 26662 : ItemPointerSetInvalid(&scan->rs_ctup.t_self);
151 26662 : scan->rs_cbuf = InvalidBuffer;
152 26662 : scan->rs_cblock = InvalidBlockNumber;
153 :
154 : /* we don't have a marked position... */
155 26662 : ItemPointerSetInvalid(&(scan->rs_mctid));
156 :
157 : /* page-at-a-time fields are always invalid when not rs_inited */
158 :
159 : /*
160 : * copy the scan key, if appropriate
161 : */
162 26662 : if (key != NULL)
163 5898 : memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData));
164 :
165 : /*
166 : * Currently, we don't have a stats counter for bitmap heap scans (but the
167 : * underlying bitmap index scans will be counted).
168 : */
169 26662 : if (!scan->rs_bitmapscan)
170 26308 : pgstat_count_heap_scan(scan->rs_rd);
171 26662 : }
172 :
173 : /*
174 : * heapgetpage - subroutine for heapgettup()
175 : *
176 : * This routine reads and pins the specified page of the relation.
177 : * In page-at-a-time mode it performs additional work, namely determining
178 : * which tuples on the page are visible.
179 : */
180 : static void
181 : heapgetpage(HeapScanDesc scan, BlockNumber page)
182 183680 : {
183 : Buffer buffer;
184 : Snapshot snapshot;
185 : Page dp;
186 : int lines;
187 : int ntup;
188 : OffsetNumber lineoff;
189 : ItemId lpp;
190 :
191 : Assert(page < scan->rs_nblocks);
192 :
193 : /* release previous scan buffer, if any */
194 183680 : if (BufferIsValid(scan->rs_cbuf))
195 : {
196 161003 : ReleaseBuffer(scan->rs_cbuf);
197 161003 : scan->rs_cbuf = InvalidBuffer;
198 : }
199 :
200 : /* read page using selected strategy */
201 183680 : scan->rs_cbuf = ReadBufferWithStrategy(scan->rs_rd,
202 : page,
203 : scan->rs_strategy);
204 183680 : scan->rs_cblock = page;
205 :
206 183680 : if (!scan->rs_pageatatime)
207 149235 : return;
208 :
209 34445 : buffer = scan->rs_cbuf;
210 34445 : snapshot = scan->rs_snapshot;
211 :
212 : /*
213 : * Prune and repair fragmentation for the whole page, if possible.
214 : */
215 34445 : heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin);
216 :
217 : /*
218 : * We must hold share lock on the buffer content while examining tuple
219 : * visibility. Afterwards, however, the tuples we have found to be
220 : * visible are guaranteed good as long as we hold the buffer pin.
221 : */
222 34445 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
223 :
224 34445 : dp = (Page) BufferGetPage(buffer);
225 34445 : lines = PageGetMaxOffsetNumber(dp);
226 34445 : ntup = 0;
227 :
228 34445 : for (lineoff = FirstOffsetNumber, lpp = PageGetItemId(dp, lineoff);
229 1092949 : lineoff <= lines;
230 1024059 : lineoff++, lpp++)
231 : {
232 1024059 : if (ItemIdIsNormal(lpp))
233 : {
234 : HeapTupleData loctup;
235 : bool valid;
236 :
237 920976 : loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
238 920976 : loctup.t_len = ItemIdGetLength(lpp);
239 920976 : ItemPointerSet(&(loctup.t_self), page, lineoff);
240 :
241 920976 : valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
242 920976 : if (valid)
243 906232 : scan->rs_vistuples[ntup++] = lineoff;
244 : }
245 : }
246 :
247 34445 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
248 :
249 : Assert(ntup <= MaxHeapTuplesPerPage);
250 34445 : scan->rs_ntuples = ntup;
251 : }
252 :
253 : /* ----------------
254 : * heapgettup - fetch next heap tuple
255 : *
256 : * Initialize the scan if not already done; then advance to the next
257 : * tuple as indicated by "dir"; return the next tuple in scan->rs_ctup,
258 : * or set scan->rs_ctup.t_data = NULL if no more tuples.
259 : *
260 : * dir == NoMovementScanDirection means "re-fetch the tuple indicated
261 : * by scan->rs_ctup".
262 : *
263 : * Note: the reason nkeys/key are passed separately, even though they are
264 : * kept in the scan descriptor, is that the caller may not want us to check
265 : * the scankeys.
266 : *
267 : * Note: when we fall off the end of the scan in either direction, we
268 : * reset rs_inited. This means that a further request with the same
269 : * scan direction will restart the scan, which is a bit odd, but a
270 : * request with the opposite scan direction will start a fresh scan
271 : * in the proper direction. The latter is required behavior for cursors,
272 : * while the former case is generally undefined behavior in Postgres
273 : * so we don't care too much.
274 : * ----------------
275 : */
276 : static void
277 : heapgettup(HeapScanDesc scan,
278 : ScanDirection dir,
279 : int nkeys,
280 : ScanKey key)
281 254224 : {
282 254224 : HeapTuple tuple = &(scan->rs_ctup);
283 254224 : Snapshot snapshot = scan->rs_snapshot;
284 254224 : bool backward = ScanDirectionIsBackward(dir);
285 : BlockNumber page;
286 : bool finished;
287 : Page dp;
288 : int lines;
289 : OffsetNumber lineoff;
290 : int linesleft;
291 : ItemId lpp;
292 :
293 : /*
294 : * calculate next starting lineoff, given scan direction
295 : */
296 254224 : if (ScanDirectionIsForward(dir))
297 : {
298 254224 : if (!scan->rs_inited)
299 : {
300 : /*
301 : * return null immediately if relation is empty
302 : */
303 6769 : if (scan->rs_nblocks == 0)
304 : {
305 : Assert(!BufferIsValid(scan->rs_cbuf));
306 481 : tuple->t_data = NULL;
307 481 : return;
308 : }
309 6288 : page = scan->rs_startblock; /* first page */
310 6288 : heapgetpage(scan, page);
311 6288 : lineoff = FirstOffsetNumber; /* first offnum */
312 6288 : scan->rs_inited = true;
313 : }
314 : else
315 : {
316 : /* continue from previously returned page/tuple */
317 247455 : page = scan->rs_cblock; /* current page */
318 247455 : lineoff = /* next offnum */
319 : OffsetNumberNext(ItemPointerGetOffsetNumber(&(tuple->t_self)));
320 : }
321 :
322 253743 : LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
323 :
324 253743 : dp = (Page) BufferGetPage(scan->rs_cbuf);
325 253743 : lines = PageGetMaxOffsetNumber(dp);
326 : /* page and lineoff now reference the physically next tid */
327 :
328 253743 : linesleft = lines - lineoff + 1;
329 : }
330 0 : else if (backward)
331 : {
332 0 : if (!scan->rs_inited)
333 : {
334 : /*
335 : * return null immediately if relation is empty
336 : */
337 0 : if (scan->rs_nblocks == 0)
338 : {
339 : Assert(!BufferIsValid(scan->rs_cbuf));
340 0 : tuple->t_data = NULL;
341 0 : return;
342 : }
343 :
344 : /*
345 : * Disable reporting to syncscan logic in a backwards scan; it's
346 : * not very likely anyone else is doing the same thing at the same
347 : * time, and much more likely that we'll just bollix things for
348 : * forward scanners.
349 : */
350 0 : scan->rs_syncscan = false;
351 : /* start from last page of the scan */
352 0 : if (scan->rs_startblock > 0)
353 0 : page = scan->rs_startblock - 1;
354 : else
355 0 : page = scan->rs_nblocks - 1;
356 0 : heapgetpage(scan, page);
357 : }
358 : else
359 : {
360 : /* continue from previously returned page/tuple */
361 0 : page = scan->rs_cblock; /* current page */
362 : }
363 :
364 0 : LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
365 :
366 0 : dp = (Page) BufferGetPage(scan->rs_cbuf);
367 0 : lines = PageGetMaxOffsetNumber(dp);
368 :
369 0 : if (!scan->rs_inited)
370 : {
371 0 : lineoff = lines; /* final offnum */
372 0 : scan->rs_inited = true;
373 : }
374 : else
375 : {
376 0 : lineoff = /* previous offnum */
377 : OffsetNumberPrev(ItemPointerGetOffsetNumber(&(tuple->t_self)));
378 : }
379 : /* page and lineoff now reference the physically previous tid */
380 :
381 0 : linesleft = lineoff;
382 : }
383 : else
384 : {
385 : /*
386 : * ``no movement'' scan direction: refetch prior tuple
387 : */
388 0 : if (!scan->rs_inited)
389 : {
390 : Assert(!BufferIsValid(scan->rs_cbuf));
391 0 : tuple->t_data = NULL;
392 0 : return;
393 : }
394 :
395 0 : page = ItemPointerGetBlockNumber(&(tuple->t_self));
396 0 : if (page != scan->rs_cblock)
397 0 : heapgetpage(scan, page);
398 :
399 : /* Since the tuple was previously fetched, needn't lock page here */
400 0 : dp = (Page) BufferGetPage(scan->rs_cbuf);
401 0 : lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self));
402 0 : lpp = PageGetItemId(dp, lineoff);
403 : Assert(ItemIdIsNormal(lpp));
404 :
405 0 : tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
406 0 : tuple->t_len = ItemIdGetLength(lpp);
407 :
408 0 : return;
409 : }
410 :
411 : /*
412 : * advance the scan until we find a qualifying tuple or run out of stuff
413 : * to scan
414 : */
415 253743 : lpp = PageGetItemId(dp, lineoff);
416 : for (;;)
417 : {
418 6833004 : while (linesleft > 0)
419 : {
420 6686787 : if (ItemIdIsNormal(lpp))
421 : {
422 : bool valid;
423 :
424 6660318 : tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
425 6660318 : tuple->t_len = ItemIdGetLength(lpp);
426 6660318 : ItemPointerSet(&(tuple->t_self), page, lineoff);
427 :
428 : /*
429 : * if current tuple qualifies, return it.
430 : */
431 6660318 : valid = HeapTupleSatisfiesVisibility(tuple,
432 : snapshot,
433 : scan->rs_cbuf);
434 :
435 6660318 : if (valid && key != NULL)
436 6412102 : HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd),
437 : nkeys, key, valid);
438 :
439 6660318 : if (valid)
440 : {
441 250473 : LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
442 250473 : return;
443 : }
444 : }
445 :
446 : /*
447 : * otherwise move to the next item on the page
448 : */
449 6436314 : --linesleft;
450 6436314 : if (backward)
451 : {
452 0 : --lpp; /* move back in this page's ItemId array */
453 0 : --lineoff;
454 : }
455 : else
456 : {
457 6436314 : ++lpp; /* move forward in this page's ItemId array */
458 6436314 : ++lineoff;
459 : }
460 : }
461 :
462 : /*
463 : * if we get here, it means we've exhausted the items on this page and
464 : * it's time to move to the next.
465 : */
466 146217 : LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
467 :
468 : /*
469 : * advance to next/prior page and detect end of scan
470 : */
471 146217 : if (backward)
472 : {
473 0 : finished = (page == scan->rs_startblock);
474 0 : if (page == 0)
475 0 : page = scan->rs_nblocks;
476 0 : page--;
477 : }
478 : else
479 : {
480 146217 : page++;
481 146217 : if (page >= scan->rs_nblocks)
482 3270 : page = 0;
483 146217 : finished = (page == scan->rs_startblock);
484 :
485 : /*
486 : * Report our new scan position for synchronization purposes. We
487 : * don't do that when moving backwards, however. That would just
488 : * mess up any other forward-moving scanners.
489 : *
490 : * Note: we do this before checking for end of scan so that the
491 : * final state of the position hint is back at the start of the
492 : * rel. That's not strictly necessary, but otherwise when you run
493 : * the same query multiple times the starting position would shift
494 : * a little bit backwards on every invocation, which is confusing.
495 : * We don't guarantee any specific ordering in general, though.
496 : */
497 146217 : if (scan->rs_syncscan)
498 0 : ss_report_location(scan->rs_rd, page);
499 : }
500 :
501 : /*
502 : * return NULL if we've exhausted all the pages
503 : */
504 146217 : if (finished)
505 : {
506 3270 : if (BufferIsValid(scan->rs_cbuf))
507 3270 : ReleaseBuffer(scan->rs_cbuf);
508 3270 : scan->rs_cbuf = InvalidBuffer;
509 3270 : scan->rs_cblock = InvalidBlockNumber;
510 3270 : tuple->t_data = NULL;
511 3270 : scan->rs_inited = false;
512 3270 : return;
513 : }
514 :
515 142947 : heapgetpage(scan, page);
516 :
517 142947 : LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
518 :
519 142947 : dp = (Page) BufferGetPage(scan->rs_cbuf);
520 142947 : lines = PageGetMaxOffsetNumber((Page) dp);
521 142947 : linesleft = lines;
522 142947 : if (backward)
523 : {
524 0 : lineoff = lines;
525 0 : lpp = PageGetItemId(dp, lines);
526 : }
527 : else
528 : {
529 142947 : lineoff = FirstOffsetNumber;
530 142947 : lpp = PageGetItemId(dp, FirstOffsetNumber);
531 : }
532 : }
533 : }
534 :
535 : /* ----------------
536 : * heapgettup_pagemode - fetch next heap tuple in page-at-a-time mode
537 : *
538 : * Same API as heapgettup, but used in page-at-a-time mode
539 : *
540 : * The internal logic is much the same as heapgettup's too, but there are some
541 : * differences: we do not take the buffer content lock (that only needs to
542 : * happen inside heapgetpage), and we iterate through just the tuples listed
543 : * in rs_vistuples[] rather than all tuples on the page. Notice that
544 : * lineindex is 0-based, where the corresponding loop variable lineoff in
545 : * heapgettup is 1-based.
546 : * ----------------
547 : */
548 : static void
549 : heapgettup_pagemode(HeapScanDesc scan,
550 : ScanDirection dir,
551 : int nkeys,
552 : ScanKey key)
553 840201 : {
554 840201 : HeapTuple tuple = &(scan->rs_ctup);
555 840201 : bool backward = ScanDirectionIsBackward(dir);
556 : BlockNumber page;
557 : bool finished;
558 : Page dp;
559 : int lines;
560 : int lineindex;
561 : OffsetNumber lineoff;
562 : int linesleft;
563 : ItemId lpp;
564 :
565 : /*
566 : * calculate next starting lineindex, given scan direction
567 : */
568 840201 : if (ScanDirectionIsForward(dir))
569 : {
570 840109 : if (!scan->rs_inited)
571 : {
572 : /*
573 : * return null immediately if relation is empty
574 : */
575 16470 : if (scan->rs_nblocks == 0)
576 : {
577 : Assert(!BufferIsValid(scan->rs_cbuf));
578 84 : tuple->t_data = NULL;
579 84 : return;
580 : }
581 16386 : page = scan->rs_startblock; /* first page */
582 16386 : heapgetpage(scan, page);
583 16386 : lineindex = 0;
584 16386 : scan->rs_inited = true;
585 : }
586 : else
587 : {
588 : /* continue from previously returned page/tuple */
589 823639 : page = scan->rs_cblock; /* current page */
590 823639 : lineindex = scan->rs_cindex + 1;
591 : }
592 :
593 840025 : dp = (Page) BufferGetPage(scan->rs_cbuf);
594 840025 : lines = scan->rs_ntuples;
595 : /* page and lineindex now reference the next visible tid */
596 :
597 840025 : linesleft = lines - lineindex;
598 : }
599 92 : else if (backward)
600 : {
601 92 : if (!scan->rs_inited)
602 : {
603 : /*
604 : * return null immediately if relation is empty
605 : */
606 3 : if (scan->rs_nblocks == 0)
607 : {
608 : Assert(!BufferIsValid(scan->rs_cbuf));
609 0 : tuple->t_data = NULL;
610 0 : return;
611 : }
612 :
613 : /*
614 : * Disable reporting to syncscan logic in a backwards scan; it's
615 : * not very likely anyone else is doing the same thing at the same
616 : * time, and much more likely that we'll just bollix things for
617 : * forward scanners.
618 : */
619 3 : scan->rs_syncscan = false;
620 : /* start from last page of the scan */
621 3 : if (scan->rs_startblock > 0)
622 0 : page = scan->rs_startblock - 1;
623 : else
624 3 : page = scan->rs_nblocks - 1;
625 3 : heapgetpage(scan, page);
626 : }
627 : else
628 : {
629 : /* continue from previously returned page/tuple */
630 89 : page = scan->rs_cblock; /* current page */
631 : }
632 :
633 92 : dp = (Page) BufferGetPage(scan->rs_cbuf);
634 92 : lines = scan->rs_ntuples;
635 :
636 92 : if (!scan->rs_inited)
637 : {
638 3 : lineindex = lines - 1;
639 3 : scan->rs_inited = true;
640 : }
641 : else
642 : {
643 89 : lineindex = scan->rs_cindex - 1;
644 : }
645 : /* page and lineindex now reference the previous visible tid */
646 :
647 92 : linesleft = lineindex + 1;
648 : }
649 : else
650 : {
651 : /*
652 : * ``no movement'' scan direction: refetch prior tuple
653 : */
654 0 : if (!scan->rs_inited)
655 : {
656 : Assert(!BufferIsValid(scan->rs_cbuf));
657 0 : tuple->t_data = NULL;
658 0 : return;
659 : }
660 :
661 0 : page = ItemPointerGetBlockNumber(&(tuple->t_self));
662 0 : if (page != scan->rs_cblock)
663 0 : heapgetpage(scan, page);
664 :
665 : /* Since the tuple was previously fetched, needn't lock page here */
666 0 : dp = (Page) BufferGetPage(scan->rs_cbuf);
667 0 : lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self));
668 0 : lpp = PageGetItemId(dp, lineoff);
669 : Assert(ItemIdIsNormal(lpp));
670 :
671 0 : tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
672 0 : tuple->t_len = ItemIdGetLength(lpp);
673 :
674 : /* check that rs_cindex is in sync */
675 : Assert(scan->rs_cindex < scan->rs_ntuples);
676 : Assert(lineoff == scan->rs_vistuples[scan->rs_cindex]);
677 :
678 0 : return;
679 : }
680 :
681 : /*
682 : * advance the scan until we find a qualifying tuple or run out of stuff
683 : * to scan
684 : */
685 : for (;;)
686 : {
687 858173 : while (linesleft > 0)
688 : {
689 835898 : lineoff = scan->rs_vistuples[lineindex];
690 835898 : lpp = PageGetItemId(dp, lineoff);
691 : Assert(ItemIdIsNormal(lpp));
692 :
693 835898 : tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
694 835898 : tuple->t_len = ItemIdGetLength(lpp);
695 835898 : ItemPointerSet(&(tuple->t_self), page, lineoff);
696 :
697 : /*
698 : * if current tuple qualifies, return it.
699 : */
700 835898 : if (key != NULL)
701 : {
702 : bool valid;
703 :
704 0 : HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd),
705 : nkeys, key, valid);
706 0 : if (valid)
707 : {
708 0 : scan->rs_cindex = lineindex;
709 0 : return;
710 : }
711 : }
712 : else
713 : {
714 835898 : scan->rs_cindex = lineindex;
715 835898 : return;
716 : }
717 :
718 : /*
719 : * otherwise move to the next item on the page
720 : */
721 0 : --linesleft;
722 0 : if (backward)
723 0 : --lineindex;
724 : else
725 0 : ++lineindex;
726 : }
727 :
728 : /*
729 : * if we get here, it means we've exhausted the items on this page and
730 : * it's time to move to the next.
731 : */
732 22275 : if (backward)
733 : {
734 11 : finished = (page == scan->rs_startblock);
735 11 : if (page == 0)
736 11 : page = scan->rs_nblocks;
737 11 : page--;
738 : }
739 : else
740 : {
741 22264 : page++;
742 22264 : if (page >= scan->rs_nblocks)
743 4208 : page = 0;
744 22264 : finished = (page == scan->rs_startblock);
745 :
746 : /*
747 : * Report our new scan position for synchronization purposes. We
748 : * don't do that when moving backwards, however. That would just
749 : * mess up any other forward-moving scanners.
750 : *
751 : * Note: we do this before checking for end of scan so that the
752 : * final state of the position hint is back at the start of the
753 : * rel. That's not strictly necessary, but otherwise when you run
754 : * the same query multiple times the starting position would shift
755 : * a little bit backwards on every invocation, which is confusing.
756 : * We don't guarantee any specific ordering in general, though.
757 : */
758 22264 : if (scan->rs_syncscan)
759 0 : ss_report_location(scan->rs_rd, page);
760 : }
761 :
762 : /*
763 : * return NULL if we've exhausted all the pages
764 : */
765 22275 : if (finished)
766 : {
767 4219 : if (BufferIsValid(scan->rs_cbuf))
768 4219 : ReleaseBuffer(scan->rs_cbuf);
769 4219 : scan->rs_cbuf = InvalidBuffer;
770 4219 : scan->rs_cblock = InvalidBlockNumber;
771 4219 : tuple->t_data = NULL;
772 4219 : scan->rs_inited = false;
773 4219 : return;
774 : }
775 :
776 18056 : heapgetpage(scan, page);
777 :
778 18056 : dp = (Page) BufferGetPage(scan->rs_cbuf);
779 18056 : lines = scan->rs_ntuples;
780 18056 : linesleft = lines;
781 18056 : if (backward)
782 0 : lineindex = lines - 1;
783 : else
784 18056 : lineindex = 0;
785 : }
786 : }
787 :
788 :
789 : #if defined(DISABLE_COMPLEX_MACRO)
790 : /*
791 : * This is formatted so oddly so that the correspondence to the macro
792 : * definition in access/heapam.h is maintained.
793 : */
794 : Datum
795 : fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc,
796 : bool *isnull)
797 : {
798 : return (
799 : (attnum) > 0 ?
800 : (
801 : ((isnull) ? (*(isnull) = false) : (dummyret) NULL),
802 : HeapTupleNoNulls(tup) ?
803 : (
804 : (tupleDesc)->attrs[(attnum) - 1]->attcacheoff >= 0 ?
805 : (
806 : fetchatt((tupleDesc)->attrs[(attnum) - 1],
807 : (char *) (tup)->t_data + (tup)->t_data->t_hoff +
808 : (tupleDesc)->attrs[(attnum) - 1]->attcacheoff)
809 : )
810 : :
811 : nocachegetattr((tup), (attnum), (tupleDesc), (isnull))
812 : )
813 : :
814 : (
815 : att_isnull((attnum) - 1, (tup)->t_data->t_bits) ?
816 : (
817 : ((isnull) ? (*(isnull) = true) : (dummyret) NULL),
818 : (Datum) NULL
819 : )
820 : :
821 : (
822 : nocachegetattr((tup), (attnum), (tupleDesc), (isnull))
823 : )
824 : )
825 : )
826 : :
827 : (
828 : (Datum) NULL
829 : )
830 : );
831 : }
832 : #endif /* defined(DISABLE_COMPLEX_MACRO) */
833 :
834 :
835 : /* ----------------------------------------------------------------
836 : * heap access method interface
837 : * ----------------------------------------------------------------
838 : */
839 :
840 : /* ----------------
841 : * relation_open - open any relation by relation OID
842 : *
843 : * If lockmode is not "NoLock", the specified kind of lock is
844 : * obtained on the relation. (Generally, NoLock should only be
845 : * used if the caller knows it has some appropriate lock on the
846 : * relation already.)
847 : *
848 : * An error is raised if the relation does not exist.
849 : *
850 : * NB: a "relation" is anything with a pg_class entry. The caller is
851 : * expected to check whether the relkind is something it can handle.
852 : * ----------------
853 : */
854 : Relation
855 : relation_open(Oid relationId, LOCKMODE lockmode)
856 333304 : {
857 : Relation r;
858 :
859 : Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
860 :
861 : /* Get the lock before trying to open the relcache entry */
862 333304 : if (lockmode != NoLock)
863 313172 : LockRelationOid(relationId, lockmode);
864 :
865 : /* The relcache does all the real work... */
866 333302 : r = RelationIdGetRelation(relationId);
867 :
868 333302 : if (!RelationIsValid(r))
869 0 : elog(ERROR, "could not open relation with OID %u", relationId);
870 :
871 333302 : pgstat_initstats(r);
872 :
873 333302 : return r;
874 : }
875 :
876 : /* ----------------
877 : * try_relation_open - open any relation by relation OID
878 : *
879 : * Same as relation_open, except return NULL instead of failing
880 : * if the relation does not exist.
881 : * ----------------
882 : */
883 : Relation
884 : try_relation_open(Oid relationId, LOCKMODE lockmode)
885 406 : {
886 : Relation r;
887 :
888 : Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
889 :
890 : /* Get the lock first */
891 406 : if (lockmode != NoLock)
892 406 : LockRelationOid(relationId, lockmode);
893 :
894 : /*
895 : * Now that we have the lock, probe to see if the relation really exists
896 : * or not.
897 : */
898 406 : if (!SearchSysCacheExists(RELOID,
899 : ObjectIdGetDatum(relationId),
900 : 0, 0, 0))
901 : {
902 : /* Release useless lock */
903 0 : if (lockmode != NoLock)
904 0 : UnlockRelationOid(relationId, lockmode);
905 :
906 0 : return NULL;
907 : }
908 :
909 : /* Should be safe to do a relcache load */
910 406 : r = RelationIdGetRelation(relationId);
911 :
912 406 : if (!RelationIsValid(r))
913 0 : elog(ERROR, "could not open relation with OID %u", relationId);
914 :
915 406 : pgstat_initstats(r);
916 :
917 406 : return r;
918 : }
919 :
920 : /* ----------------
921 : * relation_open_nowait - open but don't wait for lock
922 : *
923 : * Same as relation_open, except throw an error instead of waiting
924 : * when the requested lock is not immediately obtainable.
925 : * ----------------
926 : */
927 : Relation
928 : relation_open_nowait(Oid relationId, LOCKMODE lockmode)
929 0 : {
930 : Relation r;
931 :
932 : Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
933 :
934 : /* Get the lock before trying to open the relcache entry */
935 0 : if (lockmode != NoLock)
936 : {
937 0 : if (!ConditionalLockRelationOid(relationId, lockmode))
938 : {
939 : /* try to throw error by name; relation could be deleted... */
940 0 : char *relname = get_rel_name(relationId);
941 :
942 0 : if (relname)
943 0 : ereport(ERROR,
944 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
945 : errmsg("could not obtain lock on relation \"%s\"",
946 : relname)));
947 : else
948 0 : ereport(ERROR,
949 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
950 : errmsg("could not obtain lock on relation with OID %u",
951 : relationId)));
952 : }
953 : }
954 :
955 : /* The relcache does all the real work... */
956 0 : r = RelationIdGetRelation(relationId);
957 :
958 0 : if (!RelationIsValid(r))
959 0 : elog(ERROR, "could not open relation with OID %u", relationId);
960 :
961 0 : pgstat_initstats(r);
962 :
963 0 : return r;
964 : }
965 :
966 : /* ----------------
967 : * relation_openrv - open any relation specified by a RangeVar
968 : *
969 : * Same as relation_open, but the relation is specified by a RangeVar.
970 : * ----------------
971 : */
972 : Relation
973 : relation_openrv(const RangeVar *relation, LOCKMODE lockmode)
974 8653 : {
975 : Oid relOid;
976 :
977 : /*
978 : * Check for shared-cache-inval messages before trying to open the
979 : * relation. This is needed to cover the case where the name identifies a
980 : * rel that has been dropped and recreated since the start of our
981 : * transaction: if we don't flush the old syscache entry then we'll latch
982 : * onto that entry and suffer an error when we do RelationIdGetRelation.
983 : * Note that relation_open does not need to do this, since a relation's
984 : * OID never changes.
985 : *
986 : * We skip this if asked for NoLock, on the assumption that the caller has
987 : * already ensured some appropriate lock is held.
988 : */
989 8653 : if (lockmode != NoLock)
990 8617 : AcceptInvalidationMessages();
991 :
992 : /* Look up the appropriate relation using namespace search */
993 8653 : relOid = RangeVarGetRelid(relation, false);
994 :
995 : /* Let relation_open do the rest */
996 8634 : return relation_open(relOid, lockmode);
997 : }
998 :
999 : /* ----------------
1000 : * relation_close - close any relation
1001 : *
1002 : * If lockmode is not "NoLock", we then release the specified lock.
1003 : *
1004 : * Note that it is often sensible to hold a lock beyond relation_close;
1005 : * in that case, the lock is released automatically at xact end.
1006 : * ----------------
1007 : */
1008 : void
1009 : relation_close(Relation relation, LOCKMODE lockmode)
1010 178170 : {
1011 178170 : LockRelId relid = relation->rd_lockInfo.lockRelId;
1012 :
1013 : Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
1014 :
1015 : /* The relcache does the real work... */
1016 178170 : RelationClose(relation);
1017 :
1018 178170 : if (lockmode != NoLock)
1019 128657 : UnlockRelationId(&relid, lockmode);
1020 178170 : }
1021 :
1022 :
1023 : /* ----------------
1024 : * heap_open - open a heap relation by relation OID
1025 : *
1026 : * This is essentially relation_open plus check that the relation
1027 : * is not an index nor a composite type. (The caller should also
1028 : * check that it's not a view before assuming it has storage.)
1029 : * ----------------
1030 : */
1031 : Relation
1032 : heap_open(Oid relationId, LOCKMODE lockmode)
1033 165008 : {
1034 : Relation r;
1035 :
1036 165008 : r = relation_open(relationId, lockmode);
1037 :
1038 165008 : if (r->rd_rel->relkind == RELKIND_INDEX)
1039 0 : ereport(ERROR,
1040 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1041 : errmsg("\"%s\" is an index",
1042 : RelationGetRelationName(r))));
1043 165008 : else if (r->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
1044 0 : ereport(ERROR,
1045 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1046 : errmsg("\"%s\" is a composite type",
1047 : RelationGetRelationName(r))));
1048 :
1049 165008 : return r;
1050 : }
1051 :
1052 : /* ----------------
1053 : * heap_openrv - open a heap relation specified
1054 : * by a RangeVar node
1055 : *
1056 : * As above, but relation is specified by a RangeVar.
1057 : * ----------------
1058 : */
1059 : Relation
1060 : heap_openrv(const RangeVar *relation, LOCKMODE lockmode)
1061 7979 : {
1062 : Relation r;
1063 :
1064 7979 : r = relation_openrv(relation, lockmode);
1065 :
1066 7965 : if (r->rd_rel->relkind == RELKIND_INDEX)
1067 0 : ereport(ERROR,
1068 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1069 : errmsg("\"%s\" is an index",
1070 : RelationGetRelationName(r))));
1071 7965 : else if (r->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
1072 0 : ereport(ERROR,
1073 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1074 : errmsg("\"%s\" is a composite type",
1075 : RelationGetRelationName(r))));
1076 :
1077 7965 : return r;
1078 : }
1079 :
1080 :
1081 : /* ----------------
1082 : * heap_beginscan - begin relation scan
1083 : *
1084 : * heap_beginscan_strat offers an extended API that lets the caller control
1085 : * whether a nondefault buffer access strategy can be used, and whether
1086 : * syncscan can be chosen (possibly resulting in the scan not starting from
1087 : * block zero). Both of these default to TRUE with plain heap_beginscan.
1088 : *
1089 : * heap_beginscan_bm is an alternative entry point for setting up a
1090 : * HeapScanDesc for a bitmap heap scan. Although that scan technology is
1091 : * really quite unlike a standard seqscan, there is just enough commonality
1092 : * to make it worth using the same data structure.
1093 : * ----------------
1094 : */
1095 : HeapScanDesc
1096 : heap_beginscan(Relation relation, Snapshot snapshot,
1097 : int nkeys, ScanKey key)
1098 13876 : {
1099 13876 : return heap_beginscan_internal(relation, snapshot, nkeys, key,
1100 : true, true, false);
1101 : }
1102 :
1103 : HeapScanDesc
1104 : heap_beginscan_strat(Relation relation, Snapshot snapshot,
1105 : int nkeys, ScanKey key,
1106 : bool allow_strat, bool allow_sync)
1107 5 : {
1108 5 : return heap_beginscan_internal(relation, snapshot, nkeys, key,
1109 : allow_strat, allow_sync, false);
1110 : }
1111 :
1112 : HeapScanDesc
1113 : heap_beginscan_bm(Relation relation, Snapshot snapshot,
1114 : int nkeys, ScanKey key)
1115 196 : {
1116 196 : return heap_beginscan_internal(relation, snapshot, nkeys, key,
1117 : false, false, true);
1118 : }
1119 :
1120 : static HeapScanDesc
1121 : heap_beginscan_internal(Relation relation, Snapshot snapshot,
1122 : int nkeys, ScanKey key,
1123 : bool allow_strat, bool allow_sync,
1124 : bool is_bitmapscan)
1125 14077 : {
1126 : HeapScanDesc scan;
1127 :
1128 : /*
1129 : * increment relation ref count while scanning relation
1130 : *
1131 : * This is just to make really sure the relcache entry won't go away while
1132 : * the scan has a pointer to it. Caller should be holding the rel open
1133 : * anyway, so this is redundant in all normal scenarios...
1134 : */
1135 14077 : RelationIncrementReferenceCount(relation);
1136 :
1137 : /*
1138 : * allocate and initialize scan descriptor
1139 : */
1140 14077 : scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
1141 :
1142 14077 : scan->rs_rd = relation;
1143 14077 : scan->rs_snapshot = snapshot;
1144 14077 : scan->rs_nkeys = nkeys;
1145 14077 : scan->rs_bitmapscan = is_bitmapscan;
1146 14077 : scan->rs_strategy = NULL; /* set in initscan */
1147 14077 : scan->rs_allow_strat = allow_strat;
1148 14077 : scan->rs_allow_sync = allow_sync;
1149 :
1150 : /*
1151 : * we can use page-at-a-time mode if it's an MVCC-safe snapshot
1152 : */
1153 14077 : scan->rs_pageatatime = IsMVCCSnapshot(snapshot);
1154 :
1155 : /* we only need to set this up once */
1156 14077 : scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1157 :
1158 : /*
1159 : * we do this here instead of in initscan() because heap_rescan also calls
1160 : * initscan() and we don't want to allocate memory again
1161 : */
1162 14077 : if (nkeys > 0)
1163 5898 : scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
1164 : else
1165 8179 : scan->rs_key = NULL;
1166 :
1167 14077 : initscan(scan, key);
1168 :
1169 14077 : return scan;
1170 : }
1171 :
1172 : /* ----------------
1173 : * heap_rescan - restart a relation scan
1174 : * ----------------
1175 : */
1176 : void
1177 : heap_rescan(HeapScanDesc scan,
1178 : ScanKey key)
1179 12585 : {
1180 : /*
1181 : * unpin scan buffers
1182 : */
1183 12585 : if (BufferIsValid(scan->rs_cbuf))
1184 12126 : ReleaseBuffer(scan->rs_cbuf);
1185 :
1186 : /*
1187 : * reinitialize scan descriptor
1188 : */
1189 12585 : initscan(scan, key);
1190 12585 : }
1191 :
1192 : /* ----------------
1193 : * heap_endscan - end relation scan
1194 : *
1195 : * See how to integrate with index scans.
1196 : * Check handling if reldesc caching.
1197 : * ----------------
1198 : */
1199 : void
1200 : heap_endscan(HeapScanDesc scan)
1201 14019 : {
1202 : /* Note: no locking manipulations needed */
1203 :
1204 : /*
1205 : * unpin scan buffers
1206 : */
1207 14019 : if (BufferIsValid(scan->rs_cbuf))
1208 3328 : ReleaseBuffer(scan->rs_cbuf);
1209 :
1210 : /*
1211 : * decrement relation reference count and free scan descriptor storage
1212 : */
1213 14019 : RelationDecrementReferenceCount(scan->rs_rd);
1214 :
1215 14019 : if (scan->rs_key)
1216 5898 : pfree(scan->rs_key);
1217 :
1218 14019 : if (scan->rs_strategy != NULL)
1219 0 : FreeAccessStrategy(scan->rs_strategy);
1220 :
1221 14019 : pfree(scan);
1222 14019 : }
1223 :
1224 : /* ----------------
1225 : * heap_getnext - retrieve next tuple in scan
1226 : *
1227 : * Fix to work with index relations.
1228 : * We don't return the buffer anymore, but you can get it from the
1229 : * returned HeapTuple.
1230 : * ----------------
1231 : */
1232 :
1233 : #ifdef HEAPDEBUGALL
1234 : #define HEAPDEBUG_1 \
1235 : elog(DEBUG2, "heap_getnext([%s,nkeys=%d],dir=%d) called", \
1236 : RelationGetRelationName(scan->rs_rd), scan->rs_nkeys, (int) direction)
1237 : #define HEAPDEBUG_2 \
1238 : elog(DEBUG2, "heap_getnext returning EOS")
1239 : #define HEAPDEBUG_3 \
1240 : elog(DEBUG2, "heap_getnext returning tuple")
1241 : #else
1242 : #define HEAPDEBUG_1
1243 : #define HEAPDEBUG_2
1244 : #define HEAPDEBUG_3
1245 : #endif /* !defined(HEAPDEBUGALL) */
1246 :
1247 :
1248 : HeapTuple
1249 : heap_getnext(HeapScanDesc scan, ScanDirection direction)
1250 1094425 : {
1251 : /* Note: no locking manipulations needed */
1252 :
1253 : HEAPDEBUG_1; /* heap_getnext( info ) */
1254 :
1255 1094425 : if (scan->rs_pageatatime)
1256 840201 : heapgettup_pagemode(scan, direction,
1257 : scan->rs_nkeys, scan->rs_key);
1258 : else
1259 254224 : heapgettup(scan, direction, scan->rs_nkeys, scan->rs_key);
1260 :
1261 1094425 : if (scan->rs_ctup.t_data == NULL)
1262 : {
1263 : HEAPDEBUG_2; /* heap_getnext returning EOS */
1264 8054 : return NULL;
1265 : }
1266 :
1267 : /*
1268 : * if we get here it means we have a new current scan tuple, so point to
1269 : * the proper return buffer and return the tuple.
1270 : */
1271 : HEAPDEBUG_3; /* heap_getnext returning tuple */
1272 :
1273 1086371 : pgstat_count_heap_getnext(scan->rs_rd);
1274 :
1275 1086371 : return &(scan->rs_ctup);
1276 : }
1277 :
1278 : /*
1279 : * heap_fetch - retrieve tuple with given tid
1280 : *
1281 : * On entry, tuple->t_self is the TID to fetch. We pin the buffer holding
1282 : * the tuple, fill in the remaining fields of *tuple, and check the tuple
1283 : * against the specified snapshot.
1284 : *
1285 : * If successful (tuple found and passes snapshot time qual), then *userbuf
1286 : * is set to the buffer holding the tuple and TRUE is returned. The caller
1287 : * must unpin the buffer when done with the tuple.
1288 : *
1289 : * If the tuple is not found (ie, item number references a deleted slot),
1290 : * then tuple->t_data is set to NULL and FALSE is returned.
1291 : *
1292 : * If the tuple is found but fails the time qual check, then FALSE is returned
1293 : * but tuple->t_data is left pointing to the tuple.
1294 : *
1295 : * keep_buf determines what is done with the buffer in the FALSE-result cases.
1296 : * When the caller specifies keep_buf = true, we retain the pin on the buffer
1297 : * and return it in *userbuf (so the caller must eventually unpin it); when
1298 : * keep_buf = false, the pin is released and *userbuf is set to InvalidBuffer.
1299 : *
1300 : * stats_relation is the relation to charge the heap_fetch operation against
1301 : * for statistical purposes. (This could be the heap rel itself, an
1302 : * associated index, or NULL to not count the fetch at all.)
1303 : *
1304 : * heap_fetch does not follow HOT chains: only the exact TID requested will
1305 : * be fetched.
1306 : *
1307 : * It is somewhat inconsistent that we ereport() on invalid block number but
1308 : * return false on invalid item number. There are a couple of reasons though.
1309 : * One is that the caller can relatively easily check the block number for
1310 : * validity, but cannot check the item number without reading the page
1311 : * himself. Another is that when we are following a t_ctid link, we can be
1312 : * reasonably confident that the page number is valid (since VACUUM shouldn't
1313 : * truncate off the destination page without having killed the referencing
1314 : * tuple first), but the item number might well not be good.
1315 : */
1316 : bool
1317 : heap_fetch(Relation relation,
1318 : Snapshot snapshot,
1319 : HeapTuple tuple,
1320 : Buffer *userbuf,
1321 : bool keep_buf,
1322 : Relation stats_relation)
1323 1078 : {
1324 : /* Assume *userbuf is undefined on entry */
1325 1078 : *userbuf = InvalidBuffer;
1326 1078 : return heap_release_fetch(relation, snapshot, tuple,
1327 : userbuf, keep_buf, stats_relation);
1328 : }
1329 :
1330 : /*
1331 : * heap_release_fetch - retrieve tuple with given tid
1332 : *
1333 : * This has the same API as heap_fetch except that if *userbuf is not
1334 : * InvalidBuffer on entry, that buffer will be released before reading
1335 : * the new page. This saves a separate ReleaseBuffer step and hence
1336 : * one entry into the bufmgr when looping through multiple fetches.
1337 : * Also, if *userbuf is the same buffer that holds the target tuple,
1338 : * we avoid bufmgr manipulation altogether.
1339 : */
1340 : bool
1341 : heap_release_fetch(Relation relation,
1342 : Snapshot snapshot,
1343 : HeapTuple tuple,
1344 : Buffer *userbuf,
1345 : bool keep_buf,
1346 : Relation stats_relation)
1347 55760 : {
1348 55760 : ItemPointer tid = &(tuple->t_self);
1349 : ItemId lp;
1350 : Buffer buffer;
1351 : PageHeader dp;
1352 : OffsetNumber offnum;
1353 : bool valid;
1354 :
1355 : /*
1356 : * get the buffer from the relation descriptor. Note that this does a
1357 : * buffer pin, and releases the old *userbuf if not InvalidBuffer.
1358 : */
1359 55760 : buffer = ReleaseAndReadBuffer(*userbuf, relation,
1360 : ItemPointerGetBlockNumber(tid));
1361 :
1362 : /*
1363 : * Need share lock on buffer to examine tuple commit status.
1364 : */
1365 55760 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
1366 55760 : dp = (PageHeader) BufferGetPage(buffer);
1367 :
1368 : /*
1369 : * We'd better check for out-of-range offnum in case of VACUUM since the
1370 : * TID was obtained.
1371 : */
1372 55760 : offnum = ItemPointerGetOffsetNumber(tid);
1373 55760 : if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
1374 : {
1375 0 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1376 0 : if (keep_buf)
1377 0 : *userbuf = buffer;
1378 : else
1379 : {
1380 0 : ReleaseBuffer(buffer);
1381 0 : *userbuf = InvalidBuffer;
1382 : }
1383 0 : tuple->t_data = NULL;
1384 0 : return false;
1385 : }
1386 :
1387 : /*
1388 : * get the item line pointer corresponding to the requested tid
1389 : */
1390 55760 : lp = PageGetItemId(dp, offnum);
1391 :
1392 : /*
1393 : * Must check for deleted tuple.
1394 : */
1395 55760 : if (!ItemIdIsNormal(lp))
1396 : {
1397 164 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1398 164 : if (keep_buf)
1399 164 : *userbuf = buffer;
1400 : else
1401 : {
1402 0 : ReleaseBuffer(buffer);
1403 0 : *userbuf = InvalidBuffer;
1404 : }
1405 164 : tuple->t_data = NULL;
1406 164 : return false;
1407 : }
1408 :
1409 : /*
1410 : * fill in *tuple fields
1411 : */
1412 55596 : tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
1413 55596 : tuple->t_len = ItemIdGetLength(lp);
1414 55596 : tuple->t_tableOid = RelationGetRelid(relation);
1415 :
1416 : /*
1417 : * check time qualification of tuple, then release lock
1418 : */
1419 55596 : valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1420 :
1421 55596 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1422 :
1423 55596 : if (valid)
1424 : {
1425 : /*
1426 : * All checks passed, so return the tuple as valid. Caller is now
1427 : * responsible for releasing the buffer.
1428 : */
1429 55455 : *userbuf = buffer;
1430 :
1431 : /* Count the successful fetch against appropriate rel, if any */
1432 55455 : if (stats_relation != NULL)
1433 0 : pgstat_count_heap_fetch(stats_relation);
1434 :
1435 55455 : return true;
1436 : }
1437 :
1438 : /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1439 141 : if (keep_buf)
1440 137 : *userbuf = buffer;
1441 : else
1442 : {
1443 4 : ReleaseBuffer(buffer);
1444 4 : *userbuf = InvalidBuffer;
1445 : }
1446 :
1447 141 : return false;
1448 : }
1449 :
1450 : /*
1451 : * heap_hot_search_buffer - search HOT chain for tuple satisfying snapshot
1452 : *
1453 : * On entry, *tid is the TID of a tuple (either a simple tuple, or the root
1454 : * of a HOT chain), and buffer is the buffer holding this tuple. We search
1455 : * for the first chain member satisfying the given snapshot. If one is
1456 : * found, we update *tid to reference that tuple's offset number, and
1457 : * return TRUE. If no match, return FALSE without modifying *tid.
1458 : *
1459 : * If all_dead is not NULL, we check non-visible tuples to see if they are
1460 : * globally dead; *all_dead is set TRUE if all members of the HOT chain
1461 : * are vacuumable, FALSE if not.
1462 : *
1463 : * Unlike heap_fetch, the caller must already have pin and (at least) share
1464 : * lock on the buffer; it is still pinned/locked at exit. Also unlike
1465 : * heap_fetch, we do not report any pgstats count; caller may do so if wanted.
1466 : */
1467 : bool
1468 : heap_hot_search_buffer(ItemPointer tid, Buffer buffer, Snapshot snapshot,
1469 : bool *all_dead)
1470 4024 : {
1471 4024 : Page dp = (Page) BufferGetPage(buffer);
1472 4024 : TransactionId prev_xmax = InvalidTransactionId;
1473 : OffsetNumber offnum;
1474 : bool at_chain_start;
1475 :
1476 4024 : if (all_dead)
1477 1301 : *all_dead = true;
1478 :
1479 : Assert(ItemPointerGetBlockNumber(tid) == BufferGetBlockNumber(buffer));
1480 4024 : offnum = ItemPointerGetOffsetNumber(tid);
1481 4024 : at_chain_start = true;
1482 :
1483 : /* Scan through possible multiple members of HOT-chain */
1484 : for (;;)
1485 : {
1486 : ItemId lp;
1487 : HeapTupleData heapTuple;
1488 :
1489 : /* check for bogus TID */
1490 4745 : if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
1491 : break;
1492 :
1493 4745 : lp = PageGetItemId(dp, offnum);
1494 :
1495 : /* check for unused, dead, or redirected items */
1496 4745 : if (!ItemIdIsNormal(lp))
1497 : {
1498 : /* We should only see a redirect at start of chain */
1499 113 : if (ItemIdIsRedirected(lp) && at_chain_start)
1500 : {
1501 : /* Follow the redirect */
1502 105 : offnum = ItemIdGetRedirect(lp);
1503 105 : at_chain_start = false;
1504 105 : continue;
1505 : }
1506 : /* else must be end of chain */
1507 : break;
1508 : }
1509 :
1510 4632 : heapTuple.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
1511 4632 : heapTuple.t_len = ItemIdGetLength(lp);
1512 :
1513 : /*
1514 : * Shouldn't see a HEAP_ONLY tuple at chain start.
1515 : */
1516 4632 : if (at_chain_start && HeapTupleIsHeapOnly(&heapTuple))
1517 0 : break;
1518 :
1519 : /*
1520 : * The xmin should match the previous xmax value, else chain is
1521 : * broken.
1522 : */
1523 4632 : if (TransactionIdIsValid(prev_xmax) &&
1524 : !TransactionIdEquals(prev_xmax,
1525 : HeapTupleHeaderGetXmin(heapTuple.t_data)))
1526 0 : break;
1527 :
1528 : /* If it's visible per the snapshot, we must return it */
1529 4632 : if (HeapTupleSatisfiesVisibility(&heapTuple, snapshot, buffer))
1530 : {
1531 2715 : ItemPointerSetOffsetNumber(tid, offnum);
1532 2715 : if (all_dead)
1533 22 : *all_dead = false;
1534 2715 : return true;
1535 : }
1536 :
1537 : /*
1538 : * If we can't see it, maybe no one else can either. At caller
1539 : * request, check whether all chain members are dead to all
1540 : * transactions.
1541 : */
1542 1917 : if (all_dead && *all_dead &&
1543 : HeapTupleSatisfiesVacuum(heapTuple.t_data, RecentGlobalXmin,
1544 : buffer) != HEAPTUPLE_DEAD)
1545 1247 : *all_dead = false;
1546 :
1547 : /*
1548 : * Check to see if HOT chain continues past this tuple; if so fetch
1549 : * the next offnum and loop around.
1550 : */
1551 1917 : if (HeapTupleIsHotUpdated(&heapTuple))
1552 : {
1553 : Assert(ItemPointerGetBlockNumber(&heapTuple.t_data->t_ctid) ==
1554 : ItemPointerGetBlockNumber(tid));
1555 616 : offnum = ItemPointerGetOffsetNumber(&heapTuple.t_data->t_ctid);
1556 616 : at_chain_start = false;
1557 616 : prev_xmax = HeapTupleHeaderGetXmax(heapTuple.t_data);
1558 : }
1559 : else
1560 : break; /* end of chain */
1561 : }
1562 :
1563 1309 : return false;
1564 : }
1565 :
1566 : /*
1567 : * heap_hot_search - search HOT chain for tuple satisfying snapshot
1568 : *
1569 : * This has the same API as heap_hot_search_buffer, except that the caller
1570 : * does not provide the buffer containing the page, rather we access it
1571 : * locally.
1572 : */
1573 : bool
1574 : heap_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot,
1575 : bool *all_dead)
1576 1323 : {
1577 : bool result;
1578 : Buffer buffer;
1579 :
1580 1323 : buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1581 1323 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
1582 1323 : result = heap_hot_search_buffer(tid, buffer, snapshot, all_dead);
1583 1323 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1584 1323 : ReleaseBuffer(buffer);
1585 1323 : return result;
1586 : }
1587 :
1588 : /*
1589 : * heap_get_latest_tid - get the latest tid of a specified tuple
1590 : *
1591 : * Actually, this gets the latest version that is visible according to
1592 : * the passed snapshot. You can pass SnapshotDirty to get the very latest,
1593 : * possibly uncommitted version.
1594 : *
1595 : * *tid is both an input and an output parameter: it is updated to
1596 : * show the latest version of the row. Note that it will not be changed
1597 : * if no version of the row passes the snapshot test.
1598 : */
1599 : void
1600 : heap_get_latest_tid(Relation relation,
1601 : Snapshot snapshot,
1602 : ItemPointer tid)
1603 15 : {
1604 : BlockNumber blk;
1605 : ItemPointerData ctid;
1606 : TransactionId priorXmax;
1607 :
1608 : /* this is to avoid Assert failures on bad input */
1609 15 : if (!ItemPointerIsValid(tid))
1610 0 : return;
1611 :
1612 : /*
1613 : * Since this can be called with user-supplied TID, don't trust the input
1614 : * too much. (RelationGetNumberOfBlocks is an expensive check, so we
1615 : * don't check t_ctid links again this way. Note that it would not do to
1616 : * call it just once and save the result, either.)
1617 : */
1618 15 : blk = ItemPointerGetBlockNumber(tid);
1619 15 : if (blk >= RelationGetNumberOfBlocks(relation))
1620 0 : elog(ERROR, "block number %u is out of range for relation \"%s\"",
1621 : blk, RelationGetRelationName(relation));
1622 :
1623 : /*
1624 : * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1625 : * need to examine, and *tid is the TID we will return if ctid turns out
1626 : * to be bogus.
1627 : *
1628 : * Note that we will loop until we reach the end of the t_ctid chain.
1629 : * Depending on the snapshot passed, there might be at most one visible
1630 : * version of the row, but we don't try to optimize for that.
1631 : */
1632 15 : ctid = *tid;
1633 15 : priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1634 : for (;;)
1635 : {
1636 : Buffer buffer;
1637 : PageHeader dp;
1638 : OffsetNumber offnum;
1639 : ItemId lp;
1640 : HeapTupleData tp;
1641 : bool valid;
1642 :
1643 : /*
1644 : * Read, pin, and lock the page.
1645 : */
1646 29 : buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1647 29 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
1648 29 : dp = (PageHeader) BufferGetPage(buffer);
1649 :
1650 : /*
1651 : * Check for bogus item number. This is not treated as an error
1652 : * condition because it can happen while following a t_ctid link. We
1653 : * just assume that the prior tid is OK and return it unchanged.
1654 : */
1655 29 : offnum = ItemPointerGetOffsetNumber(&ctid);
1656 29 : if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
1657 : {
1658 0 : UnlockReleaseBuffer(buffer);
1659 0 : break;
1660 : }
1661 29 : lp = PageGetItemId(dp, offnum);
1662 29 : if (!ItemIdIsNormal(lp))
1663 : {
1664 0 : UnlockReleaseBuffer(buffer);
1665 0 : break;
1666 : }
1667 :
1668 : /* OK to access the tuple */
1669 29 : tp.t_self = ctid;
1670 29 : tp.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
1671 29 : tp.t_len = ItemIdGetLength(lp);
1672 :
1673 : /*
1674 : * After following a t_ctid link, we might arrive at an unrelated
1675 : * tuple. Check for XMIN match.
1676 : */
1677 29 : if (TransactionIdIsValid(priorXmax) &&
1678 : !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data)))
1679 : {
1680 0 : UnlockReleaseBuffer(buffer);
1681 0 : break;
1682 : }
1683 :
1684 : /*
1685 : * Check time qualification of tuple; if visible, set it as the new
1686 : * result candidate.
1687 : */
1688 29 : valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1689 29 : if (valid)
1690 11 : *tid = ctid;
1691 :
1692 : /*
1693 : * If there's a valid t_ctid link, follow it, else we're done.
1694 : */
1695 29 : if ((tp.t_data->t_infomask & (HEAP_XMAX_INVALID | HEAP_IS_LOCKED)) ||
1696 : ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
1697 : {
1698 15 : UnlockReleaseBuffer(buffer);
1699 15 : break;
1700 : }
1701 :
1702 14 : ctid = tp.t_data->t_ctid;
1703 14 : priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
1704 14 : UnlockReleaseBuffer(buffer);
1705 14 : } /* end of loop */
1706 : }
1707 :
1708 :
1709 : /*
1710 : * UpdateXmaxHintBits - update tuple hint bits after xmax transaction ends
1711 : *
1712 : * This is called after we have waited for the XMAX transaction to terminate.
1713 : * If the transaction aborted, we guarantee the XMAX_INVALID hint bit will
1714 : * be set on exit. If the transaction committed, we set the XMAX_COMMITTED
1715 : * hint bit if possible --- but beware that that may not yet be possible,
1716 : * if the transaction committed asynchronously. Hence callers should look
1717 : * only at XMAX_INVALID.
1718 : */
1719 : static void
1720 : UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
1721 0 : {
1722 : Assert(TransactionIdEquals(HeapTupleHeaderGetXmax(tuple), xid));
1723 :
1724 0 : if (!(tuple->t_infomask & (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID)))
1725 : {
1726 0 : if (TransactionIdDidCommit(xid))
1727 0 : HeapTupleSetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1728 : xid);
1729 : else
1730 0 : HeapTupleSetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1731 : InvalidTransactionId);
1732 : }
1733 0 : }
1734 :
1735 :
1736 : /*
1737 : * heap_insert - insert tuple into a heap
1738 : *
1739 : * The new tuple is stamped with current transaction ID and the specified
1740 : * command ID.
1741 : *
1742 : * If use_wal is false, the new tuple is not logged in WAL, even for a
1743 : * non-temp relation. Safe usage of this behavior requires that we arrange
1744 : * that all new tuples go into new pages not containing any tuples from other
1745 : * transactions, and that the relation gets fsync'd before commit.
1746 : * (See also heap_sync() comments)
1747 : *
1748 : * use_fsm is passed directly to RelationGetBufferForTuple, which see for
1749 : * more info.
1750 : *
1751 : * Note that use_wal and use_fsm will be applied when inserting into the
1752 : * heap's TOAST table, too, if the tuple requires any out-of-line data.
1753 : *
1754 : * The return value is the OID assigned to the tuple (either here or by the
1755 : * caller), or InvalidOid if no OID. The header fields of *tup are updated
1756 : * to match the stored tuple; in particular tup->t_self receives the actual
1757 : * TID where the tuple was stored. But note that any toasting of fields
1758 : * within the tuple data is NOT reflected into *tup.
1759 : */
1760 : Oid
1761 : heap_insert(Relation relation, HeapTuple tup, CommandId cid,
1762 : bool use_wal, bool use_fsm)
1763 201605 : {
1764 201605 : TransactionId xid = GetCurrentTransactionId();
1765 : HeapTuple heaptup;
1766 : Buffer buffer;
1767 :
1768 201605 : if (relation->rd_rel->relhasoids)
1769 : {
1770 : #ifdef NOT_USED
1771 : /* this is redundant with an Assert in HeapTupleSetOid */
1772 : Assert(tup->t_data->t_infomask & HEAP_HASOID);
1773 : #endif
1774 :
1775 : /*
1776 : * If the object id of this tuple has already been assigned, trust the
1777 : * caller. There are a couple of ways this can happen. At initial db
1778 : * creation, the backend program sets oids for tuples. When we define
1779 : * an index, we set the oid. Finally, in the future, we may allow
1780 : * users to set their own object ids in order to support a persistent
1781 : * object store (objects need to contain pointers to one another).
1782 : */
1783 27892 : if (!OidIsValid(HeapTupleGetOid(tup)))
1784 22282 : HeapTupleSetOid(tup, GetNewOid(relation));
1785 : }
1786 : else
1787 : {
1788 : /* check there is not space for an OID */
1789 : Assert(!(tup->t_data->t_infomask & HEAP_HASOID));
1790 : }
1791 :
1792 201605 : tup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
1793 201605 : tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
1794 201605 : tup->t_data->t_infomask |= HEAP_XMAX_INVALID;
1795 201605 : HeapTupleHeaderSetXmin(tup->t_data, xid);
1796 201605 : HeapTupleHeaderSetCmin(tup->t_data, cid);
1797 201605 : HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */
1798 201605 : tup->t_tableOid = RelationGetRelid(relation);
1799 :
1800 : /*
1801 : * If the new tuple is too big for storage or contains already toasted
1802 : * out-of-line attributes from some other relation, invoke the toaster.
1803 : *
1804 : * Note: below this point, heaptup is the data we actually intend to store
1805 : * into the relation; tup is the caller's original untoasted data.
1806 : */
1807 201605 : if (relation->rd_rel->relkind != RELKIND_RELATION)
1808 : {
1809 : /* toast table entries should never be recursively toasted */
1810 : Assert(!HeapTupleHasExternal(tup));
1811 3541 : heaptup = tup;
1812 : }
1813 198856 : else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
1814 792 : heaptup = toast_insert_or_update(relation, tup, NULL,
1815 : use_wal, use_fsm);
1816 : else
1817 197272 : heaptup = tup;
1818 :
1819 : /* Find buffer to insert this tuple into */
1820 201605 : buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
1821 : InvalidBuffer, use_fsm);
1822 :
1823 : /* NO EREPORT(ERROR) from here till changes are logged */
1824 201605 : START_CRIT_SECTION();
1825 :
1826 201605 : RelationPutHeapTuple(relation, buffer, heaptup);
1827 :
1828 : /*
1829 : * XXX Should we set PageSetPrunable on this page ?
1830 : *
1831 : * The inserting transaction may eventually abort thus making this tuple
1832 : * DEAD and hence available for pruning. Though we don't want to optimize
1833 : * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
1834 : * aborted tuple will never be pruned until next vacuum is triggered.
1835 : *
1836 : * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
1837 : */
1838 :
1839 201605 : MarkBufferDirty(buffer);
1840 :
1841 : /* XLOG stuff */
1842 201605 : if (use_wal && !relation->rd_istemp)
1843 : {
1844 : xl_heap_insert xlrec;
1845 : xl_heap_header xlhdr;
1846 : XLogRecPtr recptr;
1847 : XLogRecData rdata[3];
1848 178130 : Page page = BufferGetPage(buffer);
1849 178130 : uint8 info = XLOG_HEAP_INSERT;
1850 :
1851 178130 : xlrec.target.node = relation->rd_node;
1852 178130 : xlrec.target.tid = heaptup->t_self;
1853 178130 : rdata[0].data = (char *) &xlrec;
1854 178130 : rdata[0].len = SizeOfHeapInsert;
1855 178130 : rdata[0].buffer = InvalidBuffer;
1856 178130 : rdata[0].next = &(rdata[1]);
1857 :
1858 178130 : xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
1859 178130 : xlhdr.t_infomask = heaptup->t_data->t_infomask;
1860 178130 : xlhdr.t_hoff = heaptup->t_data->t_hoff;
1861 :
1862 : /*
1863 : * note we mark rdata[1] as belonging to buffer; if XLogInsert decides
1864 : * to write the whole page to the xlog, we don't need to store
1865 : * xl_heap_header in the xlog.
1866 : */
1867 178130 : rdata[1].data = (char *) &xlhdr;
1868 178130 : rdata[1].len = SizeOfHeapHeader;
1869 178130 : rdata[1].buffer = buffer;
1870 178130 : rdata[1].buffer_std = true;
1871 178130 : rdata[1].next = &(rdata[2]);
1872 :
1873 : /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
1874 178130 : rdata[2].data = (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits);
1875 178130 : rdata[2].len = heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits);
1876 178130 : rdata[2].buffer = buffer;
1877 178130 : rdata[2].buffer_std = true;
1878 178130 : rdata[2].next = NULL;
1879 :
1880 : /*
1881 : * If this is the single and first tuple on page, we can reinit the
1882 : * page instead of restoring the whole thing. Set flag, and hide
1883 : * buffer references from XLogInsert.
1884 : */
1885 178130 : if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
1886 : PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
1887 : {
1888 2574 : info |= XLOG_HEAP_INIT_PAGE;
1889 2574 : rdata[1].buffer = rdata[2].buffer = InvalidBuffer;
1890 : }
1891 :
1892 178130 : recptr = XLogInsert(RM_HEAP_ID, info, rdata);
1893 :
1894 178130 : PageSetLSN(page, recptr);
1895 178130 : PageSetTLI(page, ThisTimeLineID);
1896 : }
1897 :
1898 201605 : END_CRIT_SECTION();
1899 :
1900 201605 : UnlockReleaseBuffer(buffer);
1901 :
1902 : /*
1903 : * If tuple is cachable, mark it for invalidation from the caches in case
1904 : * we abort. Note it is OK to do this after releasing the buffer, because
1905 : * the heaptup data structure is all in local memory, not in the shared
1906 : * buffer.
1907 : */
1908 201605 : CacheInvalidateHeapTuple(relation, heaptup);
1909 :
1910 201605 : pgstat_count_heap_insert(relation);
1911 :
1912 : /*
1913 : * If heaptup is a private copy, release it. Don't forget to copy t_self
1914 : * back to the caller's image, too.
1915 : */
1916 201605 : if (heaptup != tup)
1917 : {
1918 792 : tup->t_self = heaptup->t_self;
1919 792 : heap_freetuple(heaptup);
1920 : }
1921 :
1922 201605 : return HeapTupleGetOid(tup);
1923 : }
1924 :
1925 : /*
1926 : * simple_heap_insert - insert a tuple
1927 : *
1928 : * Currently, this routine differs from heap_insert only in supplying
1929 : * a default command ID and not allowing access to the speedup options.
1930 : *
1931 : * This should be used rather than using heap_insert directly in most places
1932 : * where we are modifying system catalogs.
1933 : */
1934 : Oid
1935 : simple_heap_insert(Relation relation, HeapTuple tup)
1936 28679 : {
1937 28679 : return heap_insert(relation, tup, GetCurrentCommandId(true), true, true);
1938 : }
1939 :
1940 : /*
1941 : * heap_delete - delete a tuple
1942 : *
1943 : * NB: do not call this directly unless you are prepared to deal with
1944 : * concurrent-update conditions. Use simple_heap_delete instead.
1945 : *
1946 : * relation - table to be modified (caller must hold suitable lock)
1947 : * tid - TID of tuple to be deleted
1948 : * ctid - output parameter, used only for failure case (see below)
1949 : * update_xmax - output parameter, used only for failure case (see below)
1950 : * cid - delete command ID (used for visibility test, and stored into
1951 : * cmax if successful)
1952 : * crosscheck - if not InvalidSnapshot, also check tuple against this
1953 : * wait - true if should wait for any conflicting update to commit/abort
1954 : *
1955 : * Normal, successful return value is HeapTupleMayBeUpdated, which
1956 : * actually means we did delete it. Failure return codes are
1957 : * HeapTupleSelfUpdated, HeapTupleUpdated, or HeapTupleBeingUpdated
1958 : * (the last only possible if wait == false).
1959 : *
1960 : * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
1961 : * If t_ctid is the same as tid, the tuple was deleted; if different, the
1962 : * tuple was updated, and t_ctid is the location of the replacement tuple.
1963 : * (t_xmax is needed to verify that the replacement tuple matches.)
1964 : */
1965 : HTSU_Result
1966 : heap_delete(Relation relation, ItemPointer tid,
1967 : ItemPointer ctid, TransactionId *update_xmax,
1968 : CommandId cid, Snapshot crosscheck, bool wait)
1969 21381 : {
1970 : HTSU_Result result;
1971 21381 : TransactionId xid = GetCurrentTransactionId();
1972 : ItemId lp;
1973 : HeapTupleData tp;
1974 : PageHeader dp;
1975 : Buffer buffer;
1976 21381 : bool have_tuple_lock = false;
1977 : bool iscombo;
1978 :
1979 : Assert(ItemPointerIsValid(tid));
1980 :
1981 21381 : buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1982 21381 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
1983 :
1984 21381 : dp = (PageHeader) BufferGetPage(buffer);
1985 21381 : lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid));
1986 : Assert(ItemIdIsNormal(lp));
1987 :
1988 21381 : tp.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
1989 21381 : tp.t_len = ItemIdGetLength(lp);
1990 21381 : tp.t_self = *tid;
1991 :
1992 21381 : l1:
1993 21381 : result = HeapTupleSatisfiesUpdate(tp.t_data, cid, buffer);
1994 :
1995 21381 : if (result == HeapTupleInvisible)
1996 : {
1997 0 : UnlockReleaseBuffer(buffer);
1998 0 : elog(ERROR, "attempted to delete invisible tuple");
1999 : }
2000 21381 : else if (result == HeapTupleBeingUpdated && wait)
2001 : {
2002 : TransactionId xwait;
2003 : uint16 infomask;
2004 :
2005 : /* must copy state data before unlocking buffer */
2006 0 : xwait = HeapTupleHeaderGetXmax(tp.t_data);
2007 0 : infomask = tp.t_data->t_infomask;
2008 :
2009 0 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2010 :
2011 : /*
2012 : * Acquire tuple lock to establish our priority for the tuple (see
2013 : * heap_lock_tuple). LockTuple will release us when we are
2014 : * next-in-line for the tuple.
2015 : *
2016 : * If we are forced to "start over" below, we keep the tuple lock;
2017 : * this arranges that we stay at the head of the line while rechecking
2018 : * tuple state.
2019 : */
2020 0 : if (!have_tuple_lock)
2021 : {
2022 0 : LockTuple(relation, &(tp.t_self), ExclusiveLock);
2023 0 : have_tuple_lock = true;
2024 : }
2025 :
2026 : /*
2027 : * Sleep until concurrent transaction ends. Note that we don't care
2028 : * if the locker has an exclusive or shared lock, because we need
2029 : * exclusive.
2030 : */
2031 :
2032 0 : if (infomask & HEAP_XMAX_IS_MULTI)
2033 : {
2034 : /* wait for multixact */
2035 0 : MultiXactIdWait((MultiXactId) xwait);
2036 0 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
2037 :
2038 : /*
2039 : * If xwait had just locked the tuple then some other xact could
2040 : * update this tuple before we get to this point. Check for xmax
2041 : * change, and start over if so.
2042 : */
2043 0 : if (!(tp.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
2044 : !TransactionIdEquals(HeapTupleHeaderGetXmax(tp.t_data),
2045 : xwait))
2046 : goto l1;
2047 :
2048 : /*
2049 : * You might think the multixact is necessarily done here, but not
2050 : * so: it could have surviving members, namely our own xact or
2051 : * other subxacts of this backend. It is legal for us to delete
2052 : * the tuple in either case, however (the latter case is
2053 : * essentially a situation of upgrading our former shared lock to
2054 : * exclusive). We don't bother changing the on-disk hint bits
2055 : * since we are about to overwrite the xmax altogether.
2056 : */
2057 : }
2058 : else
2059 : {
2060 : /* wait for regular transaction to end */
2061 0 : XactLockTableWait(xwait);
2062 0 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
2063 :
2064 : /*
2065 : * xwait is done, but if xwait had just locked the tuple then some
2066 : * other xact could update this tuple before we get to this point.
2067 : * Check for xmax change, and start over if so.
2068 : */
2069 0 : if ((tp.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
2070 : !TransactionIdEquals(HeapTupleHeaderGetXmax(tp.t_data),
2071 : xwait))
2072 : goto l1;
2073 :
2074 : /* Otherwise check if it committed or aborted */
2075 0 : UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2076 : }
2077 :
2078 : /*
2079 : * We may overwrite if previous xmax aborted, or if it committed but
2080 : * only locked the tuple without updating it.
2081 : */
2082 0 : if (tp.t_data->t_infomask & (HEAP_XMAX_INVALID |
2083 : HEAP_IS_LOCKED))
2084 0 : result = HeapTupleMayBeUpdated;
2085 : else
2086 0 : result = HeapTupleUpdated;
2087 : }
2088 :
2089 21381 : if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
2090 : {
2091 : /* Perform additional check for serializable RI updates */
2092 0 : if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
2093 0 : result = HeapTupleUpdated;
2094 : }
2095 :
2096 21381 : if (result != HeapTupleMayBeUpdated)
2097 : {
2098 : Assert(result == HeapTupleSelfUpdated ||
2099 : result == HeapTupleUpdated ||
2100 : result == HeapTupleBeingUpdated);
2101 : Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));
2102 0 : *ctid = tp.t_data->t_ctid;
2103 0 : *update_xmax = HeapTupleHeaderGetXmax(tp.t_data);
2104 0 : UnlockReleaseBuffer(buffer);
2105 0 : if (have_tuple_lock)
2106 0 : UnlockTuple(relation, &(tp.t_self), ExclusiveLock);
2107 0 : return result;
2108 : }
2109 :
2110 : /* replace cid with a combo cid if necessary */
2111 21381 : HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
2112 :
2113 21381 : START_CRIT_SECTION();
2114 :
2115 : /*
2116 : * If this transaction commits, the tuple will become DEAD sooner or
2117 : * later. Set flag that this page is a candidate for pruning once our xid
2118 : * falls below the OldestXmin horizon. If the transaction finally aborts,
2119 : * the subsequent page pruning will be a no-op and the hint will be
2120 : * cleared.
2121 : */
2122 21381 : PageSetPrunable(dp, xid);
2123 :
2124 : /* store transaction information of xact deleting the tuple */
2125 21381 : tp.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
2126 : HEAP_XMAX_INVALID |
2127 : HEAP_XMAX_IS_MULTI |
2128 : HEAP_IS_LOCKED |
2129 : HEAP_MOVED);
2130 21381 : HeapTupleHeaderClearHotUpdated(tp.t_data);
2131 21381 : HeapTupleHeaderSetXmax(tp.t_data, xid);
2132 21381 : HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
2133 : /* Make sure there is no forward chain link in t_ctid */
2134 21381 : tp.t_data->t_ctid = tp.t_self;
2135 :
2136 21381 : MarkBufferDirty(buffer);
2137 :
2138 : /* XLOG stuff */
2139 21381 : if (!relation->rd_istemp)
2140 : {
2141 : xl_heap_delete xlrec;
2142 : XLogRecPtr recptr;
2143 : XLogRecData rdata[2];
2144 :
2145 21359 : xlrec.target.node = relation->rd_node;
2146 21359 : xlrec.target.tid = tp.t_self;
2147 21359 : rdata[0].data = (char *) &xlrec;
2148 21359 : rdata[0].len = SizeOfHeapDelete;
2149 21359 : rdata[0].buffer = InvalidBuffer;
2150 21359 : rdata[0].next = &(rdata[1]);
2151 :
2152 21359 : rdata[1].data = NULL;
2153 21359 : rdata[1].len = 0;
2154 21359 : rdata[1].buffer = buffer;
2155 21359 : rdata[1].buffer_std = true;
2156 21359 : rdata[1].next = NULL;
2157 :
2158 21359 : recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, rdata);
2159 :
2160 21359 : PageSetLSN(dp, recptr);
2161 21359 : PageSetTLI(dp, ThisTimeLineID);
2162 : }
2163 :
2164 21381 : END_CRIT_SECTION();
2165 :
2166 21381 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2167 :
2168 : /*
2169 : * If the tuple has toasted out-of-line attributes, we need to delete
2170 : * those items too. We have to do this before releasing the buffer
2171 : * because we need to look at the contents of the tuple, but it's OK to
2172 : * release the content lock on the buffer first.
2173 : */
2174 21381 : if (relation->rd_rel->relkind != RELKIND_RELATION)
2175 : {
2176 : /* toast table entries should never be recursively toasted */
2177 : Assert(!HeapTupleHasExternal(&tp));
2178 : }
2179 21381 : else if (HeapTupleHasExternal(&tp))
2180 0 : toast_delete(relation, &tp);
2181 :
2182 : /*
2183 : * Mark tuple for invalidation from system caches at next command
2184 : * boundary. We have to do this before releasing the buffer because we
2185 : * need to look at the contents of the tuple.
2186 : */
2187 21381 : CacheInvalidateHeapTuple(relation, &tp);
2188 :
2189 : /* Now we can release the buffer */
2190 21381 : ReleaseBuffer(buffer);
2191 :
2192 : /*
2193 : * Release the lmgr tuple lock, if we had it.
2194 : */
2195 21381 : if (have_tuple_lock)
2196 0 : UnlockTuple(relation, &(tp.t_self), ExclusiveLock);
2197 :
2198 21381 : pgstat_count_heap_delete(relation);
2199 :
2200 21381 : return HeapTupleMayBeUpdated;
2201 : }
2202 :
2203 : /*
2204 : * simple_heap_delete - delete a tuple
2205 : *
2206 : * This routine may be used to delete a tuple when concurrent updates of
2207 : * the target tuple are not expected (for example, because we have a lock
2208 : * on the relation associated with the tuple). Any failure is reported
2209 : * via ereport().
2210 : */
2211 : void
2212 : simple_heap_delete(Relation relation, ItemPointer tid)
2213 13271 : {
2214 : HTSU_Result result;
2215 : ItemPointerData update_ctid;
2216 : TransactionId update_xmax;
2217 :
2218 13271 : result = heap_delete(relation, tid,
2219 : &update_ctid, &update_xmax,
2220 : GetCurrentCommandId(true), InvalidSnapshot,
2221 : true /* wait for commit */ );
2222 13271 : switch (result)
2223 : {
2224 : case HeapTupleSelfUpdated:
2225 : /* Tuple was already updated in current command? */
2226 0 : elog(ERROR, "tuple already updated by self");
2227 0 : break;
2228 :
2229 : case HeapTupleMayBeUpdated:
2230 : /* done successfully */
2231 : break;
2232 :
2233 : case HeapTupleUpdated:
2234 0 : elog(ERROR, "tuple concurrently updated");
2235 0 : break;
2236 :
2237 : default:
2238 0 : elog(ERROR, "unrecognized heap_delete status: %u", result);
2239 : break;
2240 : }
2241 13271 : }
2242 :
2243 : /*
2244 : * heap_update - replace a tuple
2245 : *
2246 : * NB: do not call this directly unless you are prepared to deal with
2247 : * concurrent-update conditions. Use simple_heap_update instead.
2248 : *
2249 : * relation - table to be modified (caller must hold suitable lock)
2250 : * otid - TID of old tuple to be replaced
2251 : * newtup - newly constructed tuple data to store
2252 : * ctid - output parameter, used only for failure case (see below)
2253 : * update_xmax - output parameter, used only for failure case (see below)
2254 : * cid - update command ID (used for visibility test, and stored into
2255 : * cmax/cmin if successful)
2256 : * crosscheck - if not InvalidSnapshot, also check old tuple against this
2257 : * wait - true if should wait for any conflicting update to commit/abort
2258 : *
2259 : * Normal, successful return value is HeapTupleMayBeUpdated, which
2260 : * actually means we *did* update it. Failure return codes are
2261 : * HeapTupleSelfUpdated, HeapTupleUpdated, or HeapTupleBeingUpdated
2262 : * (the last only possible if wait == false).
2263 : *
2264 : * On success, the header fields of *newtup are updated to match the new
2265 : * stored tuple; in particular, newtup->t_self is set to the TID where the
2266 : * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
2267 : * update was done. However, any TOAST changes in the new tuple's
2268 : * data are not reflected into *newtup.
2269 : *
2270 : * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
2271 : * If t_ctid is the same as otid, the tuple was deleted; if different, the
2272 : * tuple was updated, and t_ctid is the location of the replacement tuple.
2273 : * (t_xmax is needed to verify that the replacement tuple matches.)
2274 : */
2275 : HTSU_Result
2276 : heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
2277 : ItemPointer ctid, TransactionId *update_xmax,
2278 : CommandId cid, Snapshot crosscheck, bool wait)
2279 4588 : {
2280 : HTSU_Result result;
2281 4588 : TransactionId xid = GetCurrentTransactionId();
2282 : Bitmapset *hot_attrs;
2283 : ItemId lp;
2284 : HeapTupleData oldtup;
2285 : HeapTuple heaptup;
2286 : PageHeader dp;
2287 : Buffer buffer,
2288 : newbuf;
2289 : bool need_toast,
2290 : already_marked;
2291 : Size newtupsize,
2292 : pagefree;
2293 4588 : bool have_tuple_lock = false;
2294 : bool iscombo;
2295 4588 : bool use_hot_update = false;
2296 :
2297 : Assert(ItemPointerIsValid(otid));
2298 :
2299 : /*
2300 : * Fetch the list of attributes to be checked for HOT update. This is
2301 : * wasted effort if we fail to update or have to put the new tuple on a
2302 : * different page. But we must compute the list before obtaining buffer
2303 : * lock --- in the worst case, if we are doing an update on one of the
2304 : * relevant system catalogs, we could deadlock if we try to fetch the list
2305 : * later. In any case, the relcache caches the data so this is usually
2306 : * pretty cheap.
2307 : *
2308 : * Note that we get a copy here, so we need not worry about relcache flush
2309 : * happening midway through.
2310 : */
2311 4588 : hot_attrs = RelationGetIndexAttrBitmap(relation);
2312 :
2313 4588 : buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(otid));
2314 4588 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
2315 :
2316 4588 : dp = (PageHeader) BufferGetPage(buffer);
2317 4588 : lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(otid));
2318 : Assert(ItemIdIsNormal(lp));
2319 :
2320 4588 : oldtup.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
2321 4588 : oldtup.t_len = ItemIdGetLength(lp);
2322 4588 : oldtup.t_self = *otid;
2323 :
2324 : /*
2325 : * Note: beyond this point, use oldtup not otid to refer to old tuple.
2326 : * otid may very well point at newtup->t_self, which we will overwrite
2327 : * with the new tuple's location, so there's great risk of confusion if we
2328 : * use otid anymore.
2329 : */
2330 :
2331 4588 : l2:
2332 4588 : result = HeapTupleSatisfiesUpdate(oldtup.t_data, cid, buffer);
2333 :
2334 4588 : if (result == HeapTupleInvisible)
2335 : {
2336 0 : UnlockReleaseBuffer(buffer);
2337 0 : elog(ERROR, "attempted to update invisible tuple");
2338 : }
2339 4588 : else if (result == HeapTupleBeingUpdated && wait)
2340 : {
2341 : TransactionId xwait;
2342 : uint16 infomask;
2343 :
2344 : /* must copy state data before unlocking buffer */
2345 0 : xwait = HeapTupleHeaderGetXmax(oldtup.t_data);
2346 0 : infomask = oldtup.t_data->t_infomask;
2347 :
2348 0 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2349 :
2350 : /*
2351 : * Acquire tuple lock to establish our priority for the tuple (see
2352 : * heap_lock_tuple). LockTuple will release us when we are
2353 : * next-in-line for the tuple.
2354 : *
2355 : * If we are forced to "start over" below, we keep the tuple lock;
2356 : * this arranges that we stay at the head of the line while rechecking
2357 : * tuple state.
2358 : */
2359 0 : if (!have_tuple_lock)
2360 : {
2361 0 : LockTuple(relation, &(oldtup.t_self), ExclusiveLock);
2362 0 : have_tuple_lock = true;
2363 : }
2364 :
2365 : /*
2366 : * Sleep until concurrent transaction ends. Note that we don't care
2367 : * if the locker has an exclusive or shared lock, because we need
2368 : * exclusive.
2369 : */
2370 :
2371 0 : if (infomask & HEAP_XMAX_IS_MULTI)
2372 : {
2373 : /* wait for multixact */
2374 0 : MultiXactIdWait((MultiXactId) xwait);
2375 0 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
2376 :
2377 : /*
2378 : * If xwait had just locked the tuple then some other xact could
2379 : * update this tuple before we get to this point. Check for xmax
2380 : * change, and start over if so.
2381 : */
2382 0 : if (!(oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
2383 : !TransactionIdEquals(HeapTupleHeaderGetXmax(oldtup.t_data),
2384 : xwait))
2385 : goto l2;
2386 :
2387 : /*
2388 : * You might think the multixact is necessarily done here, but not
2389 : * so: it could have surviving members, namely our own xact or
2390 : * other subxacts of this backend. It is legal for us to update
2391 : * the tuple in either case, however (the latter case is
2392 : * essentially a situation of upgrading our former shared lock to
2393 : * exclusive). We don't bother changing the on-disk hint bits
2394 : * since we are about to overwrite the xmax altogether.
2395 : */
2396 : }
2397 : else
2398 : {
2399 : /* wait for regular transaction to end */
2400 0 : XactLockTableWait(xwait);
2401 0 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
2402 :
2403 : /*
2404 : * xwait is done, but if xwait had just locked the tuple then some
2405 : * other xact could update this tuple before we get to this point.
2406 : * Check for xmax change, and start over if so.
2407 : */
2408 0 : if ((oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
2409 : !TransactionIdEquals(HeapTupleHeaderGetXmax(oldtup.t_data),
2410 : xwait))
2411 : goto l2;
2412 :
2413 : /* Otherwise check if it committed or aborted */
2414 0 : UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
2415 : }
2416 :
2417 : /*
2418 : * We may overwrite if previous xmax aborted, or if it committed but
2419 : * only locked the tuple without updating it.
2420 : */
2421 0 : if (oldtup.t_data->t_infomask & (HEAP_XMAX_INVALID |
2422 : HEAP_IS_LOCKED))
2423 0 : result = HeapTupleMayBeUpdated;
2424 : else
2425 0 : result = HeapTupleUpdated;
2426 : }
2427 :
2428 4588 : if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
2429 : {
2430 : /* Perform additional check for serializable RI updates */
2431 0 : if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
2432 0 : result = HeapTupleUpdated;
2433 : }
2434 :
2435 4588 : if (result != HeapTupleMayBeUpdated)
2436 : {
2437 : Assert(result == HeapTupleSelfUpdated ||
2438 : result == HeapTupleUpdated ||
2439 : result == HeapTupleBeingUpdated);
2440 : Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
2441 4 : *ctid = oldtup.t_data->t_ctid;
2442 4 : *update_xmax = HeapTupleHeaderGetXmax(oldtup.t_data);
2443 4 : UnlockReleaseBuffer(buffer);
2444 4 : if (have_tuple_lock)
2445 0 : UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock);
2446 4 : bms_free(hot_attrs);
2447 4 : return result;
2448 : }
2449 :
2450 : /* Fill in OID and transaction status data for newtup */
2451 4584 : if (relation->rd_rel->relhasoids)
2452 : {
2453 : #ifdef NOT_USED
2454 : /* this is redundant with an Assert in HeapTupleSetOid */
2455 : Assert(newtup->t_data->t_infomask & HEAP_HASOID);
2456 : #endif
2457 1718 : HeapTupleSetOid(newtup, HeapTupleGetOid(&oldtup));
2458 : }
2459 : else
2460 : {
2461 : /* check there is not space for an OID */
2462 : Assert(!(newtup->t_data->t_infomask & HEAP_HASOID));
2463 : }
2464 :
2465 4584 : newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
2466 4584 : newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
2467 4584 : newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
2468 4584 : HeapTupleHeaderSetXmin(newtup->t_data, xid);
2469 4584 : HeapTupleHeaderSetCmin(newtup->t_data, cid);
2470 4584 : HeapTupleHeaderSetXmax(newtup->t_data, 0); /* for cleanliness */
2471 :
2472 : /*
2473 : * Replace cid with a combo cid if necessary. Note that we already put
2474 : * the plain cid into the new tuple.
2475 : */
2476 4584 : HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
2477 :
2478 : /*
2479 : * If the toaster needs to be activated, OR if the new tuple will not fit
2480 : * on the same page as the old, then we need to release the content lock
2481 : * (but not the pin!) on the old tuple's buffer while we are off doing
2482 : * TOAST and/or table-file-extension work. We must mark the old tuple to
2483 : * show that it's already being updated, else other processes may try to
2484 : * update it themselves.
2485 : *
2486 : * We need to invoke the toaster if there are already any out-of-line
2487 : * toasted values present, or if the new tuple is over-threshold.
2488 : */
2489 4584 : if (relation->rd_rel->relkind != RELKIND_RELATION)
2490 : {
2491 : /* toast table entries should never be recursively toasted */
2492 : Assert(!HeapTupleHasExternal(&oldtup));
2493 : Assert(!HeapTupleHasExternal(newtup));
2494 0 : need_toast = false;
2495 : }
2496 : else
2497 4584 : need_toast = (HeapTupleHasExternal(&oldtup) ||
2498 : HeapTupleHasExternal(newtup) ||
2499 : newtup->t_len > TOAST_TUPLE_THRESHOLD);
2500 :
2501 4584 : pagefree = PageGetHeapFreeSpace((Page) dp);
2502 :
2503 4584 : newtupsize = MAXALIGN(newtup->t_len);
2504 :
2505 4584 : if (need_toast || newtupsize > pagefree)
2506 : {
2507 : /* Clear obsolete visibility flags ... */
2508 2204 : oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
2509 : HEAP_XMAX_INVALID |
2510 : HEAP_XMAX_IS_MULTI |
2511 : HEAP_IS_LOCKED |
2512 : HEAP_MOVED);
2513 2204 : HeapTupleClearHotUpdated(&oldtup);
2514 : /* ... and store info about transaction updating this tuple */
2515 2204 : HeapTupleHeaderSetXmax(oldtup.t_data, xid);
2516 2204 : HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
2517 : /* temporarily make it look not-updated */
2518 2204 : oldtup.t_data->t_ctid = oldtup.t_self;
2519 2204 : already_marked = true;
2520 2204 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2521 :
2522 : /*
2523 : * Let the toaster do its thing, if needed.
2524 : *
2525 : * Note: below this point, heaptup is the data we actually intend to
2526 : * store into the relation; newtup is the caller's original untoasted
2527 : * data.
2528 : */
2529 2204 : if (need_toast)
2530 : {
2531 : /* Note we always use WAL and FSM during updates */
2532 5 : heaptup = toast_insert_or_update(relation, newtup, &oldtup,
2533 : true, true);
2534 5 : newtupsize = MAXALIGN(heaptup->t_len);
2535 : }
2536 : else
2537 2199 : heaptup = newtup;
2538 :
2539 : /*
2540 : * Now, do we need a new page for the tuple, or not? This is a bit
2541 : * tricky since someone else could have added tuples to the page while
2542 : * we weren't looking. We have to recheck the available space after
2543 : * reacquiring the buffer lock. But don't bother to do that if the
2544 : * former amount of free space is still not enough; it's unlikely
2545 : * there's more free now than before.
2546 : *
2547 : * What's more, if we need to get a new page, we will need to acquire
2548 : * buffer locks on both old and new pages. To avoid deadlock against
2549 : * some other backend trying to get the same two locks in the other
2550 : * order, we must be consistent about the order we get the locks in.
2551 : * We use the rule "lock the lower-numbered page of the relation
2552 : * first". To implement this, we must do RelationGetBufferForTuple
2553 : * while not holding the lock on the old page, and we must rely on it
2554 : * to get the locks on both pages in the correct order.
2555 : */
2556 2204 : if (newtupsize > pagefree)
2557 : {
2558 : /* Assume there's no chance to put heaptup on same page. */
2559 2199 : newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
2560 : buffer, true);
2561 : }
2562 : else
2563 : {
2564 : /* Re-acquire the lock on the old tuple's page. */
2565 5 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
2566 : /* Re-check using the up-to-date free space */
2567 5 : pagefree = PageGetHeapFreeSpace((Page) dp);
2568 5 : if (newtupsize > pagefree)
2569 : {
2570 : /*
2571 : * Rats, it doesn't fit anymore. We must now unlock and
2572 : * relock to avoid deadlock. Fortunately, this path should
2573 : * seldom be taken.
2574 : */
2575 0 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2576 0 : newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
2577 : buffer, true);
2578 : }
2579 : else
2580 : {
2581 : /* OK, it fits here, so we're done. */
2582 5 : newbuf = buffer;
2583 : }
2584 : }
2585 : }
2586 : else
2587 : {
2588 : /* No TOAST work needed, and it'll fit on same page */
2589 2380 : already_marked = false;
2590 2380 : newbuf = buffer;
2591 2380 : heaptup = newtup;
2592 : }
2593 :
2594 : /*
2595 : * At this point newbuf and buffer are both pinned and locked, and newbuf
2596 : * has enough space for the new tuple. If they are the same buffer, only
2597 : * one pin is held.
2598 : */
2599 :
2600 4584 : if (newbuf == buffer)
2601 : {
2602 : /*
2603 : * Since the new tuple is going into the same page, we might be able
2604 : * to do a HOT update. Check if any of the index columns have been
2605 : * changed. If not, then HOT update is possible.
2606 : */
2607 2385 : if (HeapSatisfiesHOTUpdate(relation, hot_attrs, &oldtup, heaptup))
2608 2196 : use_hot_update = true;
2609 : }
2610 : else
2611 : {
2612 : /* Set a hint that the old page could use prune/defrag */
2613 2199 : PageSetFull(dp);
2614 : }
2615 :
2616 : /* NO EREPORT(ERROR) from here till changes are logged */
2617 4584 : START_CRIT_SECTION();
2618 :
2619 : /*
2620 : * If this transaction commits, the old tuple will become DEAD sooner or
2621 : * later. Set flag that this page is a candidate for pruning once our xid
2622 : * falls below the OldestXmin horizon. If the transaction finally aborts,
2623 : * the subsequent page pruning will be a no-op and the hint will be
2624 : * cleared.
2625 : *
2626 : * XXX Should we set hint on newbuf as well? If the transaction aborts,
2627 : * there would be a prunable tuple in the newbuf; but for now we choose
2628 : * not to optimize for aborts. Note that heap_xlog_update must be kept in
2629 : * sync if this decision changes.
2630 : */
2631 4584 : PageSetPrunable(dp, xid);
2632 :
2633 4584 : if (use_hot_update)
2634 : {
2635 : /* Mark the old tuple as HOT-updated */
2636 2196 : HeapTupleSetHotUpdated(&oldtup);
2637 : /* And mark the new tuple as heap-only */
2638 2196 : HeapTupleSetHeapOnly(heaptup);
2639 : /* Mark the caller's copy too, in case different from heaptup */
2640 2196 : HeapTupleSetHeapOnly(newtup);
2641 : }
2642 : else
2643 : {
2644 : /* Make sure tuples are correctly marked as not-HOT */
2645 2388 : HeapTupleClearHotUpdated(&oldtup);
2646 2388 : HeapTupleClearHeapOnly(heaptup);
2647 2388 : HeapTupleClearHeapOnly(newtup);
2648 : }
2649 :
2650 4584 : RelationPutHeapTuple(relation, newbuf, heaptup); /* insert new tuple */
2651 :
2652 4584 : if (!already_marked)
2653 : {
2654 : /* Clear obsolete visibility flags ... */
2655 2380 : oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
2656 : HEAP_XMAX_INVALID |
2657 : HEAP_XMAX_IS_MULTI |
2658 : HEAP_IS_LOCKED |
2659 : HEAP_MOVED);
2660 : /* ... and store info about transaction updating this tuple */
2661 2380 : HeapTupleHeaderSetXmax(oldtup.t_data, xid);
2662 2380 : HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
2663 : }
2664 :
2665 : /* record address of new tuple in t_ctid of old one */
2666 4584 : oldtup.t_data->t_ctid = heaptup->t_self;
2667 :
2668 4584 : if (newbuf != buffer)
2669 2199 : MarkBufferDirty(newbuf);
2670 4584 : MarkBufferDirty(buffer);
2671 :
2672 : /* XLOG stuff */
2673 4584 : if (!relation->rd_istemp)
2674 : {
2675 : XLogRecPtr recptr = log_heap_update(relation, buffer, oldtup.t_self,
2676 4521 : newbuf, heaptup, false);
2677 :
2678 4521 : if (newbuf != buffer)
2679 : {
2680 2199 : PageSetLSN(BufferGetPage(newbuf), recptr);
2681 2199 : PageSetTLI(BufferGetPage(newbuf), ThisTimeLineID);
2682 : }
2683 4521 : PageSetLSN(BufferGetPage(buffer), recptr);
2684 4521 : PageSetTLI(BufferGetPage(buffer), ThisTimeLineID);
2685 : }
2686 :
2687 4584 : END_CRIT_SECTION();
2688 :
2689 4584 : if (newbuf != buffer)
2690 2199 : LockBuffer(newbuf, BUFFER_LOCK_UNLOCK);
2691 4584 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2692 :
2693 : /*
2694 : * Mark old tuple for invalidation from system caches at next command
2695 : * boundary. We have to do this before releasing the buffer because we
2696 : * need to look at the contents of the tuple.
2697 : */
2698 4584 : CacheInvalidateHeapTuple(relation, &oldtup);
2699 :
2700 : /* Now we can release the buffer(s) */
2701 4584 : if (newbuf != buffer)
2702 2199 : ReleaseBuffer(newbuf);
2703 4584 : ReleaseBuffer(buffer);
2704 :
2705 : /*
2706 : * If new tuple is cachable, mark it for invalidation from the caches in
2707 : * case we abort. Note it is OK to do this after releasing the buffer,
2708 : * because the heaptup data structure is all in local memory, not in the
2709 : * shared buffer.
2710 : */
2711 4584 : CacheInvalidateHeapTuple(relation, heaptup);
2712 :
2713 : /*
2714 : * Release the lmgr tuple lock, if we had it.
2715 : */
2716 4584 : if (have_tuple_lock)
2717 0 : UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock);
2718 :
2719 4584 : pgstat_count_heap_update(relation, use_hot_update);
2720 :
2721 : /*
2722 : * If heaptup is a private copy, release it. Don't forget to copy t_self
2723 : * back to the caller's image, too.
2724 : */
2725 4584 : if (heaptup != newtup)
2726 : {
2727 5 : newtup->t_self = heaptup->t_self;
2728 5 : heap_freetuple(heaptup);
2729 : }
2730 :
2731 4584 : bms_free(hot_attrs);
2732 :
2733 4584 : return HeapTupleMayBeUpdated;
2734 : }
2735 :
2736 : /*
2737 : * Check if the specified attribute's value is same in both given tuples.
2738 : * Subroutine for HeapSatisfiesHOTUpdate.
2739 : */
2740 : static bool
2741 : heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
2742 : HeapTuple tup1, HeapTuple tup2)
2743 5525 : {
2744 : Datum value1,
2745 : value2;
2746 : bool isnull1,
2747 : isnull2;
2748 : Form_pg_attribute att;
2749 :
2750 : /*
2751 : * If it's a whole-tuple reference, say "not equal". It's not really
2752 : * worth supporting this case, since it could only succeed after a no-op
2753 : * update, which is hardly a case worth optimizing for.
2754 : */
2755 5525 : if (attrnum == 0)
2756 0 : return false;
2757 :
2758 : /*
2759 : * Likewise, automatically say "not equal" for any system attribute other
2760 : * than OID and tableOID; we cannot expect these to be consistent in a HOT
2761 : * chain, or even to be set correctly yet in the new tuple.
2762 : */
2763 5525 : if (attrnum < 0)
2764 : {
2765 1502 : if (attrnum != ObjectIdAttributeNumber &&
2766 : attrnum != TableOidAttributeNumber)
2767 0 : return false;
2768 : }
2769 :
2770 : /*
2771 : * Extract the corresponding values. XXX this is pretty inefficient if
2772 : * there are many indexed columns. Should HeapSatisfiesHOTUpdate do a
2773 : * single heap_deform_tuple call on each tuple, instead? But that doesn't
2774 : * work for system columns ...
2775 : */
2776 5525 : value1 = heap_getattr(tup1, attrnum, tupdesc, &isnull1);
2777 5525 : value2 = heap_getattr(tup2, attrnum, tupdesc, &isnull2);
2778 :
2779 : /*
2780 : * If one value is NULL and other is not, then they are certainly not
2781 : * equal
2782 : */
2783 5525 : if (isnull1 != isnull2)
2784 0 : return false;
2785 :
2786 : /*
2787 : * If both are NULL, they can be considered equal.
2788 : */
2789 5525 : if (isnull1)
2790 0 : return true;
2791 :
2792 : /*
2793 : * We do simple binary comparison of the two datums. This may be overly
2794 : * strict because there can be multiple binary representations for the
2795 : * same logical value. But we should be OK as long as there are no false
2796 : * positives. Using a type-specific equality operator is messy because
2797 : * there could be multiple notions of equality in different operator
2798 : * classes; furthermore, we cannot safely invoke user-defined functions
2799 : * while holding exclusive buffer lock.
2800 : */
2801 5525 : if (attrnum <= 0)
2802 : {
2803 : /* The only allowed system columns are OIDs, so do this */
2804 1502 : return (DatumGetObjectId(value1) == DatumGetObjectId(value2));
2805 : }
2806 : else
2807 : {
2808 : Assert(attrnum <= tupdesc->natts);
2809 4023 : att = tupdesc->attrs[attrnum - 1];
2810 4023 : return datumIsEqual(value1, value2, att->attbyval, att->attlen);
2811 : }
2812 : }
2813 :
2814 : /*
2815 : * Check if the old and new tuples represent a HOT-safe update. To be able
2816 : * to do a HOT update, we must not have changed any columns used in index
2817 : * definitions.
2818 : *
2819 : * The set of attributes to be checked is passed in (we dare not try to
2820 : * compute it while holding exclusive buffer lock...) NOTE that hot_attrs
2821 : * is destructively modified! That is OK since this is invoked at most once
2822 : * by heap_update().
2823 : *
2824 : * Returns true if safe to do HOT update.
2825 : */
2826 : static bool
2827 : HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs,
2828 : HeapTuple oldtup, HeapTuple newtup)
2829 2385 : {
2830 : int attrnum;
2831 :
2832 10106 : while ((attrnum = bms_first_member(hot_attrs)) >= 0)
2833 : {
2834 : /* Adjust for system attributes */
2835 5525 : attrnum += FirstLowInvalidHeapAttributeNumber;
2836 :
2837 : /* If the attribute value has changed, we can't do HOT update */
2838 5525 : if (!heap_tuple_attr_equals(RelationGetDescr(relation), attrnum,
2839 : oldtup, newtup))
2840 189 : return false;
2841 : }
2842 :
2843 2196 : return true;
2844 : }
2845 :
2846 : /*
2847 : * simple_heap_update - replace a tuple
2848 : *
2849 : * This routine may be used to update a tuple when concurrent updates of
2850 : * the target tuple are not expected (for example, because we have a lock
2851 : * on the relation associated with the tuple). Any failure is reported
2852 : * via ereport().
2853 : */
2854 : void
2855 : simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
2856 1848 : {
2857 : HTSU_Result result;
2858 : ItemPointerData update_ctid;
2859 : TransactionId update_xmax;
2860 :
2861 1848 : result = heap_update(relation, otid, tup,
2862 : &update_ctid, &update_xmax,
2863 : GetCurrentCommandId(true), InvalidSnapshot,
2864 : true /* wait for commit */ );
2865 1848 : switch (result)
2866 : {
2867 : case HeapTupleSelfUpdated:
2868 : /* Tuple was already updated in current command? */
2869 0 : elog(ERROR, "tuple already updated by self");
2870 0 : break;
2871 :
2872 : case HeapTupleMayBeUpdated:
2873 : /* done successfully */
2874 : break;
2875 :
2876 : case HeapTupleUpdated:
2877 0 : elog(ERROR, "tuple concurrently updated");
2878 0 : break;
2879 :
2880 : default:
2881 0 : elog(ERROR, "unrecognized heap_update status: %u", result);
2882 : break;
2883 : }
2884 1848 : }
2885 :
2886 : /*
2887 : * heap_lock_tuple - lock a tuple in shared or exclusive mode
2888 : *
2889 : * Note that this acquires a buffer pin, which the caller must release.
2890 : *
2891 : * Input parameters:
2892 : * relation: relation containing tuple (caller must hold suitable lock)
2893 : * tuple->t_self: TID of tuple to lock (rest of struct need not be valid)
2894 : * cid: current command ID (used for visibility test, and stored into
2895 : * tuple's cmax if lock is successful)
2896 : * mode: indicates if shared or exclusive tuple lock is desired
2897 : * nowait: if true, ereport rather than blocking if lock not available
2898 : *
2899 : * Output parameters:
2900 : * *tuple: all fields filled in
2901 : * *buffer: set to buffer holding tuple (pinned but not locked at exit)
2902 : * *ctid: set to tuple's t_ctid, but only in failure cases
2903 : * *update_xmax: set to tuple's xmax, but only in failure cases
2904 : *
2905 : * Function result may be:
2906 : * HeapTupleMayBeUpdated: lock was successfully acquired
2907 : * HeapTupleSelfUpdated: lock failed because tuple updated by self
2908 : * HeapTupleUpdated: lock failed because tuple updated by other xact
2909 : *
2910 : * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
2911 : * If t_ctid is the same as t_self, the tuple was deleted; if different, the
2912 : * tuple was updated, and t_ctid is the location of the replacement tuple.
2913 : * (t_xmax is needed to verify that the replacement tuple matches.)
2914 : *
2915 : *
2916 : * NOTES: because the shared-memory lock table is of finite size, but users
2917 : * could reasonably want to lock large numbers of tuples, we do not rely on
2918 : * the standard lock manager to store tuple-level locks over the long term.
2919 : * Instead, a tuple is marked as locked by setting the current transaction's
2920 : * XID as its XMAX, and setting additional infomask bits to distinguish this
2921 : * usage from the more normal case of having deleted the tuple. When
2922 : * multiple transactions concurrently share-lock a tuple, the first locker's
2923 : * XID is replaced in XMAX with a MultiTransactionId representing the set of
2924 : * XIDs currently holding share-locks.
2925 : *
2926 : * When it is necessary to wait for a tuple-level lock to be released, the
2927 : * basic delay is provided by XactLockTableWait or MultiXactIdWait on the
2928 : * contents of the tuple's XMAX. However, that mechanism will release all
2929 : * waiters concurrently, so there would be a race condition as to which
2930 : * waiter gets the tuple, potentially leading to indefinite starvation of
2931 : * some waiters. The possibility of share-locking makes the problem much
2932 : * worse --- a steady stream of share-lockers can easily block an exclusive
2933 : * locker forever. To provide more reliable semantics about who gets a
2934 : * tuple-level lock first, we use the standard lock manager. The protocol
2935 : * for waiting for a tuple-level lock is really
2936 : * LockTuple()
2937 : * XactLockTableWait()
2938 : * mark tuple as locked by me
2939 : * UnlockTuple()
2940 : * When there are multiple waiters, arbitration of who is to get the lock next
2941 : * is provided by LockTuple(). However, at most one tuple-level lock will
2942 : * be held or awaited per backend at any time, so we don't risk overflow
2943 : * of the lock table. Note that incoming share-lockers are required to
2944 : * do LockTuple as well, if there is any conflict, to ensure that they don't
2945 : * starve out waiting exclusive-lockers. However, if there is not any active
2946 : * conflict for a tuple, we don't incur any extra overhead.
2947 : */
2948 : HTSU_Result
2949 : heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer,
2950 : ItemPointer ctid, TransactionId *update_xmax,
2951 : CommandId cid, LockTupleMode mode, bool nowait)
2952 263 : {
2953 : HTSU_Result result;
2954 263 : ItemPointer tid = &(tuple->t_self);
2955 : ItemId lp;
2956 : PageHeader dp;
2957 : TransactionId xid;
2958 : TransactionId xmax;
2959 : uint16 old_infomask;
2960 : uint16 new_infomask;
2961 : LOCKMODE tuple_lock_type;
2962 263 : bool have_tuple_lock = false;
2963 :
2964 263 : tuple_lock_type = (mode == LockTupleShared) ? ShareLock : ExclusiveLock;
2965 :
2966 263 : *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
2967 263 : LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
2968 :
2969 263 : dp = (PageHeader) BufferGetPage(*buffer);
2970 263 : lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid));
2971 : Assert(ItemIdIsNormal(lp));
2972 :
2973 263 : tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2974 263 : tuple->t_len = ItemIdGetLength(lp);
2975 263 : tuple->t_tableOid = RelationGetRelid(relation);
2976 :
2977 263 : l3:
2978 263 : result = HeapTupleSatisfiesUpdate(tuple->t_data, cid, *buffer);
2979 :
2980 263 : if (result == HeapTupleInvisible)
2981 : {
2982 0 : UnlockReleaseBuffer(*buffer);
2983 0 : elog(ERROR, "attempted to lock invisible tuple");
2984 : }
2985 263 : else if (result == HeapTupleBeingUpdated)
2986 : {
2987 : TransactionId xwait;
2988 : uint16 infomask;
2989 :
2990 : /* must copy state data before unlocking buffer */
2991 0 : xwait = HeapTupleHeaderGetXmax(tuple->t_data);
2992 0 : infomask = tuple->t_data->t_infomask;
2993 :
2994 0 : LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
2995 :
2996 : /*
2997 : * If we wish to acquire share lock, and the tuple is already
2998 : * share-locked by a multixact that includes any subtransaction of the
2999 : * current top transaction, then we effectively hold the desired lock
3000 : * already. We *must* succeed without trying to take the tuple lock,
3001 : * else we will deadlock against anyone waiting to acquire exclusive
3002 : * lock. We don't need to make any state changes in this case.
3003 : */
3004 0 : if (mode == LockTupleShared &&
3005 : (infomask & HEAP_XMAX_IS_MULTI) &&
3006 : MultiXactIdIsCurrent((MultiXactId) xwait))
3007 : {
3008 : Assert(infomask & HEAP_XMAX_SHARED_LOCK);
3009 : /* Probably can't hold tuple lock here, but may as well check */
3010 0 : if (have_tuple_lock)
3011 0 : UnlockTuple(relation, tid, tuple_lock_type);
3012 0 : return HeapTupleMayBeUpdated;
3013 : }
3014 :
3015 : /*
3016 : * Acquire tuple lock to establish our priority for the tuple.
3017 : * LockTuple will release us when we are next-in-line for the tuple.
3018 : * We must do this even if we are share-locking.
3019 : *
3020 : * If we are forced to "start over" below, we keep the tuple lock;
3021 : * this arranges that we stay at the head of the line while rechecking
3022 : * tuple state.
3023 : */
3024 0 : if (!have_tuple_lock)
3025 : {
3026 0 : if (nowait)
3027 : {
3028 0 : if (!ConditionalLockTuple(relation, tid, tuple_lock_type))
3029 0 : ereport(ERROR,
3030 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
3031 : errmsg("could not obtain lock on row in relation \"%s\"",
3032 : RelationGetRelationName(relation))));
3033 : }
3034 : else
3035 0 : LockTuple(relation, tid, tuple_lock_type);
3036 0 : have_tuple_lock = true;
3037 : }
3038 :
3039 0 : if (mode == LockTupleShared && (infomask & HEAP_XMAX_SHARED_LOCK))
3040 : {
3041 : /*
3042 : * Acquiring sharelock when there's at least one sharelocker
3043 : * already. We need not wait for him/them to complete.
3044 : */
3045 0 : LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
3046 :
3047 : /*
3048 : * Make sure it's still a shared lock, else start over. (It's OK
3049 : * if the ownership of the shared lock has changed, though.)
3050 : */
3051 0 : if (!(tuple->t_data->t_infomask & HEAP_XMAX_SHARED_LOCK))
3052 0 : goto l3;
3053 : }
3054 0 : else if (infomask & HEAP_XMAX_IS_MULTI)
3055 : {
3056 : /* wait for multixact to end */
3057 0 : if (nowait)
3058 : {
3059 0 : if (!ConditionalMultiXactIdWait((MultiXactId) xwait))
3060 0 : ereport(ERROR,
3061 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
3062 : errmsg("could not obtain lock on row in relation \"%s\"",
3063 : RelationGetRelationName(relation))));
3064 : }
3065 : else
3066 0 : MultiXactIdWait((MultiXactId) xwait);
3067 :
3068 0 : LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
3069 :
3070 : /*
3071 : * If xwait had just locked the tuple then some other xact could
3072 : * update this tuple before we get to this point. Check for xmax
3073 : * change, and start over if so.
3074 : */
3075 0 : if (!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
3076 : !TransactionIdEquals(HeapTupleHeaderGetXmax(tuple->t_data),
3077 : xwait))
3078 : goto l3;
3079 :
3080 : /*
3081 : * You might think the multixact is necessarily done here, but not
3082 : * so: it could have surviving members, namely our own xact or
3083 : * other subxacts of this backend. It is legal for us to lock the
3084 : * tuple in either case, however. We don't bother changing the
3085 : * on-disk hint bits since we are about to overwrite the xmax
3086 : * altogether.
3087 : */
3088 : }
3089 : else
3090 : {
3091 : /* wait for regular transaction to end */
3092 0 : if (nowait)
3093 : {
3094 0 : if (!ConditionalXactLockTableWait(xwait))
3095 0 : ereport(ERROR,
3096 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
3097 : errmsg("could not obtain lock on row in relation \"%s\"",
3098 : RelationGetRelationName(relation))));
3099 : }
3100 : else
3101 0 : XactLockTableWait(xwait);
3102 :
3103 0 : LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
3104 :
3105 : /*
3106 : * xwait is done, but if xwait had just locked the tuple then some
3107 : * other xact could update this tuple before we get to this point.
3108 : * Check for xmax change, and start over if so.
3109 : */
3110 0 : if ((tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
3111 : !TransactionIdEquals(HeapTupleHeaderGetXmax(tuple->t_data),
3112 : xwait))
3113 : goto l3;
3114 :
3115 : /* Otherwise check if it committed or aborted */
3116 0 : UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
3117 : }
3118 :
3119 : /*
3120 : * We may lock if previous xmax aborted, or if it committed but only
3121 : * locked the tuple without updating it. The case where we didn't
3122 : * wait because we are joining an existing shared lock is correctly
3123 : * handled, too.
3124 : */
3125 0 : if (tuple->t_data->t_infomask & (HEAP_XMAX_INVALID |
3126 : HEAP_IS_LOCKED))
3127 0 : result = HeapTupleMayBeUpdated;
3128 : else
3129 0 : result = HeapTupleUpdated;
3130 : }
3131 :
3132 263 : if (result != HeapTupleMayBeUpdated)
3133 : {
3134 : Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated);
3135 : Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
3136 0 : *ctid = tuple->t_data->t_ctid;
3137 0 : *update_xmax = HeapTupleHeaderGetXmax(tuple->t_data);
3138 0 : LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
3139 0 : if (have_tuple_lock)
3140 0 : UnlockTuple(relation, tid, tuple_lock_type);
3141 0 : return result;
3142 : }
3143 :
3144 : /*
3145 : * We might already hold the desired lock (or stronger), possibly under a
3146 : * different subtransaction of the current top transaction. If so, there
3147 : * is no need to change state or issue a WAL record. We already handled
3148 : * the case where this is true for xmax being a MultiXactId, so now check
3149 : * for cases where it is a plain TransactionId.
3150 : *
3151 : * Note in particular that this covers the case where we already hold
3152 : * exclusive lock on the tuple and the caller only wants shared lock. It
3153 : * would certainly not do to give up the exclusive lock.
3154 : */
3155 263 : xmax = HeapTupleHeaderGetXmax(tuple->t_data);
3156 263 : old_infomask = tuple->t_data->t_infomask;
3157 :
3158 263 : if (!(old_infomask & (HEAP_XMAX_INVALID |
3159 : HEAP_XMAX_COMMITTED |
3160 : HEAP_XMAX_IS_MULTI)) &&
3161 : (mode == LockTupleShared ?
3162 : (old_infomask & HEAP_IS_LOCKED) :
3163 : (old_infomask & HEAP_XMAX_EXCL_LOCK)) &&
3164 : TransactionIdIsCurrentTransactionId(xmax))
3165 : {
3166 5 : LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
3167 : /* Probably can't hold tuple lock here, but may as well check */
3168 5 : if (have_tuple_lock)
3169 0 : UnlockTuple(relation, tid, tuple_lock_type);
3170 5 : return HeapTupleMayBeUpdated;
3171 : }
3172 :
3173 : /*
3174 : * Compute the new xmax and infomask to store into the tuple. Note we do
3175 : * not modify the tuple just yet, because that would leave it in the wrong
3176 : * state if multixact.c elogs.
3177 : */
3178 258 : xid = GetCurrentTransactionId();
3179 :
3180 258 : new_infomask = old_infomask & ~(HEAP_XMAX_COMMITTED |
3181 : HEAP_XMAX_INVALID |
3182 : HEAP_XMAX_IS_MULTI |
3183 : HEAP_IS_LOCKED |
3184 : HEAP_MOVED);
3185 :
3186 258 : if (mode == LockTupleShared)
3187 : {
3188 : /*
3189 : * If this is the first acquisition of a shared lock in the current
3190 : * transaction, set my per-backend OldestMemberMXactId setting. We can
3191 : * be certain that the transaction will never become a member of any
3192 : * older MultiXactIds than that. (We have to do this even if we end
3193 : * up just using our own TransactionId below, since some other backend
3194 : * could incorporate our XID into a MultiXact immediately afterwards.)
3195 : */
3196 111 : MultiXactIdSetOldestMember();
3197 :
3198 111 : new_infomask |= HEAP_XMAX_SHARED_LOCK;
3199 :
3200 : /*
3201 : * Check to see if we need a MultiXactId because there are multiple
3202 : * lockers.
3203 : *
3204 : * HeapTupleSatisfiesUpdate will have set the HEAP_XMAX_INVALID bit if
3205 : * the xmax was a MultiXactId but it was not running anymore. There is
3206 : * a race condition, which is that the MultiXactId may have finished
3207 : * since then, but that uncommon case is handled within
3208 : * MultiXactIdExpand.
3209 : *
3210 : * There is a similar race condition possible when the old xmax was a
3211 : * regular TransactionId. We test TransactionIdIsInProgress again
3212 : * just to narrow the window, but it's still possible to end up
3213 : * creating an unnecessary MultiXactId. Fortunately this is harmless.
3214 : */
3215 111 : if (!(old_infomask & (HEAP_XMAX_INVALID | HEAP_XMAX_COMMITTED)))
3216 : {
3217 0 : if (old_infomask & HEAP_XMAX_IS_MULTI)
3218 : {
3219 : /*
3220 : * If the XMAX is already a MultiXactId, then we need to
3221 : * expand it to include our own TransactionId.
3222 : */
3223 0 : xid = MultiXactIdExpand((MultiXactId) xmax, xid);
3224 0 : new_infomask |= HEAP_XMAX_IS_MULTI;
3225 : }
3226 0 : else if (TransactionIdIsInProgress(xmax))
3227 : {
3228 : /*
3229 : * If the XMAX is a valid TransactionId, then we need to
3230 : * create a new MultiXactId that includes both the old locker
3231 : * and our own TransactionId.
3232 : */
3233 0 : xid = MultiXactIdCreate(xmax, xid);
3234 0 : new_infomask |= HEAP_XMAX_IS_MULTI;
3235 : }
3236 : else
3237 : {
3238 : /*
3239 : * Can get here iff HeapTupleSatisfiesUpdate saw the old xmax
3240 : * as running, but it finished before
3241 : * TransactionIdIsInProgress() got to run. Treat it like
3242 : * there's no locker in the tuple.
3243 : */
3244 : }
3245 : }
3246 : else
3247 : {
3248 : /*
3249 : * There was no previous locker, so just insert our own
3250 : * TransactionId.
3251 : */
3252 : }
3253 : }
3254 : else
3255 : {
3256 : /* We want an exclusive lock on the tuple */
3257 147 : new_infomask |= HEAP_XMAX_EXCL_LOCK;
3258 : }
3259 :
3260 258 : START_CRIT_SECTION();
3261 :
3262 : /*
3263 : * Store transaction information of xact locking the tuple.
3264 : *
3265 : * Note: Cmax is meaningless in this context, so don't set it; this avoids
3266 : * possibly generating a useless combo CID.
3267 : */
3268 258 : tuple->t_data->t_infomask = new_infomask;
3269 258 : HeapTupleHeaderClearHotUpdated(tuple->t_data);
3270 258 : HeapTupleHeaderSetXmax(tuple->t_data, xid);
3271 : /* Make sure there is no forward chain link in t_ctid */
3272 258 : tuple->t_data->t_ctid = *tid;
3273 :
3274 258 : MarkBufferDirty(*buffer);
3275 :
3276 : /*
3277 : * XLOG stuff. You might think that we don't need an XLOG record because
3278 : * there is no state change worth restoring after a crash. You would be
3279 : * wrong however: we have just written either a TransactionId or a
3280 : * MultiXactId that may never have been seen on disk before, and we need
3281 : * to make sure that there are XLOG entries covering those ID numbers.
3282 : * Else the same IDs might be re-used after a crash, which would be
3283 : * disastrous if this page made it to disk before the crash. Essentially
3284 : * we have to enforce the WAL log-before-data rule even in this case.
3285 : * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
3286 : * entries for everything anyway.)
3287 : */
3288 258 : if (!relation->rd_istemp)
3289 : {
3290 : xl_heap_lock xlrec;
3291 : XLogRecPtr recptr;
3292 : XLogRecData rdata[2];
3293 :
3294 244 : xlrec.target.node = relation->rd_node;
3295 244 : xlrec.target.tid = tuple->t_self;
3296 244 : xlrec.locking_xid = xid;
3297 244 : xlrec.xid_is_mxact = ((new_infomask & HEAP_XMAX_IS_MULTI) != 0);
3298 244 : xlrec.shared_lock = (mode == LockTupleShared);
3299 244 : rdata[0].data = (char *) &xlrec;
3300 244 : rdata[0].len = SizeOfHeapLock;
3301 244 : rdata[0].buffer = InvalidBuffer;
3302 244 : rdata[0].next = &(rdata[1]);
3303 :
3304 244 : rdata[1].data = NULL;
3305 244 : rdata[1].len = 0;
3306 244 : rdata[1].buffer = *buffer;
3307 244 : rdata[1].buffer_std = true;
3308 244 : rdata[1].next = NULL;
3309 :
3310 244 : recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK, rdata);
3311 :
3312 244 : PageSetLSN(dp, recptr);
3313 244 : PageSetTLI(dp, ThisTimeLineID);
3314 : }
3315 :
3316 258 : END_CRIT_SECTION();
3317 :
3318 258 : LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
3319 :
3320 : /*
3321 : * Now that we have successfully marked the tuple as locked, we can
3322 : * release the lmgr tuple lock, if we had it.
3323 : */
3324 258 : if (have_tuple_lock)
3325 0 : UnlockTuple(relation, tid, tuple_lock_type);
3326 :
3327 258 : return HeapTupleMayBeUpdated;
3328 : }
3329 :
3330 :
3331 : /*
3332 : * heap_inplace_update - update a tuple "in place" (ie, overwrite it)
3333 : *
3334 : * Overwriting violates both MVCC and transactional safety, so the uses
3335 : * of this function in Postgres are extremely limited. Nonetheless we
3336 : * find some places to use it.
3337 : *
3338 : * The tuple cannot change size, and therefore it's reasonable to assume
3339 : * that its null bitmap (if any) doesn't change either. So we just
3340 : * overwrite the data portion of the tuple without touching the null
3341 : * bitmap or any of the header fields.
3342 : *
3343 : * tuple is an in-memory tuple structure containing the data to be written
3344 : * over the target tuple. Also, tuple->t_self identifies the target tuple.
3345 : */
3346 : void
3347 : heap_inplace_update(Relation relation, HeapTuple tuple)
3348 1297 : {
3349 : Buffer buffer;
3350 : Page page;
3351 : OffsetNumber offnum;
3352 1297 : ItemId lp = NULL;
3353 : HeapTupleHeader htup;
3354 : uint32 oldlen;
3355 : uint32 newlen;
3356 :
3357 1297 : buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
3358 1297 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
3359 1297 : page = (Page) BufferGetPage(buffer);
3360 :
3361 1297 : offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
3362 1297 : if (PageGetMaxOffsetNumber(page) >= offnum)
3363 1297 : lp = PageGetItemId(page, offnum);
3364 :
3365 1297 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
3366 0 : elog(ERROR, "heap_inplace_update: invalid lp");
3367 :
3368 1297 : htup = (HeapTupleHeader) PageGetItem(page, lp);
3369 :
3370 1297 : oldlen = ItemIdGetLength(lp) - htup->t_hoff;
3371 1297 : newlen = tuple->t_len - tuple->t_data->t_hoff;
3372 1297 : if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
3373 0 : elog(ERROR, "heap_inplace_update: wrong tuple length");
3374 :
3375 : /* NO EREPORT(ERROR) from here till changes are logged */
3376 1297 : START_CRIT_SECTION();
3377 :
3378 1297 : memcpy((char *) htup + htup->t_hoff,
3379 : (char *) tuple->t_data + tuple->t_data->t_hoff,
3380 : newlen);
3381 :
3382 1297 : MarkBufferDirty(buffer);
3383 :
3384 : /* XLOG stuff */
3385 1297 : if (!relation->rd_istemp)
3386 : {
3387 : xl_heap_inplace xlrec;
3388 : XLogRecPtr recptr;
3389 : XLogRecData rdata[2];
3390 :
3391 1297 : xlrec.target.node = relation->rd_node;
3392 1297 : xlrec.target.tid = tuple->t_self;
3393 :
3394 1297 : rdata[0].data = (char *) &xlrec;
3395 1297 : rdata[0].len = SizeOfHeapInplace;
3396 1297 : rdata[0].buffer = InvalidBuffer;
3397 1297 : rdata[0].next = &(rdata[1]);
3398 :
3399 1297 : rdata[1].data = (char *) htup + htup->t_hoff;
3400 1297 : rdata[1].len = newlen;
3401 1297 : rdata[1].buffer = buffer;
3402 1297 : rdata[1].buffer_std = true;
3403 1297 : rdata[1].next = NULL;
3404 :
3405 1297 : recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE, rdata);
3406 :
3407 1297 : PageSetLSN(page, recptr);
3408 1297 : PageSetTLI(page, ThisTimeLineID);
3409 : }
3410 :
3411 1297 : END_CRIT_SECTION();
3412 :
3413 1297 : UnlockReleaseBuffer(buffer);
3414 :
3415 : /* Send out shared cache inval if necessary */
3416 1297 : if (!IsBootstrapProcessingMode())
3417 1157 : CacheInvalidateHeapTuple(relation, tuple);
3418 1297 : }
3419 :
3420 :
3421 : /*
3422 : * heap_freeze_tuple
3423 : *
3424 : * Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
3425 : * are older than the specified cutoff XID. If so, replace them with
3426 : * FrozenTransactionId or InvalidTransactionId as appropriate, and return
3427 : * TRUE. Return FALSE if nothing was changed.
3428 : *
3429 : * It is assumed that the caller has checked the tuple with
3430 : * HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD
3431 : * (else we should be removing the tuple, not freezing it).
3432 : *
3433 : * NB: cutoff_xid *must* be <= the current global xmin, to ensure that any
3434 : * XID older than it could neither be running nor seen as running by any
3435 : * open transaction. This ensures that the replacement will not change
3436 : * anyone's idea of the tuple state. Also, since we assume the tuple is
3437 : * not HEAPTUPLE_DEAD, the fact that an XID is not still running allows us
3438 : * to assume that it is either committed good or aborted, as appropriate;
3439 : * so we need no external state checks to decide what to do. (This is good
3440 : * because this function is applied during WAL recovery, when we don't have
3441 : * access to any such state, and can't depend on the hint bits to be set.)
3442 : *
3443 : * In lazy VACUUM, we call this while initially holding only a shared lock
3444 : * on the tuple's buffer. If any change is needed, we trade that in for an
3445 : * exclusive lock before making the change. Caller should pass the buffer ID
3446 : * if shared lock is held, InvalidBuffer if exclusive lock is already held.
3447 : *
3448 : * Note: it might seem we could make the changes without exclusive lock, since
3449 : * TransactionId read/write is assumed atomic anyway. However there is a race
3450 : * condition: someone who just fetched an old XID that we overwrite here could
3451 : * conceivably not finish checking the XID against pg_clog before we finish
3452 : * the VACUUM and perhaps truncate off the part of pg_clog he needs. Getting
3453 : * exclusive lock ensures no other backend is in process of checking the
3454 : * tuple status. Also, getting exclusive lock makes it safe to adjust the
3455 : * infomask bits.
3456 : */
3457 : bool
3458 : heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
3459 : Buffer buf)
3460 184152 : {
3461 184152 : bool changed = false;
3462 : TransactionId xid;
3463 :
3464 184152 : xid = HeapTupleHeaderGetXmin(tuple);
3465 184152 : if (TransactionIdIsNormal(xid) &&
3466 : TransactionIdPrecedes(xid, cutoff_xid))
3467 : {
3468 10023 : if (buf != InvalidBuffer)
3469 : {
3470 : /* trade in share lock for exclusive lock */
3471 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
3472 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
3473 0 : buf = InvalidBuffer;
3474 : }
3475 10023 : HeapTupleHeaderSetXmin(tuple, FrozenTransactionId);
3476 :
3477 : /*
3478 : * Might as well fix the hint bits too; usually XMIN_COMMITTED will
3479 : * already be set here, but there's a small chance not.
3480 : */
3481 : Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
3482 10023 : tuple->t_infomask |= HEAP_XMIN_COMMITTED;
3483 10023 : changed = true;
3484 : }
3485 :
3486 : /*
3487 : * When we release shared lock, it's possible for someone else to change
3488 : * xmax before we get the lock back, so repeat the check after acquiring
3489 : * exclusive lock. (We don't need this pushup for xmin, because only
3490 : * VACUUM could be interested in changing an existing tuple's xmin, and
3491 : * there's only one VACUUM allowed on a table at a time.)
3492 : */
3493 184152 : recheck_xmax:
3494 184152 : if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
3495 : {
3496 184152 : xid = HeapTupleHeaderGetXmax(tuple);
3497 184152 : if (TransactionIdIsNormal(xid) &&
3498 : TransactionIdPrecedes(xid, cutoff_xid))
3499 : {
3500 0 : if (buf != InvalidBuffer)
3501 : {
3502 : /* trade in share lock for exclusive lock */
3503 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
3504 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
3505 0 : buf = InvalidBuffer;
3506 0 : goto recheck_xmax; /* see comment above */
3507 : }
3508 0 : HeapTupleHeaderSetXmax(tuple, InvalidTransactionId);
3509 :
3510 : /*
3511 : * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED
3512 : * + LOCKED. Normalize to INVALID just to be sure no one gets
3513 : * confused.
3514 : */
3515 0 : tuple->t_infomask &= ~HEAP_XMAX_COMMITTED;
3516 0 : tuple->t_infomask |= HEAP_XMAX_INVALID;
3517 0 : HeapTupleHeaderClearHotUpdated(tuple);
3518 0 : changed = true;
3519 : }
3520 : }
3521 : else
3522 : {
3523 : /*----------
3524 : * XXX perhaps someday we should zero out very old MultiXactIds here?
3525 : *
3526 : * The only way a stale MultiXactId could pose a problem is if a
3527 : * tuple, having once been multiply-share-locked, is not touched by
3528 : * any vacuum or attempted lock or deletion for just over 4G MultiXact
3529 : * creations, and then in the probably-narrow window where its xmax
3530 : * is again a live MultiXactId, someone tries to lock or delete it.
3531 : * Even then, another share-lock attempt would work fine. An
3532 : * exclusive-lock or delete attempt would face unexpected delay, or
3533 : * in the very worst case get a deadlock error. This seems an
3534 : * extremely low-probability scenario with minimal downside even if
3535 : * it does happen, so for now we don't do the extra bookkeeping that
3536 : * would be needed to clean out MultiXactIds.
3537 : *----------
3538 : */
3539 : }
3540 :
3541 : /*
3542 : * Although xvac per se could only be set by VACUUM, it shares physical
3543 : * storage space with cmax, and so could be wiped out by someone setting
3544 : * xmax. Hence recheck after changing lock, same as for xmax itself.
3545 : */
3546 184152 : recheck_xvac:
3547 184152 : if (tuple->t_infomask & HEAP_MOVED)
3548 : {
3549 0 : xid = HeapTupleHeaderGetXvac(tuple);
3550 0 : if (TransactionIdIsNormal(xid) &&
3551 : TransactionIdPrecedes(xid, cutoff_xid))
3552 : {
3553 0 : if (buf != InvalidBuffer)
3554 : {
3555 : /* trade in share lock for exclusive lock */
3556 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
3557 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
3558 0 : buf = InvalidBuffer;
3559 0 : goto recheck_xvac; /* see comment above */
3560 : }
3561 :
3562 : /*
3563 : * If a MOVED_OFF tuple is not dead, the xvac transaction must
3564 : * have failed; whereas a non-dead MOVED_IN tuple must mean the
3565 : * xvac transaction succeeded.
3566 : */
3567 0 : if (tuple->t_infomask & HEAP_MOVED_OFF)
3568 0 : HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
3569 : else
3570 0 : HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
3571 :
3572 : /*
3573 : * Might as well fix the hint bits too; usually XMIN_COMMITTED
3574 : * will already be set here, but there's a small chance not.
3575 : */
3576 : Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
3577 0 : tuple->t_infomask |= HEAP_XMIN_COMMITTED;
3578 0 : changed = true;
3579 : }
3580 : }
3581 :
3582 184152 : return changed;
3583 : }
3584 :
3585 :
3586 : /* ----------------
3587 : * heap_markpos - mark scan position
3588 : * ----------------
3589 : */
3590 : void
3591 : heap_markpos(HeapScanDesc scan)
3592 0 : {
3593 : /* Note: no locking manipulations needed */
3594 :
3595 0 : if (scan->rs_ctup.t_data != NULL)
3596 : {
3597 0 : scan->rs_mctid = scan->rs_ctup.t_self;
3598 0 : if (scan->rs_pageatatime)
3599 0 : scan->rs_mindex = scan->rs_cindex;
3600 : }
3601 : else
3602 0 : ItemPointerSetInvalid(&scan->rs_mctid);
3603 0 : }
3604 :
3605 : /* ----------------
3606 : * heap_restrpos - restore position to marked location
3607 : * ----------------
3608 : */
3609 : void
3610 : heap_restrpos(HeapScanDesc scan)
3611 0 : {
3612 : /* XXX no amrestrpos checking that ammarkpos called */
3613 :
3614 0 : if (!ItemPointerIsValid(&scan->rs_mctid))
3615 : {
3616 0 : scan->rs_ctup.t_data = NULL;
3617 :
3618 : /*
3619 : * unpin scan buffers
3620 : */
3621 0 : if (BufferIsValid(scan->rs_cbuf))
3622 0 : ReleaseBuffer(scan->rs_cbuf);
3623 0 : scan->rs_cbuf = InvalidBuffer;
3624 0 : scan->rs_cblock = InvalidBlockNumber;
3625 0 : scan->rs_inited = false;
3626 : }
3627 : else
3628 : {
3629 : /*
3630 : * If we reached end of scan, rs_inited will now be false. We must
3631 : * reset it to true to keep heapgettup from doing the wrong thing.
3632 : */
3633 0 : scan->rs_inited = true;
3634 0 : scan->rs_ctup.t_self = scan->rs_mctid;
3635 0 : if (scan->rs_pageatatime)
3636 : {
3637 0 : scan->rs_cindex = scan->rs_mindex;
3638 0 : heapgettup_pagemode(scan,
3639 : NoMovementScanDirection,
3640 : 0, /* needn't recheck scan keys */
3641 : NULL);
3642 : }
3643 : else
3644 0 : heapgettup(scan,
3645 : NoMovementScanDirection,
3646 : 0, /* needn't recheck scan keys */
3647 : NULL);
3648 : }
3649 0 : }
3650 :
3651 : /*
3652 : * Perform XLogInsert for a heap-clean operation. Caller must already
3653 : * have modified the buffer and marked it dirty.
3654 : *
3655 : * Note: prior to Postgres 8.3, the entries in the nowunused[] array were
3656 : * zero-based tuple indexes. Now they are one-based like other uses
3657 : * of OffsetNumber.
3658 : */
3659 : XLogRecPtr
3660 : log_heap_clean(Relation reln, Buffer buffer,
3661 : OffsetNumber *redirected, int nredirected,
3662 : OffsetNumber *nowdead, int ndead,
3663 : OffsetNumber *nowunused, int nunused,
3664 : bool redirect_move)
3665 1048 : {
3666 : xl_heap_clean xlrec;
3667 : uint8 info;
3668 : XLogRecPtr recptr;
3669 : XLogRecData rdata[4];
3670 :
3671 : /* Caller should not call me on a temp relation */
3672 : Assert(!reln->rd_istemp);
3673 :
3674 1048 : xlrec.node = reln->rd_node;
3675 1048 : xlrec.block = BufferGetBlockNumber(buffer);
3676 1048 : xlrec.nredirected = nredirected;
3677 1048 : xlrec.ndead = ndead;
3678 :
3679 1048 : rdata[0].data = (char *) &xlrec;
3680 1048 : rdata[0].len = SizeOfHeapClean;
3681 1048 : rdata[0].buffer = InvalidBuffer;
3682 1048 : rdata[0].next = &(rdata[1]);
3683 :
3684 : /*
3685 : * The OffsetNumber arrays are not actually in the buffer, but we pretend
3686 : * that they are. When XLogInsert stores the whole buffer, the offset
3687 : * arrays need not be stored too. Note that even if all three arrays are
3688 : * empty, we want to expose the buffer as a candidate for whole-page
3689 : * storage, since this record type implies a defragmentation operation
3690 : * even if no item pointers changed state.
3691 : */
3692 1048 : if (nredirected > 0)
3693 : {
3694 200 : rdata[1].data = (char *) redirected;
3695 200 : rdata[1].len = nredirected * sizeof(OffsetNumber) * 2;
3696 : }
3697 : else
3698 : {
3699 848 : rdata[1].data = NULL;
3700 848 : rdata[1].len = 0;
3701 : }
3702 1048 : rdata[1].buffer = buffer;
3703 1048 : rdata[1].buffer_std = true;
3704 1048 : rdata[1].next = &(rdata[2]);
3705 :
3706 1048 : if (ndead > 0)
3707 : {
3708 753 : rdata[2].data = (char *) nowdead;
3709 753 : rdata[2].len = ndead * sizeof(OffsetNumber);
3710 : }
3711 : else
3712 : {
3713 295 : rdata[2].data = NULL;
3714 295 : rdata[2].len = 0;
3715 : }
3716 1048 : rdata[2].buffer = buffer;
3717 1048 : rdata[2].buffer_std = true;
3718 1048 : rdata[2].next = &(rdata[3]);
3719 :
3720 1048 : if (nunused > 0)
3721 : {
3722 383 : rdata[3].data = (char *) nowunused;
3723 383 : rdata[3].len = nunused * sizeof(OffsetNumber);
3724 : }
3725 : else
3726 : {
3727 665 : rdata[3].data = NULL;
3728 665 : rdata[3].len = 0;
3729 : }
3730 1048 : rdata[3].buffer = buffer;
3731 1048 : rdata[3].buffer_std = true;
3732 1048 : rdata[3].next = NULL;
3733 :
3734 1048 : info = redirect_move ? XLOG_HEAP2_CLEAN_MOVE : XLOG_HEAP2_CLEAN;
3735 1048 : recptr = XLogInsert(RM_HEAP2_ID, info, rdata);
3736 :
3737 1048 : return recptr;
3738 : }
3739 :
3740 : /*
3741 : * Perform XLogInsert for a heap-freeze operation. Caller must already
3742 : * have modified the buffer and marked it dirty.
3743 : */
3744 : XLogRecPtr
3745 : log_heap_freeze(Relation reln, Buffer buffer,
3746 : TransactionId cutoff_xid,
3747 : OffsetNumber *offsets, int offcnt)
3748 142 : {
3749 : xl_heap_freeze xlrec;
3750 : XLogRecPtr recptr;
3751 : XLogRecData rdata[2];
3752 :
3753 : /* Caller should not call me on a temp relation */
3754 : Assert(!reln->rd_istemp);
3755 :
3756 142 : xlrec.node = reln->rd_node;
3757 142 : xlrec.block = BufferGetBlockNumber(buffer);
3758 142 : xlrec.cutoff_xid = cutoff_xid;
3759 :
3760 142 : rdata[0].data = (char *) &xlrec;
3761 142 : rdata[0].len = SizeOfHeapFreeze;
3762 142 : rdata[0].buffer = InvalidBuffer;
3763 142 : rdata[0].next = &(rdata[1]);
3764 :
3765 : /*
3766 : * The tuple-offsets array is not actually in the buffer, but pretend that
3767 : * it is. When XLogInsert stores the whole buffer, the offsets array need
3768 : * not be stored too.
3769 : */
3770 142 : if (offcnt > 0)
3771 : {
3772 142 : rdata[1].data = (char *) offsets;
3773 142 : rdata[1].len = offcnt * sizeof(OffsetNumber);
3774 : }
3775 : else
3776 : {
3777 0 : rdata[1].data = NULL;
3778 0 : rdata[1].len = 0;
3779 : }
3780 142 : rdata[1].buffer = buffer;
3781 142 : rdata[1].buffer_std = true;
3782 142 : rdata[1].next = NULL;
3783 :
3784 142 : recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE, rdata);
3785 :
3786 142 : return recptr;
3787 : }
3788 :
3789 : /*
3790 : * Perform XLogInsert for a heap-update operation. Caller must already
3791 : * have modified the buffer(s) and marked them dirty.
3792 : */
3793 : static XLogRecPtr
3794 : log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
3795 : Buffer newbuf, HeapTuple newtup, bool move)
3796 4632 : {
3797 : /*
3798 : * Note: xlhdr is declared to have adequate size and correct alignment for
3799 : * an xl_heap_header. However the two tids, if present at all, will be
3800 : * packed in with no wasted space after the xl_heap_header; they aren't
3801 : * necessarily aligned as implied by this struct declaration.
3802 : */
3803 : struct
3804 : {
3805 : xl_heap_header hdr;
3806 : TransactionId tid1;
3807 : TransactionId tid2;
3808 : } xlhdr;
3809 4632 : int hsize = SizeOfHeapHeader;
3810 : xl_heap_update xlrec;
3811 : uint8 info;
3812 : XLogRecPtr recptr;
3813 : XLogRecData rdata[4];
3814 4632 : Page page = BufferGetPage(newbuf);
3815 :
3816 : /* Caller should not call me on a temp relation */
3817 : Assert(!reln->rd_istemp);
3818 :
3819 4632 : if (move)
3820 : {
3821 : Assert(!HeapTupleIsHeapOnly(newtup));
3822 111 : info = XLOG_HEAP_MOVE;
3823 : }
3824 4521 : else if (HeapTupleIsHeapOnly(newtup))
3825 2138 : info = XLOG_HEAP_HOT_UPDATE;
3826 : else
3827 2383 : info = XLOG_HEAP_UPDATE;
3828 :
3829 4632 : xlrec.target.node = reln->rd_node;
3830 4632 : xlrec.target.tid = from;
3831 4632 : xlrec.newtid = newtup->t_self;
3832 :
3833 4632 : rdata[0].data = (char *) &xlrec;
3834 4632 : rdata[0].len = SizeOfHeapUpdate;
3835 4632 : rdata[0].buffer = InvalidBuffer;
3836 4632 : rdata[0].next = &(rdata[1]);
3837 :
3838 4632 : rdata[1].data = NULL;
3839 4632 : rdata[1].len = 0;
3840 4632 : rdata[1].buffer = oldbuf;
3841 4632 : rdata[1].buffer_std = true;
3842 4632 : rdata[1].next = &(rdata[2]);
3843 :
3844 4632 : xlhdr.hdr.t_infomask2 = newtup->t_data->t_infomask2;
3845 4632 : xlhdr.hdr.t_infomask = newtup->t_data->t_infomask;
3846 4632 : xlhdr.hdr.t_hoff = newtup->t_data->t_hoff;
3847 4632 : if (move) /* remember xmax & xmin */
3848 : {
3849 : TransactionId xid[2]; /* xmax, xmin */
3850 :
3851 111 : if (newtup->t_data->t_infomask & (HEAP_XMAX_INVALID | HEAP_IS_LOCKED))
3852 111 : xid[0] = InvalidTransactionId;
3853 : else
3854 0 : xid[0] = HeapTupleHeaderGetXmax(newtup->t_data);
3855 111 : xid[1] = HeapTupleHeaderGetXmin(newtup->t_data);
3856 111 : memcpy((char *) &xlhdr + hsize,
3857 : (char *) xid,
3858 : 2 * sizeof(TransactionId));
3859 111 : hsize += 2 * sizeof(TransactionId);
3860 : }
3861 :
3862 : /*
3863 : * As with insert records, we need not store the rdata[2] segment if we
3864 : * decide to store the whole buffer instead.
3865 : */
3866 4632 : rdata[2].data = (char *) &xlhdr;
3867 4632 : rdata[2].len = hsize;
3868 4632 : rdata[2].buffer = newbuf;
3869 4632 : rdata[2].buffer_std = true;
3870 4632 : rdata[2].next = &(rdata[3]);
3871 :
3872 : /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
3873 4632 : rdata[3].data = (char *) newtup->t_data + offsetof(HeapTupleHeaderData, t_bits);
3874 4632 : rdata[3].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits);
3875 4632 : rdata[3].buffer = newbuf;
3876 4632 : rdata[3].buffer_std = true;
3877 4632 : rdata[3].next = NULL;
3878 :
3879 : /* If new tuple is the single and first tuple on page... */
3880 4632 : if (ItemPointerGetOffsetNumber(&(newtup->t_self)) == FirstOffsetNumber &&
3881 : PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
3882 : {
3883 55 : info |= XLOG_HEAP_INIT_PAGE;
3884 55 : rdata[2].buffer = rdata[3].buffer = InvalidBuffer;
3885 : }
3886 :
3887 4632 : recptr = XLogInsert(RM_HEAP_ID, info, rdata);
3888 :
3889 4632 : return recptr;
3890 : }
3891 :
3892 : /*
3893 : * Perform XLogInsert for a heap-move operation. Caller must already
3894 : * have modified the buffers and marked them dirty.
3895 : */
3896 : XLogRecPtr
3897 : log_heap_move(Relation reln, Buffer oldbuf, ItemPointerData from,
3898 : Buffer newbuf, HeapTuple newtup)
3899 111 : {
3900 111 : return log_heap_update(reln, oldbuf, from, newbuf, newtup, true);
3901 : }
3902 :
3903 : /*
3904 : * Perform XLogInsert of a HEAP_NEWPAGE record to WAL. Caller is responsible
3905 : * for writing the page to disk after calling this routine.
3906 : *
3907 : * Note: all current callers build pages in private memory and write them
3908 : * directly to smgr, rather than using bufmgr. Therefore there is no need
3909 : * to pass a buffer ID to XLogInsert, nor to perform MarkBufferDirty within
3910 : * the critical section.
3911 : *
3912 : * Note: the NEWPAGE log record is used for both heaps and indexes, so do
3913 : * not do anything that assumes we are touching a heap.
3914 : */
3915 : XLogRecPtr
3916 : log_newpage(RelFileNode *rnode, BlockNumber blkno, Page page)
3917 0 : {
3918 : xl_heap_newpage xlrec;
3919 : XLogRecPtr recptr;
3920 : XLogRecData rdata[2];
3921 :
3922 : /* NO ELOG(ERROR) from here till newpage op is logged */
3923 0 : START_CRIT_SECTION();
3924 :
3925 0 : xlrec.node = *rnode;
3926 0 : xlrec.blkno = blkno;
3927 :
3928 0 : rdata[0].data = (char *) &xlrec;
3929 0 : rdata[0].len = SizeOfHeapNewpage;
3930 0 : rdata[0].buffer = InvalidBuffer;
3931 0 : rdata[0].next = &(rdata[1]);
3932 :
3933 0 : rdata[1].data = (char *) page;
3934 0 : rdata[1].len = BLCKSZ;
3935 0 : rdata[1].buffer = InvalidBuffer;
3936 0 : rdata[1].next = NULL;
3937 :
3938 0 : recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
3939 :
3940 0 : PageSetLSN(page, recptr);
3941 0 : PageSetTLI(page, ThisTimeLineID);
3942 :
3943 0 : END_CRIT_SECTION();
3944 :
3945 0 : return recptr;
3946 : }
3947 :
3948 : /*
3949 : * Handles CLEAN and CLEAN_MOVE record types
3950 : */
3951 : static void
3952 : heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
3953 0 : {
3954 0 : xl_heap_clean *xlrec = (xl_heap_clean *) XLogRecGetData(record);
3955 : Relation reln;
3956 : Buffer buffer;
3957 : Page page;
3958 : OffsetNumber *offnum;
3959 : OffsetNumber *end;
3960 : int nredirected;
3961 : int ndead;
3962 : int i;
3963 :
3964 0 : if (record->xl_info & XLR_BKP_BLOCK_1)
3965 0 : return;
3966 :
3967 0 : reln = XLogOpenRelation(xlrec->node);
3968 0 : buffer = XLogReadBuffer(reln, xlrec->block, false);
3969 0 : if (!BufferIsValid(buffer))
3970 : return;
3971 0 : page = (Page) BufferGetPage(buffer);
3972 :
3973 0 : if (XLByteLE(lsn, PageGetLSN(page)))
3974 : {
3975 0 : UnlockReleaseBuffer(buffer);
3976 0 : return;
3977 : }
3978 :
3979 0 : nredirected = xlrec->nredirected;
3980 0 : ndead = xlrec->ndead;
3981 0 : offnum = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
3982 0 : end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
3983 :
3984 : /* Update all redirected or moved line pointers */
3985 0 : for (i = 0; i < nredirected; i++)
3986 : {
3987 0 : OffsetNumber fromoff = *offnum++;
3988 0 : OffsetNumber tooff = *offnum++;
3989 0 : ItemId fromlp = PageGetItemId(page, fromoff);
3990 :
3991 0 : if (clean_move)
3992 : {
3993 : /* Physically move the "to" item to the "from" slot */
3994 0 : ItemId tolp = PageGetItemId(page, tooff);
3995 : HeapTupleHeader htup;
3996 :
3997 0 : *fromlp = *tolp;
3998 0 : ItemIdSetUnused(tolp);
3999 :
4000 : /* We also have to clear the tuple's heap-only bit */
4001 : Assert(ItemIdIsNormal(fromlp));
4002 0 : htup = (HeapTupleHeader) PageGetItem(page, fromlp);
4003 : Assert(HeapTupleHeaderIsHeapOnly(htup));
4004 0 : HeapTupleHeaderClearHeapOnly(htup);
4005 : }
4006 : else
4007 : {
4008 : /* Just insert a REDIRECT link at fromoff */
4009 0 : ItemIdSetRedirect(fromlp, tooff);
4010 : }
4011 : }
4012 :
4013 : /* Update all now-dead line pointers */
4014 0 : for (i = 0; i < ndead; i++)
4015 : {
4016 0 : OffsetNumber off = *offnum++;
4017 0 : ItemId lp = PageGetItemId(page, off);
4018 :
4019 0 : ItemIdSetDead(lp);
4020 : }
4021 :
4022 : /* Update all now-unused line pointers */
4023 0 : while (offnum < end)
4024 : {
4025 0 : OffsetNumber off = *offnum++;
4026 0 : ItemId lp = PageGetItemId(page, off);
4027 :
4028 0 : ItemIdSetUnused(lp);
4029 : }
4030 :
4031 : /*
4032 : * Finally, repair any fragmentation, and update the page's hint bit about
4033 : * whether it has free pointers.
4034 : */
4035 0 : PageRepairFragmentation(page);
4036 :
4037 0 : PageSetLSN(page, lsn);
4038 0 : PageSetTLI(page, ThisTimeLineID);
4039 0 : MarkBufferDirty(buffer);
4040 0 : UnlockReleaseBuffer(buffer);
4041 : }
4042 :
4043 : static void
4044 : heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record)
4045 0 : {
4046 0 : xl_heap_freeze *xlrec = (xl_heap_freeze *) XLogRecGetData(record);
4047 0 : TransactionId cutoff_xid = xlrec->cutoff_xid;
4048 : Relation reln;
4049 : Buffer buffer;
4050 : Page page;
4051 :
4052 0 : if (record->xl_info & XLR_BKP_BLOCK_1)
4053 0 : return;
4054 :
4055 0 : reln = XLogOpenRelation(xlrec->node);
4056 0 : buffer = XLogReadBuffer(reln, xlrec->block, false);
4057 0 : if (!BufferIsValid(buffer))
4058 : return;
4059 0 : page = (Page) BufferGetPage(buffer);
4060 :
4061 0 : if (XLByteLE(lsn, PageGetLSN(page)))
4062 : {
4063 0 : UnlockReleaseBuffer(buffer);
4064 0 : return;
4065 : }
4066 :
4067 0 : if (record->xl_len > SizeOfHeapFreeze)
4068 : {
4069 : OffsetNumber *offsets;
4070 : OffsetNumber *offsets_end;
4071 :
4072 0 : offsets = (OffsetNumber *) ((char *) xlrec + SizeOfHeapFreeze);
4073 0 : offsets_end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
4074 :
4075 0 : while (offsets < offsets_end)
4076 : {
4077 : /* offsets[] entries are one-based */
4078 0 : ItemId lp = PageGetItemId(page, *offsets);
4079 0 : HeapTupleHeader tuple = (HeapTupleHeader) PageGetItem(page, lp);
4080 :
4081 0 : (void) heap_freeze_tuple(tuple, cutoff_xid, InvalidBuffer);
4082 0 : offsets++;
4083 : }
4084 : }
4085 :
4086 0 : PageSetLSN(page, lsn);
4087 0 : PageSetTLI(page, ThisTimeLineID);
4088 0 : MarkBufferDirty(buffer);
4089 0 : UnlockReleaseBuffer(buffer);
4090 : }
4091 :
4092 : static void
4093 : heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
4094 0 : {
4095 0 : xl_heap_newpage *xlrec = (xl_heap_newpage *) XLogRecGetData(record);
4096 : Relation reln;
4097 : Buffer buffer;
4098 : Page page;
4099 :
4100 : /*
4101 : * Note: the NEWPAGE log record is used for both heaps and indexes, so do
4102 : * not do anything that assumes we are touching a heap.
4103 : */
4104 0 : reln = XLogOpenRelation(xlrec->node);
4105 0 : buffer = XLogReadBuffer(reln, xlrec->blkno, true);
4106 : Assert(BufferIsValid(buffer));
4107 0 : page = (Page) BufferGetPage(buffer);
4108 :
4109 : Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ);
4110 0 : memcpy(page, (char *) xlrec + SizeOfHeapNewpage, BLCKSZ);
4111 :
4112 0 : PageSetLSN(page, lsn);
4113 0 : PageSetTLI(page, ThisTimeLineID);
4114 0 : MarkBufferDirty(buffer);
4115 0 : UnlockReleaseBuffer(buffer);
4116 0 : }
4117 :
4118 : static void
4119 : heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
4120 0 : {
4121 0 : xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
4122 : Relation reln;
4123 : Buffer buffer;
4124 : Page page;
4125 : OffsetNumber offnum;
4126 0 : ItemId lp = NULL;
4127 : HeapTupleHeader htup;
4128 :
4129 0 : if (record->xl_info & XLR_BKP_BLOCK_1)
4130 0 : return;
4131 :
4132 0 : reln = XLogOpenRelation(xlrec->target.node);
4133 0 : buffer = XLogReadBuffer(reln,
4134 : ItemPointerGetBlockNumber(&(xlrec->target.tid)),
4135 : false);
4136 0 : if (!BufferIsValid(buffer))
4137 : return;
4138 0 : page = (Page) BufferGetPage(buffer);
4139 :
4140 0 : if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
4141 : {
4142 0 : UnlockReleaseBuffer(buffer);
4143 0 : return;
4144 : }
4145 :
4146 0 : offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
4147 0 : if (PageGetMaxOffsetNumber(page) >= offnum)
4148 0 : lp = PageGetItemId(page, offnum);
4149 :
4150 0 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
4151 0 : elog(PANIC, "heap_delete_redo: invalid lp");
4152 :
4153 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
4154 :
4155 0 : htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
4156 : HEAP_XMAX_INVALID |
4157 : HEAP_XMAX_IS_MULTI |
4158 : HEAP_IS_LOCKED |
4159 : HEAP_MOVED);
4160 0 : HeapTupleHeaderClearHotUpdated(htup);
4161 0 : HeapTupleHeaderSetXmax(htup, record->xl_xid);
4162 0 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
4163 :
4164 : /* Mark the page as a candidate for pruning */
4165 0 : PageSetPrunable(page, record->xl_xid);
4166 :
4167 : /* Make sure there is no forward chain link in t_ctid */
4168 0 : htup->t_ctid = xlrec->target.tid;
4169 0 : PageSetLSN(page, lsn);
4170 0 : PageSetTLI(page, ThisTimeLineID);
4171 0 : MarkBufferDirty(buffer);
4172 0 : UnlockReleaseBuffer(buffer);
4173 : }
4174 :
4175 : static void
4176 : heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
4177 0 : {
4178 0 : xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
4179 : Relation reln;
4180 : Buffer buffer;
4181 : Page page;
4182 : OffsetNumber offnum;
4183 : struct
4184 : {
4185 : HeapTupleHeaderData hdr;
4186 : char data[MaxHeapTupleSize];
4187 : } tbuf;
4188 : HeapTupleHeader htup;
4189 : xl_heap_header xlhdr;
4190 : uint32 newlen;
4191 :
4192 0 : if (record->xl_info & XLR_BKP_BLOCK_1)
4193 0 : return;
4194 :
4195 0 : reln = XLogOpenRelation(xlrec->target.node);
4196 :
4197 0 : if (record->xl_info & XLOG_HEAP_INIT_PAGE)
4198 : {
4199 0 : buffer = XLogReadBuffer(reln,
4200 : ItemPointerGetBlockNumber(&(xlrec->target.tid)),
4201 : true);
4202 : Assert(BufferIsValid(buffer));
4203 0 : page = (Page) BufferGetPage(buffer);
4204 :
4205 0 : PageInit(page, BufferGetPageSize(buffer), 0);
4206 : }
4207 : else
4208 : {
4209 0 : buffer = XLogReadBuffer(reln,
4210 : ItemPointerGetBlockNumber(&(xlrec->target.tid)),
4211 : false);
4212 0 : if (!BufferIsValid(buffer))
4213 : return;
4214 0 : page = (Page) BufferGetPage(buffer);
4215 :
4216 0 : if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
4217 : {
4218 0 : UnlockReleaseBuffer(buffer);
4219 0 : return;
4220 : }
4221 : }
4222 :
4223 0 : offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
4224 0 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
4225 0 : elog(PANIC, "heap_insert_redo: invalid max offset number");
4226 :
4227 0 : newlen = record->xl_len - SizeOfHeapInsert - SizeOfHeapHeader;
4228 : Assert(newlen <= MaxHeapTupleSize);
4229 0 : memcpy((char *) &xlhdr,
4230 : (char *) xlrec + SizeOfHeapInsert,
4231 : SizeOfHeapHeader);
4232 0 : htup = &tbuf.hdr;
4233 0 : MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
4234 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
4235 0 : memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
4236 : (char *) xlrec + SizeOfHeapInsert + SizeOfHeapHeader,
4237 : newlen);
4238 0 : newlen += offsetof(HeapTupleHeaderData, t_bits);
4239 0 : htup->t_infomask2 = xlhdr.t_infomask2;
4240 0 : htup->t_infomask = xlhdr.t_infomask;
4241 0 : htup->t_hoff = xlhdr.t_hoff;
4242 0 : HeapTupleHeaderSetXmin(htup, record->xl_xid);
4243 0 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
4244 0 : htup->t_ctid = xlrec->target.tid;
4245 :
4246 0 : offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
4247 0 : if (offnum == InvalidOffsetNumber)
4248 0 : elog(PANIC, "heap_insert_redo: failed to add tuple");
4249 0 : PageSetLSN(page, lsn);
4250 0 : PageSetTLI(page, ThisTimeLineID);
4251 0 : MarkBufferDirty(buffer);
4252 0 : UnlockReleaseBuffer(buffer);
4253 : }
4254 :
4255 : /*
4256 : * Handles UPDATE, HOT_UPDATE & MOVE
4257 : */
4258 : static void
4259 : heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move, bool hot_update)
4260 0 : {
4261 0 : xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
4262 0 : Relation reln = XLogOpenRelation(xlrec->target.node);
4263 : Buffer buffer;
4264 : bool samepage = (ItemPointerGetBlockNumber(&(xlrec->newtid)) ==
4265 0 : ItemPointerGetBlockNumber(&(xlrec->target.tid)));
4266 : Page page;
4267 : OffsetNumber offnum;
4268 0 : ItemId lp = NULL;
4269 : HeapTupleHeader htup;
4270 : struct
4271 : {
4272 : HeapTupleHeaderData hdr;
4273 : char data[MaxHeapTupleSize];
4274 : } tbuf;
4275 : xl_heap_header xlhdr;
4276 : int hsize;
4277 : uint32 newlen;
4278 :
4279 0 : if (record->xl_info & XLR_BKP_BLOCK_1)
4280 : {
4281 0 : if (samepage)
4282 0 : return; /* backup block covered both changes */
4283 : goto newt;
4284 : }
4285 :
4286 : /* Deal with old tuple version */
4287 :
4288 0 : buffer = XLogReadBuffer(reln,
4289 : ItemPointerGetBlockNumber(&(xlrec->target.tid)),
4290 : false);
4291 0 : if (!BufferIsValid(buffer))
4292 : goto newt;
4293 0 : page = (Page) BufferGetPage(buffer);
4294 :
4295 0 : if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
4296 : {
4297 0 : UnlockReleaseBuffer(buffer);
4298 0 : if (samepage)
4299 0 : return;
4300 : goto newt;
4301 : }
4302 :
4303 0 : offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
4304 0 : if (PageGetMaxOffsetNumber(page) >= offnum)
4305 0 : lp = PageGetItemId(page, offnum);
4306 :
4307 0 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
4308 0 : elog(PANIC, "heap_update_redo: invalid lp");
4309 :
4310 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
4311 :
4312 0 : if (move)
4313 : {
4314 0 : htup->t_infomask &= ~(HEAP_XMIN_COMMITTED |
4315 : HEAP_XMIN_INVALID |
4316 : HEAP_MOVED_IN);
4317 0 : htup->t_infomask |= HEAP_MOVED_OFF;
4318 0 : HeapTupleHeaderClearHotUpdated(htup);
4319 0 : HeapTupleHeaderSetXvac(htup, record->xl_xid);
4320 : /* Make sure there is no forward chain link in t_ctid */
4321 0 : htup->t_ctid = xlrec->target.tid;
4322 : }
4323 : else
4324 : {
4325 0 : htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
4326 : HEAP_XMAX_INVALID |
4327 : HEAP_XMAX_IS_MULTI |
4328 : HEAP_IS_LOCKED |
4329 : HEAP_MOVED);
4330 0 : if (hot_update)
4331 0 : HeapTupleHeaderSetHotUpdated(htup);
4332 : else
4333 0 : HeapTupleHeaderClearHotUpdated(htup);
4334 0 : HeapTupleHeaderSetXmax(htup, record->xl_xid);
4335 0 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
4336 : /* Set forward chain link in t_ctid */
4337 0 : htup->t_ctid = xlrec->newtid;
4338 : }
4339 :
4340 : /* Mark the page as a candidate for pruning */
4341 0 : PageSetPrunable(page, record->xl_xid);
4342 :
4343 : /*
4344 : * this test is ugly, but necessary to avoid thinking that insert change
4345 : * is already applied
4346 : */
4347 0 : if (samepage)
4348 0 : goto newsame;
4349 0 : PageSetLSN(page, lsn);
4350 0 : PageSetTLI(page, ThisTimeLineID);
4351 0 : MarkBufferDirty(buffer);
4352 0 : UnlockReleaseBuffer(buffer);
4353 :
4354 : /* Deal with new tuple */
4355 :
4356 0 : newt:;
4357 :
4358 0 : if (record->xl_info & XLR_BKP_BLOCK_2)
4359 0 : return;
4360 :
4361 0 : if (record->xl_info & XLOG_HEAP_INIT_PAGE)
4362 : {
4363 0 : buffer = XLogReadBuffer(reln,
4364 : ItemPointerGetBlockNumber(&(xlrec->newtid)),
4365 : true);
4366 : Assert(BufferIsValid(buffer));
4367 0 : page = (Page) BufferGetPage(buffer);
4368 :
4369 0 : PageInit(page, BufferGetPageSize(buffer), 0);
4370 : }
4371 : else
4372 : {
4373 0 : buffer = XLogReadBuffer(reln,
4374 : ItemPointerGetBlockNumber(&(xlrec->newtid)),
4375 : false);
4376 0 : if (!BufferIsValid(buffer))
4377 : return;
4378 0 : page = (Page) BufferGetPage(buffer);
4379 :
4380 0 : if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
4381 : {
4382 0 : UnlockReleaseBuffer(buffer);
4383 0 : return;
4384 : }
4385 : }
4386 :
4387 0 : newsame:;
4388 :
4389 0 : offnum = ItemPointerGetOffsetNumber(&(xlrec->newtid));
4390 0 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
4391 0 : elog(PANIC, "heap_update_redo: invalid max offset number");
4392 :
4393 0 : hsize = SizeOfHeapUpdate + SizeOfHeapHeader;
4394 0 : if (move)
4395 0 : hsize += (2 * sizeof(TransactionId));
4396 :
4397 0 : newlen = record->xl_len - hsize;
4398 : Assert(newlen <= MaxHeapTupleSize);
4399 0 : memcpy((char *) &xlhdr,
4400 : (char *) xlrec + SizeOfHeapUpdate,
4401 : SizeOfHeapHeader);
4402 0 : htup = &tbuf.hdr;
4403 0 : MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
4404 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
4405 0 : memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
4406 : (char *) xlrec + hsize,
4407 : newlen);
4408 0 : newlen += offsetof(HeapTupleHeaderData, t_bits);
4409 0 : htup->t_infomask2 = xlhdr.t_infomask2;
4410 0 : htup->t_infomask = xlhdr.t_infomask;
4411 0 : htup->t_hoff = xlhdr.t_hoff;
4412 :
4413 0 : if (move)
4414 : {
4415 : TransactionId xid[2]; /* xmax, xmin */
4416 :
4417 0 : memcpy((char *) xid,
4418 : (char *) xlrec + SizeOfHeapUpdate + SizeOfHeapHeader,
4419 : 2 * sizeof(TransactionId));
4420 0 : HeapTupleHeaderSetXmin(htup, xid[1]);
4421 0 : HeapTupleHeaderSetXmax(htup, xid[0]);
4422 0 : HeapTupleHeaderSetXvac(htup, record->xl_xid);
4423 : }
4424 : else
4425 : {
4426 0 : HeapTupleHeaderSetXmin(htup, record->xl_xid);
4427 0 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
4428 : }
4429 : /* Make sure there is no forward chain link in t_ctid */
4430 0 : htup->t_ctid = xlrec->newtid;
4431 :
4432 0 : offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
4433 0 : if (offnum == InvalidOffsetNumber)
4434 0 : elog(PANIC, "heap_update_redo: failed to add tuple");
4435 0 : PageSetLSN(page, lsn);
4436 0 : PageSetTLI(page, ThisTimeLineID);
4437 0 : MarkBufferDirty(buffer);
4438 0 : UnlockReleaseBuffer(buffer);
4439 : }
4440 :
4441 : static void
4442 : heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
4443 0 : {
4444 0 : xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
4445 : Relation reln;
4446 : Buffer buffer;
4447 : Page page;
4448 : OffsetNumber offnum;
4449 0 : ItemId lp = NULL;
4450 : HeapTupleHeader htup;
4451 :
4452 0 : if (record->xl_info & XLR_BKP_BLOCK_1)
4453 0 : return;
4454 :
4455 0 : reln = XLogOpenRelation(xlrec->target.node);
4456 0 : buffer = XLogReadBuffer(reln,
4457 : ItemPointerGetBlockNumber(&(xlrec->target.tid)),
4458 : false);
4459 0 : if (!BufferIsValid(buffer))
4460 : return;
4461 0 : page = (Page) BufferGetPage(buffer);
4462 :
4463 0 : if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
4464 : {
4465 0 : UnlockReleaseBuffer(buffer);
4466 0 : return;
4467 : }
4468 :
4469 0 : offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
4470 0 : if (PageGetMaxOffsetNumber(page) >= offnum)
4471 0 : lp = PageGetItemId(page, offnum);
4472 :
4473 0 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
4474 0 : elog(PANIC, "heap_lock_redo: invalid lp");
4475 :
4476 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
4477 :
4478 0 : htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
4479 : HEAP_XMAX_INVALID |
4480 : HEAP_XMAX_IS_MULTI |
4481 : HEAP_IS_LOCKED |
4482 : HEAP_MOVED);
4483 0 : if (xlrec->xid_is_mxact)
4484 0 : htup->t_infomask |= HEAP_XMAX_IS_MULTI;
4485 0 : if (xlrec->shared_lock)
4486 0 : htup->t_infomask |= HEAP_XMAX_SHARED_LOCK;
4487 : else
4488 0 : htup->t_infomask |= HEAP_XMAX_EXCL_LOCK;
4489 0 : HeapTupleHeaderClearHotUpdated(htup);
4490 0 : HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
4491 0 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
4492 : /* Make sure there is no forward chain link in t_ctid */
4493 0 : htup->t_ctid = xlrec->target.tid;
4494 0 : PageSetLSN(page, lsn);
4495 0 : PageSetTLI(page, ThisTimeLineID);
4496 0 : MarkBufferDirty(buffer);
4497 0 : UnlockReleaseBuffer(buffer);
4498 : }
4499 :
4500 : static void
4501 : heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
4502 0 : {
4503 0 : xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
4504 0 : Relation reln = XLogOpenRelation(xlrec->target.node);
4505 : Buffer buffer;
4506 : Page page;
4507 : OffsetNumber offnum;
4508 0 : ItemId lp = NULL;
4509 : HeapTupleHeader htup;
4510 : uint32 oldlen;
4511 : uint32 newlen;
4512 :
4513 0 : if (record->xl_info & XLR_BKP_BLOCK_1)
4514 0 : return;
4515 :
4516 0 : buffer = XLogReadBuffer(reln,
4517 : ItemPointerGetBlockNumber(&(xlrec->target.tid)),
4518 : false);
4519 0 : if (!BufferIsValid(buffer))
4520 : return;
4521 0 : page = (Page) BufferGetPage(buffer);
4522 :
4523 0 : if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
4524 : {
4525 0 : UnlockReleaseBuffer(buffer);
4526 0 : return;
4527 : }
4528 :
4529 0 : offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
4530 0 : if (PageGetMaxOffsetNumber(page) >= offnum)
4531 0 : lp = PageGetItemId(page, offnum);
4532 :
4533 0 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
4534 0 : elog(PANIC, "heap_inplace_redo: invalid lp");
4535 :
4536 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
4537 :
4538 0 : oldlen = ItemIdGetLength(lp) - htup->t_hoff;
4539 0 : newlen = record->xl_len - SizeOfHeapInplace;
4540 0 : if (oldlen != newlen)
4541 0 : elog(PANIC, "heap_inplace_redo: wrong tuple length");
4542 :
4543 0 : memcpy((char *) htup + htup->t_hoff,
4544 : (char *) xlrec + SizeOfHeapInplace,
4545 : newlen);
4546 :
4547 0 : PageSetLSN(page, lsn);
4548 0 : PageSetTLI(page, ThisTimeLineID);
4549 0 : MarkBufferDirty(buffer);
4550 0 : UnlockReleaseBuffer(buffer);
4551 : }
4552 :
4553 : void
4554 : heap_redo(XLogRecPtr lsn, XLogRecord *record)
4555 0 : {
4556 0 : uint8 info = record->xl_info & ~XLR_INFO_MASK;
4557 :
4558 0 : switch (info & XLOG_HEAP_OPMASK)
4559 : {
4560 : case XLOG_HEAP_INSERT:
4561 0 : heap_xlog_insert(lsn, record);
4562 0 : break;
4563 : case XLOG_HEAP_DELETE:
4564 0 : heap_xlog_delete(lsn, record);
4565 0 : break;
4566 : case XLOG_HEAP_UPDATE:
4567 0 : heap_xlog_update(lsn, record, false, false);
4568 0 : break;
4569 : case XLOG_HEAP_MOVE:
4570 0 : heap_xlog_update(lsn, record, true, false);
4571 0 : break;
4572 : case XLOG_HEAP_HOT_UPDATE:
4573 0 : heap_xlog_update(lsn, record, false, true);
4574 0 : break;
4575 : case XLOG_HEAP_NEWPAGE:
4576 0 : heap_xlog_newpage(lsn, record);
4577 0 : break;
4578 : case XLOG_HEAP_LOCK:
4579 0 : heap_xlog_lock(lsn, record);
4580 0 : break;
4581 : case XLOG_HEAP_INPLACE:
4582 0 : heap_xlog_inplace(lsn, record);
4583 0 : break;
4584 : default:
4585 0 : elog(PANIC, "heap_redo: unknown op code %u", info);
4586 : }
4587 0 : }
4588 :
4589 : void
4590 : heap2_redo(XLogRecPtr lsn, XLogRecord *record)
4591 0 : {
4592 0 : uint8 info = record->xl_info & ~XLR_INFO_MASK;
4593 :
4594 0 : switch (info & XLOG_HEAP_OPMASK)
4595 : {
4596 : case XLOG_HEAP2_FREEZE:
4597 0 : heap_xlog_freeze(lsn, record);
4598 0 : break;
4599 : case XLOG_HEAP2_CLEAN:
4600 0 : heap_xlog_clean(lsn, record, false);
4601 0 : break;
4602 : case XLOG_HEAP2_CLEAN_MOVE:
4603 0 : heap_xlog_clean(lsn, record, true);
4604 0 : break;
4605 : default:
4606 0 : elog(PANIC, "heap2_redo: unknown op code %u", info);
4607 : }
4608 0 : }
4609 :
4610 : static void
4611 : out_target(StringInfo buf, xl_heaptid *target)
4612 0 : {
4613 0 : appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
4614 : target->node.spcNode, target->node.dbNode, target->node.relNode,
4615 : ItemPointerGetBlockNumber(&(target->tid)),
4616 : ItemPointerGetOffsetNumber(&(target->tid)));
4617 0 : }
4618 :
4619 : void
4620 : heap_desc(StringInfo buf, uint8 xl_info, char *rec)
4621 0 : {
4622 0 : uint8 info = xl_info & ~XLR_INFO_MASK;
4623 :
4624 0 : info &= XLOG_HEAP_OPMASK;
4625 0 : if (info == XLOG_HEAP_INSERT)
4626 : {
4627 0 : xl_heap_insert *xlrec = (xl_heap_insert *) rec;
4628 :
4629 0 : if (xl_info & XLOG_HEAP_INIT_PAGE)
4630 0 : appendStringInfo(buf, "insert(init): ");
4631 : else
4632 0 : appendStringInfo(buf, "insert: ");
4633 0 : out_target(buf, &(xlrec->target));
4634 : }
4635 0 : else if (info == XLOG_HEAP_DELETE)
4636 : {
4637 0 : xl_heap_delete *xlrec = (xl_heap_delete *) rec;
4638 :
4639 0 : appendStringInfo(buf, "delete: ");
4640 0 : out_target(buf, &(xlrec->target));
4641 : }
4642 0 : else if (info == XLOG_HEAP_UPDATE)
4643 : {
4644 0 : xl_heap_update *xlrec = (xl_heap_update *) rec;
4645 :
4646 0 : if (xl_info & XLOG_HEAP_INIT_PAGE)
4647 0 : appendStringInfo(buf, "update(init): ");
4648 : else
4649 0 : appendStringInfo(buf, "update: ");
4650 0 : out_target(buf, &(xlrec->target));
4651 0 : appendStringInfo(buf, "; new %u/%u",
4652 : ItemPointerGetBlockNumber(&(xlrec->newtid)),
4653 : ItemPointerGetOffsetNumber(&(xlrec->newtid)));
4654 : }
4655 0 : else if (info == XLOG_HEAP_MOVE)
4656 : {
4657 0 : xl_heap_update *xlrec = (xl_heap_update *) rec;
4658 :
4659 0 : if (xl_info & XLOG_HEAP_INIT_PAGE)
4660 0 : appendStringInfo(buf, "move(init): ");
4661 : else
4662 0 : appendStringInfo(buf, "move: ");
4663 0 : out_target(buf, &(xlrec->target));
4664 0 : appendStringInfo(buf, "; new %u/%u",
4665 : ItemPointerGetBlockNumber(&(xlrec->newtid)),
4666 : ItemPointerGetOffsetNumber(&(xlrec->newtid)));
4667 : }
4668 0 : else if (info == XLOG_HEAP_HOT_UPDATE)
4669 : {
4670 0 : xl_heap_update *xlrec = (xl_heap_update *) rec;
4671 :
4672 0 : if (xl_info & XLOG_HEAP_INIT_PAGE) /* can this case happen? */
4673 0 : appendStringInfo(buf, "hot_update(init): ");
4674 : else
4675 0 : appendStringInfo(buf, "hot_update: ");
4676 0 : out_target(buf, &(xlrec->target));
4677 0 : appendStringInfo(buf, "; new %u/%u",
4678 : ItemPointerGetBlockNumber(&(xlrec->newtid)),
4679 : ItemPointerGetOffsetNumber(&(xlrec->newtid)));
4680 : }
4681 0 : else if (info == XLOG_HEAP_NEWPAGE)
4682 : {
4683 0 : xl_heap_newpage *xlrec = (xl_heap_newpage *) rec;
4684 :
4685 0 : appendStringInfo(buf, "newpage: rel %u/%u/%u; blk %u",
4686 : xlrec->node.spcNode, xlrec->node.dbNode,
4687 : xlrec->node.relNode, xlrec->blkno);
4688 : }
4689 0 : else if (info == XLOG_HEAP_LOCK)
4690 : {
4691 0 : xl_heap_lock *xlrec = (xl_heap_lock *) rec;
4692 :
4693 0 : if (xlrec->shared_lock)
4694 0 : appendStringInfo(buf, "shared_lock: ");
4695 : else
4696 0 : appendStringInfo(buf, "exclusive_lock: ");
4697 0 : if (xlrec->xid_is_mxact)
4698 0 : appendStringInfo(buf, "mxid ");
4699 : else
4700 0 : appendStringInfo(buf, "xid ");
4701 0 : appendStringInfo(buf, "%u ", xlrec->locking_xid);
4702 0 : out_target(buf, &(xlrec->target));
4703 : }
4704 0 : else if (info == XLOG_HEAP_INPLACE)
4705 : {
4706 0 : xl_heap_inplace *xlrec = (xl_heap_inplace *) rec;
4707 :
4708 0 : appendStringInfo(buf, "inplace: ");
4709 0 : out_target(buf, &(xlrec->target));
4710 : }
4711 : else
4712 0 : appendStringInfo(buf, "UNKNOWN");
4713 0 : }
4714 :
4715 : void
4716 : heap2_desc(StringInfo buf, uint8 xl_info, char *rec)
4717 0 : {
4718 0 : uint8 info = xl_info & ~XLR_INFO_MASK;
4719 :
4720 0 : info &= XLOG_HEAP_OPMASK;
4721 0 : if (info == XLOG_HEAP2_FREEZE)
4722 : {
4723 0 : xl_heap_freeze *xlrec = (xl_heap_freeze *) rec;
4724 :
4725 0 : appendStringInfo(buf, "freeze: rel %u/%u/%u; blk %u; cutoff %u",
4726 : xlrec->node.spcNode, xlrec->node.dbNode,
4727 : xlrec->node.relNode, xlrec->block,
4728 : xlrec->cutoff_xid);
4729 : }
4730 0 : else if (info == XLOG_HEAP2_CLEAN)
4731 : {
4732 0 : xl_heap_clean *xlrec = (xl_heap_clean *) rec;
4733 :
4734 0 : appendStringInfo(buf, "clean: rel %u/%u/%u; blk %u",
4735 : xlrec->node.spcNode, xlrec->node.dbNode,
4736 : xlrec->node.relNode, xlrec->block);
4737 : }
4738 0 : else if (info == XLOG_HEAP2_CLEAN_MOVE)
4739 : {
4740 0 : xl_heap_clean *xlrec = (xl_heap_clean *) rec;
4741 :
4742 0 : appendStringInfo(buf, "clean_move: rel %u/%u/%u; blk %u",
4743 : xlrec->node.spcNode, xlrec->node.dbNode,
4744 : xlrec->node.relNode, xlrec->block);
4745 : }
4746 : else
4747 0 : appendStringInfo(buf, "UNKNOWN");
4748 0 : }
4749 :
4750 : /*
4751 : * heap_sync - sync a heap, for use when no WAL has been written
4752 : *
4753 : * This forces the heap contents (including TOAST heap if any) down to disk.
4754 : * If we skipped using WAL, and it's not a temp relation, we must force the
4755 : * relation down to disk before it's safe to commit the transaction. This
4756 : * requires writing out any dirty buffers and then doing a forced fsync.
4757 : *
4758 : * Indexes are not touched. (Currently, index operations associated with
4759 : * the commands that use this are WAL-logged and so do not need fsync.
4760 : * That behavior might change someday, but in any case it's likely that
4761 : * any fsync decisions required would be per-index and hence not appropriate
4762 : * to be done here.)
4763 : */
4764 : void
4765 : heap_sync(Relation rel)
4766 36 : {
4767 : /* temp tables never need fsync */
4768 36 : if (rel->rd_istemp)
4769 10 : return;
4770 :
4771 : /* main heap */
4772 26 : FlushRelationBuffers(rel);
4773 : /* FlushRelationBuffers will have opened rd_smgr */
4774 26 : smgrimmedsync(rel->rd_smgr);
4775 :
4776 : /* toast heap, if any */
4777 26 : if (OidIsValid(rel->rd_rel->reltoastrelid))
4778 : {
4779 : Relation toastrel;
4780 :
4781 2 : toastrel = heap_open(rel->rd_rel->reltoastrelid, AccessShareLock);
4782 2 : FlushRelationBuffers(toastrel);
4783 2 : smgrimmedsync(toastrel->rd_smgr);
4784 2 : heap_close(toastrel, AccessShareLock);
4785 : }
4786 : }
|