1 : /*-------------------------------------------------------------------------
2 : *
3 : * hashovfl.c
4 : * Overflow page management code for the Postgres hash access method
5 : *
6 : * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.61 2007/11/15 21:14:32 momjian Exp $
12 : *
13 : * NOTES
14 : * Overflow pages look like ordinary relation pages.
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 : #include "postgres.h"
19 :
20 : #include "access/hash.h"
21 :
22 :
23 : static Buffer _hash_getovflpage(Relation rel, Buffer metabuf);
24 : static uint32 _hash_firstfreebit(uint32 map);
25 :
26 :
27 : /*
28 : * Convert overflow page bit number (its index in the free-page bitmaps)
29 : * to block number within the index.
30 : */
31 : static BlockNumber
32 : bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
33 117 : {
34 117 : uint32 splitnum = metap->hashm_ovflpoint;
35 : uint32 i;
36 :
37 : /* Convert zero-based bitnumber to 1-based page number */
38 117 : ovflbitnum += 1;
39 :
40 : /* Determine the split number for this page (must be >= 1) */
41 117 : for (i = 1;
42 672 : i < splitnum && ovflbitnum > metap->hashm_spares[i];
43 438 : i++)
44 : /* loop */ ;
45 :
46 : /*
47 : * Convert to absolute page number by adding the number of bucket pages
48 : * that exist before this split point.
49 : */
50 117 : return (BlockNumber) ((1 << i) + ovflbitnum);
51 : }
52 :
53 : /*
54 : * Convert overflow page block number to bit number for free-page bitmap.
55 : */
56 : static uint32
57 : blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
58 110 : {
59 110 : uint32 splitnum = metap->hashm_ovflpoint;
60 : uint32 i;
61 : uint32 bitnum;
62 :
63 : /* Determine the split number containing this page */
64 525 : for (i = 1; i <= splitnum; i++)
65 : {
66 525 : if (ovflblkno <= (BlockNumber) (1 << i))
67 0 : break; /* oops */
68 525 : bitnum = ovflblkno - (1 << i);
69 525 : if (bitnum <= metap->hashm_spares[i])
70 110 : return bitnum - 1; /* -1 to convert 1-based to 0-based */
71 : }
72 :
73 0 : elog(ERROR, "invalid overflow block number %u", ovflblkno);
74 0 : return 0; /* keep compiler quiet */
75 : }
76 :
77 : /*
78 : * _hash_addovflpage
79 : *
80 : * Add an overflow page to the bucket whose last page is pointed to by 'buf'.
81 : *
82 : * On entry, the caller must hold a pin but no lock on 'buf'. The pin is
83 : * dropped before exiting (we assume the caller is not interested in 'buf'
84 : * anymore). The returned overflow page will be pinned and write-locked;
85 : * it is guaranteed to be empty.
86 : *
87 : * The caller must hold a pin, but no lock, on the metapage buffer.
88 : * That buffer is returned in the same state.
89 : *
90 : * The caller must hold at least share lock on the bucket, to ensure that
91 : * no one else tries to compact the bucket meanwhile. This guarantees that
92 : * 'buf' won't stop being part of the bucket while it's unlocked.
93 : *
94 : * NB: since this could be executed concurrently by multiple processes,
95 : * one should not assume that the returned overflow page will be the
96 : * immediate successor of the originally passed 'buf'. Additional overflow
97 : * pages might have been added to the bucket chain in between.
98 : */
99 : Buffer
100 : _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
101 117 : {
102 : Buffer ovflbuf;
103 : Page page;
104 : Page ovflpage;
105 : HashPageOpaque pageopaque;
106 : HashPageOpaque ovflopaque;
107 :
108 : /* allocate and lock an empty overflow page */
109 117 : ovflbuf = _hash_getovflpage(rel, metabuf);
110 :
111 : /*
112 : * Write-lock the tail page. It is okay to hold two buffer locks here
113 : * since there cannot be anyone else contending for access to ovflbuf.
114 : */
115 117 : _hash_chgbufaccess(rel, buf, HASH_NOLOCK, HASH_WRITE);
116 :
117 : /* probably redundant... */
118 117 : _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
119 :
120 : /* loop to find current tail page, in case someone else inserted too */
121 : for (;;)
122 : {
123 : BlockNumber nextblkno;
124 :
125 117 : page = BufferGetPage(buf);
126 117 : pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
127 117 : nextblkno = pageopaque->hasho_nextblkno;
128 :
129 117 : if (!BlockNumberIsValid(nextblkno))
130 117 : break;
131 :
132 : /* we assume we do not need to write the unmodified page */
133 0 : _hash_relbuf(rel, buf);
134 :
135 0 : buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
136 0 : }
137 :
138 : /* now that we have correct backlink, initialize new overflow page */
139 117 : ovflpage = BufferGetPage(ovflbuf);
140 117 : ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
141 117 : ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
142 117 : ovflopaque->hasho_nextblkno = InvalidBlockNumber;
143 117 : ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
144 117 : ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
145 117 : ovflopaque->hasho_page_id = HASHO_PAGE_ID;
146 :
147 117 : MarkBufferDirty(ovflbuf);
148 :
149 : /* logically chain overflow page to previous page */
150 117 : pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
151 117 : _hash_wrtbuf(rel, buf);
152 :
153 117 : return ovflbuf;
154 : }
155 :
156 : /*
157 : * _hash_getovflpage()
158 : *
159 : * Find an available overflow page and return it. The returned buffer
160 : * is pinned and write-locked, and has had _hash_pageinit() applied,
161 : * but it is caller's responsibility to fill the special space.
162 : *
163 : * The caller must hold a pin, but no lock, on the metapage buffer.
164 : * That buffer is left in the same state at exit.
165 : */
166 : static Buffer
167 : _hash_getovflpage(Relation rel, Buffer metabuf)
168 117 : {
169 : HashMetaPage metap;
170 117 : Buffer mapbuf = 0;
171 : Buffer newbuf;
172 : BlockNumber blkno;
173 : uint32 orig_firstfree;
174 : uint32 splitnum;
175 117 : uint32 *freep = NULL;
176 : uint32 max_ovflpg;
177 : uint32 bit;
178 : uint32 first_page;
179 : uint32 last_bit;
180 : uint32 last_page;
181 : uint32 i,
182 : j;
183 :
184 : /* Get exclusive lock on the meta page */
185 117 : _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
186 :
187 117 : _hash_checkpage(rel, metabuf, LH_META_PAGE);
188 117 : metap = (HashMetaPage) BufferGetPage(metabuf);
189 :
190 : /* start search at hashm_firstfree */
191 117 : orig_firstfree = metap->hashm_firstfree;
192 117 : first_page = orig_firstfree >> BMPG_SHIFT(metap);
193 117 : bit = orig_firstfree & BMPG_MASK(metap);
194 117 : i = first_page;
195 117 : j = bit / BITS_PER_MAP;
196 117 : bit &= ~(BITS_PER_MAP - 1);
197 :
198 : /* outer loop iterates once per bitmap page */
199 : for (;;)
200 : {
201 : BlockNumber mapblkno;
202 : Page mappage;
203 : uint32 last_inpage;
204 :
205 : /* want to end search with the last existing overflow page */
206 166 : splitnum = metap->hashm_ovflpoint;
207 166 : max_ovflpg = metap->hashm_spares[splitnum] - 1;
208 166 : last_page = max_ovflpg >> BMPG_SHIFT(metap);
209 166 : last_bit = max_ovflpg & BMPG_MASK(metap);
210 :
211 166 : if (i > last_page)
212 49 : break;
213 :
214 : Assert(i < metap->hashm_nmaps);
215 117 : mapblkno = metap->hashm_mapp[i];
216 :
217 117 : if (i == last_page)
218 117 : last_inpage = last_bit;
219 : else
220 0 : last_inpage = BMPGSZ_BIT(metap) - 1;
221 :
222 : /* Release exclusive lock on metapage while reading bitmap page */
223 117 : _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
224 :
225 117 : mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
226 117 : mappage = BufferGetPage(mapbuf);
227 117 : freep = HashPageGetBitmap(mappage);
228 :
229 166 : for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
230 : {
231 117 : if (freep[j] != ALL_SET)
232 68 : goto found;
233 : }
234 :
235 : /* No free space here, try to advance to next map page */
236 49 : _hash_relbuf(rel, mapbuf);
237 49 : i++;
238 49 : j = 0; /* scan from start of next map page */
239 49 : bit = 0;
240 :
241 : /* Reacquire exclusive lock on the meta page */
242 49 : _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
243 49 : }
244 :
245 : /*
246 : * No free pages --- have to extend the relation to add an overflow page.
247 : * First, check to see if we have to add a new bitmap page too.
248 : */
249 49 : if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
250 : {
251 : /*
252 : * We create the new bitmap page with all pages marked "in use".
253 : * Actually two pages in the new bitmap's range will exist
254 : * immediately: the bitmap page itself, and the following page which
255 : * is the one we return to the caller. Both of these are correctly
256 : * marked "in use". Subsequent pages do not exist yet, but it is
257 : * convenient to pre-mark them as "in use" too.
258 : */
259 0 : bit = metap->hashm_spares[splitnum];
260 0 : _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit));
261 0 : metap->hashm_spares[splitnum]++;
262 : }
263 : else
264 : {
265 : /*
266 : * Nothing to do here; since the page will be past the last used page,
267 : * we know its bitmap bit was preinitialized to "in use".
268 : */
269 : }
270 :
271 : /* Calculate address of the new overflow page */
272 49 : bit = metap->hashm_spares[splitnum];
273 49 : blkno = bitno_to_blkno(metap, bit);
274 :
275 : /*
276 : * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
277 : * relation length stays in sync with ours. XXX It's annoying to do this
278 : * with metapage write lock held; would be better to use a lock that
279 : * doesn't block incoming searches.
280 : */
281 49 : newbuf = _hash_getnewbuf(rel, blkno);
282 :
283 49 : metap->hashm_spares[splitnum]++;
284 :
285 : /*
286 : * Adjust hashm_firstfree to avoid redundant searches. But don't risk
287 : * changing it if someone moved it while we were searching bitmap pages.
288 : */
289 49 : if (metap->hashm_firstfree == orig_firstfree)
290 49 : metap->hashm_firstfree = bit + 1;
291 :
292 : /* Write updated metapage and release lock, but not pin */
293 49 : _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
294 :
295 49 : return newbuf;
296 :
297 68 : found:
298 : /* convert bit to bit number within page */
299 68 : bit += _hash_firstfreebit(freep[j]);
300 :
301 : /* mark page "in use" in the bitmap */
302 68 : SETBIT(freep, bit);
303 68 : _hash_wrtbuf(rel, mapbuf);
304 :
305 : /* Reacquire exclusive lock on the meta page */
306 68 : _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
307 :
308 : /* convert bit to absolute bit number */
309 68 : bit += (i << BMPG_SHIFT(metap));
310 :
311 : /* Calculate address of the recycled overflow page */
312 68 : blkno = bitno_to_blkno(metap, bit);
313 :
314 : /*
315 : * Adjust hashm_firstfree to avoid redundant searches. But don't risk
316 : * changing it if someone moved it while we were searching bitmap pages.
317 : */
318 68 : if (metap->hashm_firstfree == orig_firstfree)
319 : {
320 68 : metap->hashm_firstfree = bit + 1;
321 :
322 : /* Write updated metapage and release lock, but not pin */
323 68 : _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
324 : }
325 : else
326 : {
327 : /* We didn't change the metapage, so no need to write */
328 0 : _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
329 : }
330 :
331 : /* Fetch, init, and return the recycled page */
332 68 : return _hash_getinitbuf(rel, blkno);
333 : }
334 :
335 : /*
336 : * _hash_firstfreebit()
337 : *
338 : * Return the number of the first bit that is not set in the word 'map'.
339 : */
340 : static uint32
341 : _hash_firstfreebit(uint32 map)
342 68 : {
343 : uint32 i,
344 : mask;
345 :
346 68 : mask = 0x1;
347 481 : for (i = 0; i < BITS_PER_MAP; i++)
348 : {
349 481 : if (!(mask & map))
350 68 : return i;
351 413 : mask <<= 1;
352 : }
353 :
354 0 : elog(ERROR, "firstfreebit found no free bit");
355 :
356 0 : return 0; /* keep compiler quiet */
357 : }
358 :
359 : /*
360 : * _hash_freeovflpage() -
361 : *
362 : * Remove this overflow page from its bucket's chain, and mark the page as
363 : * free. On entry, ovflbuf is write-locked; it is released before exiting.
364 : *
365 : * Since this function is invoked in VACUUM, we provide an access strategy
366 : * parameter that controls fetches of the bucket pages.
367 : *
368 : * Returns the block number of the page that followed the given page
369 : * in the bucket, or InvalidBlockNumber if no following page.
370 : *
371 : * NB: caller must not hold lock on metapage, nor on either page that's
372 : * adjacent in the bucket chain. The caller had better hold exclusive lock
373 : * on the bucket, too.
374 : */
375 : BlockNumber
376 : _hash_freeovflpage(Relation rel, Buffer ovflbuf,
377 : BufferAccessStrategy bstrategy)
378 110 : {
379 : HashMetaPage metap;
380 : Buffer metabuf;
381 : Buffer mapbuf;
382 : BlockNumber ovflblkno;
383 : BlockNumber prevblkno;
384 : BlockNumber blkno;
385 : BlockNumber nextblkno;
386 : HashPageOpaque ovflopaque;
387 : Page ovflpage;
388 : Page mappage;
389 : uint32 *freep;
390 : uint32 ovflbitno;
391 : int32 bitmappage,
392 : bitmapbit;
393 : Bucket bucket;
394 :
395 : /* Get information from the doomed page */
396 110 : _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
397 110 : ovflblkno = BufferGetBlockNumber(ovflbuf);
398 110 : ovflpage = BufferGetPage(ovflbuf);
399 110 : ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
400 110 : nextblkno = ovflopaque->hasho_nextblkno;
401 110 : prevblkno = ovflopaque->hasho_prevblkno;
402 110 : bucket = ovflopaque->hasho_bucket;
403 :
404 : /*
405 : * Zero the page for debugging's sake; then write and release it. (Note:
406 : * if we failed to zero the page here, we'd have problems with the Assert
407 : * in _hash_pageinit() when the page is reused.)
408 : */
409 110 : MemSet(ovflpage, 0, BufferGetPageSize(ovflbuf));
410 110 : _hash_wrtbuf(rel, ovflbuf);
411 :
412 : /*
413 : * Fix up the bucket chain. this is a doubly-linked list, so we must fix
414 : * up the bucket chain members behind and ahead of the overflow page being
415 : * deleted. No concurrency issues since we hold exclusive lock on the
416 : * entire bucket.
417 : */
418 110 : if (BlockNumberIsValid(prevblkno))
419 : {
420 : Buffer prevbuf = _hash_getbuf_with_strategy(rel,
421 : prevblkno,
422 : HASH_WRITE,
423 : LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
424 110 : bstrategy);
425 110 : Page prevpage = BufferGetPage(prevbuf);
426 110 : HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);
427 :
428 : Assert(prevopaque->hasho_bucket == bucket);
429 110 : prevopaque->hasho_nextblkno = nextblkno;
430 110 : _hash_wrtbuf(rel, prevbuf);
431 : }
432 110 : if (BlockNumberIsValid(nextblkno))
433 : {
434 : Buffer nextbuf = _hash_getbuf_with_strategy(rel,
435 : nextblkno,
436 : HASH_WRITE,
437 : LH_OVERFLOW_PAGE,
438 0 : bstrategy);
439 0 : Page nextpage = BufferGetPage(nextbuf);
440 0 : HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);
441 :
442 : Assert(nextopaque->hasho_bucket == bucket);
443 0 : nextopaque->hasho_prevblkno = prevblkno;
444 0 : _hash_wrtbuf(rel, nextbuf);
445 : }
446 :
447 : /* Note: bstrategy is intentionally not used for metapage and bitmap */
448 :
449 : /* Read the metapage so we can determine which bitmap page to use */
450 110 : metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
451 110 : metap = (HashMetaPage) BufferGetPage(metabuf);
452 :
453 : /* Identify which bit to set */
454 110 : ovflbitno = blkno_to_bitno(metap, ovflblkno);
455 :
456 110 : bitmappage = ovflbitno >> BMPG_SHIFT(metap);
457 110 : bitmapbit = ovflbitno & BMPG_MASK(metap);
458 :
459 110 : if (bitmappage >= metap->hashm_nmaps)
460 0 : elog(ERROR, "invalid overflow bit number %u", ovflbitno);
461 110 : blkno = metap->hashm_mapp[bitmappage];
462 :
463 : /* Release metapage lock while we access the bitmap page */
464 110 : _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
465 :
466 : /* Clear the bitmap bit to indicate that this overflow page is free */
467 110 : mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
468 110 : mappage = BufferGetPage(mapbuf);
469 110 : freep = HashPageGetBitmap(mappage);
470 : Assert(ISSET(freep, bitmapbit));
471 110 : CLRBIT(freep, bitmapbit);
472 110 : _hash_wrtbuf(rel, mapbuf);
473 :
474 : /* Get write-lock on metapage to update firstfree */
475 110 : _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
476 :
477 : /* if this is now the first free page, update hashm_firstfree */
478 110 : if (ovflbitno < metap->hashm_firstfree)
479 : {
480 51 : metap->hashm_firstfree = ovflbitno;
481 51 : _hash_wrtbuf(rel, metabuf);
482 : }
483 : else
484 : {
485 : /* no need to change metapage */
486 59 : _hash_relbuf(rel, metabuf);
487 : }
488 :
489 110 : return nextblkno;
490 : }
491 :
492 :
493 : /*
494 : * _hash_initbitmap()
495 : *
496 : * Initialize a new bitmap page. The metapage has a write-lock upon
497 : * entering the function, and must be written by caller after return.
498 : *
499 : * 'blkno' is the block number of the new bitmap page.
500 : *
501 : * All bits in the new bitmap page are set to "1", indicating "in use".
502 : */
503 : void
504 : _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
505 6 : {
506 : Buffer buf;
507 : Page pg;
508 : HashPageOpaque op;
509 : uint32 *freep;
510 :
511 : /*
512 : * It is okay to write-lock the new bitmap page while holding metapage
513 : * write lock, because no one else could be contending for the new page.
514 : * Also, the metapage lock makes it safe to extend the index using
515 : * _hash_getnewbuf.
516 : *
517 : * There is some loss of concurrency in possibly doing I/O for the new
518 : * page while holding the metapage lock, but this path is taken so seldom
519 : * that it's not worth worrying about.
520 : */
521 6 : buf = _hash_getnewbuf(rel, blkno);
522 6 : pg = BufferGetPage(buf);
523 :
524 : /* initialize the page's special space */
525 6 : op = (HashPageOpaque) PageGetSpecialPointer(pg);
526 6 : op->hasho_prevblkno = InvalidBlockNumber;
527 6 : op->hasho_nextblkno = InvalidBlockNumber;
528 6 : op->hasho_bucket = -1;
529 6 : op->hasho_flag = LH_BITMAP_PAGE;
530 6 : op->hasho_page_id = HASHO_PAGE_ID;
531 :
532 : /* set all of the bits to 1 */
533 6 : freep = HashPageGetBitmap(pg);
534 6 : MemSet(freep, 0xFF, BMPGSZ_BYTE(metap));
535 :
536 : /* write out the new bitmap page (releasing write lock and pin) */
537 6 : _hash_wrtbuf(rel, buf);
538 :
539 : /* add the new bitmap page to the metapage's list of bitmaps */
540 : /* metapage already has a write lock */
541 6 : if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
542 0 : ereport(ERROR,
543 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
544 : errmsg("out of overflow pages in hash index \"%s\"",
545 : RelationGetRelationName(rel))));
546 :
547 6 : metap->hashm_mapp[metap->hashm_nmaps] = blkno;
548 :
549 6 : metap->hashm_nmaps++;
550 6 : }
551 :
552 :
553 : /*
554 : * _hash_squeezebucket(rel, bucket)
555 : *
556 : * Try to squeeze the tuples onto pages occurring earlier in the
557 : * bucket chain in an attempt to free overflow pages. When we start
558 : * the "squeezing", the page from which we start taking tuples (the
559 : * "read" page) is the last bucket in the bucket chain and the page
560 : * onto which we start squeezing tuples (the "write" page) is the
561 : * first page in the bucket chain. The read page works backward and
562 : * the write page works forward; the procedure terminates when the
563 : * read page and write page are the same page.
564 : *
565 : * At completion of this procedure, it is guaranteed that all pages in
566 : * the bucket are nonempty, unless the bucket is totally empty (in
567 : * which case all overflow pages will be freed). The original implementation
568 : * required that to be true on entry as well, but it's a lot easier for
569 : * callers to leave empty overflow pages and let this guy clean it up.
570 : *
571 : * Caller must hold exclusive lock on the target bucket. This allows
572 : * us to safely lock multiple pages in the bucket.
573 : *
574 : * Since this function is invoked in VACUUM, we provide an access strategy
575 : * parameter that controls fetches of the bucket pages.
576 : */
577 : void
578 : _hash_squeezebucket(Relation rel,
579 : Bucket bucket,
580 : BlockNumber bucket_blkno,
581 : BufferAccessStrategy bstrategy)
582 250 : {
583 : Buffer wbuf;
584 250 : Buffer rbuf = 0;
585 : BlockNumber wblkno;
586 : BlockNumber rblkno;
587 : Page wpage;
588 : Page rpage;
589 : HashPageOpaque wopaque;
590 : HashPageOpaque ropaque;
591 : OffsetNumber woffnum;
592 : OffsetNumber roffnum;
593 : IndexTuple itup;
594 : Size itemsz;
595 :
596 : /*
597 : * start squeezing into the base bucket page.
598 : */
599 250 : wblkno = bucket_blkno;
600 250 : wbuf = _hash_getbuf_with_strategy(rel,
601 : wblkno,
602 : HASH_WRITE,
603 : LH_BUCKET_PAGE,
604 : bstrategy);
605 250 : wpage = BufferGetPage(wbuf);
606 250 : wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
607 :
608 : /*
609 : * if there aren't any overflow pages, there's nothing to squeeze.
610 : */
611 250 : if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
612 : {
613 140 : _hash_relbuf(rel, wbuf);
614 140 : return;
615 : }
616 :
617 : /*
618 : * Find the last page in the bucket chain by starting at the base bucket
619 : * page and working forward. Note: we assume that a hash bucket chain is
620 : * usually smaller than the buffer ring being used by VACUUM, else using
621 : * the access strategy here would be counterproductive.
622 : */
623 110 : ropaque = wopaque;
624 : do
625 : {
626 110 : rblkno = ropaque->hasho_nextblkno;
627 110 : if (ropaque != wopaque)
628 0 : _hash_relbuf(rel, rbuf);
629 110 : rbuf = _hash_getbuf_with_strategy(rel,
630 : rblkno,
631 : HASH_WRITE,
632 : LH_OVERFLOW_PAGE,
633 : bstrategy);
634 110 : rpage = BufferGetPage(rbuf);
635 110 : ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
636 : Assert(ropaque->hasho_bucket == bucket);
637 110 : } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
638 :
639 : /*
640 : * squeeze the tuples.
641 : */
642 110 : roffnum = FirstOffsetNumber;
643 : for (;;)
644 : {
645 : /* this test is needed in case page is empty on entry */
646 2513 : if (roffnum <= PageGetMaxOffsetNumber(rpage))
647 : {
648 2513 : itup = (IndexTuple) PageGetItem(rpage,
649 : PageGetItemId(rpage, roffnum));
650 2513 : itemsz = IndexTupleDSize(*itup);
651 2513 : itemsz = MAXALIGN(itemsz);
652 :
653 : /*
654 : * Walk up the bucket chain, looking for a page big enough for
655 : * this item. Exit if we reach the read page.
656 : */
657 5026 : while (PageGetFreeSpace(wpage) < itemsz)
658 : {
659 : Assert(!PageIsEmpty(wpage));
660 :
661 0 : wblkno = wopaque->hasho_nextblkno;
662 : Assert(BlockNumberIsValid(wblkno));
663 :
664 0 : _hash_wrtbuf(rel, wbuf);
665 :
666 0 : if (rblkno == wblkno)
667 : {
668 : /* wbuf is already released */
669 0 : _hash_wrtbuf(rel, rbuf);
670 0 : return;
671 : }
672 :
673 0 : wbuf = _hash_getbuf_with_strategy(rel,
674 : wblkno,
675 : HASH_WRITE,
676 : LH_OVERFLOW_PAGE,
677 : bstrategy);
678 0 : wpage = BufferGetPage(wbuf);
679 0 : wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
680 : Assert(wopaque->hasho_bucket == bucket);
681 : }
682 :
683 : /*
684 : * we have found room so insert on the "write" page.
685 : */
686 2513 : woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage));
687 2513 : if (PageAddItem(wpage, (Item) itup, itemsz, woffnum, false, false)
688 : == InvalidOffsetNumber)
689 0 : elog(ERROR, "failed to add index item to \"%s\"",
690 : RelationGetRelationName(rel));
691 :
692 : /*
693 : * delete the tuple from the "read" page. PageIndexTupleDelete
694 : * repacks the ItemId array, so 'roffnum' will be "advanced" to
695 : * the "next" ItemId.
696 : */
697 2513 : PageIndexTupleDelete(rpage, roffnum);
698 : }
699 :
700 : /*
701 : * if the "read" page is now empty because of the deletion (or because
702 : * it was empty when we got to it), free it.
703 : *
704 : * Tricky point here: if our read and write pages are adjacent in the
705 : * bucket chain, our write lock on wbuf will conflict with
706 : * _hash_freeovflpage's attempt to update the sibling links of the
707 : * removed page. However, in that case we are done anyway, so we can
708 : * simply drop the write lock before calling _hash_freeovflpage.
709 : */
710 2513 : if (PageIsEmpty(rpage))
711 : {
712 110 : rblkno = ropaque->hasho_prevblkno;
713 : Assert(BlockNumberIsValid(rblkno));
714 :
715 : /* are we freeing the page adjacent to wbuf? */
716 110 : if (rblkno == wblkno)
717 : {
718 : /* yes, so release wbuf lock first */
719 110 : _hash_wrtbuf(rel, wbuf);
720 : /* free this overflow page (releases rbuf) */
721 110 : _hash_freeovflpage(rel, rbuf, bstrategy);
722 : /* done */
723 110 : return;
724 : }
725 :
726 : /* free this overflow page, then get the previous one */
727 0 : _hash_freeovflpage(rel, rbuf, bstrategy);
728 :
729 0 : rbuf = _hash_getbuf_with_strategy(rel,
730 : rblkno,
731 : HASH_WRITE,
732 : LH_OVERFLOW_PAGE,
733 : bstrategy);
734 0 : rpage = BufferGetPage(rbuf);
735 0 : ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
736 : Assert(ropaque->hasho_bucket == bucket);
737 :
738 0 : roffnum = FirstOffsetNumber;
739 : }
740 : }
741 :
742 : /* NOTREACHED */
743 : }
|