1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginentrypage.c
4 : * page utilities routines for the postgres inverted index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.11 2007/11/15 21:14:31 momjian Exp $
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 : #include "access/gin.h"
17 : #include "access/tuptoaster.h"
18 :
19 : /*
20 : * forms tuple for entry tree. On leaf page, Index tuple has
21 : * non-traditional layout. Tuple may contain posting list or
22 : * root blocknumber of posting tree. Macros GinIsPostingTre: (itup) / GinSetPostingTree(itup, blkno)
23 : * 1) Posting list
24 : * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
25 : * - ItemPointerGetBlockNumber(&itup->t_tid) contains original
26 : * size of tuple (without posting list).
27 : * Macroses: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
28 : * - ItemPointerGetOffsetNumber(&itup->t_tid) contains number
29 : * of elements in posting list (number of heap itempointer)
30 : * Macroses: GinGetNPosting(itup) / GinSetNPosting(itup,n)
31 : * - After usual part of tuple there is a posting list
32 : * Macros: GinGetPosting(itup)
33 : * 2) Posting tree
34 : * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
35 : * - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
36 : * root of posting tree
37 : * - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number GIN_TREE_POSTING
38 : */
39 : IndexTuple
40 : GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd)
41 2765 : {
42 2765 : bool isnull = FALSE;
43 : IndexTuple itup;
44 :
45 2765 : itup = index_form_tuple(ginstate->tupdesc, &key, &isnull);
46 :
47 2765 : GinSetOrigSizePosting(itup, IndexTupleSize(itup));
48 :
49 2765 : if (nipd > 0)
50 : {
51 2746 : uint32 newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData) * nipd);
52 :
53 2746 : if (newsize >= INDEX_SIZE_MASK)
54 0 : return NULL;
55 :
56 2746 : if (newsize > TOAST_INDEX_TARGET && nipd > 1)
57 19 : return NULL;
58 :
59 2727 : itup = repalloc(itup, newsize);
60 :
61 : /* set new size */
62 2727 : itup->t_info &= ~INDEX_SIZE_MASK;
63 2727 : itup->t_info |= newsize;
64 :
65 2727 : if (ipd)
66 1434 : memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd);
67 2727 : GinSetNPosting(itup, nipd);
68 : }
69 : else
70 : {
71 19 : GinSetNPosting(itup, 0);
72 : }
73 2746 : return itup;
74 : }
75 :
76 : /*
77 : * Entry tree is a "static", ie tuple never deletes from it,
78 : * so we don't use right bound, we use rightest key instead.
79 : */
80 : static IndexTuple
81 : getRightMostTuple(Page page)
82 38 : {
83 38 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
84 :
85 38 : return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff));
86 : }
87 :
88 : Datum
89 : ginGetHighKey(GinState *ginstate, Page page)
90 34 : {
91 : IndexTuple itup;
92 : bool isnull;
93 :
94 34 : itup = getRightMostTuple(page);
95 :
96 34 : return index_getattr(itup, FirstOffsetNumber, ginstate->tupdesc, &isnull);
97 : }
98 :
99 : static bool
100 : entryIsMoveRight(GinBtree btree, Page page)
101 986 : {
102 : Datum highkey;
103 :
104 986 : if (GinPageRightMost(page))
105 952 : return FALSE;
106 :
107 34 : highkey = ginGetHighKey(btree->ginstate, page);
108 :
109 34 : if (compareEntries(btree->ginstate, btree->entryValue, highkey) > 0)
110 0 : return TRUE;
111 :
112 34 : return FALSE;
113 : }
114 :
115 : /*
116 : * Find correct tuple in non-leaf page. It supposed that
117 : * page correctly choosen and searching value SHOULD be on page
118 : */
119 : static BlockNumber
120 : entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
121 986 : {
122 : OffsetNumber low,
123 : high,
124 : maxoff;
125 986 : IndexTuple itup = NULL;
126 : int result;
127 986 : Page page = BufferGetPage(stack->buffer);
128 :
129 : Assert(!GinPageIsLeaf(page));
130 : Assert(!GinPageIsData(page));
131 :
132 986 : if (btree->fullScan)
133 : {
134 0 : stack->off = FirstOffsetNumber;
135 0 : stack->predictNumber *= PageGetMaxOffsetNumber(page);
136 0 : return btree->getLeftMostPage(btree, page);
137 : }
138 :
139 986 : low = FirstOffsetNumber;
140 986 : maxoff = high = PageGetMaxOffsetNumber(page);
141 : Assert(high >= low);
142 :
143 986 : high++;
144 :
145 5991 : while (high > low)
146 : {
147 4019 : OffsetNumber mid = low + ((high - low) / 2);
148 :
149 4979 : if (mid == maxoff && GinPageRightMost(page))
150 : /* Right infinity */
151 960 : result = -1;
152 : else
153 : {
154 : bool isnull;
155 :
156 3059 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
157 3059 : result = compareEntries(btree->ginstate, btree->entryValue,
158 : index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull));
159 : }
160 :
161 4019 : if (result == 0)
162 : {
163 0 : stack->off = mid;
164 : Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
165 0 : return GinItemPointerGetBlockNumber(&(itup)->t_tid);
166 : }
167 4019 : else if (result > 0)
168 2955 : low = mid + 1;
169 : else
170 1064 : high = mid;
171 : }
172 :
173 : Assert(high >= FirstOffsetNumber && high <= maxoff);
174 :
175 986 : stack->off = high;
176 986 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high));
177 : Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
178 986 : return GinItemPointerGetBlockNumber(&(itup)->t_tid);
179 : }
180 :
181 : /*
182 : * Searches correct position for value on leaf page.
183 : * Page should be corrrectly choosen.
184 : * Returns true if value found on page.
185 : */
186 : static bool
187 : entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack)
188 1490 : {
189 1490 : Page page = BufferGetPage(stack->buffer);
190 : OffsetNumber low,
191 : high;
192 : IndexTuple itup;
193 :
194 : Assert(GinPageIsLeaf(page));
195 : Assert(!GinPageIsData(page));
196 :
197 1490 : if (btree->fullScan)
198 : {
199 0 : stack->off = FirstOffsetNumber;
200 0 : return TRUE;
201 : }
202 :
203 1490 : low = FirstOffsetNumber;
204 1490 : high = PageGetMaxOffsetNumber(page);
205 :
206 1490 : if (high < low)
207 : {
208 3 : stack->off = FirstOffsetNumber;
209 3 : return false;
210 : }
211 :
212 1487 : high++;
213 :
214 10320 : while (high > low)
215 : {
216 7400 : OffsetNumber mid = low + ((high - low) / 2);
217 : bool isnull;
218 : int result;
219 :
220 7400 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
221 7400 : result = compareEntries(btree->ginstate, btree->entryValue,
222 : index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull));
223 :
224 7400 : if (result == 0)
225 : {
226 54 : stack->off = mid;
227 54 : return true;
228 : }
229 7346 : else if (result > 0)
230 7190 : low = mid + 1;
231 : else
232 156 : high = mid;
233 : }
234 :
235 1433 : stack->off = high;
236 1433 : return false;
237 : }
238 :
239 : static OffsetNumber
240 : entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
241 40 : {
242 : OffsetNumber i,
243 40 : maxoff = PageGetMaxOffsetNumber(page);
244 : IndexTuple itup;
245 :
246 : Assert(!GinPageIsLeaf(page));
247 : Assert(!GinPageIsData(page));
248 :
249 : /* if page isn't changed, we returns storedOff */
250 40 : if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
251 : {
252 40 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff));
253 40 : if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
254 40 : return storedOff;
255 :
256 : /*
257 : * we hope, that needed pointer goes to right. It's true if there
258 : * wasn't a deletion
259 : */
260 0 : for (i = storedOff + 1; i <= maxoff; i++)
261 : {
262 0 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
263 0 : if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
264 0 : return i;
265 : }
266 0 : maxoff = storedOff - 1;
267 : }
268 :
269 : /* last chance */
270 0 : for (i = FirstOffsetNumber; i <= maxoff; i++)
271 : {
272 0 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
273 0 : if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
274 0 : return i;
275 : }
276 :
277 0 : return InvalidOffsetNumber;
278 : }
279 :
280 : static BlockNumber
281 : entryGetLeftMostPage(GinBtree btree, Page page)
282 0 : {
283 : IndexTuple itup;
284 :
285 : Assert(!GinPageIsLeaf(page));
286 : Assert(!GinPageIsData(page));
287 : Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
288 :
289 0 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
290 0 : return GinItemPointerGetBlockNumber(&(itup)->t_tid);
291 : }
292 :
293 : static bool
294 : entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
295 1476 : {
296 1476 : Size itupsz = 0;
297 1476 : Page page = BufferGetPage(buf);
298 :
299 : Assert(btree->entry);
300 : Assert(!GinPageIsData(page));
301 :
302 1476 : if (btree->isDelete)
303 : {
304 2 : IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
305 :
306 2 : itupsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
307 : }
308 :
309 1476 : if (PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData))
310 1434 : return true;
311 :
312 42 : return false;
313 : }
314 :
315 : /*
316 : * Delete tuple on leaf page if tuples was existed and we
317 : * should update it, update old child blkno to new right page
318 : * if child split is occured
319 : */
320 : static BlockNumber
321 : entryPreparePage(GinBtree btree, Page page, OffsetNumber off)
322 1476 : {
323 1476 : BlockNumber ret = InvalidBlockNumber;
324 :
325 : Assert(btree->entry);
326 : Assert(!GinPageIsData(page));
327 :
328 1476 : if (btree->isDelete)
329 : {
330 : Assert(GinPageIsLeaf(page));
331 2 : PageIndexTupleDelete(page, off);
332 : }
333 :
334 1476 : if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
335 : {
336 40 : IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
337 :
338 40 : ItemPointerSet(&itup->t_tid, btree->rightblkno, InvalidOffsetNumber);
339 40 : ret = btree->rightblkno;
340 : }
341 :
342 1476 : btree->rightblkno = InvalidBlockNumber;
343 :
344 1476 : return ret;
345 : }
346 :
347 : /*
348 : * Place tuple on page and fills WAL record
349 : */
350 : static void
351 : entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
352 1434 : {
353 1434 : Page page = BufferGetPage(buf);
354 : static XLogRecData rdata[3];
355 : OffsetNumber placed;
356 : static ginxlogInsert data;
357 1434 : int cnt = 0;
358 :
359 1434 : *prdata = rdata;
360 1434 : data.updateBlkno = entryPreparePage(btree, page, off);
361 :
362 1434 : placed = PageAddItem(page, (Item) btree->entry, IndexTupleSize(btree->entry), off, false, false);
363 1434 : if (placed != off)
364 0 : elog(ERROR, "failed to add item to index page in \"%s\"",
365 : RelationGetRelationName(btree->index));
366 :
367 1434 : data.node = btree->index->rd_node;
368 1434 : data.blkno = BufferGetBlockNumber(buf);
369 1434 : data.offset = off;
370 1434 : data.nitem = 1;
371 1434 : data.isDelete = btree->isDelete;
372 1434 : data.isData = false;
373 1434 : data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
374 :
375 : /*
376 : * Prevent full page write if child's split occurs. That is needed to
377 : * remove incomplete splits while replaying WAL
378 : *
379 : * data.updateBlkno contains new block number (of newly created right
380 : * page) for recently splited page.
381 : */
382 1434 : if (data.updateBlkno == InvalidBlockNumber)
383 : {
384 1394 : rdata[0].buffer = buf;
385 1394 : rdata[0].buffer_std = TRUE;
386 1394 : rdata[0].data = NULL;
387 1394 : rdata[0].len = 0;
388 1394 : rdata[0].next = &rdata[1];
389 1394 : cnt++;
390 : }
391 :
392 1434 : rdata[cnt].buffer = InvalidBuffer;
393 1434 : rdata[cnt].data = (char *) &data;
394 1434 : rdata[cnt].len = sizeof(ginxlogInsert);
395 1434 : rdata[cnt].next = &rdata[cnt + 1];
396 1434 : cnt++;
397 :
398 1434 : rdata[cnt].buffer = InvalidBuffer;
399 1434 : rdata[cnt].data = (char *) btree->entry;
400 1434 : rdata[cnt].len = IndexTupleSize(btree->entry);
401 1434 : rdata[cnt].next = NULL;
402 :
403 1434 : btree->entry = NULL;
404 1434 : }
405 :
406 : /*
407 : * Returns new tuple with copied value from source tuple.
408 : * New tuple will not store posting list
409 : */
410 : static IndexTuple
411 : copyIndexTuple(IndexTuple itup, Page page)
412 46 : {
413 : IndexTuple nitup;
414 :
415 92 : if (GinPageIsLeaf(page) && !GinIsPostingTree(itup))
416 : {
417 46 : nitup = (IndexTuple) palloc(MAXALIGN(GinGetOrigSizePosting(itup)));
418 46 : memcpy(nitup, itup, GinGetOrigSizePosting(itup));
419 46 : nitup->t_info &= ~INDEX_SIZE_MASK;
420 46 : nitup->t_info |= GinGetOrigSizePosting(itup);
421 : }
422 : else
423 : {
424 0 : nitup = (IndexTuple) palloc(MAXALIGN(IndexTupleSize(itup)));
425 0 : memcpy(nitup, itup, IndexTupleSize(itup));
426 : }
427 :
428 46 : return nitup;
429 : }
430 :
431 : /*
432 : * Place tuple and split page, original buffer(lbuf) leaves untouched,
433 : * returns shadow page of lbuf filled new data.
434 : * Tuples are distributed between pages by equal size on its, not
435 : * an equal number!
436 : */
437 : static Page
438 : entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
439 42 : {
440 : static XLogRecData rdata[2];
441 : OffsetNumber i,
442 : maxoff,
443 42 : separator = InvalidOffsetNumber;
444 42 : Size totalsize = 0;
445 42 : Size lsize = 0,
446 : size;
447 : static char tupstore[2 * BLCKSZ];
448 : char *ptr;
449 : IndexTuple itup,
450 42 : leftrightmost = NULL;
451 : static ginxlogSplit data;
452 : Page page;
453 42 : Page lpage = GinPageGetCopyPage(BufferGetPage(lbuf));
454 42 : Page rpage = BufferGetPage(rbuf);
455 42 : Size pageSize = PageGetPageSize(lpage);
456 :
457 42 : *prdata = rdata;
458 42 : data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
459 : InvalidOffsetNumber : GinItemPointerGetBlockNumber(&(btree->entry->t_tid));
460 42 : data.updateBlkno = entryPreparePage(btree, lpage, off);
461 :
462 42 : maxoff = PageGetMaxOffsetNumber(lpage);
463 42 : ptr = tupstore;
464 :
465 2196 : for (i = FirstOffsetNumber; i <= maxoff; i++)
466 : {
467 2154 : if (i == off)
468 : {
469 0 : size = MAXALIGN(IndexTupleSize(btree->entry));
470 0 : memcpy(ptr, btree->entry, size);
471 0 : ptr += size;
472 0 : totalsize += size + sizeof(ItemIdData);
473 : }
474 :
475 2154 : itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i));
476 2154 : size = MAXALIGN(IndexTupleSize(itup));
477 2154 : memcpy(ptr, itup, size);
478 2154 : ptr += size;
479 2154 : totalsize += size + sizeof(ItemIdData);
480 : }
481 :
482 42 : if (off == maxoff + 1)
483 : {
484 42 : size = MAXALIGN(IndexTupleSize(btree->entry));
485 42 : memcpy(ptr, btree->entry, size);
486 42 : ptr += size;
487 42 : totalsize += size + sizeof(ItemIdData);
488 : }
489 :
490 42 : GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
491 42 : GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
492 :
493 42 : ptr = tupstore;
494 42 : maxoff++;
495 42 : lsize = 0;
496 :
497 42 : page = lpage;
498 2238 : for (i = FirstOffsetNumber; i <= maxoff; i++)
499 : {
500 2196 : itup = (IndexTuple) ptr;
501 :
502 2196 : if (lsize > totalsize / 2)
503 : {
504 1009 : if (separator == InvalidOffsetNumber)
505 42 : separator = i - 1;
506 1009 : page = rpage;
507 : }
508 : else
509 : {
510 1187 : leftrightmost = itup;
511 1187 : lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
512 : }
513 :
514 2196 : if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
515 0 : elog(ERROR, "failed to add item to index page in \"%s\"",
516 : RelationGetRelationName(btree->index));
517 2196 : ptr += MAXALIGN(IndexTupleSize(itup));
518 : }
519 :
520 42 : btree->entry = copyIndexTuple(leftrightmost, lpage);
521 42 : ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber(lbuf), InvalidOffsetNumber);
522 :
523 42 : btree->rightblkno = BufferGetBlockNumber(rbuf);
524 :
525 42 : data.node = btree->index->rd_node;
526 42 : data.rootBlkno = InvalidBlockNumber;
527 42 : data.lblkno = BufferGetBlockNumber(lbuf);
528 42 : data.rblkno = BufferGetBlockNumber(rbuf);
529 42 : data.separator = separator;
530 42 : data.nitem = maxoff;
531 42 : data.isData = FALSE;
532 42 : data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
533 42 : data.isRootSplit = FALSE;
534 :
535 42 : rdata[0].buffer = InvalidBuffer;
536 42 : rdata[0].data = (char *) &data;
537 42 : rdata[0].len = sizeof(ginxlogSplit);
538 42 : rdata[0].next = &rdata[1];
539 :
540 42 : rdata[1].buffer = InvalidBuffer;
541 42 : rdata[1].data = tupstore;
542 42 : rdata[1].len = MAXALIGN(totalsize);
543 42 : rdata[1].next = NULL;
544 :
545 42 : return lpage;
546 : }
547 :
548 : /*
549 : * return newly allocate rightmost tuple
550 : */
551 : IndexTuple
552 : ginPageGetLinkItup(Buffer buf)
553 4 : {
554 : IndexTuple itup,
555 : nitup;
556 4 : Page page = BufferGetPage(buf);
557 :
558 4 : itup = getRightMostTuple(page);
559 4 : nitup = copyIndexTuple(itup, page);
560 4 : ItemPointerSet(&nitup->t_tid, BufferGetBlockNumber(buf), InvalidOffsetNumber);
561 :
562 4 : return nitup;
563 : }
564 :
565 : /*
566 : * Fills new root by rightest values from child.
567 : * Also called from ginxlog, should not use btree
568 : */
569 : void
570 : entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
571 2 : {
572 : Page page;
573 : IndexTuple itup;
574 :
575 2 : page = BufferGetPage(root);
576 :
577 2 : itup = ginPageGetLinkItup(lbuf);
578 2 : if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
579 0 : elog(ERROR, "failed to add item to index root page");
580 :
581 2 : itup = ginPageGetLinkItup(rbuf);
582 2 : if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
583 0 : elog(ERROR, "failed to add item to index root page");
584 2 : }
585 :
586 : void
587 : prepareEntryScan(GinBtree btree, Relation index, Datum value, GinState *ginstate)
588 1490 : {
589 1490 : memset(btree, 0, sizeof(GinBtreeData));
590 :
591 1490 : btree->isMoveRight = entryIsMoveRight;
592 1490 : btree->findChildPage = entryLocateEntry;
593 1490 : btree->findItem = entryLocateLeafEntry;
594 1490 : btree->findChildPtr = entryFindChildPtr;
595 1490 : btree->getLeftMostPage = entryGetLeftMostPage;
596 1490 : btree->isEnoughSpace = entryIsEnoughSpace;
597 1490 : btree->placeToPage = entryPlaceToPage;
598 1490 : btree->splitPage = entrySplitPage;
599 1490 : btree->fillRoot = entryFillRoot;
600 :
601 1490 : btree->index = index;
602 1490 : btree->ginstate = ginstate;
603 1490 : btree->entryValue = value;
604 :
605 1490 : btree->isDelete = FALSE;
606 1490 : btree->searchMode = FALSE;
607 1490 : btree->fullScan = FALSE;
608 1490 : btree->isBuild = FALSE;
609 1490 : }
|