1 : /*-------------------------------------------------------------------------
2 : *
3 : * gist.c
4 : * interface routines for the postgres GiST index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.148 2007/11/15 21:14:31 momjian Exp $
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/genam.h"
18 : #include "access/gist_private.h"
19 : #include "catalog/index.h"
20 : #include "miscadmin.h"
21 : #include "utils/memutils.h"
22 :
23 : const XLogRecPtr XLogRecPtrForTemp = {1, 1};
24 :
25 : /* Working state for gistbuild and its callback */
26 : typedef struct
27 : {
28 : GISTSTATE giststate;
29 : int numindexattrs;
30 : double indtuples;
31 : MemoryContext tmpCtx;
32 : } GISTBuildState;
33 :
34 :
35 : /* non-export function prototypes */
36 : static void gistbuildCallback(Relation index,
37 : HeapTuple htup,
38 : Datum *values,
39 : bool *isnull,
40 : bool tupleIsAlive,
41 : void *state);
42 : static void gistdoinsert(Relation r,
43 : IndexTuple itup,
44 : Size freespace,
45 : GISTSTATE *GISTstate);
46 : static void gistfindleaf(GISTInsertState *state,
47 : GISTSTATE *giststate);
48 :
49 :
50 : #define ROTATEDIST(d) do { \
51 : SplitedPageLayout *tmp=(SplitedPageLayout*)palloc(sizeof(SplitedPageLayout)); \
52 : memset(tmp,0,sizeof(SplitedPageLayout)); \
53 : tmp->block.blkno = InvalidBlockNumber; \
54 : tmp->buffer = InvalidBuffer; \
55 : tmp->next = (d); \
56 : (d)=tmp; \
57 : } while(0)
58 :
59 :
60 : /*
61 : * Create and return a temporary memory context for use by GiST. We
62 : * _always_ invoke user-provided methods in a temporary memory
63 : * context, so that memory leaks in those functions cannot cause
64 : * problems. Also, we use some additional temporary contexts in the
65 : * GiST code itself, to avoid the need to do some awkward manual
66 : * memory management.
67 : */
68 : MemoryContext
69 : createTempGistContext(void)
70 24 : {
71 24 : return AllocSetContextCreate(CurrentMemoryContext,
72 : "GiST temporary context",
73 : ALLOCSET_DEFAULT_MINSIZE,
74 : ALLOCSET_DEFAULT_INITSIZE,
75 : ALLOCSET_DEFAULT_MAXSIZE);
76 : }
77 :
78 : /*
79 : * Routine to build an index. Basically calls insert over and over.
80 : *
81 : * XXX: it would be nice to implement some sort of bulk-loading
82 : * algorithm, but it is not clear how to do that.
83 : */
84 : Datum
85 : gistbuild(PG_FUNCTION_ARGS)
86 7 : {
87 7 : Relation heap = (Relation) PG_GETARG_POINTER(0);
88 7 : Relation index = (Relation) PG_GETARG_POINTER(1);
89 7 : IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
90 : IndexBuildResult *result;
91 : double reltuples;
92 : GISTBuildState buildstate;
93 : Buffer buffer;
94 : Page page;
95 :
96 : /*
97 : * We expect to be called exactly once for any index relation. If that's
98 : * not the case, big trouble's what we have.
99 : */
100 7 : if (RelationGetNumberOfBlocks(index) != 0)
101 0 : elog(ERROR, "index \"%s\" already contains data",
102 : RelationGetRelationName(index));
103 :
104 : /* no locking is needed */
105 7 : initGISTstate(&buildstate.giststate, index);
106 :
107 : /* initialize the root page */
108 7 : buffer = gistNewBuffer(index);
109 : Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
110 7 : page = BufferGetPage(buffer);
111 :
112 7 : START_CRIT_SECTION();
113 :
114 7 : GISTInitBuffer(buffer, F_LEAF);
115 :
116 7 : MarkBufferDirty(buffer);
117 :
118 7 : if (!index->rd_istemp)
119 : {
120 : XLogRecPtr recptr;
121 : XLogRecData rdata;
122 :
123 5 : rdata.data = (char *) &(index->rd_node);
124 5 : rdata.len = sizeof(RelFileNode);
125 5 : rdata.buffer = InvalidBuffer;
126 5 : rdata.next = NULL;
127 :
128 5 : recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata);
129 5 : PageSetLSN(page, recptr);
130 5 : PageSetTLI(page, ThisTimeLineID);
131 : }
132 : else
133 2 : PageSetLSN(page, XLogRecPtrForTemp);
134 :
135 7 : UnlockReleaseBuffer(buffer);
136 :
137 7 : END_CRIT_SECTION();
138 :
139 : /* build the index */
140 7 : buildstate.numindexattrs = indexInfo->ii_NumIndexAttrs;
141 7 : buildstate.indtuples = 0;
142 :
143 : /*
144 : * create a temporary memory context that is reset once for each tuple
145 : * inserted into the index
146 : */
147 7 : buildstate.tmpCtx = createTempGistContext();
148 :
149 : /* do the heap scan */
150 7 : reltuples = IndexBuildHeapScan(heap, index, indexInfo,
151 : gistbuildCallback, (void *) &buildstate);
152 :
153 : /* okay, all heap tuples are indexed */
154 7 : MemoryContextDelete(buildstate.tmpCtx);
155 :
156 7 : freeGISTstate(&buildstate.giststate);
157 :
158 : /*
159 : * Return statistics
160 : */
161 7 : result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
162 :
163 7 : result->heap_tuples = reltuples;
164 7 : result->index_tuples = buildstate.indtuples;
165 :
166 7 : PG_RETURN_POINTER(result);
167 : }
168 :
169 : /*
170 : * Per-tuple callback from IndexBuildHeapScan
171 : */
172 : static void
173 : gistbuildCallback(Relation index,
174 : HeapTuple htup,
175 : Datum *values,
176 : bool *isnull,
177 : bool tupleIsAlive,
178 : void *state)
179 10656 : {
180 10656 : GISTBuildState *buildstate = (GISTBuildState *) state;
181 : IndexTuple itup;
182 : MemoryContext oldCtx;
183 :
184 10656 : oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
185 :
186 : /* form an index tuple and point it at the heap tuple */
187 10656 : itup = gistFormTuple(&buildstate->giststate, index,
188 : values, isnull, true /* size is currently bogus */ );
189 10656 : itup->t_tid = htup->t_self;
190 :
191 : /*
192 : * Since we already have the index relation locked, we call gistdoinsert
193 : * directly. Normal access method calls dispatch through gistinsert,
194 : * which locks the relation for write. This is the right thing to do if
195 : * you're inserting single tups, but not when you're initializing the
196 : * whole index at once.
197 : *
198 : * In this path we respect the fillfactor setting, whereas insertions
199 : * after initial build do not.
200 : */
201 10656 : gistdoinsert(index, itup,
202 : RelationGetTargetPageFreeSpace(index, GIST_DEFAULT_FILLFACTOR),
203 : &buildstate->giststate);
204 :
205 10656 : buildstate->indtuples += 1;
206 10656 : MemoryContextSwitchTo(oldCtx);
207 10656 : MemoryContextReset(buildstate->tmpCtx);
208 10656 : }
209 :
210 : /*
211 : * gistinsert -- wrapper for GiST tuple insertion.
212 : *
213 : * This is the public interface routine for tuple insertion in GiSTs.
214 : * It doesn't do any work; just locks the relation and passes the buck.
215 : */
216 : Datum
217 : gistinsert(PG_FUNCTION_ARGS)
218 0 : {
219 0 : Relation r = (Relation) PG_GETARG_POINTER(0);
220 0 : Datum *values = (Datum *) PG_GETARG_POINTER(1);
221 0 : bool *isnull = (bool *) PG_GETARG_POINTER(2);
222 0 : ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
223 :
224 : #ifdef NOT_USED
225 : Relation heapRel = (Relation) PG_GETARG_POINTER(4);
226 : bool checkUnique = PG_GETARG_BOOL(5);
227 : #endif
228 : IndexTuple itup;
229 : GISTSTATE giststate;
230 : MemoryContext oldCtx;
231 : MemoryContext insertCtx;
232 :
233 0 : insertCtx = createTempGistContext();
234 0 : oldCtx = MemoryContextSwitchTo(insertCtx);
235 :
236 0 : initGISTstate(&giststate, r);
237 :
238 0 : itup = gistFormTuple(&giststate, r,
239 : values, isnull, true /* size is currently bogus */ );
240 0 : itup->t_tid = *ht_ctid;
241 :
242 0 : gistdoinsert(r, itup, 0, &giststate);
243 :
244 : /* cleanup */
245 0 : freeGISTstate(&giststate);
246 0 : MemoryContextSwitchTo(oldCtx);
247 0 : MemoryContextDelete(insertCtx);
248 :
249 0 : PG_RETURN_BOOL(true);
250 : }
251 :
252 :
253 : /*
254 : * Workhouse routine for doing insertion into a GiST index. Note that
255 : * this routine assumes it is invoked in a short-lived memory context,
256 : * so it does not bother releasing palloc'd allocations.
257 : */
258 : static void
259 : gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
260 10656 : {
261 : GISTInsertState state;
262 :
263 10656 : memset(&state, 0, sizeof(GISTInsertState));
264 :
265 10656 : state.itup = (IndexTuple *) palloc(sizeof(IndexTuple));
266 10656 : state.itup[0] = (IndexTuple) palloc(IndexTupleSize(itup));
267 10656 : memcpy(state.itup[0], itup, IndexTupleSize(itup));
268 10656 : state.ituplen = 1;
269 10656 : state.freespace = freespace;
270 10656 : state.r = r;
271 10656 : state.key = itup->t_tid;
272 10656 : state.needInsertComplete = true;
273 :
274 10656 : state.stack = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
275 10656 : state.stack->blkno = GIST_ROOT_BLKNO;
276 :
277 10656 : gistfindleaf(&state, giststate);
278 10656 : gistmakedeal(&state, giststate);
279 10656 : }
280 :
281 : static bool
282 : gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
283 11601 : {
284 11601 : bool is_splitted = false;
285 11601 : bool is_leaf = (GistPageIsLeaf(state->stack->page)) ? true : false;
286 :
287 : /*
288 : * if (!is_leaf) remove old key: This node's key has been modified, either
289 : * because a child split occurred or because we needed to adjust our key
290 : * for an insert in a child node. Therefore, remove the old version of
291 : * this node's key.
292 : *
293 : * for WAL replay, in the non-split case we handle this by setting up a
294 : * one-element todelete array; in the split case, it's handled implicitly
295 : * because the tuple vector passed to gistSplit won't include this tuple.
296 : *
297 : * XXX: If we want to change fillfactors between node and leaf, fillfactor
298 : * = (is_leaf ? state->leaf_fillfactor : state->node_fillfactor)
299 : */
300 11601 : if (gistnospace(state->stack->page, state->itup, state->ituplen,
301 : is_leaf ? InvalidOffsetNumber : state->stack->childoffnum,
302 : state->freespace))
303 : {
304 : /* no space for insertion */
305 : IndexTuple *itvec;
306 : int tlen;
307 109 : SplitedPageLayout *dist = NULL,
308 : *ptr;
309 109 : BlockNumber rrlink = InvalidBlockNumber;
310 : GistNSN oldnsn;
311 :
312 109 : is_splitted = true;
313 :
314 : /*
315 : * Form index tuples vector to split: remove old tuple if t's needed
316 : * and add new tuples to vector
317 : */
318 109 : itvec = gistextractpage(state->stack->page, &tlen);
319 109 : if (!is_leaf)
320 : {
321 : /* on inner page we should remove old tuple */
322 0 : int pos = state->stack->childoffnum - FirstOffsetNumber;
323 :
324 0 : tlen--;
325 0 : if (pos != tlen)
326 0 : memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
327 : }
328 109 : itvec = gistjoinvector(itvec, &tlen, state->itup, state->ituplen);
329 109 : dist = gistSplit(state->r, state->stack->page, itvec, tlen, giststate);
330 :
331 109 : state->itup = (IndexTuple *) palloc(sizeof(IndexTuple) * tlen);
332 109 : state->ituplen = 0;
333 :
334 109 : if (state->stack->blkno != GIST_ROOT_BLKNO)
335 : {
336 : /*
337 : * if non-root split then we should not allocate new buffer, but
338 : * we must create temporary page to operate
339 : */
340 105 : dist->buffer = state->stack->buffer;
341 105 : dist->page = PageGetTempPage(BufferGetPage(dist->buffer), sizeof(GISTPageOpaqueData));
342 :
343 : /* clean all flags except F_LEAF */
344 105 : GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
345 : }
346 :
347 : /* make new pages and fills them */
348 327 : for (ptr = dist; ptr; ptr = ptr->next)
349 : {
350 : int i;
351 : char *data;
352 :
353 : /* get new page */
354 218 : if (ptr->buffer == InvalidBuffer)
355 : {
356 113 : ptr->buffer = gistNewBuffer(state->r);
357 113 : GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
358 113 : ptr->page = BufferGetPage(ptr->buffer);
359 : }
360 218 : ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
361 :
362 : /*
363 : * fill page, we can do it because all these pages are new (ie not
364 : * linked in tree or masked by temp page
365 : */
366 218 : data = (char *) (ptr->list);
367 15988 : for (i = 0; i < ptr->block.num; i++)
368 : {
369 15770 : if (PageAddItem(ptr->page, (Item) data, IndexTupleSize((IndexTuple) data), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
370 0 : elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(state->r));
371 15770 : data += IndexTupleSize((IndexTuple) data);
372 : }
373 :
374 : /* set up ItemPointer and remember it for parent */
375 218 : ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
376 218 : state->itup[state->ituplen] = ptr->itup;
377 218 : state->ituplen++;
378 : }
379 :
380 : /* saves old rightlink */
381 109 : if (state->stack->blkno != GIST_ROOT_BLKNO)
382 105 : rrlink = GistPageGetOpaque(dist->page)->rightlink;
383 :
384 109 : START_CRIT_SECTION();
385 :
386 : /*
387 : * must mark buffers dirty before XLogInsert, even though we'll still
388 : * be changing their opaque fields below. set up right links.
389 : */
390 327 : for (ptr = dist; ptr; ptr = ptr->next)
391 : {
392 218 : MarkBufferDirty(ptr->buffer);
393 218 : GistPageGetOpaque(ptr->page)->rightlink = (ptr->next) ?
394 : ptr->next->block.blkno : rrlink;
395 : }
396 :
397 : /* restore splitted non-root page */
398 109 : if (state->stack->blkno != GIST_ROOT_BLKNO)
399 : {
400 105 : PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
401 105 : dist->page = BufferGetPage(dist->buffer);
402 : }
403 :
404 109 : if (!state->r->rd_istemp)
405 : {
406 : XLogRecPtr recptr;
407 : XLogRecData *rdata;
408 :
409 49 : rdata = formSplitRdata(state->r->rd_node, state->stack->blkno,
410 : is_leaf, &(state->key), dist);
411 :
412 49 : recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
413 :
414 147 : for (ptr = dist; ptr; ptr = ptr->next)
415 : {
416 98 : PageSetLSN(ptr->page, recptr);
417 98 : PageSetTLI(ptr->page, ThisTimeLineID);
418 : }
419 : }
420 : else
421 : {
422 180 : for (ptr = dist; ptr; ptr = ptr->next)
423 : {
424 120 : PageSetLSN(ptr->page, XLogRecPtrForTemp);
425 : }
426 : }
427 :
428 : /* set up NSN */
429 109 : oldnsn = GistPageGetOpaque(dist->page)->nsn;
430 109 : if (state->stack->blkno == GIST_ROOT_BLKNO)
431 : /* if root split we should put initial value */
432 4 : oldnsn = PageGetLSN(dist->page);
433 :
434 327 : for (ptr = dist; ptr; ptr = ptr->next)
435 : {
436 : /* only for last set oldnsn */
437 218 : GistPageGetOpaque(ptr->page)->nsn = (ptr->next) ?
438 : PageGetLSN(ptr->page) : oldnsn;
439 : }
440 :
441 : /*
442 : * release buffers, if it was a root split then release all buffers
443 : * because we create all buffers
444 : */
445 109 : ptr = (state->stack->blkno == GIST_ROOT_BLKNO) ? dist : dist->next;
446 222 : for (; ptr; ptr = ptr->next)
447 113 : UnlockReleaseBuffer(ptr->buffer);
448 :
449 109 : if (state->stack->blkno == GIST_ROOT_BLKNO)
450 : {
451 4 : gistnewroot(state->r, state->stack->buffer, state->itup, state->ituplen, &(state->key));
452 4 : state->needInsertComplete = false;
453 : }
454 :
455 109 : END_CRIT_SECTION();
456 : }
457 : else
458 : {
459 : /* enough space */
460 11492 : START_CRIT_SECTION();
461 :
462 11492 : if (!is_leaf)
463 945 : PageIndexTupleDelete(state->stack->page, state->stack->childoffnum);
464 11492 : gistfillbuffer(state->r, state->stack->page, state->itup, state->ituplen, InvalidOffsetNumber);
465 :
466 11492 : MarkBufferDirty(state->stack->buffer);
467 :
468 11492 : if (!state->r->rd_istemp)
469 : {
470 4443 : OffsetNumber noffs = 0,
471 : offs[1];
472 : XLogRecPtr recptr;
473 : XLogRecData *rdata;
474 :
475 4443 : if (!is_leaf)
476 : {
477 : /* only on inner page we should delete previous version */
478 592 : offs[0] = state->stack->childoffnum;
479 592 : noffs = 1;
480 : }
481 :
482 4443 : rdata = formUpdateRdata(state->r->rd_node, state->stack->buffer,
483 : offs, noffs,
484 : state->itup, state->ituplen,
485 : &(state->key));
486 :
487 4443 : recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, rdata);
488 4443 : PageSetLSN(state->stack->page, recptr);
489 4443 : PageSetTLI(state->stack->page, ThisTimeLineID);
490 : }
491 : else
492 7049 : PageSetLSN(state->stack->page, XLogRecPtrForTemp);
493 :
494 11492 : if (state->stack->blkno == GIST_ROOT_BLKNO)
495 1519 : state->needInsertComplete = false;
496 :
497 11492 : END_CRIT_SECTION();
498 :
499 11492 : if (state->ituplen > 1)
500 : { /* previous is_splitted==true */
501 :
502 : /*
503 : * child was splited, so we must form union for insertion in
504 : * parent
505 : */
506 105 : IndexTuple newtup = gistunion(state->r, state->itup, state->ituplen, giststate);
507 :
508 105 : ItemPointerSetBlockNumber(&(newtup->t_tid), state->stack->blkno);
509 105 : state->itup[0] = newtup;
510 105 : state->ituplen = 1;
511 : }
512 11387 : else if (is_leaf)
513 : {
514 : /*
515 : * itup[0] store key to adjust parent, we set it to valid to
516 : * correct check by GistTupleIsInvalid macro in gistgetadjusted()
517 : */
518 10547 : ItemPointerSetBlockNumber(&(state->itup[0]->t_tid), state->stack->blkno);
519 10547 : GistTupleSetValid(state->itup[0]);
520 : }
521 : }
522 11601 : return is_splitted;
523 : }
524 :
525 : /*
526 : * returns stack of pages, all pages in stack are pinned, and
527 : * leaf is X-locked
528 : */
529 :
530 : static void
531 : gistfindleaf(GISTInsertState *state, GISTSTATE *giststate)
532 20734 : {
533 : ItemId iid;
534 : IndexTuple idxtuple;
535 : GISTPageOpaque opaque;
536 :
537 : /*
538 : * walk down, We don't lock page for a long time, but so we should be
539 : * ready to recheck path in a bad case... We remember, that page->lsn
540 : * should never be invalid.
541 : */
542 : for (;;)
543 : {
544 20734 : if (XLogRecPtrIsInvalid(state->stack->lsn))
545 20734 : state->stack->buffer = ReadBuffer(state->r, state->stack->blkno);
546 20734 : LockBuffer(state->stack->buffer, GIST_SHARE);
547 20734 : gistcheckpage(state->r, state->stack->buffer);
548 :
549 20734 : state->stack->page = (Page) BufferGetPage(state->stack->buffer);
550 20734 : opaque = GistPageGetOpaque(state->stack->page);
551 :
552 20734 : state->stack->lsn = PageGetLSN(state->stack->page);
553 : Assert(state->r->rd_istemp || !XLogRecPtrIsInvalid(state->stack->lsn));
554 :
555 20734 : if (state->stack->blkno != GIST_ROOT_BLKNO &&
556 : XLByteLT(state->stack->parent->lsn, opaque->nsn))
557 : {
558 : /*
559 : * caused split non-root page is detected, go up to parent to
560 : * choose best child
561 : */
562 0 : UnlockReleaseBuffer(state->stack->buffer);
563 0 : state->stack = state->stack->parent;
564 0 : continue;
565 : }
566 :
567 20734 : if (!GistPageIsLeaf(state->stack->page))
568 : {
569 : /*
570 : * This is an internal page, so continue to walk down the tree. We
571 : * find the child node that has the minimum insertion penalty and
572 : * recursively invoke ourselves to modify that node. Once the
573 : * recursive call returns, we may need to adjust the parent node
574 : * for two reasons: the child node split, or the key in this node
575 : * needs to be adjusted for the newly inserted key below us.
576 : */
577 10078 : GISTInsertStack *item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
578 :
579 10078 : state->stack->childoffnum = gistchoose(state->r, state->stack->page, state->itup[0], giststate);
580 :
581 10078 : iid = PageGetItemId(state->stack->page, state->stack->childoffnum);
582 10078 : idxtuple = (IndexTuple) PageGetItem(state->stack->page, iid);
583 10078 : item->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
584 10078 : LockBuffer(state->stack->buffer, GIST_UNLOCK);
585 :
586 10078 : item->parent = state->stack;
587 10078 : item->child = NULL;
588 10078 : if (state->stack)
589 10078 : state->stack->child = item;
590 10078 : state->stack = item;
591 : }
592 : else
593 : {
594 : /* be carefull, during unlock/lock page may be changed... */
595 10656 : LockBuffer(state->stack->buffer, GIST_UNLOCK);
596 10656 : LockBuffer(state->stack->buffer, GIST_EXCLUSIVE);
597 10656 : state->stack->page = (Page) BufferGetPage(state->stack->buffer);
598 10656 : opaque = GistPageGetOpaque(state->stack->page);
599 :
600 10656 : if (state->stack->blkno == GIST_ROOT_BLKNO)
601 : {
602 : /*
603 : * the only page can become inner instead of leaf is a root
604 : * page, so for root we should recheck it
605 : */
606 578 : if (!GistPageIsLeaf(state->stack->page))
607 : {
608 : /*
609 : * very rarely situation: during unlock/lock index with
610 : * number of pages = 1 was increased
611 : */
612 0 : LockBuffer(state->stack->buffer, GIST_UNLOCK);
613 0 : continue;
614 : }
615 :
616 : /*
617 : * we don't need to check root split, because checking
618 : * leaf/inner is enough to recognize split for root
619 : */
620 :
621 : }
622 10078 : else if (XLByteLT(state->stack->parent->lsn, opaque->nsn))
623 : {
624 : /*
625 : * detecting split during unlock/lock, so we should find
626 : * better child on parent
627 : */
628 :
629 : /* forget buffer */
630 0 : UnlockReleaseBuffer(state->stack->buffer);
631 :
632 0 : state->stack = state->stack->parent;
633 0 : continue;
634 : }
635 :
636 10656 : state->stack->lsn = PageGetLSN(state->stack->page);
637 :
638 : /* ok we found a leaf page and it X-locked */
639 : break;
640 : }
641 : }
642 :
643 : /* now state->stack->(page, buffer and blkno) points to leaf page */
644 10656 : }
645 :
646 : /*
647 : * Traverse the tree to find path from root page to specified "child" block.
648 : *
649 : * returns from the beginning of closest parent;
650 : *
651 : * To prevent deadlocks, this should lock only one page simultaneously.
652 : */
653 : GISTInsertStack *
654 : gistFindPath(Relation r, BlockNumber child)
655 0 : {
656 : Page page;
657 : Buffer buffer;
658 : OffsetNumber i,
659 : maxoff;
660 : ItemId iid;
661 : IndexTuple idxtuple;
662 : GISTInsertStack *top,
663 : *tail,
664 : *ptr;
665 : BlockNumber blkno;
666 :
667 0 : top = tail = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
668 0 : top->blkno = GIST_ROOT_BLKNO;
669 :
670 0 : while (top && top->blkno != child)
671 : {
672 0 : buffer = ReadBuffer(r, top->blkno);
673 0 : LockBuffer(buffer, GIST_SHARE);
674 0 : gistcheckpage(r, buffer);
675 0 : page = (Page) BufferGetPage(buffer);
676 :
677 0 : if (GistPageIsLeaf(page))
678 : {
679 : /* we can safety go away, follows only leaf pages */
680 0 : UnlockReleaseBuffer(buffer);
681 0 : return NULL;
682 : }
683 :
684 0 : top->lsn = PageGetLSN(page);
685 :
686 0 : if (top->parent && XLByteLT(top->parent->lsn, GistPageGetOpaque(page)->nsn) &&
687 : GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
688 : {
689 : /* page splited while we thinking of... */
690 0 : ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
691 0 : ptr->blkno = GistPageGetOpaque(page)->rightlink;
692 0 : ptr->childoffnum = InvalidOffsetNumber;
693 0 : ptr->parent = top;
694 0 : ptr->next = NULL;
695 0 : tail->next = ptr;
696 0 : tail = ptr;
697 : }
698 :
699 0 : maxoff = PageGetMaxOffsetNumber(page);
700 :
701 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
702 : {
703 0 : iid = PageGetItemId(page, i);
704 0 : idxtuple = (IndexTuple) PageGetItem(page, iid);
705 0 : blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
706 0 : if (blkno == child)
707 : {
708 0 : OffsetNumber poff = InvalidOffsetNumber;
709 :
710 : /* make childs links */
711 0 : ptr = top;
712 0 : while (ptr->parent)
713 : {
714 : /* set child link */
715 0 : ptr->parent->child = ptr;
716 : /* move childoffnum.. */
717 0 : if (ptr == top)
718 : {
719 : /* first iteration */
720 0 : poff = ptr->parent->childoffnum;
721 0 : ptr->parent->childoffnum = ptr->childoffnum;
722 : }
723 : else
724 : {
725 0 : OffsetNumber tmp = ptr->parent->childoffnum;
726 :
727 0 : ptr->parent->childoffnum = poff;
728 0 : poff = tmp;
729 : }
730 0 : ptr = ptr->parent;
731 : }
732 0 : top->childoffnum = i;
733 0 : UnlockReleaseBuffer(buffer);
734 0 : return top;
735 : }
736 : else
737 : {
738 : /* Install next inner page to the end of stack */
739 0 : ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
740 0 : ptr->blkno = blkno;
741 0 : ptr->childoffnum = i; /* set offsetnumber of child to child
742 : * !!! */
743 0 : ptr->parent = top;
744 0 : ptr->next = NULL;
745 0 : tail->next = ptr;
746 0 : tail = ptr;
747 : }
748 : }
749 :
750 0 : UnlockReleaseBuffer(buffer);
751 0 : top = top->next;
752 : }
753 :
754 0 : return NULL;
755 : }
756 :
757 :
758 : /*
759 : * Returns X-locked parent of stack page
760 : */
761 :
762 : static void
763 : gistFindCorrectParent(Relation r, GISTInsertStack *child)
764 10078 : {
765 10078 : GISTInsertStack *parent = child->parent;
766 :
767 10078 : LockBuffer(parent->buffer, GIST_EXCLUSIVE);
768 10078 : gistcheckpage(r, parent->buffer);
769 10078 : parent->page = (Page) BufferGetPage(parent->buffer);
770 :
771 : /* here we don't need to distinguish between split and page update */
772 10078 : if (parent->childoffnum == InvalidOffsetNumber || !XLByteEQ(parent->lsn, PageGetLSN(parent->page)))
773 : {
774 : /* parent is changed, look child in right links until found */
775 : OffsetNumber i,
776 : maxoff;
777 : ItemId iid;
778 : IndexTuple idxtuple;
779 : GISTInsertStack *ptr;
780 :
781 : while (true)
782 : {
783 0 : maxoff = PageGetMaxOffsetNumber(parent->page);
784 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
785 : {
786 0 : iid = PageGetItemId(parent->page, i);
787 0 : idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
788 0 : if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
789 : {
790 : /* yes!!, found */
791 0 : parent->childoffnum = i;
792 0 : return;
793 : }
794 : }
795 :
796 0 : parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
797 0 : UnlockReleaseBuffer(parent->buffer);
798 0 : if (parent->blkno == InvalidBlockNumber)
799 :
800 : /*
801 : * end of chain and still didn't found parent, It's very-very
802 : * rare situation when root splited
803 : */
804 0 : break;
805 0 : parent->buffer = ReadBuffer(r, parent->blkno);
806 0 : LockBuffer(parent->buffer, GIST_EXCLUSIVE);
807 0 : gistcheckpage(r, parent->buffer);
808 0 : parent->page = (Page) BufferGetPage(parent->buffer);
809 0 : }
810 :
811 : /*
812 : * awful!!, we need search tree to find parent ... , but before we
813 : * should release all old parent
814 : */
815 :
816 0 : ptr = child->parent->parent; /* child->parent already released
817 : * above */
818 0 : while (ptr)
819 : {
820 0 : ReleaseBuffer(ptr->buffer);
821 0 : ptr = ptr->parent;
822 : }
823 :
824 : /* ok, find new path */
825 0 : ptr = parent = gistFindPath(r, child->blkno);
826 : Assert(ptr != NULL);
827 :
828 : /* read all buffers as expected by caller */
829 : /* note we don't lock them or gistcheckpage them here! */
830 0 : while (ptr)
831 : {
832 0 : ptr->buffer = ReadBuffer(r, ptr->blkno);
833 0 : ptr->page = (Page) BufferGetPage(ptr->buffer);
834 0 : ptr = ptr->parent;
835 : }
836 :
837 : /* install new chain of parents to stack */
838 0 : child->parent = parent;
839 0 : parent->child = child;
840 :
841 : /* make recursive call to normal processing */
842 0 : gistFindCorrectParent(r, child);
843 : }
844 :
845 : return;
846 : }
847 :
848 : void
849 : gistmakedeal(GISTInsertState *state, GISTSTATE *giststate)
850 11601 : {
851 : int is_splitted;
852 : ItemId iid;
853 : IndexTuple oldtup,
854 : newtup;
855 :
856 : /* walk up */
857 : while (true)
858 : {
859 : /*
860 : * After this call: 1. if child page was splited, then itup contains
861 : * keys for each page 2. if child page wasn't splited, then itup
862 : * contains additional for adjustment of current key
863 : */
864 :
865 11601 : if (state->stack->parent)
866 : {
867 : /*
868 : * X-lock parent page before proceed child, gistFindCorrectParent
869 : * should find and lock it
870 : */
871 10078 : gistFindCorrectParent(state->r, state->stack);
872 : }
873 11601 : is_splitted = gistplacetopage(state, giststate);
874 :
875 : /* parent locked above, so release child buffer */
876 11601 : UnlockReleaseBuffer(state->stack->buffer);
877 :
878 : /* pop parent page from stack */
879 11601 : state->stack = state->stack->parent;
880 :
881 : /* stack is void */
882 11601 : if (!state->stack)
883 1523 : break;
884 :
885 : /*
886 : * child did not split, so we can check is it needed to update parent
887 : * tuple
888 : */
889 10078 : if (!is_splitted)
890 : {
891 : /* parent's tuple */
892 9973 : iid = PageGetItemId(state->stack->page, state->stack->childoffnum);
893 9973 : oldtup = (IndexTuple) PageGetItem(state->stack->page, iid);
894 9973 : newtup = gistgetadjusted(state->r, oldtup, state->itup[0], giststate);
895 :
896 9973 : if (!newtup)
897 : { /* not need to update key */
898 9133 : LockBuffer(state->stack->buffer, GIST_UNLOCK);
899 9133 : break;
900 : }
901 :
902 840 : state->itup[0] = newtup;
903 : }
904 : } /* while */
905 :
906 : /* release all parent buffers */
907 19789 : while (state->stack)
908 : {
909 9133 : ReleaseBuffer(state->stack->buffer);
910 9133 : state->stack = state->stack->parent;
911 : }
912 :
913 : /* say to xlog that insert is completed */
914 10656 : if (state->needInsertComplete && !state->r->rd_istemp)
915 3080 : gistxlogInsertCompletion(state->r->rd_node, &(state->key), 1);
916 10656 : }
917 :
918 : /*
919 : * gistSplit -- split a page in the tree and fill struct
920 : * used for XLOG and real writes buffers. Function is recursive, ie
921 : * it will split page until keys will fit in every page.
922 : */
923 : SplitedPageLayout *
924 : gistSplit(Relation r,
925 : Page page,
926 : IndexTuple *itup, /* contains compressed entry */
927 : int len,
928 : GISTSTATE *giststate)
929 109 : {
930 : IndexTuple *lvectup,
931 : *rvectup;
932 : GistSplitVector v;
933 : GistEntryVector *entryvec;
934 : int i;
935 109 : SplitedPageLayout *res = NULL;
936 :
937 : /* generate the item array */
938 109 : entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
939 109 : entryvec->n = len + 1;
940 :
941 109 : memset(v.spl_lisnull, TRUE, sizeof(bool) * giststate->tupdesc->natts);
942 109 : memset(v.spl_risnull, TRUE, sizeof(bool) * giststate->tupdesc->natts);
943 109 : gistSplitByKey(r, page, itup, len, giststate,
944 : &v, entryvec, 0);
945 :
946 : /* form left and right vector */
947 109 : lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
948 109 : rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
949 :
950 7993 : for (i = 0; i < v.splitVector.spl_nleft; i++)
951 7884 : lvectup[i] = itup[v.splitVector.spl_left[i] - 1];
952 :
953 7995 : for (i = 0; i < v.splitVector.spl_nright; i++)
954 7886 : rvectup[i] = itup[v.splitVector.spl_right[i] - 1];
955 :
956 : /* finalize splitting (may need another split) */
957 109 : if (!gistfitpage(rvectup, v.splitVector.spl_nright))
958 : {
959 0 : res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);
960 : }
961 : else
962 : {
963 109 : ROTATEDIST(res);
964 109 : res->block.num = v.splitVector.spl_nright;
965 109 : res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
966 109 : res->itup = (v.spl_rightvalid) ? gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false)
967 : : gist_form_invalid_tuple(GIST_ROOT_BLKNO);
968 : }
969 :
970 109 : if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
971 : {
972 : SplitedPageLayout *resptr,
973 : *subres;
974 :
975 0 : resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
976 :
977 : /* install on list's tail */
978 0 : while (resptr->next)
979 0 : resptr = resptr->next;
980 :
981 0 : resptr->next = res;
982 0 : res = subres;
983 : }
984 : else
985 : {
986 109 : ROTATEDIST(res);
987 109 : res->block.num = v.splitVector.spl_nleft;
988 109 : res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
989 109 : res->itup = (v.spl_leftvalid) ? gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false)
990 : : gist_form_invalid_tuple(GIST_ROOT_BLKNO);
991 : }
992 :
993 109 : return res;
994 : }
995 :
996 : /*
997 : * buffer must be pinned and locked by caller
998 : */
999 : void
1000 : gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer key)
1001 4 : {
1002 : Page page;
1003 :
1004 : Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
1005 4 : page = BufferGetPage(buffer);
1006 :
1007 4 : START_CRIT_SECTION();
1008 :
1009 4 : GISTInitBuffer(buffer, 0);
1010 4 : gistfillbuffer(r, page, itup, len, FirstOffsetNumber);
1011 :
1012 4 : MarkBufferDirty(buffer);
1013 :
1014 4 : if (!r->rd_istemp)
1015 : {
1016 : XLogRecPtr recptr;
1017 : XLogRecData *rdata;
1018 :
1019 2 : rdata = formUpdateRdata(r->rd_node, buffer,
1020 : NULL, 0,
1021 : itup, len, key);
1022 :
1023 2 : recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_NEW_ROOT, rdata);
1024 2 : PageSetLSN(page, recptr);
1025 2 : PageSetTLI(page, ThisTimeLineID);
1026 : }
1027 : else
1028 2 : PageSetLSN(page, XLogRecPtrForTemp);
1029 :
1030 4 : END_CRIT_SECTION();
1031 4 : }
1032 :
1033 : void
1034 : initGISTstate(GISTSTATE *giststate, Relation index)
1035 24 : {
1036 : int i;
1037 :
1038 24 : if (index->rd_att->natts > INDEX_MAX_KEYS)
1039 0 : elog(ERROR, "numberOfAttributes %d > %d",
1040 : index->rd_att->natts, INDEX_MAX_KEYS);
1041 :
1042 24 : giststate->tupdesc = index->rd_att;
1043 :
1044 48 : for (i = 0; i < index->rd_att->natts; i++)
1045 : {
1046 24 : fmgr_info_copy(&(giststate->consistentFn[i]),
1047 : index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC),
1048 : CurrentMemoryContext);
1049 24 : fmgr_info_copy(&(giststate->unionFn[i]),
1050 : index_getprocinfo(index, i + 1, GIST_UNION_PROC),
1051 : CurrentMemoryContext);
1052 24 : fmgr_info_copy(&(giststate->compressFn[i]),
1053 : index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC),
1054 : CurrentMemoryContext);
1055 24 : fmgr_info_copy(&(giststate->decompressFn[i]),
1056 : index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC),
1057 : CurrentMemoryContext);
1058 24 : fmgr_info_copy(&(giststate->penaltyFn[i]),
1059 : index_getprocinfo(index, i + 1, GIST_PENALTY_PROC),
1060 : CurrentMemoryContext);
1061 24 : fmgr_info_copy(&(giststate->picksplitFn[i]),
1062 : index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC),
1063 : CurrentMemoryContext);
1064 24 : fmgr_info_copy(&(giststate->equalFn[i]),
1065 : index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),
1066 : CurrentMemoryContext);
1067 : }
1068 24 : }
1069 :
1070 : void
1071 : freeGISTstate(GISTSTATE *giststate)
1072 24 : {
1073 : /* no work */
1074 24 : }
|