1 : /*-------------------------------------------------------------------------
2 : *
3 : * gistget.c
4 : * fetch tuples from a GiST scan.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.68 2007/11/15 21:14:31 momjian Exp $
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/gist_private.h"
18 : #include "executor/execdebug.h"
19 : #include "pgstat.h"
20 : #include "utils/memutils.h"
21 :
22 :
23 : static OffsetNumber gistfindnext(IndexScanDesc scan, OffsetNumber n,
24 : ScanDirection dir);
25 : static int gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples);
26 : static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan,
27 : OffsetNumber offset);
28 :
29 : static void
30 : killtuple(Relation r, GISTScanOpaque so, ItemPointer iptr)
31 0 : {
32 0 : Buffer buffer = so->curbuf;
33 :
34 : for (;;)
35 : {
36 : Page p;
37 : BlockNumber blkno;
38 : OffsetNumber offset,
39 : maxoff;
40 :
41 0 : LockBuffer(buffer, GIST_SHARE);
42 0 : gistcheckpage(r, buffer);
43 0 : p = (Page) BufferGetPage(buffer);
44 :
45 0 : if (buffer == so->curbuf && XLByteEQ(so->stack->lsn, PageGetLSN(p)))
46 : {
47 : /* page unchanged, so all is simple */
48 0 : offset = ItemPointerGetOffsetNumber(iptr);
49 0 : ItemIdMarkDead(PageGetItemId(p, offset));
50 0 : SetBufferCommitInfoNeedsSave(buffer);
51 0 : LockBuffer(buffer, GIST_UNLOCK);
52 0 : break;
53 : }
54 :
55 0 : maxoff = PageGetMaxOffsetNumber(p);
56 :
57 0 : for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
58 : {
59 0 : IndexTuple ituple = (IndexTuple) PageGetItem(p, PageGetItemId(p, offset));
60 :
61 0 : if (ItemPointerEquals(&(ituple->t_tid), iptr))
62 : {
63 : /* found */
64 0 : ItemIdMarkDead(PageGetItemId(p, offset));
65 0 : SetBufferCommitInfoNeedsSave(buffer);
66 0 : LockBuffer(buffer, GIST_UNLOCK);
67 0 : if (buffer != so->curbuf)
68 0 : ReleaseBuffer(buffer);
69 : return;
70 : }
71 : }
72 :
73 : /* follow right link */
74 :
75 : /*
76 : * ??? is it good? if tuple dropped by concurrent vacuum, we will read
77 : * all leaf pages...
78 : */
79 0 : blkno = GistPageGetOpaque(p)->rightlink;
80 0 : LockBuffer(buffer, GIST_UNLOCK);
81 0 : if (buffer != so->curbuf)
82 0 : ReleaseBuffer(buffer);
83 :
84 0 : if (blkno == InvalidBlockNumber)
85 : /* can't found, dropped by somebody else */
86 0 : return;
87 0 : buffer = ReadBuffer(r, blkno);
88 0 : }
89 : }
90 :
91 : /*
92 : * gistgettuple() -- Get the next tuple in the scan
93 : */
94 : Datum
95 : gistgettuple(PG_FUNCTION_ARGS)
96 363 : {
97 363 : IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
98 363 : ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
99 : GISTScanOpaque so;
100 : ItemPointerData tid;
101 : bool res;
102 :
103 363 : so = (GISTScanOpaque) scan->opaque;
104 :
105 : /*
106 : * If we have produced an index tuple in the past and the executor has
107 : * informed us we need to mark it as "killed", do so now.
108 : */
109 363 : if (scan->kill_prior_tuple && ItemPointerIsValid(&(so->curpos)))
110 0 : killtuple(scan->indexRelation, so, &(so->curpos));
111 :
112 : /*
113 : * Get the next tuple that matches the search key. If asked to skip killed
114 : * tuples, continue looping until we find a non-killed tuple that matches
115 : * the search key.
116 : */
117 363 : res = (gistnext(scan, dir, &tid, 1, scan->ignore_killed_tuples)) ? true : false;
118 :
119 363 : PG_RETURN_BOOL(res);
120 : }
121 :
122 : Datum
123 : gistgetmulti(PG_FUNCTION_ARGS)
124 5 : {
125 5 : IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
126 5 : ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
127 5 : int32 max_tids = PG_GETARG_INT32(2);
128 5 : int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
129 :
130 5 : *returned_tids = gistnext(scan, ForwardScanDirection, tids, max_tids, false);
131 :
132 5 : PG_RETURN_BOOL(*returned_tids == max_tids);
133 : }
134 :
135 : /*
136 : * Fetch a tuples that matchs the search key; this can be invoked
137 : * either to fetch the first such tuple or subsequent matching
138 : * tuples. Returns true iff a matching tuple was found.
139 : */
140 : static int
141 : gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids,
142 : int maxtids, bool ignore_killed_tuples)
143 368 : {
144 : Page p;
145 : OffsetNumber n;
146 : GISTScanOpaque so;
147 : GISTSearchStack *stk;
148 : IndexTuple it;
149 : GISTPageOpaque opaque;
150 368 : bool resetoffset = false;
151 368 : int ntids = 0;
152 :
153 368 : so = (GISTScanOpaque) scan->opaque;
154 :
155 368 : if (ItemPointerIsValid(&so->curpos) == false)
156 : {
157 : /* Being asked to fetch the first entry, so start at the root */
158 : Assert(so->curbuf == InvalidBuffer);
159 : Assert(so->stack == NULL);
160 :
161 17 : so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO);
162 :
163 17 : stk = so->stack = (GISTSearchStack *) palloc0(sizeof(GISTSearchStack));
164 :
165 17 : stk->next = NULL;
166 17 : stk->block = GIST_ROOT_BLKNO;
167 :
168 17 : pgstat_count_index_scan(scan->indexRelation);
169 : }
170 351 : else if (so->curbuf == InvalidBuffer)
171 : {
172 0 : return 0;
173 : }
174 :
175 : for (;;)
176 : {
177 : /* First of all, we need lock buffer */
178 : Assert(so->curbuf != InvalidBuffer);
179 520 : LockBuffer(so->curbuf, GIST_SHARE);
180 520 : gistcheckpage(scan->indexRelation, so->curbuf);
181 520 : p = BufferGetPage(so->curbuf);
182 520 : opaque = GistPageGetOpaque(p);
183 520 : resetoffset = false;
184 :
185 520 : if (XLogRecPtrIsInvalid(so->stack->lsn) || !XLByteEQ(so->stack->lsn, PageGetLSN(p)))
186 : {
187 : /* page changed from last visit or visit first time , reset offset */
188 169 : so->stack->lsn = PageGetLSN(p);
189 169 : resetoffset = true;
190 :
191 : /* check page split, occured from last visit or visit to parent */
192 169 : if (!XLogRecPtrIsInvalid(so->stack->parentlsn) &&
193 : XLByteLT(so->stack->parentlsn, opaque->nsn) &&
194 : opaque->rightlink != InvalidBlockNumber /* sanity check */ &&
195 : (so->stack->next == NULL || so->stack->next->block != opaque->rightlink) /* check if already
196 : added */ )
197 : {
198 : /* detect page split, follow right link to add pages */
199 :
200 0 : stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack));
201 0 : stk->next = so->stack->next;
202 0 : stk->block = opaque->rightlink;
203 0 : stk->parentlsn = so->stack->parentlsn;
204 0 : memset(&(stk->lsn), 0, sizeof(GistNSN));
205 0 : so->stack->next = stk;
206 : }
207 : }
208 :
209 : /* if page is empty, then just skip it */
210 520 : if (PageIsEmpty(p))
211 : {
212 0 : LockBuffer(so->curbuf, GIST_UNLOCK);
213 0 : stk = so->stack->next;
214 0 : pfree(so->stack);
215 0 : so->stack = stk;
216 :
217 0 : if (so->stack == NULL)
218 : {
219 0 : ReleaseBuffer(so->curbuf);
220 0 : so->curbuf = InvalidBuffer;
221 0 : return ntids;
222 : }
223 :
224 0 : so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
225 : stk->block);
226 0 : continue;
227 : }
228 :
229 520 : if (!GistPageIsLeaf(p) || resetoffset ||
230 : !ItemPointerIsValid(&so->curpos))
231 : {
232 169 : if (ScanDirectionIsBackward(dir))
233 0 : n = PageGetMaxOffsetNumber(p);
234 : else
235 169 : n = FirstOffsetNumber;
236 : }
237 : else
238 : {
239 351 : n = ItemPointerGetOffsetNumber(&(so->curpos));
240 :
241 351 : if (ScanDirectionIsBackward(dir))
242 0 : n = OffsetNumberPrev(n);
243 : else
244 351 : n = OffsetNumberNext(n);
245 : }
246 :
247 : /* wonderful, we can look at page */
248 :
249 : for (;;)
250 : {
251 958 : n = gistfindnext(scan, n, dir);
252 :
253 958 : if (!OffsetNumberIsValid(n))
254 : {
255 : /*
256 : * We ran out of matching index entries on the current page,
257 : * so pop the top stack entry and use it to continue the
258 : * search.
259 : */
260 169 : LockBuffer(so->curbuf, GIST_UNLOCK);
261 169 : stk = so->stack->next;
262 169 : pfree(so->stack);
263 169 : so->stack = stk;
264 :
265 : /* If we're out of stack entries, we're done */
266 :
267 169 : if (so->stack == NULL)
268 : {
269 17 : ReleaseBuffer(so->curbuf);
270 17 : so->curbuf = InvalidBuffer;
271 17 : return ntids;
272 : }
273 :
274 152 : so->curbuf = ReleaseAndReadBuffer(so->curbuf,
275 : scan->indexRelation,
276 : stk->block);
277 : /* XXX go up */
278 152 : break;
279 : }
280 :
281 789 : if (GistPageIsLeaf(p))
282 : {
283 : /*
284 : * We've found a matching index entry in a leaf page, so
285 : * return success. Note that we keep "curbuf" pinned so that
286 : * we can efficiently resume the index scan later.
287 : */
288 :
289 637 : ItemPointerSet(&(so->curpos),
290 : BufferGetBlockNumber(so->curbuf), n);
291 :
292 637 : if (!(ignore_killed_tuples && ItemIdIsDead(PageGetItemId(p, n))))
293 : {
294 637 : it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
295 637 : tids[ntids] = scan->xs_ctup.t_self = it->t_tid;
296 637 : ntids++;
297 :
298 637 : if (ntids == maxtids)
299 : {
300 351 : LockBuffer(so->curbuf, GIST_UNLOCK);
301 351 : return ntids;
302 : }
303 : }
304 : }
305 : else
306 : {
307 : /*
308 : * We've found an entry in an internal node whose key is
309 : * consistent with the search key, so push it to stack
310 : */
311 :
312 152 : stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack));
313 :
314 152 : it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
315 152 : stk->block = ItemPointerGetBlockNumber(&(it->t_tid));
316 152 : memset(&(stk->lsn), 0, sizeof(GistNSN));
317 152 : stk->parentlsn = so->stack->lsn;
318 :
319 152 : stk->next = so->stack->next;
320 152 : so->stack->next = stk;
321 :
322 : }
323 :
324 438 : if (ScanDirectionIsBackward(dir))
325 0 : n = OffsetNumberPrev(n);
326 : else
327 438 : n = OffsetNumberNext(n);
328 : }
329 : }
330 :
331 : return ntids;
332 : }
333 :
334 : /*
335 : * gistindex_keytest() -- does this index tuple satisfy the scan key(s)?
336 : *
337 : * We must decompress the key in the IndexTuple before passing it to the
338 : * sk_func (and we have previously overwritten the sk_func to use the
339 : * user-defined Consistent method, so we actually are invoking that).
340 : *
341 : * Note that this function is always invoked in a short-lived memory context,
342 : * so we don't need to worry about cleaning up allocated memory, either here
343 : * or in the implementation of any Consistent methods.
344 : */
345 : static bool
346 : gistindex_keytest(IndexTuple tuple,
347 : IndexScanDesc scan,
348 : OffsetNumber offset)
349 7513 : {
350 7513 : int keySize = scan->numberOfKeys;
351 7513 : ScanKey key = scan->keyData;
352 7513 : Relation r = scan->indexRelation;
353 : GISTScanOpaque so;
354 : Page p;
355 : GISTSTATE *giststate;
356 :
357 7513 : so = (GISTScanOpaque) scan->opaque;
358 7513 : giststate = so->giststate;
359 7513 : p = BufferGetPage(so->curbuf);
360 :
361 : IncrIndexProcessed();
362 :
363 : /*
364 : * Tuple doesn't restore after crash recovery because of incomplete insert
365 : */
366 7513 : if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple))
367 0 : return true;
368 :
369 8302 : while (keySize > 0)
370 : {
371 : Datum datum;
372 : bool isNull;
373 : Datum test;
374 : GISTENTRY de;
375 :
376 7513 : datum = index_getattr(tuple,
377 : key->sk_attno,
378 : giststate->tupdesc,
379 : &isNull);
380 :
381 7513 : if (key->sk_flags & SK_ISNULL)
382 : {
383 : /*
384 : * On non-leaf page we can't conclude that child hasn't NULL
385 : * values because of assumption in GiST: uinon (VAL, NULL) is VAL
386 : * But if on non-leaf page key IS NULL then all childs has NULL.
387 : */
388 :
389 : Assert(key->sk_flags & SK_SEARCHNULL);
390 :
391 3410 : if (GistPageIsLeaf(p) && !isNull)
392 3100 : return false;
393 : }
394 4103 : else if (isNull)
395 : {
396 4 : return false;
397 : }
398 : else
399 : {
400 :
401 4099 : gistdentryinit(giststate, key->sk_attno - 1, &de,
402 : datum, r, p, offset,
403 : FALSE, isNull);
404 :
405 : /*
406 : * Call the Consistent function to evaluate the test. The
407 : * arguments are the index datum (as a GISTENTRY*), the comparison
408 : * datum, and the comparison operator's strategy number and
409 : * subtype from pg_amop.
410 : *
411 : * (Presently there's no need to pass the subtype since it'll
412 : * always be zero, but might as well pass it for possible future
413 : * use.)
414 : */
415 4099 : test = FunctionCall4(&key->sk_func,
416 : PointerGetDatum(&de),
417 : key->sk_argument,
418 : Int32GetDatum(key->sk_strategy),
419 : ObjectIdGetDatum(key->sk_subtype));
420 :
421 4099 : if (!DatumGetBool(test))
422 3620 : return false;
423 : }
424 :
425 789 : keySize--;
426 789 : key++;
427 : }
428 :
429 789 : return true;
430 : }
431 :
432 : /*
433 : * Return the offset of the first index entry that is consistent with
434 : * the search key after offset 'n' in the current page. If there are
435 : * no more consistent entries, return InvalidOffsetNumber.
436 : * Page should be locked....
437 : */
438 : static OffsetNumber
439 : gistfindnext(IndexScanDesc scan, OffsetNumber n, ScanDirection dir)
440 958 : {
441 : OffsetNumber maxoff;
442 : IndexTuple it;
443 : GISTScanOpaque so;
444 : MemoryContext oldcxt;
445 : Page p;
446 :
447 958 : so = (GISTScanOpaque) scan->opaque;
448 958 : p = BufferGetPage(so->curbuf);
449 958 : maxoff = PageGetMaxOffsetNumber(p);
450 :
451 : /*
452 : * Make sure we're in a short-lived memory context when we invoke a
453 : * user-supplied GiST method in gistindex_keytest(), so we don't leak
454 : * memory
455 : */
456 1916 : oldcxt = MemoryContextSwitchTo(so->tempCxt);
457 :
458 : /*
459 : * If we modified the index during the scan, we may have a pointer to a
460 : * ghost tuple, before the scan. If this is the case, back up one.
461 : */
462 958 : if (so->flags & GS_CURBEFORE)
463 : {
464 0 : so->flags &= ~GS_CURBEFORE;
465 0 : n = OffsetNumberPrev(n);
466 : }
467 :
468 7682 : while (n >= FirstOffsetNumber && n <= maxoff)
469 : {
470 7513 : it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
471 7513 : if (gistindex_keytest(it, scan, n))
472 789 : break;
473 :
474 6724 : if (ScanDirectionIsBackward(dir))
475 0 : n = OffsetNumberPrev(n);
476 : else
477 6724 : n = OffsetNumberNext(n);
478 : }
479 :
480 : MemoryContextSwitchTo(oldcxt);
481 958 : MemoryContextReset(so->tempCxt);
482 :
483 : /*
484 : * If we found a matching entry, return its offset; otherwise return
485 : * InvalidOffsetNumber to inform the caller to go to the next page.
486 : */
487 958 : if (n >= FirstOffsetNumber && n <= maxoff)
488 789 : return n;
489 : else
490 169 : return InvalidOffsetNumber;
491 : }
|