1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginbtree.c
4 : * page utilities routines for the postgres inverted index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * $PostgreSQL: pgsql/src/backend/access/gin/ginbtree.c,v 1.10 2007/11/15 21:14:31 momjian Exp $
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 : #include "access/gin.h"
17 : #include "miscadmin.h"
18 :
19 : /*
20 : * Locks buffer by needed method for search.
21 : */
22 : static int
23 : ginTraverseLock(Buffer buffer, bool searchMode)
24 2499 : {
25 : Page page;
26 2499 : int access = GIN_SHARE;
27 :
28 2499 : LockBuffer(buffer, GIN_SHARE);
29 2499 : page = BufferGetPage(buffer);
30 2499 : if (GinPageIsLeaf(page))
31 : {
32 1513 : if (searchMode == FALSE)
33 : {
34 : /* we should relock our page */
35 1455 : LockBuffer(buffer, GIN_UNLOCK);
36 1455 : LockBuffer(buffer, GIN_EXCLUSIVE);
37 :
38 : /* But root can become non-leaf during relock */
39 1455 : if (!GinPageIsLeaf(page))
40 : {
41 : /* restore old lock type (very rare) */
42 0 : LockBuffer(buffer, GIN_UNLOCK);
43 0 : LockBuffer(buffer, GIN_SHARE);
44 : }
45 : else
46 1455 : access = GIN_EXCLUSIVE;
47 : }
48 : }
49 :
50 2499 : return access;
51 : }
52 :
53 : GinBtreeStack *
54 : ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno)
55 1513 : {
56 1513 : GinBtreeStack *stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
57 :
58 1513 : stack->blkno = blkno;
59 1513 : stack->buffer = ReadBuffer(btree->index, stack->blkno);
60 1513 : stack->parent = NULL;
61 1513 : stack->predictNumber = 1;
62 :
63 1513 : ginTraverseLock(stack->buffer, btree->searchMode);
64 :
65 1513 : return stack;
66 : }
67 :
68 : /*
69 : * Locates leaf page contained tuple
70 : */
71 : GinBtreeStack *
72 : ginFindLeafPage(GinBtree btree, GinBtreeStack *stack)
73 1513 : {
74 1513 : bool isfirst = TRUE;
75 : BlockNumber rootBlkno;
76 :
77 1513 : if (!stack)
78 1490 : stack = ginPrepareFindLeafPage(btree, GIN_ROOT_BLKNO);
79 1513 : rootBlkno = stack->blkno;
80 :
81 : for (;;)
82 : {
83 : Page page;
84 : BlockNumber child;
85 2499 : int access = GIN_SHARE;
86 :
87 2499 : stack->off = InvalidOffsetNumber;
88 :
89 2499 : page = BufferGetPage(stack->buffer);
90 :
91 2499 : if (isfirst)
92 : {
93 1513 : if (GinPageIsLeaf(page) && !btree->searchMode)
94 509 : access = GIN_EXCLUSIVE;
95 1513 : isfirst = FALSE;
96 : }
97 : else
98 986 : access = ginTraverseLock(stack->buffer, btree->searchMode);
99 :
100 : /*
101 : * ok, page is correctly locked, we should check to move right ..,
102 : * root never has a right link, so small optimization
103 : */
104 2499 : while (btree->fullScan == FALSE && stack->blkno != rootBlkno && btree->isMoveRight(btree, page))
105 : {
106 0 : BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
107 :
108 0 : if (rightlink == InvalidBlockNumber)
109 : /* rightmost page */
110 0 : break;
111 :
112 0 : stack->blkno = rightlink;
113 0 : LockBuffer(stack->buffer, GIN_UNLOCK);
114 0 : stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
115 0 : LockBuffer(stack->buffer, access);
116 0 : page = BufferGetPage(stack->buffer);
117 : }
118 :
119 2499 : if (GinPageIsLeaf(page)) /* we found, return locked page */
120 1513 : return stack;
121 :
122 : /* now we have correct buffer, try to find child */
123 986 : child = btree->findChildPage(btree, stack);
124 :
125 986 : LockBuffer(stack->buffer, GIN_UNLOCK);
126 : Assert(child != InvalidBlockNumber);
127 : Assert(stack->blkno != child);
128 :
129 986 : if (btree->searchMode)
130 : {
131 : /* in search mode we may forget path to leaf */
132 40 : stack->blkno = child;
133 40 : stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
134 : }
135 : else
136 : {
137 946 : GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
138 :
139 946 : ptr->parent = stack;
140 946 : stack = ptr;
141 946 : stack->blkno = child;
142 946 : stack->buffer = ReadBuffer(btree->index, stack->blkno);
143 946 : stack->predictNumber = 1;
144 : }
145 : }
146 :
147 : /* keep compiler happy */
148 : return NULL;
149 : }
150 :
151 : void
152 : freeGinBtreeStack(GinBtreeStack *stack)
153 1511 : {
154 3986 : while (stack)
155 : {
156 964 : GinBtreeStack *tmp = stack->parent;
157 :
158 964 : if (stack->buffer != InvalidBuffer)
159 964 : ReleaseBuffer(stack->buffer);
160 :
161 964 : pfree(stack);
162 964 : stack = tmp;
163 : }
164 1511 : }
165 :
166 : /*
167 : * Try to find parent for current stack position, returns correct
168 : * parent and child's offset in stack->parent.
169 : * Function should never release root page to prevent conflicts
170 : * with vacuum process
171 : */
172 : void
173 : findParents(GinBtree btree, GinBtreeStack *stack,
174 : BlockNumber rootBlkno)
175 0 : {
176 :
177 : Page page;
178 : Buffer buffer;
179 : BlockNumber blkno,
180 : leftmostBlkno;
181 : OffsetNumber offset;
182 0 : GinBtreeStack *root = stack->parent;
183 : GinBtreeStack *ptr;
184 :
185 0 : if (!root)
186 : {
187 : /* XLog mode... */
188 0 : root = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
189 0 : root->blkno = rootBlkno;
190 0 : root->buffer = ReadBuffer(btree->index, rootBlkno);
191 0 : LockBuffer(root->buffer, GIN_EXCLUSIVE);
192 0 : root->parent = NULL;
193 : }
194 : else
195 : {
196 : /*
197 : * find root, we should not release root page until update is
198 : * finished!!
199 : */
200 0 : while (root->parent)
201 : {
202 0 : ReleaseBuffer(root->buffer);
203 0 : root = root->parent;
204 : }
205 :
206 : Assert(root->blkno == rootBlkno);
207 : Assert(BufferGetBlockNumber(root->buffer) == rootBlkno);
208 0 : LockBuffer(root->buffer, GIN_EXCLUSIVE);
209 : }
210 0 : root->off = InvalidOffsetNumber;
211 :
212 0 : page = BufferGetPage(root->buffer);
213 : Assert(!GinPageIsLeaf(page));
214 :
215 : /* check trivial case */
216 0 : if ((root->off = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) != InvalidOffsetNumber)
217 : {
218 0 : stack->parent = root;
219 0 : return;
220 : }
221 :
222 0 : leftmostBlkno = blkno = btree->getLeftMostPage(btree, page);
223 0 : LockBuffer(root->buffer, GIN_UNLOCK);
224 : Assert(blkno != InvalidBlockNumber);
225 :
226 :
227 : for (;;)
228 : {
229 0 : buffer = ReadBuffer(btree->index, blkno);
230 0 : LockBuffer(buffer, GIN_EXCLUSIVE);
231 0 : page = BufferGetPage(buffer);
232 0 : if (GinPageIsLeaf(page))
233 0 : elog(ERROR, "Lost path");
234 :
235 0 : leftmostBlkno = btree->getLeftMostPage(btree, page);
236 :
237 0 : while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
238 : {
239 0 : blkno = GinPageGetOpaque(page)->rightlink;
240 0 : LockBuffer(buffer, GIN_UNLOCK);
241 0 : ReleaseBuffer(buffer);
242 0 : if (blkno == InvalidBlockNumber)
243 0 : break;
244 0 : buffer = ReadBuffer(btree->index, blkno);
245 0 : LockBuffer(buffer, GIN_EXCLUSIVE);
246 0 : page = BufferGetPage(buffer);
247 : }
248 :
249 0 : if (blkno != InvalidBlockNumber)
250 : {
251 0 : ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
252 0 : ptr->blkno = blkno;
253 0 : ptr->buffer = buffer;
254 0 : ptr->parent = root; /* it's may be wrong, but in next call we will
255 : * correct */
256 0 : ptr->off = offset;
257 0 : stack->parent = ptr;
258 0 : return;
259 : }
260 :
261 0 : blkno = leftmostBlkno;
262 0 : }
263 : }
264 :
265 : /*
266 : * Insert value (stored in GinBtree) to tree described by stack
267 : */
268 : void
269 : ginInsertValue(GinBtree btree, GinBtreeStack *stack)
270 1455 : {
271 1455 : GinBtreeStack *parent = stack;
272 1455 : BlockNumber rootBlkno = InvalidBuffer;
273 : Page page,
274 : rpage,
275 : lpage;
276 :
277 : /* remember root BlockNumber */
278 5311 : while (parent)
279 : {
280 2401 : rootBlkno = parent->blkno;
281 2401 : parent = parent->parent;
282 : }
283 :
284 1495 : while (stack)
285 : {
286 : XLogRecData *rdata;
287 : BlockNumber savedRightLink;
288 :
289 1495 : page = BufferGetPage(stack->buffer);
290 1495 : savedRightLink = GinPageGetOpaque(page)->rightlink;
291 :
292 1495 : if (btree->isEnoughSpace(btree, stack->buffer, stack->off))
293 : {
294 1453 : START_CRIT_SECTION();
295 1453 : btree->placeToPage(btree, stack->buffer, stack->off, &rdata);
296 :
297 1453 : MarkBufferDirty(stack->buffer);
298 :
299 1453 : if (!btree->index->rd_istemp)
300 : {
301 : XLogRecPtr recptr;
302 :
303 1453 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
304 1453 : PageSetLSN(page, recptr);
305 1453 : PageSetTLI(page, ThisTimeLineID);
306 : }
307 :
308 1453 : UnlockReleaseBuffer(stack->buffer);
309 1453 : END_CRIT_SECTION();
310 :
311 1453 : freeGinBtreeStack(stack->parent);
312 1453 : return;
313 : }
314 : else
315 : {
316 42 : Buffer rbuffer = GinNewBuffer(btree->index);
317 : Page newlpage;
318 :
319 : /*
320 : * newlpage is a pointer to memory page, it doesn't associate with
321 : * buffer, stack->buffer should be untouched
322 : */
323 42 : newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata);
324 :
325 :
326 42 : ((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno;
327 :
328 42 : parent = stack->parent;
329 :
330 42 : if (parent == NULL)
331 : {
332 : /*
333 : * split root, so we need to allocate new left page and place
334 : * pointer on root to left and right page
335 : */
336 2 : Buffer lbuffer = GinNewBuffer(btree->index);
337 :
338 2 : ((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE;
339 2 : ((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber;
340 :
341 :
342 2 : page = BufferGetPage(stack->buffer);
343 2 : lpage = BufferGetPage(lbuffer);
344 2 : rpage = BufferGetPage(rbuffer);
345 :
346 2 : GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber;
347 2 : GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
348 2 : ((ginxlogSplit *) (rdata->data))->lblkno = BufferGetBlockNumber(lbuffer);
349 :
350 2 : START_CRIT_SECTION();
351 :
352 2 : GinInitBuffer(stack->buffer, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF);
353 2 : PageRestoreTempPage(newlpage, lpage);
354 2 : btree->fillRoot(btree, stack->buffer, lbuffer, rbuffer);
355 :
356 2 : MarkBufferDirty(rbuffer);
357 2 : MarkBufferDirty(lbuffer);
358 2 : MarkBufferDirty(stack->buffer);
359 :
360 2 : if (!btree->index->rd_istemp)
361 : {
362 : XLogRecPtr recptr;
363 :
364 2 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
365 2 : PageSetLSN(page, recptr);
366 2 : PageSetTLI(page, ThisTimeLineID);
367 2 : PageSetLSN(lpage, recptr);
368 2 : PageSetTLI(lpage, ThisTimeLineID);
369 2 : PageSetLSN(rpage, recptr);
370 2 : PageSetTLI(rpage, ThisTimeLineID);
371 : }
372 :
373 2 : UnlockReleaseBuffer(rbuffer);
374 2 : UnlockReleaseBuffer(lbuffer);
375 2 : UnlockReleaseBuffer(stack->buffer);
376 :
377 2 : END_CRIT_SECTION();
378 :
379 2 : return;
380 : }
381 : else
382 : {
383 : /* split non-root page */
384 40 : ((ginxlogSplit *) (rdata->data))->isRootSplit = FALSE;
385 40 : ((ginxlogSplit *) (rdata->data))->rrlink = savedRightLink;
386 :
387 40 : lpage = BufferGetPage(stack->buffer);
388 40 : rpage = BufferGetPage(rbuffer);
389 :
390 40 : GinPageGetOpaque(rpage)->rightlink = savedRightLink;
391 40 : GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
392 :
393 40 : START_CRIT_SECTION();
394 40 : PageRestoreTempPage(newlpage, lpage);
395 :
396 40 : MarkBufferDirty(rbuffer);
397 40 : MarkBufferDirty(stack->buffer);
398 :
399 40 : if (!btree->index->rd_istemp)
400 : {
401 : XLogRecPtr recptr;
402 :
403 40 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
404 40 : PageSetLSN(lpage, recptr);
405 40 : PageSetTLI(lpage, ThisTimeLineID);
406 40 : PageSetLSN(rpage, recptr);
407 40 : PageSetTLI(rpage, ThisTimeLineID);
408 : }
409 40 : UnlockReleaseBuffer(rbuffer);
410 40 : END_CRIT_SECTION();
411 : }
412 : }
413 :
414 40 : btree->isDelete = FALSE;
415 :
416 : /* search parent to lock */
417 40 : LockBuffer(parent->buffer, GIN_EXCLUSIVE);
418 :
419 : /* move right if it's needed */
420 40 : page = BufferGetPage(parent->buffer);
421 80 : while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
422 : {
423 0 : BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
424 :
425 0 : LockBuffer(parent->buffer, GIN_UNLOCK);
426 :
427 0 : if (rightlink == InvalidBlockNumber)
428 : {
429 : /*
430 : * rightmost page, but we don't find parent, we should use
431 : * plain search...
432 : */
433 0 : findParents(btree, stack, rootBlkno);
434 0 : parent = stack->parent;
435 0 : page = BufferGetPage(parent->buffer);
436 0 : break;
437 : }
438 :
439 0 : parent->blkno = rightlink;
440 0 : parent->buffer = ReleaseAndReadBuffer(parent->buffer, btree->index, parent->blkno);
441 0 : LockBuffer(parent->buffer, GIN_EXCLUSIVE);
442 0 : page = BufferGetPage(parent->buffer);
443 : }
444 :
445 40 : UnlockReleaseBuffer(stack->buffer);
446 40 : pfree(stack);
447 40 : stack = parent;
448 : }
449 : }
|