1 : /*-------------------------------------------------------------------------
2 : *
3 : * nbtutils.c
4 : * Utility code for Postgres btree implementation.
5 : *
6 : * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.87 2007/11/15 21:14:32 momjian Exp $
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include <time.h>
19 :
20 : #include "access/genam.h"
21 : #include "access/nbtree.h"
22 : #include "access/reloptions.h"
23 : #include "executor/execdebug.h"
24 : #include "miscadmin.h"
25 : #include "storage/lwlock.h"
26 : #include "storage/shmem.h"
27 : #include "utils/lsyscache.h"
28 :
29 :
30 : static bool _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
31 : ScanKey leftarg, ScanKey rightarg,
32 : bool *result);
33 : static void _bt_mark_scankey_with_indoption(ScanKey skey, int16 *indoption);
34 : static void _bt_mark_scankey_required(ScanKey skey);
35 : static bool _bt_check_rowcompare(ScanKey skey,
36 : IndexTuple tuple, TupleDesc tupdesc,
37 : ScanDirection dir, bool *continuescan);
38 :
39 :
40 : /*
41 : * _bt_mkscankey
42 : * Build an insertion scan key that contains comparison data from itup
43 : * as well as comparator routines appropriate to the key datatypes.
44 : *
45 : * The result is intended for use with _bt_compare().
46 : */
47 : ScanKey
48 : _bt_mkscankey(Relation rel, IndexTuple itup)
49 70111 : {
50 : ScanKey skey;
51 : TupleDesc itupdesc;
52 : int natts;
53 : int16 *indoption;
54 : int i;
55 :
56 70111 : itupdesc = RelationGetDescr(rel);
57 70111 : natts = RelationGetNumberOfAttributes(rel);
58 70111 : indoption = rel->rd_indoption;
59 :
60 70111 : skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
61 :
62 216105 : for (i = 0; i < natts; i++)
63 : {
64 : FmgrInfo *procinfo;
65 : Datum arg;
66 : bool null;
67 : int flags;
68 :
69 : /*
70 : * We can use the cached (default) support procs since no cross-type
71 : * comparison can be needed.
72 : */
73 145994 : procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
74 145994 : arg = index_getattr(itup, i + 1, itupdesc, &null);
75 145994 : flags = (null ? SK_ISNULL : 0) | (indoption[i] << SK_BT_INDOPTION_SHIFT);
76 145994 : ScanKeyEntryInitializeWithInfo(&skey[i],
77 : flags,
78 : (AttrNumber) (i + 1),
79 : InvalidStrategy,
80 : InvalidOid,
81 : procinfo,
82 : arg);
83 : }
84 :
85 70111 : return skey;
86 : }
87 :
88 : /*
89 : * _bt_mkscankey_nodata
90 : * Build an insertion scan key that contains 3-way comparator routines
91 : * appropriate to the key datatypes, but no comparison data. The
92 : * comparison data ultimately used must match the key datatypes.
93 : *
94 : * The result cannot be used with _bt_compare(), unless comparison
95 : * data is first stored into the key entries. Currently this
96 : * routine is only called by nbtsort.c and tuplesort.c, which have
97 : * their own comparison routines.
98 : */
99 : ScanKey
100 : _bt_mkscankey_nodata(Relation rel)
101 1066 : {
102 : ScanKey skey;
103 : int natts;
104 : int16 *indoption;
105 : int i;
106 :
107 1066 : natts = RelationGetNumberOfAttributes(rel);
108 1066 : indoption = rel->rd_indoption;
109 :
110 1066 : skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
111 :
112 2884 : for (i = 0; i < natts; i++)
113 : {
114 : FmgrInfo *procinfo;
115 : int flags;
116 :
117 : /*
118 : * We can use the cached (default) support procs since no cross-type
119 : * comparison can be needed.
120 : */
121 1818 : procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
122 1818 : flags = SK_ISNULL | (indoption[i] << SK_BT_INDOPTION_SHIFT);
123 1818 : ScanKeyEntryInitializeWithInfo(&skey[i],
124 : flags,
125 : (AttrNumber) (i + 1),
126 : InvalidStrategy,
127 : InvalidOid,
128 : procinfo,
129 : (Datum) 0);
130 : }
131 :
132 1066 : return skey;
133 : }
134 :
135 : /*
136 : * free a scan key made by either _bt_mkscankey or _bt_mkscankey_nodata.
137 : */
138 : void
139 : _bt_freeskey(ScanKey skey)
140 70090 : {
141 70090 : pfree(skey);
142 70090 : }
143 :
144 : /*
145 : * free a retracement stack made by _bt_search.
146 : */
147 : void
148 : _bt_freestack(BTStack stack)
149 233490 : {
150 : BTStack ostack;
151 :
152 638874 : while (stack != NULL)
153 : {
154 171894 : ostack = stack;
155 171894 : stack = stack->bts_parent;
156 171894 : pfree(ostack);
157 : }
158 233490 : }
159 :
160 :
161 : /*
162 : * _bt_preprocess_keys() -- Preprocess scan keys
163 : *
164 : * The caller-supplied search-type keys (in scan->keyData[]) are copied to
165 : * so->keyData[] with possible transformation. scan->numberOfKeys is
166 : * the number of input keys, so->numberOfKeys gets the number of output
167 : * keys (possibly less, never greater).
168 : *
169 : * The output keys are marked with additional sk_flag bits beyond the
170 : * system-standard bits supplied by the caller. The DESC and NULLS_FIRST
171 : * indoption bits for the relevant index attribute are copied into the flags.
172 : * Also, for a DESC column, we commute (flip) all the sk_strategy numbers
173 : * so that the index sorts in the desired direction.
174 : *
175 : * One key purpose of this routine is to discover how many scan keys
176 : * must be satisfied to continue the scan. It also attempts to eliminate
177 : * redundant keys and detect contradictory keys. (If the index opfamily
178 : * provides incomplete sets of cross-type operators, we may fail to detect
179 : * redundant or contradictory keys, but we can survive that.)
180 : *
181 : * The output keys must be sorted by index attribute. Presently we expect
182 : * (but verify) that the input keys are already so sorted --- this is done
183 : * by group_clauses_by_indexkey() in indxpath.c. Some reordering of the keys
184 : * within each attribute may be done as a byproduct of the processing here,
185 : * but no other code depends on that.
186 : *
187 : * The output keys are marked with flags SK_BT_REQFWD and/or SK_BT_REQBKWD
188 : * if they must be satisfied in order to continue the scan forward or backward
189 : * respectively. _bt_checkkeys uses these flags. For example, if the quals
190 : * are "x = 1 AND y < 4 AND z < 5", then _bt_checkkeys will reject a tuple
191 : * (1,2,7), but we must continue the scan in case there are tuples (1,3,z).
192 : * But once we reach tuples like (1,4,z) we can stop scanning because no
193 : * later tuples could match. This is reflected by marking the x and y keys,
194 : * but not the z key, with SK_BT_REQFWD. In general, the keys for leading
195 : * attributes with "=" keys are marked both SK_BT_REQFWD and SK_BT_REQBKWD.
196 : * For the first attribute without an "=" key, any "<" and "<=" keys are
197 : * marked SK_BT_REQFWD while any ">" and ">=" keys are marked SK_BT_REQBKWD.
198 : * This can be seen to be correct by considering the above example. Note
199 : * in particular that if there are no keys for a given attribute, the keys for
200 : * subsequent attributes can never be required; for instance "WHERE y = 4"
201 : * requires a full-index scan.
202 : *
203 : * If possible, redundant keys are eliminated: we keep only the tightest
204 : * >/>= bound and the tightest </<= bound, and if there's an = key then
205 : * that's the only one returned. (So, we return either a single = key,
206 : * or one or two boundary-condition keys for each attr.) However, if we
207 : * cannot compare two keys for lack of a suitable cross-type operator,
208 : * we cannot eliminate either. If there are two such keys of the same
209 : * operator strategy, the second one is just pushed into the output array
210 : * without further processing here. We may also emit both >/>= or both
211 : * </<= keys if we can't compare them. The logic about required keys still
212 : * works if we don't eliminate redundant keys.
213 : *
214 : * As a byproduct of this work, we can detect contradictory quals such
215 : * as "x = 1 AND x > 2". If we see that, we return so->qual_ok = FALSE,
216 : * indicating the scan need not be run at all since no tuples can match.
217 : * (In this case we do not bother completing the output key array!)
218 : * Again, missing cross-type operators might cause us to fail to prove the
219 : * quals contradictory when they really are, but the scan will work correctly.
220 : *
221 : * Row comparison keys are currently also treated without any smarts:
222 : * we just transfer them into the preprocessed array without any
223 : * editorialization. We can treat them the same as an ordinary inequality
224 : * comparison on the row's first index column, for the purposes of the logic
225 : * about required keys.
226 : *
227 : * Note: the reason we have to copy the preprocessed scan keys into private
228 : * storage is that we are modifying the array based on comparisons of the
229 : * key argument values, which could change on a rescan. Therefore we can't
230 : * overwrite the caller's data structure.
231 : */
232 : void
233 : _bt_preprocess_keys(IndexScanDesc scan)
234 163467 : {
235 163467 : BTScanOpaque so = (BTScanOpaque) scan->opaque;
236 163467 : int numberOfKeys = scan->numberOfKeys;
237 163467 : int16 *indoption = scan->indexRelation->rd_indoption;
238 : int new_numberOfKeys;
239 : int numberOfEqualCols;
240 : ScanKey inkeys;
241 : ScanKey outkeys;
242 : ScanKey cur;
243 : ScanKey xform[BTMaxStrategyNumber];
244 : bool test_result;
245 : int i,
246 : j;
247 : AttrNumber attno;
248 :
249 : /* initialize result variables */
250 163467 : so->qual_ok = true;
251 163467 : so->numberOfKeys = 0;
252 :
253 163467 : if (numberOfKeys < 1)
254 39 : return; /* done if qual-less scan */
255 :
256 163428 : inkeys = scan->keyData;
257 163428 : outkeys = so->keyData;
258 163428 : cur = &inkeys[0];
259 : /* we check that input keys are correctly ordered */
260 163428 : if (cur->sk_attno < 1)
261 0 : elog(ERROR, "btree index keys must be ordered by attribute");
262 :
263 : /* We can short-circuit most of the work if there's just one key */
264 163428 : if (numberOfKeys == 1)
265 : {
266 : /*
267 : * We treat all btree operators as strict (even if they're not so
268 : * marked in pg_proc). This means that it is impossible for an
269 : * operator condition with a NULL comparison constant to succeed, and
270 : * we can reject it right away.
271 : *
272 : * However, we now also support "x IS NULL" clauses as search
273 : * conditions, so in that case keep going. The planner has not filled
274 : * in any particular strategy in this case, so set it to
275 : * BTEqualStrategyNumber --- we can treat IS NULL as an equality
276 : * operator for purposes of search strategy.
277 : */
278 93158 : if (cur->sk_flags & SK_ISNULL)
279 : {
280 7 : if (cur->sk_flags & SK_SEARCHNULL)
281 : {
282 4 : cur->sk_strategy = BTEqualStrategyNumber;
283 4 : cur->sk_subtype = InvalidOid;
284 : }
285 : else
286 3 : so->qual_ok = false;
287 : }
288 93158 : _bt_mark_scankey_with_indoption(cur, indoption);
289 93158 : memcpy(outkeys, cur, sizeof(ScanKeyData));
290 93158 : so->numberOfKeys = 1;
291 : /* We can mark the qual as required if it's for first index col */
292 93158 : if (cur->sk_attno == 1)
293 93153 : _bt_mark_scankey_required(outkeys);
294 : return;
295 : }
296 :
297 : /*
298 : * Otherwise, do the full set of pushups.
299 : */
300 70270 : new_numberOfKeys = 0;
301 70270 : numberOfEqualCols = 0;
302 :
303 : /*
304 : * Initialize for processing of keys for attr 1.
305 : *
306 : * xform[i] points to the currently best scan key of strategy type i+1; it
307 : * is NULL if we haven't yet found such a key for this attr.
308 : */
309 70270 : attno = 1;
310 70270 : memset(xform, 0, sizeof(xform));
311 :
312 : /*
313 : * Loop iterates from 0 to numberOfKeys inclusive; we use the last pass to
314 : * handle after-last-key processing. Actual exit from the loop is at the
315 : * "break" statement below.
316 : */
317 218164 : for (i = 0;; cur++, i++)
318 : {
319 218164 : if (i < numberOfKeys)
320 : {
321 : /* See comments above about NULLs and IS NULL handling. */
322 : /* Note: we assume SK_ISNULL is never set in a row header key */
323 147894 : if (cur->sk_flags & SK_ISNULL)
324 : {
325 8 : if (cur->sk_flags & SK_SEARCHNULL)
326 : {
327 8 : cur->sk_strategy = BTEqualStrategyNumber;
328 8 : cur->sk_subtype = InvalidOid;
329 : }
330 : else
331 : {
332 0 : so->qual_ok = false;
333 0 : return;
334 : }
335 : }
336 : }
337 :
338 : /*
339 : * If we are at the end of the keys for a particular attr, finish up
340 : * processing and emit the cleaned-up keys.
341 : */
342 218164 : if (i == numberOfKeys || cur->sk_attno != attno)
343 : {
344 147878 : int priorNumberOfEqualCols = numberOfEqualCols;
345 :
346 : /* check input keys are correctly ordered */
347 147878 : if (i < numberOfKeys && cur->sk_attno < attno)
348 0 : elog(ERROR, "btree index keys must be ordered by attribute");
349 :
350 : /*
351 : * If = has been specified, all other keys can be eliminated as
352 : * redundant. In case of key > 2 && key == 1 we can set qual_ok
353 : * to false and abandon further processing.
354 : */
355 147878 : if (xform[BTEqualStrategyNumber - 1])
356 : {
357 137723 : ScanKey eq = xform[BTEqualStrategyNumber - 1];
358 :
359 964061 : for (j = BTMaxStrategyNumber; --j >= 0;)
360 : {
361 688615 : ScanKey chk = xform[j];
362 :
363 688615 : if (!chk || j == (BTEqualStrategyNumber - 1))
364 688613 : continue;
365 :
366 : /* IS NULL together with any other predicate must fail */
367 2 : if (eq->sk_flags & SK_SEARCHNULL)
368 : {
369 0 : so->qual_ok = false;
370 0 : return;
371 : }
372 :
373 2 : if (_bt_compare_scankey_args(scan, chk, eq, chk,
374 : &test_result))
375 : {
376 2 : if (!test_result)
377 : {
378 : /* keys proven mutually contradictory */
379 0 : so->qual_ok = false;
380 0 : return;
381 : }
382 : /* else discard the redundant non-equality key */
383 2 : xform[j] = NULL;
384 : }
385 : /* else, cannot determine redundancy, keep both keys */
386 : }
387 : /* track number of attrs for which we have "=" keys */
388 137723 : numberOfEqualCols++;
389 : }
390 :
391 : /* try to keep only one of <, <= */
392 147878 : if (xform[BTLessStrategyNumber - 1]
393 : && xform[BTLessEqualStrategyNumber - 1])
394 : {
395 0 : ScanKey lt = xform[BTLessStrategyNumber - 1];
396 0 : ScanKey le = xform[BTLessEqualStrategyNumber - 1];
397 :
398 0 : if (_bt_compare_scankey_args(scan, le, lt, le,
399 : &test_result))
400 : {
401 0 : if (test_result)
402 0 : xform[BTLessEqualStrategyNumber - 1] = NULL;
403 : else
404 0 : xform[BTLessStrategyNumber - 1] = NULL;
405 : }
406 : }
407 :
408 : /* try to keep only one of >, >= */
409 147878 : if (xform[BTGreaterStrategyNumber - 1]
410 : && xform[BTGreaterEqualStrategyNumber - 1])
411 : {
412 0 : ScanKey gt = xform[BTGreaterStrategyNumber - 1];
413 0 : ScanKey ge = xform[BTGreaterEqualStrategyNumber - 1];
414 :
415 0 : if (_bt_compare_scankey_args(scan, ge, gt, ge,
416 : &test_result))
417 : {
418 0 : if (test_result)
419 0 : xform[BTGreaterEqualStrategyNumber - 1] = NULL;
420 : else
421 0 : xform[BTGreaterStrategyNumber - 1] = NULL;
422 : }
423 : }
424 :
425 : /*
426 : * Emit the cleaned-up keys into the outkeys[] array, and then
427 : * mark them if they are required. They are required (possibly
428 : * only in one direction) if all attrs before this one had "=".
429 : */
430 1035146 : for (j = BTMaxStrategyNumber; --j >= 0;)
431 : {
432 739390 : if (xform[j])
433 : {
434 147892 : ScanKey outkey = &outkeys[new_numberOfKeys++];
435 :
436 147892 : memcpy(outkey, xform[j], sizeof(ScanKeyData));
437 147892 : if (priorNumberOfEqualCols == attno - 1)
438 147844 : _bt_mark_scankey_required(outkey);
439 : }
440 : }
441 :
442 : /*
443 : * Exit loop here if done.
444 : */
445 147878 : if (i == numberOfKeys)
446 70270 : break;
447 :
448 : /* Re-initialize for new attno */
449 77608 : attno = cur->sk_attno;
450 77608 : memset(xform, 0, sizeof(xform));
451 : }
452 :
453 : /* apply indoption to scankey (might change sk_strategy!) */
454 147894 : _bt_mark_scankey_with_indoption(cur, indoption);
455 :
456 : /* check strategy this key's operator corresponds to */
457 147894 : j = cur->sk_strategy - 1;
458 :
459 : /* if row comparison, push it directly to the output array */
460 147894 : if (cur->sk_flags & SK_ROW_HEADER)
461 : {
462 0 : ScanKey outkey = &outkeys[new_numberOfKeys++];
463 :
464 0 : memcpy(outkey, cur, sizeof(ScanKeyData));
465 0 : if (numberOfEqualCols == attno - 1)
466 0 : _bt_mark_scankey_required(outkey);
467 :
468 : /*
469 : * We don't support RowCompare using equality; such a qual would
470 : * mess up the numberOfEqualCols tracking.
471 : */
472 : Assert(j != (BTEqualStrategyNumber - 1));
473 : continue;
474 : }
475 :
476 : /* have we seen one of these before? */
477 147894 : if (xform[j] == NULL)
478 : {
479 : /* nope, so remember this scankey */
480 147894 : xform[j] = cur;
481 : }
482 : else
483 : {
484 : /* yup, keep only the more restrictive key */
485 :
486 : /* if either arg is NULL, don't try to compare */
487 0 : if ((cur->sk_flags | xform[j]->sk_flags) & SK_ISNULL)
488 : {
489 : /* at least one of them must be an IS NULL clause */
490 : Assert(j == (BTEqualStrategyNumber - 1));
491 : Assert((cur->sk_flags | xform[j]->sk_flags) & SK_SEARCHNULL);
492 : /* if one is and one isn't, the search must fail */
493 0 : if ((cur->sk_flags ^ xform[j]->sk_flags) & SK_SEARCHNULL)
494 : {
495 0 : so->qual_ok = false;
496 0 : return;
497 : }
498 : /* we have duplicate IS NULL clauses, ignore the newer one */
499 : continue;
500 : }
501 :
502 0 : if (_bt_compare_scankey_args(scan, cur, cur, xform[j],
503 : &test_result))
504 : {
505 0 : if (test_result)
506 0 : xform[j] = cur;
507 0 : else if (j == (BTEqualStrategyNumber - 1))
508 : {
509 : /* key == a && key == b, but a != b */
510 0 : so->qual_ok = false;
511 0 : return;
512 : }
513 : /* else old key is more restrictive, keep it */
514 : }
515 : else
516 : {
517 : /*
518 : * We can't determine which key is more restrictive. Keep the
519 : * previous one in xform[j] and push this one directly to the
520 : * output array.
521 : */
522 0 : ScanKey outkey = &outkeys[new_numberOfKeys++];
523 :
524 0 : memcpy(outkey, cur, sizeof(ScanKeyData));
525 0 : if (numberOfEqualCols == attno - 1)
526 0 : _bt_mark_scankey_required(outkey);
527 : }
528 : }
529 147894 : }
530 :
531 70270 : so->numberOfKeys = new_numberOfKeys;
532 : }
533 :
534 : /*
535 : * Compare two scankey values using a specified operator. Both values
536 : * must be already known non-NULL.
537 : *
538 : * The test we want to perform is logically "leftarg op rightarg", where
539 : * leftarg and rightarg are the sk_argument values in those ScanKeys, and
540 : * the comparison operator is the one in the op ScanKey. However, in
541 : * cross-data-type situations we may need to look up the correct operator in
542 : * the index's opfamily: it is the one having amopstrategy = op->sk_strategy
543 : * and amoplefttype/amoprighttype equal to the two argument datatypes.
544 : *
545 : * If the opfamily doesn't supply a complete set of cross-type operators we
546 : * may not be able to make the comparison. If we can make the comparison
547 : * we store the operator result in *result and return TRUE. We return FALSE
548 : * if the comparison could not be made.
549 : *
550 : * Note: op always points at the same ScanKey as either leftarg or rightarg.
551 : * Since we don't scribble on the scankeys, this aliasing should cause no
552 : * trouble.
553 : *
554 : * Note: this routine needs to be insensitive to any DESC option applied
555 : * to the index column. For example, "x < 4" is a tighter constraint than
556 : * "x < 5" regardless of which way the index is sorted. We don't worry about
557 : * NULLS FIRST/LAST either, since the given values are never nulls.
558 : */
559 : static bool
560 : _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
561 : ScanKey leftarg, ScanKey rightarg,
562 : bool *result)
563 2 : {
564 2 : Relation rel = scan->indexRelation;
565 : Oid lefttype,
566 : righttype,
567 : optype,
568 : opcintype,
569 : cmp_op;
570 : StrategyNumber strat;
571 :
572 : /*
573 : * The opfamily we need to worry about is identified by the index column.
574 : */
575 : Assert(leftarg->sk_attno == rightarg->sk_attno);
576 :
577 2 : opcintype = rel->rd_opcintype[leftarg->sk_attno - 1];
578 :
579 : /*
580 : * Determine the actual datatypes of the ScanKey arguments. We have to
581 : * support the convention that sk_subtype == InvalidOid means the opclass
582 : * input type; this is a hack to simplify life for ScanKeyInit().
583 : */
584 2 : lefttype = leftarg->sk_subtype;
585 2 : if (lefttype == InvalidOid)
586 0 : lefttype = opcintype;
587 2 : righttype = rightarg->sk_subtype;
588 2 : if (righttype == InvalidOid)
589 0 : righttype = opcintype;
590 2 : optype = op->sk_subtype;
591 2 : if (optype == InvalidOid)
592 0 : optype = opcintype;
593 :
594 : /*
595 : * If leftarg and rightarg match the types expected for the "op" scankey,
596 : * we can use its already-looked-up comparison function.
597 : */
598 2 : if (lefttype == opcintype && righttype == optype)
599 : {
600 2 : *result = DatumGetBool(FunctionCall2(&op->sk_func,
601 : leftarg->sk_argument,
602 : rightarg->sk_argument));
603 2 : return true;
604 : }
605 :
606 : /*
607 : * Otherwise, we need to go to the syscache to find the appropriate
608 : * operator. (This cannot result in infinite recursion, since no
609 : * indexscan initiated by syscache lookup will use cross-data-type
610 : * operators.)
611 : *
612 : * If the sk_strategy was flipped by _bt_mark_scankey_with_indoption, we
613 : * have to un-flip it to get the correct opfamily member.
614 : */
615 0 : strat = op->sk_strategy;
616 0 : if (op->sk_flags & SK_BT_DESC)
617 0 : strat = BTCommuteStrategyNumber(strat);
618 :
619 0 : cmp_op = get_opfamily_member(rel->rd_opfamily[leftarg->sk_attno - 1],
620 : lefttype,
621 : righttype,
622 : strat);
623 0 : if (OidIsValid(cmp_op))
624 : {
625 0 : RegProcedure cmp_proc = get_opcode(cmp_op);
626 :
627 0 : if (RegProcedureIsValid(cmp_proc))
628 : {
629 0 : *result = DatumGetBool(OidFunctionCall2(cmp_proc,
630 : leftarg->sk_argument,
631 : rightarg->sk_argument));
632 0 : return true;
633 : }
634 : }
635 :
636 : /* Can't make the comparison */
637 0 : *result = false; /* suppress compiler warnings */
638 0 : return false;
639 : }
640 :
641 : /*
642 : * Mark a scankey with info from the index's indoption array.
643 : *
644 : * We copy the appropriate indoption value into the scankey sk_flags
645 : * (shifting to avoid clobbering system-defined flag bits). Also, if
646 : * the DESC option is set, commute (flip) the operator strategy number.
647 : *
648 : * This function is applied to the *input* scankey structure; therefore
649 : * on a rescan we will be looking at already-processed scankeys. Hence
650 : * we have to be careful not to re-commute the strategy if we already did it.
651 : * It's a bit ugly to modify the caller's copy of the scankey but in practice
652 : * there shouldn't be any problem, since the index's indoptions are certainly
653 : * not going to change while the scankey survives.
654 : */
655 : static void
656 : _bt_mark_scankey_with_indoption(ScanKey skey, int16 *indoption)
657 241052 : {
658 : int addflags;
659 :
660 241052 : addflags = indoption[skey->sk_attno - 1] << SK_BT_INDOPTION_SHIFT;
661 241052 : if ((addflags & SK_BT_DESC) && !(skey->sk_flags & SK_BT_DESC))
662 2 : skey->sk_strategy = BTCommuteStrategyNumber(skey->sk_strategy);
663 241052 : skey->sk_flags |= addflags;
664 :
665 241052 : if (skey->sk_flags & SK_ROW_HEADER)
666 : {
667 1 : ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
668 :
669 : for (;;)
670 : {
671 : Assert(subkey->sk_flags & SK_ROW_MEMBER);
672 2 : addflags = indoption[subkey->sk_attno - 1] << SK_BT_INDOPTION_SHIFT;
673 2 : if ((addflags & SK_BT_DESC) && !(subkey->sk_flags & SK_BT_DESC))
674 0 : subkey->sk_strategy = BTCommuteStrategyNumber(subkey->sk_strategy);
675 2 : subkey->sk_flags |= addflags;
676 2 : if (subkey->sk_flags & SK_ROW_END)
677 1 : break;
678 1 : subkey++;
679 1 : }
680 : }
681 241052 : }
682 :
683 : /*
684 : * Mark a scankey as "required to continue the scan".
685 : *
686 : * Depending on the operator type, the key may be required for both scan
687 : * directions or just one. Also, if the key is a row comparison header,
688 : * we have to mark the appropriate subsidiary ScanKeys as required. In
689 : * such cases, the first subsidiary key is required, but subsequent ones
690 : * are required only as long as they correspond to successive index columns
691 : * and match the leading column as to sort direction.
692 : * Otherwise the row comparison ordering is different from the index ordering
693 : * and so we can't stop the scan on the basis of those lower-order columns.
694 : *
695 : * Note: when we set required-key flag bits in a subsidiary scankey, we are
696 : * scribbling on a data structure belonging to the index AM's caller, not on
697 : * our private copy. This should be OK because the marking will not change
698 : * from scan to scan within a query, and so we'd just re-mark the same way
699 : * anyway on a rescan. Something to keep an eye on though.
700 : */
701 : static void
702 : _bt_mark_scankey_required(ScanKey skey)
703 240997 : {
704 : int addflags;
705 :
706 240997 : switch (skey->sk_strategy)
707 : {
708 : case BTLessStrategyNumber:
709 : case BTLessEqualStrategyNumber:
710 34 : addflags = SK_BT_REQFWD;
711 34 : break;
712 : case BTEqualStrategyNumber:
713 230785 : addflags = SK_BT_REQFWD | SK_BT_REQBKWD;
714 230785 : break;
715 : case BTGreaterEqualStrategyNumber:
716 : case BTGreaterStrategyNumber:
717 10178 : addflags = SK_BT_REQBKWD;
718 10178 : break;
719 : default:
720 0 : elog(ERROR, "unrecognized StrategyNumber: %d",
721 : (int) skey->sk_strategy);
722 0 : addflags = 0; /* keep compiler quiet */
723 : break;
724 : }
725 :
726 240997 : skey->sk_flags |= addflags;
727 :
728 240997 : if (skey->sk_flags & SK_ROW_HEADER)
729 : {
730 1 : ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
731 1 : AttrNumber attno = skey->sk_attno;
732 :
733 : /* First subkey should be same as the header says */
734 : Assert(subkey->sk_attno == attno);
735 :
736 : for (;;)
737 : {
738 : Assert(subkey->sk_flags & SK_ROW_MEMBER);
739 2 : if (subkey->sk_attno != attno)
740 0 : break; /* non-adjacent key, so not required */
741 2 : if (subkey->sk_strategy != skey->sk_strategy)
742 0 : break; /* wrong direction, so not required */
743 2 : subkey->sk_flags |= addflags;
744 2 : if (subkey->sk_flags & SK_ROW_END)
745 1 : break;
746 1 : subkey++;
747 1 : attno++;
748 1 : }
749 : }
750 240997 : }
751 :
752 : /*
753 : * Test whether an indextuple satisfies all the scankey conditions.
754 : *
755 : * If so, copy its TID into scan->xs_ctup.t_self, and return TRUE.
756 : * If not, return FALSE (xs_ctup is not changed).
757 : *
758 : * If the tuple fails to pass the qual, we also determine whether there's
759 : * any need to continue the scan beyond this tuple, and set *continuescan
760 : * accordingly. See comments for _bt_preprocess_keys(), above, about how
761 : * this is done.
762 : *
763 : * scan: index scan descriptor (containing a search-type scankey)
764 : * page: buffer page containing index tuple
765 : * offnum: offset number of index tuple (must be a valid item!)
766 : * dir: direction we are scanning in
767 : * continuescan: output parameter (will be set correctly in all cases)
768 : */
769 : bool
770 : _bt_checkkeys(IndexScanDesc scan,
771 : Page page, OffsetNumber offnum,
772 : ScanDirection dir, bool *continuescan)
773 607416 : {
774 607416 : ItemId iid = PageGetItemId(page, offnum);
775 : bool tuple_valid;
776 : IndexTuple tuple;
777 : TupleDesc tupdesc;
778 : BTScanOpaque so;
779 : int keysz;
780 : int ikey;
781 : ScanKey key;
782 :
783 607416 : *continuescan = true; /* default assumption */
784 :
785 : /*
786 : * If the scan specifies not to return killed tuples, then we treat a
787 : * killed tuple as not passing the qual. Most of the time, it's a win to
788 : * not bother examining the tuple's index keys, but just return
789 : * immediately with continuescan = true to proceed to the next tuple.
790 : * However, if this is the last tuple on the page, we should check the
791 : * index keys to prevent uselessly advancing to the next page.
792 : */
793 607534 : if (scan->ignore_killed_tuples && ItemIdIsDead(iid))
794 : {
795 : /* return immediately if there are more tuples on the page */
796 48871 : if (ScanDirectionIsForward(dir))
797 : {
798 48722 : if (offnum < PageGetMaxOffsetNumber(page))
799 48604 : return false;
800 : }
801 : else
802 : {
803 149 : BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
804 :
805 149 : if (offnum > P_FIRSTDATAKEY(opaque))
806 149 : return false;
807 : }
808 :
809 : /*
810 : * OK, we want to check the keys, but we'll return FALSE even if the
811 : * tuple passes the key tests.
812 : */
813 118 : tuple_valid = false;
814 : }
815 : else
816 558545 : tuple_valid = true;
817 :
818 558663 : tuple = (IndexTuple) PageGetItem(page, iid);
819 :
820 : IncrIndexProcessed();
821 :
822 558663 : tupdesc = RelationGetDescr(scan->indexRelation);
823 558663 : so = (BTScanOpaque) scan->opaque;
824 558663 : keysz = so->numberOfKeys;
825 :
826 1332326 : for (key = so->keyData, ikey = 0; ikey < keysz; key++, ikey++)
827 : {
828 : Datum datum;
829 : bool isNull;
830 : Datum test;
831 :
832 : /* row-comparison keys need special processing */
833 896873 : if (key->sk_flags & SK_ROW_HEADER)
834 : {
835 25 : if (_bt_check_rowcompare(key, tuple, tupdesc, dir, continuescan))
836 25 : continue;
837 0 : return false;
838 : }
839 :
840 896848 : datum = index_getattr(tuple,
841 : key->sk_attno,
842 : tupdesc,
843 : &isNull);
844 :
845 896848 : if (key->sk_flags & SK_ISNULL)
846 : {
847 : /* Handle IS NULL tests */
848 : Assert(key->sk_flags & SK_SEARCHNULL);
849 :
850 4018 : if (isNull)
851 16 : continue; /* tuple satisfies this qual */
852 :
853 : /*
854 : * Tuple fails this qual. If it's a required qual for the current
855 : * scan direction, then we can conclude no further tuples will
856 : * pass, either.
857 : */
858 4002 : if ((key->sk_flags & SK_BT_REQFWD) &&
859 : ScanDirectionIsForward(dir))
860 2 : *continuescan = false;
861 4000 : else if ((key->sk_flags & SK_BT_REQBKWD) &&
862 : ScanDirectionIsBackward(dir))
863 0 : *continuescan = false;
864 :
865 : /*
866 : * In any case, this indextuple doesn't match the qual.
867 : */
868 4002 : return false;
869 : }
870 :
871 892830 : if (isNull)
872 : {
873 10 : if (key->sk_flags & SK_BT_NULLS_FIRST)
874 : {
875 : /*
876 : * Since NULLs are sorted before non-NULLs, we know we have
877 : * reached the lower limit of the range of values for this
878 : * index attr. On a backward scan, we can stop if this qual
879 : * is one of the "must match" subset. On a forward scan,
880 : * however, we should keep going.
881 : */
882 0 : if ((key->sk_flags & SK_BT_REQBKWD) &&
883 : ScanDirectionIsBackward(dir))
884 0 : *continuescan = false;
885 : }
886 : else
887 : {
888 : /*
889 : * Since NULLs are sorted after non-NULLs, we know we have
890 : * reached the upper limit of the range of values for this
891 : * index attr. On a forward scan, we can stop if this qual is
892 : * one of the "must match" subset. On a backward scan,
893 : * however, we should keep going.
894 : */
895 10 : if ((key->sk_flags & SK_BT_REQFWD) &&
896 : ScanDirectionIsForward(dir))
897 10 : *continuescan = false;
898 : }
899 :
900 : /*
901 : * In any case, this indextuple doesn't match the qual.
902 : */
903 10 : return false;
904 : }
905 :
906 892820 : test = FunctionCall2(&key->sk_func, datum, key->sk_argument);
907 :
908 892820 : if (!DatumGetBool(test))
909 : {
910 : /*
911 : * Tuple fails this qual. If it's a required qual for the current
912 : * scan direction, then we can conclude no further tuples will
913 : * pass, either.
914 : *
915 : * Note: because we stop the scan as soon as any required equality
916 : * qual fails, it is critical that equality quals be used for the
917 : * initial positioning in _bt_first() when they are available. See
918 : * comments in _bt_first().
919 : */
920 119198 : if ((key->sk_flags & SK_BT_REQFWD) &&
921 : ScanDirectionIsForward(dir))
922 118794 : *continuescan = false;
923 404 : else if ((key->sk_flags & SK_BT_REQBKWD) &&
924 : ScanDirectionIsBackward(dir))
925 2 : *continuescan = false;
926 :
927 : /*
928 : * In any case, this indextuple doesn't match the qual.
929 : */
930 119198 : return false;
931 : }
932 : }
933 :
934 : /* If we get here, the tuple passes all index quals. */
935 435453 : if (tuple_valid)
936 435419 : scan->xs_ctup.t_self = tuple->t_tid;
937 :
938 435453 : return tuple_valid;
939 : }
940 :
941 : /*
942 : * Test whether an indextuple satisfies a row-comparison scan condition.
943 : *
944 : * Return true if so, false if not. If not, also clear *continuescan if
945 : * it's not possible for any future tuples in the current scan direction
946 : * to pass the qual.
947 : *
948 : * This is a subroutine for _bt_checkkeys, which see for more info.
949 : */
950 : static bool
951 : _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
952 : ScanDirection dir, bool *continuescan)
953 25 : {
954 25 : ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
955 25 : int32 cmpresult = 0;
956 : bool result;
957 :
958 : /* First subkey should be same as the header says */
959 : Assert(subkey->sk_attno == skey->sk_attno);
960 :
961 : /* Loop over columns of the row condition */
962 : for (;;)
963 : {
964 : Datum datum;
965 : bool isNull;
966 :
967 : Assert(subkey->sk_flags & SK_ROW_MEMBER);
968 :
969 30 : datum = index_getattr(tuple,
970 : subkey->sk_attno,
971 : tupdesc,
972 : &isNull);
973 :
974 30 : if (isNull)
975 : {
976 0 : if (subkey->sk_flags & SK_BT_NULLS_FIRST)
977 : {
978 : /*
979 : * Since NULLs are sorted before non-NULLs, we know we have
980 : * reached the lower limit of the range of values for this
981 : * index attr. On a backward scan, we can stop if this qual is
982 : * one of the "must match" subset. On a forward scan,
983 : * however, we should keep going.
984 : */
985 0 : if ((subkey->sk_flags & SK_BT_REQBKWD) &&
986 : ScanDirectionIsBackward(dir))
987 0 : *continuescan = false;
988 : }
989 : else
990 : {
991 : /*
992 : * Since NULLs are sorted after non-NULLs, we know we have
993 : * reached the upper limit of the range of values for this
994 : * index attr. On a forward scan, we can stop if this qual is
995 : * one of the "must match" subset. On a backward scan,
996 : * however, we should keep going.
997 : */
998 0 : if ((subkey->sk_flags & SK_BT_REQFWD) &&
999 : ScanDirectionIsForward(dir))
1000 0 : *continuescan = false;
1001 : }
1002 :
1003 : /*
1004 : * In any case, this indextuple doesn't match the qual.
1005 : */
1006 0 : return false;
1007 : }
1008 :
1009 30 : if (subkey->sk_flags & SK_ISNULL)
1010 : {
1011 : /*
1012 : * Unlike the simple-scankey case, this isn't a disallowed case.
1013 : * But it can never match. If all the earlier row comparison
1014 : * columns are required for the scan direction, we can stop the
1015 : * scan, because there can't be another tuple that will succeed.
1016 : */
1017 0 : if (subkey != (ScanKey) DatumGetPointer(skey->sk_argument))
1018 0 : subkey--;
1019 0 : if ((subkey->sk_flags & SK_BT_REQFWD) &&
1020 : ScanDirectionIsForward(dir))
1021 0 : *continuescan = false;
1022 0 : else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
1023 : ScanDirectionIsBackward(dir))
1024 0 : *continuescan = false;
1025 0 : return false;
1026 : }
1027 :
1028 : /* Perform the test --- three-way comparison not bool operator */
1029 30 : cmpresult = DatumGetInt32(FunctionCall2(&subkey->sk_func,
1030 : datum,
1031 : subkey->sk_argument));
1032 :
1033 30 : if (subkey->sk_flags & SK_BT_DESC)
1034 0 : cmpresult = -cmpresult;
1035 :
1036 : /* Done comparing if unequal, else advance to next column */
1037 30 : if (cmpresult != 0)
1038 25 : break;
1039 :
1040 5 : if (subkey->sk_flags & SK_ROW_END)
1041 0 : break;
1042 5 : subkey++;
1043 5 : }
1044 :
1045 : /*
1046 : * At this point cmpresult indicates the overall result of the row
1047 : * comparison, and subkey points to the deciding column (or the last
1048 : * column if the result is "=").
1049 : */
1050 25 : switch (subkey->sk_strategy)
1051 : {
1052 : /* EQ and NE cases aren't allowed here */
1053 : case BTLessStrategyNumber:
1054 0 : result = (cmpresult < 0);
1055 0 : break;
1056 : case BTLessEqualStrategyNumber:
1057 0 : result = (cmpresult <= 0);
1058 0 : break;
1059 : case BTGreaterEqualStrategyNumber:
1060 25 : result = (cmpresult >= 0);
1061 25 : break;
1062 : case BTGreaterStrategyNumber:
1063 0 : result = (cmpresult > 0);
1064 0 : break;
1065 : default:
1066 0 : elog(ERROR, "unrecognized RowCompareType: %d",
1067 : (int) subkey->sk_strategy);
1068 0 : result = 0; /* keep compiler quiet */
1069 : break;
1070 : }
1071 :
1072 25 : if (!result)
1073 : {
1074 : /*
1075 : * Tuple fails this qual. If it's a required qual for the current
1076 : * scan direction, then we can conclude no further tuples will pass,
1077 : * either. Note we have to look at the deciding column, not
1078 : * necessarily the first or last column of the row condition.
1079 : */
1080 0 : if ((subkey->sk_flags & SK_BT_REQFWD) &&
1081 : ScanDirectionIsForward(dir))
1082 0 : *continuescan = false;
1083 0 : else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
1084 : ScanDirectionIsBackward(dir))
1085 0 : *continuescan = false;
1086 : }
1087 :
1088 25 : return result;
1089 : }
1090 :
1091 : /*
1092 : * _bt_killitems - set LP_DEAD state for items an indexscan caller has
1093 : * told us were killed
1094 : *
1095 : * scan->so contains information about the current page and killed tuples
1096 : * thereon (generally, this should only be called if so->numKilled > 0).
1097 : *
1098 : * The caller must have pin on so->currPos.buf, but may or may not have
1099 : * read-lock, as indicated by haveLock. Note that we assume read-lock
1100 : * is sufficient for setting LP_DEAD status (which is only a hint).
1101 : *
1102 : * We match items by heap TID before assuming they are the right ones to
1103 : * delete. We cope with cases where items have moved right due to insertions.
1104 : * If an item has moved off the current page due to a split, we'll fail to
1105 : * find it and do nothing (this is not an error case --- we assume the item
1106 : * will eventually get marked in a future indexscan). Note that because we
1107 : * hold pin on the target page continuously from initially reading the items
1108 : * until applying this function, VACUUM cannot have deleted any items from
1109 : * the page, and so there is no need to search left from the recorded offset.
1110 : * (This observation also guarantees that the item is still the right one
1111 : * to delete, which might otherwise be questionable since heap TIDs can get
1112 : * recycled.)
1113 : */
1114 : void
1115 : _bt_killitems(IndexScanDesc scan, bool haveLock)
1116 934 : {
1117 934 : BTScanOpaque so = (BTScanOpaque) scan->opaque;
1118 : Page page;
1119 : BTPageOpaque opaque;
1120 : OffsetNumber minoff;
1121 : OffsetNumber maxoff;
1122 : int i;
1123 934 : bool killedsomething = false;
1124 :
1125 : Assert(BufferIsValid(so->currPos.buf));
1126 :
1127 934 : if (!haveLock)
1128 238 : LockBuffer(so->currPos.buf, BT_READ);
1129 :
1130 934 : page = BufferGetPage(so->currPos.buf);
1131 934 : opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1132 934 : minoff = P_FIRSTDATAKEY(opaque);
1133 934 : maxoff = PageGetMaxOffsetNumber(page);
1134 :
1135 2631 : for (i = 0; i < so->numKilled; i++)
1136 : {
1137 1697 : int itemIndex = so->killedItems[i];
1138 1697 : BTScanPosItem *kitem = &so->currPos.items[itemIndex];
1139 1697 : OffsetNumber offnum = kitem->indexOffset;
1140 :
1141 : Assert(itemIndex >= so->currPos.firstItem &&
1142 : itemIndex <= so->currPos.lastItem);
1143 1697 : if (offnum < minoff)
1144 0 : continue; /* pure paranoia */
1145 1697 : while (offnum <= maxoff)
1146 : {
1147 1697 : ItemId iid = PageGetItemId(page, offnum);
1148 1697 : IndexTuple ituple = (IndexTuple) PageGetItem(page, iid);
1149 :
1150 1697 : if (ItemPointerEquals(&ituple->t_tid, &kitem->heapTid))
1151 : {
1152 : /* found the item */
1153 1697 : ItemIdMarkDead(iid);
1154 1697 : killedsomething = true;
1155 1697 : break; /* out of inner search loop */
1156 : }
1157 0 : offnum = OffsetNumberNext(offnum);
1158 : }
1159 : }
1160 :
1161 : /*
1162 : * Since this can be redone later if needed, it's treated the same as a
1163 : * commit-hint-bit status update for heap tuples: we mark the buffer dirty
1164 : * but don't make a WAL log entry.
1165 : *
1166 : * Whenever we mark anything LP_DEAD, we also set the page's
1167 : * BTP_HAS_GARBAGE flag, which is likewise just a hint.
1168 : */
1169 934 : if (killedsomething)
1170 : {
1171 934 : opaque->btpo_flags |= BTP_HAS_GARBAGE;
1172 934 : SetBufferCommitInfoNeedsSave(so->currPos.buf);
1173 : }
1174 :
1175 934 : if (!haveLock)
1176 238 : LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
1177 :
1178 : /*
1179 : * Always reset the scan state, so we don't look for same items on other
1180 : * pages.
1181 : */
1182 934 : so->numKilled = 0;
1183 934 : }
1184 :
1185 :
1186 : /*
1187 : * The following routines manage a shared-memory area in which we track
1188 : * assignment of "vacuum cycle IDs" to currently-active btree vacuuming
1189 : * operations. There is a single counter which increments each time we
1190 : * start a vacuum to assign it a cycle ID. Since multiple vacuums could
1191 : * be active concurrently, we have to track the cycle ID for each active
1192 : * vacuum; this requires at most MaxBackends entries (usually far fewer).
1193 : * We assume at most one vacuum can be active for a given index.
1194 : *
1195 : * Access to the shared memory area is controlled by BtreeVacuumLock.
1196 : * In principle we could use a separate lmgr locktag for each index,
1197 : * but a single LWLock is much cheaper, and given the short time that
1198 : * the lock is ever held, the concurrency hit should be minimal.
1199 : */
1200 :
1201 : typedef struct BTOneVacInfo
1202 : {
1203 : LockRelId relid; /* global identifier of an index */
1204 : BTCycleId cycleid; /* cycle ID for its active VACUUM */
1205 : } BTOneVacInfo;
1206 :
1207 : typedef struct BTVacInfo
1208 : {
1209 : BTCycleId cycle_ctr; /* cycle ID most recently assigned */
1210 : int num_vacuums; /* number of currently active VACUUMs */
1211 : int max_vacuums; /* allocated length of vacuums[] array */
1212 : BTOneVacInfo vacuums[1]; /* VARIABLE LENGTH ARRAY */
1213 : } BTVacInfo;
1214 :
1215 : static BTVacInfo *btvacinfo;
1216 :
1217 :
1218 : /*
1219 : * _bt_vacuum_cycleid --- get the active vacuum cycle ID for an index,
1220 : * or zero if there is no active VACUUM
1221 : *
1222 : * Note: for correct interlocking, the caller must already hold pin and
1223 : * exclusive lock on each buffer it will store the cycle ID into. This
1224 : * ensures that even if a VACUUM starts immediately afterwards, it cannot
1225 : * process those pages until the page split is complete.
1226 : */
1227 : BTCycleId
1228 : _bt_vacuum_cycleid(Relation rel)
1229 383 : {
1230 383 : BTCycleId result = 0;
1231 : int i;
1232 :
1233 : /* Share lock is enough since this is a read-only operation */
1234 383 : LWLockAcquire(BtreeVacuumLock, LW_SHARED);
1235 :
1236 383 : for (i = 0; i < btvacinfo->num_vacuums; i++)
1237 : {
1238 0 : BTOneVacInfo *vac = &btvacinfo->vacuums[i];
1239 :
1240 0 : if (vac->relid.relId == rel->rd_lockInfo.lockRelId.relId &&
1241 : vac->relid.dbId == rel->rd_lockInfo.lockRelId.dbId)
1242 : {
1243 0 : result = vac->cycleid;
1244 0 : break;
1245 : }
1246 : }
1247 :
1248 383 : LWLockRelease(BtreeVacuumLock);
1249 383 : return result;
1250 : }
1251 :
1252 : /*
1253 : * _bt_start_vacuum --- assign a cycle ID to a just-starting VACUUM operation
1254 : *
1255 : * Note: the caller must guarantee (via PG_TRY) that it will eventually call
1256 : * _bt_end_vacuum, else we'll permanently leak an array slot.
1257 : */
1258 : BTCycleId
1259 : _bt_start_vacuum(Relation rel)
1260 70 : {
1261 : BTCycleId result;
1262 : int i;
1263 : BTOneVacInfo *vac;
1264 :
1265 70 : LWLockAcquire(BtreeVacuumLock, LW_EXCLUSIVE);
1266 :
1267 : /*
1268 : * Assign the next cycle ID, being careful to avoid zero as well as the
1269 : * reserved high values.
1270 : */
1271 70 : result = ++(btvacinfo->cycle_ctr);
1272 70 : if (result == 0 || result > MAX_BT_CYCLE_ID)
1273 0 : result = btvacinfo->cycle_ctr = 1;
1274 :
1275 : /* Let's just make sure there's no entry already for this index */
1276 70 : for (i = 0; i < btvacinfo->num_vacuums; i++)
1277 : {
1278 0 : vac = &btvacinfo->vacuums[i];
1279 0 : if (vac->relid.relId == rel->rd_lockInfo.lockRelId.relId &&
1280 : vac->relid.dbId == rel->rd_lockInfo.lockRelId.dbId)
1281 : {
1282 : /*
1283 : * Unlike most places in the backend, we have to explicitly
1284 : * release our LWLock before throwing an error. This is because
1285 : * we expect _bt_end_vacuum() to be called before transaction
1286 : * abort cleanup can run to release LWLocks.
1287 : */
1288 0 : LWLockRelease(BtreeVacuumLock);
1289 0 : elog(ERROR, "multiple active vacuums for index \"%s\"",
1290 : RelationGetRelationName(rel));
1291 : }
1292 : }
1293 :
1294 : /* OK, add an entry */
1295 70 : if (btvacinfo->num_vacuums >= btvacinfo->max_vacuums)
1296 : {
1297 0 : LWLockRelease(BtreeVacuumLock);
1298 0 : elog(ERROR, "out of btvacinfo slots");
1299 : }
1300 70 : vac = &btvacinfo->vacuums[btvacinfo->num_vacuums];
1301 70 : vac->relid = rel->rd_lockInfo.lockRelId;
1302 70 : vac->cycleid = result;
1303 70 : btvacinfo->num_vacuums++;
1304 :
1305 70 : LWLockRelease(BtreeVacuumLock);
1306 70 : return result;
1307 : }
1308 :
1309 : /*
1310 : * _bt_end_vacuum --- mark a btree VACUUM operation as done
1311 : *
1312 : * Note: this is deliberately coded not to complain if no entry is found;
1313 : * this allows the caller to put PG_TRY around the start_vacuum operation.
1314 : */
1315 : void
1316 : _bt_end_vacuum(Relation rel)
1317 70 : {
1318 : int i;
1319 :
1320 70 : LWLockAcquire(BtreeVacuumLock, LW_EXCLUSIVE);
1321 :
1322 : /* Find the array entry */
1323 70 : for (i = 0; i < btvacinfo->num_vacuums; i++)
1324 : {
1325 70 : BTOneVacInfo *vac = &btvacinfo->vacuums[i];
1326 :
1327 70 : if (vac->relid.relId == rel->rd_lockInfo.lockRelId.relId &&
1328 : vac->relid.dbId == rel->rd_lockInfo.lockRelId.dbId)
1329 : {
1330 : /* Remove it by shifting down the last entry */
1331 70 : *vac = btvacinfo->vacuums[btvacinfo->num_vacuums - 1];
1332 70 : btvacinfo->num_vacuums--;
1333 70 : break;
1334 : }
1335 : }
1336 :
1337 70 : LWLockRelease(BtreeVacuumLock);
1338 70 : }
1339 :
1340 : /*
1341 : * BTreeShmemSize --- report amount of shared memory space needed
1342 : */
1343 : Size
1344 : BTreeShmemSize(void)
1345 34 : {
1346 : Size size;
1347 :
1348 34 : size = offsetof(BTVacInfo, vacuums[0]);
1349 34 : size = add_size(size, mul_size(MaxBackends, sizeof(BTOneVacInfo)));
1350 34 : return size;
1351 : }
1352 :
1353 : /*
1354 : * BTreeShmemInit --- initialize this module's shared memory
1355 : */
1356 : void
1357 : BTreeShmemInit(void)
1358 16 : {
1359 : bool found;
1360 :
1361 16 : btvacinfo = (BTVacInfo *) ShmemInitStruct("BTree Vacuum State",
1362 : BTreeShmemSize(),
1363 : &found);
1364 :
1365 16 : if (!IsUnderPostmaster)
1366 : {
1367 : /* Initialize shared memory area */
1368 : Assert(!found);
1369 :
1370 : /*
1371 : * It doesn't really matter what the cycle counter starts at, but
1372 : * having it always start the same doesn't seem good. Seed with
1373 : * low-order bits of time() instead.
1374 : */
1375 16 : btvacinfo->cycle_ctr = (BTCycleId) time(NULL);
1376 :
1377 16 : btvacinfo->num_vacuums = 0;
1378 16 : btvacinfo->max_vacuums = MaxBackends;
1379 : }
1380 : else
1381 : Assert(found);
1382 16 : }
1383 :
1384 : Datum
1385 : btoptions(PG_FUNCTION_ARGS)
1386 0 : {
1387 0 : Datum reloptions = PG_GETARG_DATUM(0);
1388 0 : bool validate = PG_GETARG_BOOL(1);
1389 : bytea *result;
1390 :
1391 0 : result = default_reloptions(reloptions, validate,
1392 : BTREE_MIN_FILLFACTOR,
1393 : BTREE_DEFAULT_FILLFACTOR);
1394 0 : if (result)
1395 0 : PG_RETURN_BYTEA_P(result);
1396 0 : PG_RETURN_NULL();
1397 : }
|