1 : /*-------------------------------------------------------------------------
2 : *
3 : * hashsearch.c
4 : * search code for postgres hash tables
5 : *
6 : * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.50 2007/05/27 03:50:38 tgl Exp $
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/hash.h"
18 : #include "pgstat.h"
19 :
20 :
21 : /*
22 : * _hash_next() -- Get the next item in a scan.
23 : *
24 : * On entry, we have a valid hashso_curpos in the scan, and a
25 : * pin and read lock on the page that contains that item.
26 : * We find the next item in the scan, if any.
27 : * On success exit, we have the page containing the next item
28 : * pinned and locked.
29 : */
30 : bool
31 : _hash_next(IndexScanDesc scan, ScanDirection dir)
32 20 : {
33 20 : Relation rel = scan->indexRelation;
34 20 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
35 : Buffer buf;
36 : Page page;
37 : OffsetNumber offnum;
38 : ItemPointer current;
39 : IndexTuple itup;
40 :
41 : /* we still have the buffer pinned and read-locked */
42 20 : buf = so->hashso_curbuf;
43 : Assert(BufferIsValid(buf));
44 :
45 : /*
46 : * step to next valid tuple.
47 : */
48 20 : if (!_hash_step(scan, &buf, dir))
49 17 : return false;
50 :
51 : /* if we're here, _hash_step found a valid tuple */
52 3 : current = &(so->hashso_curpos);
53 3 : offnum = ItemPointerGetOffsetNumber(current);
54 3 : _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
55 3 : page = BufferGetPage(buf);
56 3 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
57 3 : scan->xs_ctup.t_self = itup->t_tid;
58 :
59 3 : return true;
60 : }
61 :
62 : /*
63 : * Advance to next page in a bucket, if any.
64 : */
65 : static void
66 : _hash_readnext(Relation rel,
67 : Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
68 21 : {
69 : BlockNumber blkno;
70 :
71 21 : blkno = (*opaquep)->hasho_nextblkno;
72 21 : _hash_relbuf(rel, *bufp);
73 21 : *bufp = InvalidBuffer;
74 21 : if (BlockNumberIsValid(blkno))
75 : {
76 0 : *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
77 0 : *pagep = BufferGetPage(*bufp);
78 0 : *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
79 : }
80 21 : }
81 :
82 : /*
83 : * Advance to previous page in a bucket, if any.
84 : */
85 : static void
86 : _hash_readprev(Relation rel,
87 : Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
88 0 : {
89 : BlockNumber blkno;
90 :
91 0 : blkno = (*opaquep)->hasho_prevblkno;
92 0 : _hash_relbuf(rel, *bufp);
93 0 : *bufp = InvalidBuffer;
94 0 : if (BlockNumberIsValid(blkno))
95 : {
96 0 : *bufp = _hash_getbuf(rel, blkno, HASH_READ,
97 : LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
98 0 : *pagep = BufferGetPage(*bufp);
99 0 : *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
100 : }
101 0 : }
102 :
103 : /*
104 : * _hash_first() -- Find the first item in a scan.
105 : *
106 : * Find the first item in the index that
107 : * satisfies the qualification associated with the scan descriptor. On
108 : * success, the page containing the current index tuple is read locked
109 : * and pinned, and the scan's opaque data entry is updated to
110 : * include the buffer.
111 : */
112 : bool
113 : _hash_first(IndexScanDesc scan, ScanDirection dir)
114 21 : {
115 21 : Relation rel = scan->indexRelation;
116 21 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
117 : ScanKey cur;
118 : uint32 hashkey;
119 : Bucket bucket;
120 : BlockNumber blkno;
121 : Buffer buf;
122 : Buffer metabuf;
123 : Page page;
124 : HashPageOpaque opaque;
125 : HashMetaPage metap;
126 : IndexTuple itup;
127 : ItemPointer current;
128 : OffsetNumber offnum;
129 :
130 21 : pgstat_count_index_scan(rel);
131 :
132 21 : current = &(so->hashso_curpos);
133 21 : ItemPointerSetInvalid(current);
134 :
135 : /*
136 : * We do not support hash scans with no index qualification, because we
137 : * would have to read the whole index rather than just one bucket. That
138 : * creates a whole raft of problems, since we haven't got a practical way
139 : * to lock all the buckets against splits or compactions.
140 : */
141 21 : if (scan->numberOfKeys < 1)
142 0 : ereport(ERROR,
143 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
144 : errmsg("hash indexes do not support whole-index scans")));
145 :
146 : /* There may be more than one index qual, but we hash only the first */
147 21 : cur = &scan->keyData[0];
148 :
149 : /* We support only single-column hash indexes */
150 : Assert(cur->sk_attno == 1);
151 : /* And there's only one operator strategy, too */
152 : Assert(cur->sk_strategy == HTEqualStrategyNumber);
153 :
154 : /*
155 : * If the constant in the index qual is NULL, assume it cannot match any
156 : * items in the index.
157 : */
158 21 : if (cur->sk_flags & SK_ISNULL)
159 0 : return false;
160 :
161 : /*
162 : * Okay to compute the hash key. We want to do this before acquiring any
163 : * locks, in case a user-defined hash function happens to be slow.
164 : *
165 : * If scankey operator is not a cross-type comparison, we can use the
166 : * cached hash function; otherwise gotta look it up in the catalogs.
167 : *
168 : * We support the convention that sk_subtype == InvalidOid means the
169 : * opclass input type; this is a hack to simplify life for ScanKeyInit().
170 : */
171 42 : if (cur->sk_subtype == rel->rd_opcintype[0] ||
172 : cur->sk_subtype == InvalidOid)
173 21 : hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
174 : else
175 0 : hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
176 : cur->sk_subtype);
177 :
178 : /*
179 : * Acquire shared split lock so we can compute the target bucket safely
180 : * (see README).
181 : */
182 21 : _hash_getlock(rel, 0, HASH_SHARE);
183 :
184 : /* Read the metapage */
185 21 : metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
186 21 : metap = (HashMetaPage) BufferGetPage(metabuf);
187 :
188 : /*
189 : * Compute the target bucket number, and convert to block number.
190 : */
191 21 : bucket = _hash_hashkey2bucket(hashkey,
192 : metap->hashm_maxbucket,
193 : metap->hashm_highmask,
194 : metap->hashm_lowmask);
195 :
196 21 : blkno = BUCKET_TO_BLKNO(metap, bucket);
197 :
198 : /* done with the metapage */
199 21 : _hash_relbuf(rel, metabuf);
200 :
201 : /*
202 : * Acquire share lock on target bucket; then we can release split lock.
203 : */
204 21 : _hash_getlock(rel, blkno, HASH_SHARE);
205 :
206 21 : _hash_droplock(rel, 0, HASH_SHARE);
207 :
208 : /* Update scan opaque state to show we have lock on the bucket */
209 21 : so->hashso_bucket = bucket;
210 21 : so->hashso_bucket_valid = true;
211 21 : so->hashso_bucket_blkno = blkno;
212 :
213 : /* Fetch the primary bucket page for the bucket */
214 21 : buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
215 21 : page = BufferGetPage(buf);
216 21 : opaque = (HashPageOpaque) PageGetSpecialPointer(page);
217 : Assert(opaque->hasho_bucket == bucket);
218 :
219 : /* If a backwards scan is requested, move to the end of the chain */
220 21 : if (ScanDirectionIsBackward(dir))
221 : {
222 0 : while (BlockNumberIsValid(opaque->hasho_nextblkno))
223 0 : _hash_readnext(rel, &buf, &page, &opaque);
224 : }
225 :
226 : /* Now find the first tuple satisfying the qualification */
227 21 : if (!_hash_step(scan, &buf, dir))
228 4 : return false;
229 :
230 : /* if we're here, _hash_step found a valid tuple */
231 17 : offnum = ItemPointerGetOffsetNumber(current);
232 17 : _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
233 17 : page = BufferGetPage(buf);
234 17 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
235 17 : scan->xs_ctup.t_self = itup->t_tid;
236 :
237 17 : return true;
238 : }
239 :
240 : /*
241 : * _hash_step() -- step to the next valid item in a scan in the bucket.
242 : *
243 : * If no valid record exists in the requested direction, return
244 : * false. Else, return true and set the hashso_curpos for the
245 : * scan to the right thing.
246 : *
247 : * 'bufP' points to the current buffer, which is pinned and read-locked.
248 : * On success exit, we have pin and read-lock on whichever page
249 : * contains the right item; on failure, we have released all buffers.
250 : */
251 : bool
252 : _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
253 41 : {
254 41 : Relation rel = scan->indexRelation;
255 41 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
256 : ItemPointer current;
257 : Buffer buf;
258 : Page page;
259 : HashPageOpaque opaque;
260 : OffsetNumber maxoff;
261 : OffsetNumber offnum;
262 : BlockNumber blkno;
263 : IndexTuple itup;
264 :
265 41 : current = &(so->hashso_curpos);
266 :
267 41 : buf = *bufP;
268 41 : _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
269 41 : page = BufferGetPage(buf);
270 41 : opaque = (HashPageOpaque) PageGetSpecialPointer(page);
271 :
272 : /*
273 : * If _hash_step is called from _hash_first, current will not be valid, so
274 : * we can't dereference it. However, in that case, we presumably want to
275 : * start at the beginning/end of the page...
276 : */
277 41 : maxoff = PageGetMaxOffsetNumber(page);
278 41 : if (ItemPointerIsValid(current))
279 20 : offnum = ItemPointerGetOffsetNumber(current);
280 : else
281 21 : offnum = InvalidOffsetNumber;
282 :
283 : /*
284 : * 'offnum' now points to the last tuple we have seen (if any).
285 : *
286 : * continue to step through tuples until: 1) we get to the end of the
287 : * bucket chain or 2) we find a valid tuple.
288 : */
289 : do
290 : {
291 4194 : switch (dir)
292 : {
293 : case ForwardScanDirection:
294 4194 : if (offnum != InvalidOffsetNumber)
295 4173 : offnum = OffsetNumberNext(offnum); /* move forward */
296 : else
297 21 : offnum = FirstOffsetNumber; /* new page */
298 :
299 4194 : while (offnum > maxoff)
300 : {
301 : /*
302 : * either this page is empty (maxoff ==
303 : * InvalidOffsetNumber) or we ran off the end.
304 : */
305 21 : _hash_readnext(rel, &buf, &page, &opaque);
306 21 : if (BufferIsValid(buf))
307 : {
308 0 : maxoff = PageGetMaxOffsetNumber(page);
309 0 : offnum = FirstOffsetNumber;
310 : }
311 : else
312 : {
313 : /* end of bucket */
314 21 : maxoff = offnum = InvalidOffsetNumber;
315 21 : break; /* exit while */
316 : }
317 : }
318 : break;
319 :
320 : case BackwardScanDirection:
321 0 : if (offnum != InvalidOffsetNumber)
322 0 : offnum = OffsetNumberPrev(offnum); /* move back */
323 : else
324 0 : offnum = maxoff; /* new page */
325 :
326 0 : while (offnum < FirstOffsetNumber)
327 : {
328 : /*
329 : * either this page is empty (offnum ==
330 : * InvalidOffsetNumber) or we ran off the end.
331 : */
332 0 : _hash_readprev(rel, &buf, &page, &opaque);
333 0 : if (BufferIsValid(buf))
334 0 : maxoff = offnum = PageGetMaxOffsetNumber(page);
335 : else
336 : {
337 : /* end of bucket */
338 0 : maxoff = offnum = InvalidOffsetNumber;
339 0 : break; /* exit while */
340 : }
341 : }
342 : break;
343 :
344 : default:
345 : /* NoMovementScanDirection */
346 : /* this should not be reached */
347 : break;
348 : }
349 :
350 : /* we ran off the end of the world without finding a match */
351 4194 : if (offnum == InvalidOffsetNumber)
352 : {
353 21 : *bufP = so->hashso_curbuf = InvalidBuffer;
354 21 : ItemPointerSetInvalid(current);
355 21 : return false;
356 : }
357 :
358 : /* get ready to check this tuple */
359 4173 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
360 4173 : } while (!_hash_checkqual(scan, itup));
361 :
362 : /* if we made it to here, we've found a valid tuple */
363 20 : blkno = BufferGetBlockNumber(buf);
364 20 : *bufP = so->hashso_curbuf = buf;
365 20 : ItemPointerSet(current, blkno, offnum);
366 20 : return true;
367 : }
|