LTP GCOV extension - code coverage report
Current view: directory - access/heap - syncscan.c
Test: unnamed
Date: 2008-07-03 Instrumented lines: 51
Code covered: 31.4 % Executed lines: 16
Legend: not executed executed

       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * syncscan.c
       4                 :  *        heap scan synchronization support
       5                 :  *
       6                 :  * When multiple backends run a sequential scan on the same table, we try
       7                 :  * to keep them synchronized to reduce the overall I/O needed.  The goal is
       8                 :  * to read each page into shared buffer cache only once, and let all backends
       9                 :  * that take part in the shared scan process the page before it falls out of
      10                 :  * the cache.
      11                 :  *
      12                 :  * Since the "leader" in a pack of backends doing a seqscan will have to wait
      13                 :  * for I/O, while the "followers" don't, there is a strong self-synchronizing
      14                 :  * effect once we can get the backends examining approximately the same part
      15                 :  * of the table at the same time.  Hence all that is really needed is to get
      16                 :  * a new backend beginning a seqscan to begin it close to where other backends
      17                 :  * are reading.  We can scan the table circularly, from block X up to the
      18                 :  * end and then from block 0 to X-1, to ensure we visit all rows while still
      19                 :  * participating in the common scan.
      20                 :  *
      21                 :  * To accomplish that, we keep track of the scan position of each table, and
      22                 :  * start new scans close to where the previous scan(s) are.  We don't try to
      23                 :  * do any extra synchronization to keep the scans together afterwards; some
      24                 :  * scans might progress much more slowly than others, for example if the
      25                 :  * results need to be transferred to the client over a slow network, and we
      26                 :  * don't want such queries to slow down others.
      27                 :  *
      28                 :  * There can realistically only be a few large sequential scans on different
      29                 :  * tables in progress at any time.      Therefore we just keep the scan positions
      30                 :  * in a small LRU list which we scan every time we need to look up or update a
      31                 :  * scan position.  The whole mechanism is only applied for tables exceeding
      32                 :  * a threshold size (but that is not the concern of this module).
      33                 :  *
      34                 :  * INTERFACE ROUTINES
      35                 :  *              ss_get_location         - return current scan location of a relation
      36                 :  *              ss_report_location      - update current scan location
      37                 :  *
      38                 :  *
      39                 :  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
      40                 :  * Portions Copyright (c) 1994, Regents of the University of California
      41                 :  *
      42                 :  * IDENTIFICATION
      43                 :  *        $PostgreSQL: pgsql/src/backend/access/heap/syncscan.c,v 1.3 2007/11/15 22:25:15 momjian Exp $
      44                 :  *
      45                 :  *-------------------------------------------------------------------------
      46                 :  */
      47                 : #include "postgres.h"
      48                 : 
      49                 : #include "access/heapam.h"
      50                 : #include "miscadmin.h"
      51                 : 
      52                 : 
      53                 : /* GUC variables */
      54                 : #ifdef TRACE_SYNCSCAN
      55                 : bool            trace_syncscan = false;
      56                 : #endif
      57                 : 
      58                 : 
      59                 : /*
      60                 :  * Size of the LRU list.
      61                 :  *
      62                 :  * Note: the code assumes that SYNC_SCAN_NELEM > 1.
      63                 :  *
      64                 :  * XXX: What's a good value? It should be large enough to hold the
      65                 :  * maximum number of large tables scanned simultaneously.  But a larger value
      66                 :  * means more traversing of the LRU list when starting a new scan.
      67                 :  */
      68                 : #define SYNC_SCAN_NELEM 20
      69                 : 
      70                 : /*
      71                 :  * Interval between reports of the location of the current scan, in pages.
      72                 :  *
      73                 :  * Note: This should be smaller than the ring size (see buffer/freelist.c)
      74                 :  * we use for bulk reads.  Otherwise a scan joining other scans might start
      75                 :  * from a page that's no longer in the buffer cache.  This is a bit fuzzy;
      76                 :  * there's no guarantee that the new scan will read the page before it leaves
      77                 :  * the buffer cache anyway, and on the other hand the page is most likely
      78                 :  * still in the OS cache.
      79                 :  */
      80                 : #define SYNC_SCAN_REPORT_INTERVAL (128 * 1024 / BLCKSZ)
      81                 : 
      82                 : 
      83                 : /*
      84                 :  * The scan locations structure is essentially a doubly-linked LRU with head
      85                 :  * and tail pointer, but designed to hold a fixed maximum number of elements in
      86                 :  * fixed-size shared memory.
      87                 :  */
      88                 : typedef struct ss_scan_location_t
      89                 : {
      90                 :         RelFileNode relfilenode;        /* identity of a relation */
      91                 :         BlockNumber location;           /* last-reported location in the relation */
      92                 : } ss_scan_location_t;
      93                 : 
      94                 : typedef struct ss_lru_item_t
      95                 : {
      96                 :         struct ss_lru_item_t *prev;
      97                 :         struct ss_lru_item_t *next;
      98                 :         ss_scan_location_t location;
      99                 : } ss_lru_item_t;
     100                 : 
     101                 : typedef struct ss_scan_locations_t
     102                 : {
     103                 :         ss_lru_item_t *head;
     104                 :         ss_lru_item_t *tail;
     105                 :         ss_lru_item_t items[1];         /* SYNC_SCAN_NELEM items */
     106                 : } ss_scan_locations_t;
     107                 : 
     108                 : #define SizeOfScanLocations(N) offsetof(ss_scan_locations_t, items[N])
     109                 : 
     110                 : /* Pointer to struct in shared memory */
     111                 : static ss_scan_locations_t *scan_locations;
     112                 : 
     113                 : /* prototypes for internal functions */
     114                 : static BlockNumber ss_search(RelFileNode relfilenode,
     115                 :                   BlockNumber location, bool set);
     116                 : 
     117                 : 
     118                 : /*
     119                 :  * SyncScanShmemSize --- report amount of shared memory space needed
     120                 :  */
     121                 : Size
     122                 : SyncScanShmemSize(void)
     123              18 : {
     124              18 :         return SizeOfScanLocations(SYNC_SCAN_NELEM);
     125                 : }
     126                 : 
     127                 : /*
     128                 :  * SyncScanShmemInit --- initialize this module's shared memory
     129                 :  */
     130                 : void
     131                 : SyncScanShmemInit(void)
     132              16 : {
     133                 :         int                     i;
     134                 :         bool            found;
     135                 : 
     136              16 :         scan_locations = (ss_scan_locations_t *)
     137                 :                 ShmemInitStruct("Sync Scan Locations List",
     138                 :                                                 SizeOfScanLocations(SYNC_SCAN_NELEM),
     139                 :                                                 &found);
     140                 : 
     141              16 :         if (!IsUnderPostmaster)
     142                 :         {
     143                 :                 /* Initialize shared memory area */
     144                 :                 Assert(!found);
     145                 : 
     146              16 :                 scan_locations->head = &scan_locations->items[0];
     147              16 :                 scan_locations->tail = &scan_locations->items[SYNC_SCAN_NELEM - 1];
     148                 : 
     149             336 :                 for (i = 0; i < SYNC_SCAN_NELEM; i++)
     150                 :                 {
     151             320 :                         ss_lru_item_t *item = &scan_locations->items[i];
     152                 : 
     153                 :                         /*
     154                 :                          * Initialize all slots with invalid values. As scans are started,
     155                 :                          * these invalid entries will fall off the LRU list and get
     156                 :                          * replaced with real entries.
     157                 :                          */
     158             320 :                         item->location.relfilenode.spcNode = InvalidOid;
     159             320 :                         item->location.relfilenode.dbNode = InvalidOid;
     160             320 :                         item->location.relfilenode.relNode = InvalidOid;
     161             320 :                         item->location.location = InvalidBlockNumber;
     162                 : 
     163             320 :                         item->prev = (i > 0) ?
     164                 :                                 (&scan_locations->items[i - 1]) : NULL;
     165             320 :                         item->next = (i < SYNC_SCAN_NELEM - 1) ?
     166                 :                                 (&scan_locations->items[i + 1]) : NULL;
     167                 :                 }
     168                 :         }
     169                 :         else
     170                 :                 Assert(found);
     171              16 : }
     172                 : 
     173                 : /*
     174                 :  * ss_search --- search the scan_locations structure for an entry with the
     175                 :  *              given relfilenode.
     176                 :  *
     177                 :  * If "set" is true, the location is updated to the given location.  If no
     178                 :  * entry for the given relfilenode is found, it will be created at the head
     179                 :  * of the list with the given location, even if "set" is false.
     180                 :  *
     181                 :  * In any case, the location after possible update is returned.
     182                 :  *
     183                 :  * Caller is responsible for having acquired suitable lock on the shared
     184                 :  * data structure.
     185                 :  */
     186                 : static BlockNumber
     187                 : ss_search(RelFileNode relfilenode, BlockNumber location, bool set)
     188               0 : {
     189                 :         ss_lru_item_t *item;
     190                 : 
     191               0 :         item = scan_locations->head;
     192                 :         for (;;)
     193                 :         {
     194                 :                 bool            match;
     195                 : 
     196               0 :                 match = RelFileNodeEquals(item->location.relfilenode, relfilenode);
     197                 : 
     198               0 :                 if (match || item->next == NULL)
     199                 :                 {
     200                 :                         /*
     201                 :                          * If we reached the end of list and no match was found, take over
     202                 :                          * the last entry
     203                 :                          */
     204               0 :                         if (!match)
     205                 :                         {
     206               0 :                                 item->location.relfilenode = relfilenode;
     207               0 :                                 item->location.location = location;
     208                 :                         }
     209               0 :                         else if (set)
     210               0 :                                 item->location.location = location;
     211                 : 
     212                 :                         /* Move the entry to the front of the LRU list */
     213               0 :                         if (item != scan_locations->head)
     214                 :                         {
     215                 :                                 /* unlink */
     216               0 :                                 if (item == scan_locations->tail)
     217               0 :                                         scan_locations->tail = item->prev;
     218               0 :                                 item->prev->next = item->next;
     219               0 :                                 if (item->next)
     220               0 :                                         item->next->prev = item->prev;
     221                 : 
     222                 :                                 /* link */
     223               0 :                                 item->prev = NULL;
     224               0 :                                 item->next = scan_locations->head;
     225               0 :                                 scan_locations->head->prev = item;
     226               0 :                                 scan_locations->head = item;
     227                 :                         }
     228                 : 
     229               0 :                         return item->location.location;
     230                 :                 }
     231                 : 
     232               0 :                 item = item->next;
     233               0 :         }
     234                 : 
     235                 :         /* not reached */
     236                 : }
     237                 : 
     238                 : /*
     239                 :  * ss_get_location --- get the optimal starting location for scan
     240                 :  *
     241                 :  * Returns the last-reported location of a sequential scan on the
     242                 :  * relation, or 0 if no valid location is found.
     243                 :  *
     244                 :  * We expect the caller has just done RelationGetNumberOfBlocks(), and
     245                 :  * so that number is passed in rather than computing it again.  The result
     246                 :  * is guaranteed less than relnblocks (assuming that's > 0).
     247                 :  */
     248                 : BlockNumber
     249                 : ss_get_location(Relation rel, BlockNumber relnblocks)
     250               0 : {
     251                 :         BlockNumber startloc;
     252                 : 
     253               0 :         LWLockAcquire(SyncScanLock, LW_EXCLUSIVE);
     254               0 :         startloc = ss_search(rel->rd_node, 0, false);
     255               0 :         LWLockRelease(SyncScanLock);
     256                 : 
     257                 :         /*
     258                 :          * If the location is not a valid block number for this scan, start at 0.
     259                 :          *
     260                 :          * This can happen if for instance a VACUUM truncated the table since the
     261                 :          * location was saved.
     262                 :          */
     263               0 :         if (startloc >= relnblocks)
     264               0 :                 startloc = 0;
     265                 : 
     266                 : #ifdef TRACE_SYNCSCAN
     267                 :         if (trace_syncscan)
     268                 :                 elog(LOG,
     269                 :                          "SYNC_SCAN: start \"%s\" (size %u) at %u",
     270                 :                          RelationGetRelationName(rel), relnblocks, startloc);
     271                 : #endif
     272                 : 
     273               0 :         return startloc;
     274                 : }
     275                 : 
     276                 : /*
     277                 :  * ss_report_location --- update the current scan location
     278                 :  *
     279                 :  * Writes an entry into the shared Sync Scan state of the form
     280                 :  * (relfilenode, blocknumber), overwriting any existing entry for the
     281                 :  * same relfilenode.
     282                 :  */
     283                 : void
     284                 : ss_report_location(Relation rel, BlockNumber location)
     285               0 : {
     286                 : #ifdef TRACE_SYNCSCAN
     287                 :         if (trace_syncscan)
     288                 :         {
     289                 :                 if ((location % 1024) == 0)
     290                 :                         elog(LOG,
     291                 :                                  "SYNC_SCAN: scanning \"%s\" at %u",
     292                 :                                  RelationGetRelationName(rel), location);
     293                 :         }
     294                 : #endif
     295                 : 
     296                 :         /*
     297                 :          * To reduce lock contention, only report scan progress every N pages. For
     298                 :          * the same reason, don't block if the lock isn't immediately available.
     299                 :          * Missing a few updates isn't critical, it just means that a new scan
     300                 :          * that wants to join the pack will start a little bit behind the head of
     301                 :          * the scan.  Hopefully the pages are still in OS cache and the scan
     302                 :          * catches up quickly.
     303                 :          */
     304               0 :         if ((location % SYNC_SCAN_REPORT_INTERVAL) == 0)
     305                 :         {
     306               0 :                 if (LWLockConditionalAcquire(SyncScanLock, LW_EXCLUSIVE))
     307                 :                 {
     308               0 :                         (void) ss_search(rel->rd_node, location, true);
     309               0 :                         LWLockRelease(SyncScanLock);
     310                 :                 }
     311                 : #ifdef TRACE_SYNCSCAN
     312                 :                 else if (trace_syncscan)
     313                 :                         elog(LOG,
     314                 :                                  "SYNC_SCAN: missed update for \"%s\" at %u",
     315                 :                                  RelationGetRelationName(rel), location);
     316                 : #endif
     317                 :         }
     318               0 : }

Generated by: LTP GCOV extension version 1.5