LTP GCOV extension - code coverage report
Current view: directory - access/transam - slru.c
Test: unnamed
Date: 2008-07-03 Instrumented lines: 327
Code covered: 65.1 % Executed lines: 213
Legend: not executed executed

       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * slru.c
       4                 :  *              Simple LRU buffering for transaction status logfiles
       5                 :  *
       6                 :  * We use a simple least-recently-used scheme to manage a pool of page
       7                 :  * buffers.  Under ordinary circumstances we expect that write
       8                 :  * traffic will occur mostly to the latest page (and to the just-prior
       9                 :  * page, soon after a page transition).  Read traffic will probably touch
      10                 :  * a larger span of pages, but in any case a fairly small number of page
      11                 :  * buffers should be sufficient.  So, we just search the buffers using plain
      12                 :  * linear search; there's no need for a hashtable or anything fancy.
      13                 :  * The management algorithm is straight LRU except that we will never swap
      14                 :  * out the latest page (since we know it's going to be hit again eventually).
      15                 :  *
      16                 :  * We use a control LWLock to protect the shared data structures, plus
      17                 :  * per-buffer LWLocks that synchronize I/O for each buffer.  The control lock
      18                 :  * must be held to examine or modify any shared state.  A process that is
      19                 :  * reading in or writing out a page buffer does not hold the control lock,
      20                 :  * only the per-buffer lock for the buffer it is working on.
      21                 :  *
      22                 :  * "Holding the control lock" means exclusive lock in all cases except for
      23                 :  * SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for
      24                 :  * the implications of that.
      25                 :  *
      26                 :  * When initiating I/O on a buffer, we acquire the per-buffer lock exclusively
      27                 :  * before releasing the control lock.  The per-buffer lock is released after
      28                 :  * completing the I/O, re-acquiring the control lock, and updating the shared
      29                 :  * state.  (Deadlock is not possible here, because we never try to initiate
      30                 :  * I/O when someone else is already doing I/O on the same buffer.)
      31                 :  * To wait for I/O to complete, release the control lock, acquire the
      32                 :  * per-buffer lock in shared mode, immediately release the per-buffer lock,
      33                 :  * reacquire the control lock, and then recheck state (since arbitrary things
      34                 :  * could have happened while we didn't have the lock).
      35                 :  *
      36                 :  * As with the regular buffer manager, it is possible for another process
      37                 :  * to re-dirty a page that is currently being written out.      This is handled
      38                 :  * by re-setting the page's page_dirty flag.
      39                 :  *
      40                 :  *
      41                 :  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
      42                 :  * Portions Copyright (c) 1994, Regents of the University of California
      43                 :  *
      44                 :  * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.43 2007/11/16 01:51:22 momjian Exp $
      45                 :  *
      46                 :  *-------------------------------------------------------------------------
      47                 :  */
      48                 : #include "postgres.h"
      49                 : 
      50                 : #include <fcntl.h>
      51                 : #include <sys/stat.h>
      52                 : #include <unistd.h>
      53                 : 
      54                 : #include "access/slru.h"
      55                 : #include "access/transam.h"
      56                 : #include "access/xlog.h"
      57                 : #include "storage/fd.h"
      58                 : #include "storage/shmem.h"
      59                 : #include "miscadmin.h"
      60                 : 
      61                 : 
      62                 : /*
      63                 :  * Define segment size.  A page is the same BLCKSZ as is used everywhere
      64                 :  * else in Postgres.  The segment size can be chosen somewhat arbitrarily;
      65                 :  * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG
      66                 :  * or 64K transactions for SUBTRANS.
      67                 :  *
      68                 :  * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
      69                 :  * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where
      70                 :  * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at
      71                 :  * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT.  We need
      72                 :  * take no explicit notice of that fact in this module, except when comparing
      73                 :  * segment and page numbers in SimpleLruTruncate (see PagePrecedes()).
      74                 :  *
      75                 :  * Note: this file currently assumes that segment file names will be four
      76                 :  * hex digits.  This sets a lower bound on the segment size (64K transactions
      77                 :  * for 32-bit TransactionIds).
      78                 :  */
      79                 : #define SLRU_PAGES_PER_SEGMENT  32
      80                 : 
      81                 : #define SlruFileName(ctl, path, seg) \
      82                 :         snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
      83                 : 
      84                 : /*
      85                 :  * During SimpleLruFlush(), we will usually not need to write/fsync more
      86                 :  * than one or two physical files, but we may need to write several pages
      87                 :  * per file.  We can consolidate the I/O requests by leaving files open
      88                 :  * until control returns to SimpleLruFlush().  This data structure remembers
      89                 :  * which files are open.
      90                 :  */
      91                 : #define MAX_FLUSH_BUFFERS       16
      92                 : 
      93                 : typedef struct SlruFlushData
      94                 : {
      95                 :         int                     num_files;              /* # files actually open */
      96                 :         int                     fd[MAX_FLUSH_BUFFERS];  /* their FD's */
      97                 :         int                     segno[MAX_FLUSH_BUFFERS];               /* their log seg#s */
      98                 : } SlruFlushData;
      99                 : 
     100                 : /*
     101                 :  * Macro to mark a buffer slot "most recently used".  Note multiple evaluation
     102                 :  * of arguments!
     103                 :  *
     104                 :  * The reason for the if-test is that there are often many consecutive
     105                 :  * accesses to the same page (particularly the latest page).  By suppressing
     106                 :  * useless increments of cur_lru_count, we reduce the probability that old
     107                 :  * pages' counts will "wrap around" and make them appear recently used.
     108                 :  *
     109                 :  * We allow this code to be executed concurrently by multiple processes within
     110                 :  * SimpleLruReadPage_ReadOnly().  As long as int reads and writes are atomic,
     111                 :  * this should not cause any completely-bogus values to enter the computation.
     112                 :  * However, it is possible for either cur_lru_count or individual
     113                 :  * page_lru_count entries to be "reset" to lower values than they should have,
     114                 :  * in case a process is delayed while it executes this macro.  With care in
     115                 :  * SlruSelectLRUPage(), this does little harm, and in any case the absolute
     116                 :  * worst possible consequence is a nonoptimal choice of page to evict.  The
     117                 :  * gain from allowing concurrent reads of SLRU pages seems worth it.
     118                 :  */
     119                 : #define SlruRecentlyUsed(shared, slotno)        \
     120                 :         do { \
     121                 :                 int             new_lru_count = (shared)->cur_lru_count; \
     122                 :                 if (new_lru_count != (shared)->page_lru_count[slotno]) { \
     123                 :                         (shared)->cur_lru_count = ++new_lru_count; \
     124                 :                         (shared)->page_lru_count[slotno] = new_lru_count; \
     125                 :                 } \
     126                 :         } while (0)
     127                 : 
     128                 : /* Saved info for SlruReportIOError */
     129                 : typedef enum
     130                 : {
     131                 :         SLRU_OPEN_FAILED,
     132                 :         SLRU_SEEK_FAILED,
     133                 :         SLRU_READ_FAILED,
     134                 :         SLRU_WRITE_FAILED,
     135                 :         SLRU_FSYNC_FAILED,
     136                 :         SLRU_CLOSE_FAILED
     137                 : } SlruErrorCause;
     138                 : 
     139                 : static SlruErrorCause slru_errcause;
     140                 : static int      slru_errno;
     141                 : 
     142                 : 
     143                 : static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
     144                 : static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
     145                 : static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno);
     146                 : static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno,
     147                 :                                           SlruFlush fdata);
     148                 : static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid);
     149                 : static int      SlruSelectLRUPage(SlruCtl ctl, int pageno);
     150                 : 
     151                 : 
     152                 : /*
     153                 :  * Initialization of shared memory
     154                 :  */
     155                 : 
     156                 : Size
     157                 : SimpleLruShmemSize(int nslots, int nlsns)
     158             136 : {
     159                 :         Size            sz;
     160                 : 
     161                 :         /* we assume nslots isn't so large as to risk overflow */
     162             136 :         sz = MAXALIGN(sizeof(SlruSharedData));
     163             136 :         sz += MAXALIGN(nslots * sizeof(char *));        /* page_buffer[] */
     164             136 :         sz += MAXALIGN(nslots * sizeof(SlruPageStatus));        /* page_status[] */
     165             136 :         sz += MAXALIGN(nslots * sizeof(bool));          /* page_dirty[] */
     166             136 :         sz += MAXALIGN(nslots * sizeof(int));           /* page_number[] */
     167             136 :         sz += MAXALIGN(nslots * sizeof(int));           /* page_lru_count[] */
     168             136 :         sz += MAXALIGN(nslots * sizeof(LWLockId));      /* buffer_locks[] */
     169                 : 
     170             136 :         if (nlsns > 0)
     171              34 :                 sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));    /* group_lsn[] */
     172                 : 
     173             136 :         return BUFFERALIGN(sz) + BLCKSZ * nslots;
     174                 : }
     175                 : 
     176                 : void
     177                 : SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
     178                 :                           LWLockId ctllock, const char *subdir)
     179              64 : {
     180                 :         SlruShared      shared;
     181                 :         bool            found;
     182                 : 
     183              64 :         shared = (SlruShared) ShmemInitStruct(name,
     184                 :                                                                                   SimpleLruShmemSize(nslots, nlsns),
     185                 :                                                                                   &found);
     186                 : 
     187              64 :         if (!IsUnderPostmaster)
     188                 :         {
     189                 :                 /* Initialize locks and shared memory area */
     190                 :                 char       *ptr;
     191                 :                 Size            offset;
     192                 :                 int                     slotno;
     193                 : 
     194                 :                 Assert(!found);
     195                 : 
     196              64 :                 memset(shared, 0, sizeof(SlruSharedData));
     197                 : 
     198              64 :                 shared->ControlLock = ctllock;
     199                 : 
     200              64 :                 shared->num_slots = nslots;
     201              64 :                 shared->lsn_groups_per_page = nlsns;
     202                 : 
     203              64 :                 shared->cur_lru_count = 0;
     204                 : 
     205                 :                 /* shared->latest_page_number will be set later */
     206                 : 
     207              64 :                 ptr = (char *) shared;
     208              64 :                 offset = MAXALIGN(sizeof(SlruSharedData));
     209              64 :                 shared->page_buffer = (char **) (ptr + offset);
     210              64 :                 offset += MAXALIGN(nslots * sizeof(char *));
     211              64 :                 shared->page_status = (SlruPageStatus *) (ptr + offset);
     212              64 :                 offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
     213              64 :                 shared->page_dirty = (bool *) (ptr + offset);
     214              64 :                 offset += MAXALIGN(nslots * sizeof(bool));
     215              64 :                 shared->page_number = (int *) (ptr + offset);
     216              64 :                 offset += MAXALIGN(nslots * sizeof(int));
     217              64 :                 shared->page_lru_count = (int *) (ptr + offset);
     218              64 :                 offset += MAXALIGN(nslots * sizeof(int));
     219              64 :                 shared->buffer_locks = (LWLockId *) (ptr + offset);
     220              64 :                 offset += MAXALIGN(nslots * sizeof(LWLockId));
     221                 : 
     222              64 :                 if (nlsns > 0)
     223                 :                 {
     224              16 :                         shared->group_lsn = (XLogRecPtr *) (ptr + offset);
     225              16 :                         offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
     226                 :                 }
     227                 : 
     228              64 :                 ptr += BUFFERALIGN(offset);
     229            1088 :                 for (slotno = 0; slotno < nslots; slotno++)
     230                 :                 {
     231            1024 :                         shared->page_buffer[slotno] = ptr;
     232            1024 :                         shared->page_status[slotno] = SLRU_PAGE_EMPTY;
     233            1024 :                         shared->page_dirty[slotno] = false;
     234            1024 :                         shared->page_lru_count[slotno] = 0;
     235            1024 :                         shared->buffer_locks[slotno] = LWLockAssign();
     236            1024 :                         ptr += BLCKSZ;
     237                 :                 }
     238                 :         }
     239                 :         else
     240                 :                 Assert(found);
     241                 : 
     242                 :         /*
     243                 :          * Initialize the unshared control struct, including directory path. We
     244                 :          * assume caller set PagePrecedes.
     245                 :          */
     246              64 :         ctl->shared = shared;
     247              64 :         ctl->do_fsync = true;                /* default behavior */
     248              64 :         StrNCpy(ctl->Dir, subdir, sizeof(ctl->Dir));
     249              64 : }
     250                 : 
     251                 : /*
     252                 :  * Initialize (or reinitialize) a page to zeroes.
     253                 :  *
     254                 :  * The page is not actually written, just set up in shared memory.
     255                 :  * The slot number of the new page is returned.
     256                 :  *
     257                 :  * Control lock must be held at entry, and will be held at exit.
     258                 :  */
     259                 : int
     260                 : SimpleLruZeroPage(SlruCtl ctl, int pageno)
     261              22 : {
     262              22 :         SlruShared      shared = ctl->shared;
     263                 :         int                     slotno;
     264                 : 
     265                 :         /* Find a suitable buffer slot for the page */
     266              22 :         slotno = SlruSelectLRUPage(ctl, pageno);
     267                 :         Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
     268                 :                    (shared->page_status[slotno] == SLRU_PAGE_VALID &&
     269                 :                         !shared->page_dirty[slotno]) ||
     270                 :                    shared->page_number[slotno] == pageno);
     271                 : 
     272                 :         /* Mark the slot as containing this page */
     273              22 :         shared->page_number[slotno] = pageno;
     274              22 :         shared->page_status[slotno] = SLRU_PAGE_VALID;
     275              22 :         shared->page_dirty[slotno] = true;
     276              22 :         SlruRecentlyUsed(shared, slotno);
     277                 : 
     278                 :         /* Set the buffer to zeroes */
     279              22 :         MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
     280                 : 
     281                 :         /* Set the LSNs for this new page to zero */
     282              22 :         SimpleLruZeroLSNs(ctl, slotno);
     283                 : 
     284                 :         /* Assume this page is now the latest active page */
     285              22 :         shared->latest_page_number = pageno;
     286                 : 
     287              22 :         return slotno;
     288                 : }
     289                 : 
     290                 : /*
     291                 :  * Zero all the LSNs we store for this slru page.
     292                 :  *
     293                 :  * This should be called each time we create a new page, and each time we read
     294                 :  * in a page from disk into an existing buffer.  (Such an old page cannot
     295                 :  * have any interesting LSNs, since we'd have flushed them before writing
     296                 :  * the page in the first place.)
     297                 :  */
     298                 : static void
     299                 : SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
     300              48 : {
     301              48 :         SlruShared      shared = ctl->shared;
     302                 : 
     303              48 :         if (shared->lsn_groups_per_page > 0)
     304              15 :                 MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
     305                 :                            shared->lsn_groups_per_page * sizeof(XLogRecPtr));
     306              48 : }
     307                 : 
     308                 : /*
     309                 :  * Wait for any active I/O on a page slot to finish.  (This does not
     310                 :  * guarantee that new I/O hasn't been started before we return, though.
     311                 :  * In fact the slot might not even contain the same page anymore.)
     312                 :  *
     313                 :  * Control lock must be held at entry, and will be held at exit.
     314                 :  */
     315                 : static void
     316                 : SimpleLruWaitIO(SlruCtl ctl, int slotno)
     317               0 : {
     318               0 :         SlruShared      shared = ctl->shared;
     319                 : 
     320                 :         /* See notes at top of file */
     321               0 :         LWLockRelease(shared->ControlLock);
     322               0 :         LWLockAcquire(shared->buffer_locks[slotno], LW_SHARED);
     323               0 :         LWLockRelease(shared->buffer_locks[slotno]);
     324               0 :         LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
     325                 : 
     326                 :         /*
     327                 :          * If the slot is still in an io-in-progress state, then either someone
     328                 :          * already started a new I/O on the slot, or a previous I/O failed and
     329                 :          * neglected to reset the page state.  That shouldn't happen, really, but
     330                 :          * it seems worth a few extra cycles to check and recover from it. We can
     331                 :          * cheaply test for failure by seeing if the buffer lock is still held (we
     332                 :          * assume that transaction abort would release the lock).
     333                 :          */
     334               0 :         if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
     335                 :                 shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
     336                 :         {
     337               0 :                 if (LWLockConditionalAcquire(shared->buffer_locks[slotno], LW_SHARED))
     338                 :                 {
     339                 :                         /* indeed, the I/O must have failed */
     340               0 :                         if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
     341               0 :                                 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
     342                 :                         else    /* write_in_progress */
     343                 :                         {
     344               0 :                                 shared->page_status[slotno] = SLRU_PAGE_VALID;
     345               0 :                                 shared->page_dirty[slotno] = true;
     346                 :                         }
     347               0 :                         LWLockRelease(shared->buffer_locks[slotno]);
     348                 :                 }
     349                 :         }
     350               0 : }
     351                 : 
     352                 : /*
     353                 :  * Find a page in a shared buffer, reading it in if necessary.
     354                 :  * The page number must correspond to an already-initialized page.
     355                 :  *
     356                 :  * If write_ok is true then it is OK to return a page that is in
     357                 :  * WRITE_IN_PROGRESS state; it is the caller's responsibility to be sure
     358                 :  * that modification of the page is safe.  If write_ok is false then we
     359                 :  * will not return the page until it is not undergoing active I/O.
     360                 :  *
     361                 :  * The passed-in xid is used only for error reporting, and may be
     362                 :  * InvalidTransactionId if no specific xid is associated with the action.
     363                 :  *
     364                 :  * Return value is the shared-buffer slot number now holding the page.
     365                 :  * The buffer's LRU access info is updated.
     366                 :  *
     367                 :  * Control lock must be held at entry, and will be held at exit.
     368                 :  */
     369                 : int
     370                 : SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
     371                 :                                   TransactionId xid)
     372            8766 : {
     373            8766 :         SlruShared      shared = ctl->shared;
     374                 : 
     375                 :         /* Outer loop handles restart if we must wait for someone else's I/O */
     376                 :         for (;;)
     377                 :         {
     378                 :                 int                     slotno;
     379                 :                 bool            ok;
     380                 : 
     381                 :                 /* See if page already is in memory; if not, pick victim slot */
     382            8766 :                 slotno = SlruSelectLRUPage(ctl, pageno);
     383                 : 
     384                 :                 /* Did we find the page in memory? */
     385            8766 :                 if (shared->page_number[slotno] == pageno &&
     386                 :                         shared->page_status[slotno] != SLRU_PAGE_EMPTY)
     387                 :                 {
     388                 :                         /*
     389                 :                          * If page is still being read in, we must wait for I/O.  Likewise
     390                 :                          * if the page is being written and the caller said that's not OK.
     391                 :                          */
     392            8740 :                         if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
     393                 :                                 (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
     394                 :                                  !write_ok))
     395                 :                         {
     396               0 :                                 SimpleLruWaitIO(ctl, slotno);
     397                 :                                 /* Now we must recheck state from the top */
     398               0 :                                 continue;
     399                 :                         }
     400                 :                         /* Otherwise, it's ready to use */
     401            8740 :                         SlruRecentlyUsed(shared, slotno);
     402            8740 :                         return slotno;
     403                 :                 }
     404                 : 
     405                 :                 /* We found no match; assert we selected a freeable slot */
     406                 :                 Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
     407                 :                            (shared->page_status[slotno] == SLRU_PAGE_VALID &&
     408                 :                                 !shared->page_dirty[slotno]));
     409                 : 
     410                 :                 /* Mark the slot read-busy */
     411              26 :                 shared->page_number[slotno] = pageno;
     412              26 :                 shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
     413              26 :                 shared->page_dirty[slotno] = false;
     414                 : 
     415                 :                 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
     416              26 :                 LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE);
     417                 : 
     418                 :                 /*
     419                 :                  * Temporarily mark page as recently-used to discourage
     420                 :                  * SlruSelectLRUPage from selecting it again for someone else.
     421                 :                  */
     422              26 :                 SlruRecentlyUsed(shared, slotno);
     423                 : 
     424                 :                 /* Release control lock while doing I/O */
     425              26 :                 LWLockRelease(shared->ControlLock);
     426                 : 
     427                 :                 /* Do the read */
     428              26 :                 ok = SlruPhysicalReadPage(ctl, pageno, slotno);
     429                 : 
     430                 :                 /* Set the LSNs for this newly read-in page to zero */
     431              26 :                 SimpleLruZeroLSNs(ctl, slotno);
     432                 : 
     433                 :                 /* Re-acquire control lock and update page state */
     434              26 :                 LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
     435                 : 
     436                 :                 Assert(shared->page_number[slotno] == pageno &&
     437                 :                            shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
     438                 :                            !shared->page_dirty[slotno]);
     439                 : 
     440              26 :                 shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
     441                 : 
     442              26 :                 LWLockRelease(shared->buffer_locks[slotno]);
     443                 : 
     444                 :                 /* Now it's okay to ereport if we failed */
     445              26 :                 if (!ok)
     446               0 :                         SlruReportIOError(ctl, pageno, xid);
     447                 : 
     448              26 :                 SlruRecentlyUsed(shared, slotno);
     449              26 :                 return slotno;
     450                 :         }
     451                 : }
     452                 : 
     453                 : /*
     454                 :  * Find a page in a shared buffer, reading it in if necessary.
     455                 :  * The page number must correspond to an already-initialized page.
     456                 :  * The caller must intend only read-only access to the page.
     457                 :  *
     458                 :  * The passed-in xid is used only for error reporting, and may be
     459                 :  * InvalidTransactionId if no specific xid is associated with the action.
     460                 :  *
     461                 :  * Return value is the shared-buffer slot number now holding the page.
     462                 :  * The buffer's LRU access info is updated.
     463                 :  *
     464                 :  * Control lock must NOT be held at entry, but will be held at exit.
     465                 :  * It is unspecified whether the lock will be shared or exclusive.
     466                 :  */
     467                 : int
     468                 : SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
     469           10306 : {
     470           10306 :         SlruShared      shared = ctl->shared;
     471                 :         int                     slotno;
     472                 : 
     473                 :         /* Try to find the page while holding only shared lock */
     474           10306 :         LWLockAcquire(shared->ControlLock, LW_SHARED);
     475                 : 
     476                 :         /* See if page is already in a buffer */
     477           10306 :         for (slotno = 0; slotno < shared->num_slots; slotno++)
     478                 :         {
     479           10306 :                 if (shared->page_number[slotno] == pageno &&
     480                 :                         shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
     481                 :                         shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
     482                 :                 {
     483                 :                         /* See comments for SlruRecentlyUsed macro */
     484           10306 :                         SlruRecentlyUsed(shared, slotno);
     485           10306 :                         return slotno;
     486                 :                 }
     487                 :         }
     488                 : 
     489                 :         /* No luck, so switch to normal exclusive lock and do regular read */
     490               0 :         LWLockRelease(shared->ControlLock);
     491               0 :         LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
     492                 : 
     493               0 :         return SimpleLruReadPage(ctl, pageno, true, xid);
     494                 : }
     495                 : 
     496                 : /*
     497                 :  * Write a page from a shared buffer, if necessary.
     498                 :  * Does nothing if the specified slot is not dirty.
     499                 :  *
     500                 :  * NOTE: only one write attempt is made here.  Hence, it is possible that
     501                 :  * the page is still dirty at exit (if someone else re-dirtied it during
     502                 :  * the write).  However, we *do* attempt a fresh write even if the page
     503                 :  * is already being written; this is for checkpoints.
     504                 :  *
     505                 :  * Control lock must be held at entry, and will be held at exit.
     506                 :  */
     507                 : void
     508                 : SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
     509            2052 : {
     510            2052 :         SlruShared      shared = ctl->shared;
     511            2052 :         int                     pageno = shared->page_number[slotno];
     512                 :         bool            ok;
     513                 : 
     514                 :         /* If a write is in progress, wait for it to finish */
     515            4104 :         while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
     516                 :                    shared->page_number[slotno] == pageno)
     517                 :         {
     518               0 :                 SimpleLruWaitIO(ctl, slotno);
     519                 :         }
     520                 : 
     521                 :         /*
     522                 :          * Do nothing if page is not dirty, or if buffer no longer contains the
     523                 :          * same page we were called for.
     524                 :          */
     525            2052 :         if (!shared->page_dirty[slotno] ||
     526                 :                 shared->page_status[slotno] != SLRU_PAGE_VALID ||
     527                 :                 shared->page_number[slotno] != pageno)
     528                 :                 return;
     529                 : 
     530                 :         /*
     531                 :          * Mark the slot write-busy, and clear the dirtybit.  After this point, a
     532                 :          * transaction status update on this page will mark it dirty again.
     533                 :          */
     534              54 :         shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
     535              54 :         shared->page_dirty[slotno] = false;
     536                 : 
     537                 :         /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
     538              54 :         LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE);
     539                 : 
     540                 :         /* Release control lock while doing I/O */
     541              54 :         LWLockRelease(shared->ControlLock);
     542                 : 
     543                 :         /* Do the write */
     544              54 :         ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
     545                 : 
     546                 :         /* If we failed, and we're in a flush, better close the files */
     547              54 :         if (!ok && fdata)
     548                 :         {
     549                 :                 int                     i;
     550                 : 
     551               0 :                 for (i = 0; i < fdata->num_files; i++)
     552               0 :                         close(fdata->fd[i]);
     553                 :         }
     554                 : 
     555                 :         /* Re-acquire control lock and update page state */
     556              54 :         LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
     557                 : 
     558                 :         Assert(shared->page_number[slotno] == pageno &&
     559                 :                    shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
     560                 : 
     561                 :         /* If we failed to write, mark the page dirty again */
     562              54 :         if (!ok)
     563               0 :                 shared->page_dirty[slotno] = true;
     564                 : 
     565              54 :         shared->page_status[slotno] = SLRU_PAGE_VALID;
     566                 : 
     567              54 :         LWLockRelease(shared->buffer_locks[slotno]);
     568                 : 
     569                 :         /* Now it's okay to ereport if we failed */
     570              54 :         if (!ok)
     571               0 :                 SlruReportIOError(ctl, pageno, InvalidTransactionId);
     572                 : }
     573                 : 
     574                 : /*
     575                 :  * Physical read of a (previously existing) page into a buffer slot
     576                 :  *
     577                 :  * On failure, we cannot just ereport(ERROR) since caller has put state in
     578                 :  * shared memory that must be undone.  So, we return FALSE and save enough
     579                 :  * info in static variables to let SlruReportIOError make the report.
     580                 :  *
     581                 :  * For now, assume it's not worth keeping a file pointer open across
     582                 :  * read/write operations.  We could cache one virtual file pointer ...
     583                 :  */
     584                 : static bool
     585                 : SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
     586              26 : {
     587              26 :         SlruShared      shared = ctl->shared;
     588              26 :         int                     segno = pageno / SLRU_PAGES_PER_SEGMENT;
     589              26 :         int                     rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
     590              26 :         int                     offset = rpageno * BLCKSZ;
     591                 :         char            path[MAXPGPATH];
     592                 :         int                     fd;
     593                 : 
     594              26 :         SlruFileName(ctl, path, segno);
     595                 : 
     596                 :         /*
     597                 :          * In a crash-and-restart situation, it's possible for us to receive
     598                 :          * commands to set the commit status of transactions whose bits are in
     599                 :          * already-truncated segments of the commit log (see notes in
     600                 :          * SlruPhysicalWritePage).      Hence, if we are InRecovery, allow the case
     601                 :          * where the file doesn't exist, and return zeroes instead.
     602                 :          */
     603              26 :         fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
     604              26 :         if (fd < 0)
     605                 :         {
     606               0 :                 if (errno != ENOENT || !InRecovery)
     607                 :                 {
     608               0 :                         slru_errcause = SLRU_OPEN_FAILED;
     609               0 :                         slru_errno = errno;
     610               0 :                         return false;
     611                 :                 }
     612                 : 
     613               0 :                 ereport(LOG,
     614                 :                                 (errmsg("file \"%s\" doesn't exist, reading as zeroes",
     615                 :                                                 path)));
     616               0 :                 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
     617               0 :                 return true;
     618                 :         }
     619                 : 
     620              26 :         if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
     621                 :         {
     622               0 :                 slru_errcause = SLRU_SEEK_FAILED;
     623               0 :                 slru_errno = errno;
     624               0 :                 close(fd);
     625               0 :                 return false;
     626                 :         }
     627                 : 
     628              26 :         errno = 0;
     629              26 :         if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
     630                 :         {
     631               0 :                 slru_errcause = SLRU_READ_FAILED;
     632               0 :                 slru_errno = errno;
     633               0 :                 close(fd);
     634               0 :                 return false;
     635                 :         }
     636                 : 
     637              26 :         if (close(fd))
     638                 :         {
     639               0 :                 slru_errcause = SLRU_CLOSE_FAILED;
     640               0 :                 slru_errno = errno;
     641               0 :                 return false;
     642                 :         }
     643                 : 
     644              26 :         return true;
     645                 : }
     646                 : 
     647                 : /*
     648                 :  * Physical write of a page from a buffer slot
     649                 :  *
     650                 :  * On failure, we cannot just ereport(ERROR) since caller has put state in
     651                 :  * shared memory that must be undone.  So, we return FALSE and save enough
     652                 :  * info in static variables to let SlruReportIOError make the report.
     653                 :  *
     654                 :  * For now, assume it's not worth keeping a file pointer open across
     655                 :  * independent read/write operations.  We do batch operations during
     656                 :  * SimpleLruFlush, though.
     657                 :  *
     658                 :  * fdata is NULL for a standalone write, pointer to open-file info during
     659                 :  * SimpleLruFlush.
     660                 :  */
     661                 : static bool
     662                 : SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
     663              54 : {
     664              54 :         SlruShared      shared = ctl->shared;
     665              54 :         int                     segno = pageno / SLRU_PAGES_PER_SEGMENT;
     666              54 :         int                     rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
     667              54 :         int                     offset = rpageno * BLCKSZ;
     668                 :         char            path[MAXPGPATH];
     669              54 :         int                     fd = -1;
     670                 : 
     671                 :         /*
     672                 :          * Honor the write-WAL-before-data rule, if appropriate, so that we do not
     673                 :          * write out data before associated WAL records.  This is the same action
     674                 :          * performed during FlushBuffer() in the main buffer manager.
     675                 :          */
     676              54 :         if (shared->group_lsn != NULL)
     677                 :         {
     678                 :                 /*
     679                 :                  * We must determine the largest async-commit LSN for the page. This
     680                 :                  * is a bit tedious, but since this entire function is a slow path
     681                 :                  * anyway, it seems better to do this here than to maintain a per-page
     682                 :                  * LSN variable (which'd need an extra comparison in the
     683                 :                  * transaction-commit path).
     684                 :                  */
     685                 :                 XLogRecPtr      max_lsn;
     686                 :                 int                     lsnindex,
     687                 :                                         lsnoff;
     688                 : 
     689              20 :                 lsnindex = slotno * shared->lsn_groups_per_page;
     690              20 :                 max_lsn = shared->group_lsn[lsnindex++];
     691           20480 :                 for (lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
     692                 :                 {
     693           20460 :                         XLogRecPtr      this_lsn = shared->group_lsn[lsnindex++];
     694                 : 
     695           20460 :                         if (XLByteLT(max_lsn, this_lsn))
     696               0 :                                 max_lsn = this_lsn;
     697                 :                 }
     698                 : 
     699              20 :                 if (!XLogRecPtrIsInvalid(max_lsn))
     700                 :                 {
     701                 :                         /*
     702                 :                          * As noted above, elog(ERROR) is not acceptable here, so if
     703                 :                          * XLogFlush were to fail, we must PANIC.  This isn't much of a
     704                 :                          * restriction because XLogFlush is just about all critical
     705                 :                          * section anyway, but let's make sure.
     706                 :                          */
     707               0 :                         START_CRIT_SECTION();
     708               0 :                         XLogFlush(max_lsn);
     709               0 :                         END_CRIT_SECTION();
     710                 :                 }
     711                 :         }
     712                 : 
     713                 :         /*
     714                 :          * During a Flush, we may already have the desired file open.
     715                 :          */
     716              54 :         if (fdata)
     717                 :         {
     718                 :                 int                     i;
     719                 : 
     720              50 :                 for (i = 0; i < fdata->num_files; i++)
     721                 :                 {
     722               2 :                         if (fdata->segno[i] == segno)
     723                 :                         {
     724               2 :                                 fd = fdata->fd[i];
     725               2 :                                 break;
     726                 :                         }
     727                 :                 }
     728                 :         }
     729                 : 
     730              54 :         if (fd < 0)
     731                 :         {
     732                 :                 /*
     733                 :                  * If the file doesn't already exist, we should create it.  It is
     734                 :                  * possible for this to need to happen when writing a page that's not
     735                 :                  * first in its segment; we assume the OS can cope with that. (Note:
     736                 :                  * it might seem that it'd be okay to create files only when
     737                 :                  * SimpleLruZeroPage is called for the first page of a segment.
     738                 :                  * However, if after a crash and restart the REDO logic elects to
     739                 :                  * replay the log from a checkpoint before the latest one, then it's
     740                 :                  * possible that we will get commands to set transaction status of
     741                 :                  * transactions that have already been truncated from the commit log.
     742                 :                  * Easiest way to deal with that is to accept references to
     743                 :                  * nonexistent files here and in SlruPhysicalReadPage.)
     744                 :                  *
     745                 :                  * Note: it is possible for more than one backend to be executing this
     746                 :                  * code simultaneously for different pages of the same file. Hence,
     747                 :                  * don't use O_EXCL or O_TRUNC or anything like that.
     748                 :                  */
     749              52 :                 SlruFileName(ctl, path, segno);
     750              52 :                 fd = BasicOpenFile(path, O_RDWR | O_CREAT | PG_BINARY,
     751                 :                                                    S_IRUSR | S_IWUSR);
     752              52 :                 if (fd < 0)
     753                 :                 {
     754               0 :                         slru_errcause = SLRU_OPEN_FAILED;
     755               0 :                         slru_errno = errno;
     756               0 :                         return false;
     757                 :                 }
     758                 : 
     759              52 :                 if (fdata)
     760                 :                 {
     761              48 :                         if (fdata->num_files < MAX_FLUSH_BUFFERS)
     762                 :                         {
     763              48 :                                 fdata->fd[fdata->num_files] = fd;
     764              48 :                                 fdata->segno[fdata->num_files] = segno;
     765              48 :                                 fdata->num_files++;
     766                 :                         }
     767                 :                         else
     768                 :                         {
     769                 :                                 /*
     770                 :                                  * In the unlikely event that we exceed MAX_FLUSH_BUFFERS,
     771                 :                                  * fall back to treating it as a standalone write.
     772                 :                                  */
     773               0 :                                 fdata = NULL;
     774                 :                         }
     775                 :                 }
     776                 :         }
     777                 : 
     778              54 :         if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
     779                 :         {
     780               0 :                 slru_errcause = SLRU_SEEK_FAILED;
     781               0 :                 slru_errno = errno;
     782               0 :                 if (!fdata)
     783               0 :                         close(fd);
     784               0 :                 return false;
     785                 :         }
     786                 : 
     787              54 :         errno = 0;
     788              54 :         if (write(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
     789                 :         {
     790                 :                 /* if write didn't set errno, assume problem is no disk space */
     791               0 :                 if (errno == 0)
     792               0 :                         errno = ENOSPC;
     793               0 :                 slru_errcause = SLRU_WRITE_FAILED;
     794               0 :                 slru_errno = errno;
     795               0 :                 if (!fdata)
     796               0 :                         close(fd);
     797               0 :                 return false;
     798                 :         }
     799                 : 
     800                 :         /*
     801                 :          * If not part of Flush, need to fsync now.  We assume this happens
     802                 :          * infrequently enough that it's not a performance issue.
     803                 :          */
     804              54 :         if (!fdata)
     805                 :         {
     806               4 :                 if (ctl->do_fsync && pg_fsync(fd))
     807                 :                 {
     808               0 :                         slru_errcause = SLRU_FSYNC_FAILED;
     809               0 :                         slru_errno = errno;
     810               0 :                         close(fd);
     811               0 :                         return false;
     812                 :                 }
     813                 : 
     814               4 :                 if (close(fd))
     815                 :                 {
     816               0 :                         slru_errcause = SLRU_CLOSE_FAILED;
     817               0 :                         slru_errno = errno;
     818               0 :                         return false;
     819                 :                 }
     820                 :         }
     821                 : 
     822              54 :         return true;
     823                 : }
     824                 : 
     825                 : /*
     826                 :  * Issue the error message after failure of SlruPhysicalReadPage or
     827                 :  * SlruPhysicalWritePage.  Call this after cleaning up shared-memory state.
     828                 :  */
     829                 : static void
     830                 : SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
     831               0 : {
     832               0 :         int                     segno = pageno / SLRU_PAGES_PER_SEGMENT;
     833               0 :         int                     rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
     834               0 :         int                     offset = rpageno * BLCKSZ;
     835                 :         char            path[MAXPGPATH];
     836                 : 
     837               0 :         SlruFileName(ctl, path, segno);
     838               0 :         errno = slru_errno;
     839               0 :         switch (slru_errcause)
     840                 :         {
     841                 :                 case SLRU_OPEN_FAILED:
     842               0 :                         ereport(ERROR,
     843                 :                                         (errcode_for_file_access(),
     844                 :                                          errmsg("could not access status of transaction %u", xid),
     845                 :                                          errdetail("Could not open file \"%s\": %m.", path)));
     846                 :                         break;
     847                 :                 case SLRU_SEEK_FAILED:
     848               0 :                         ereport(ERROR,
     849                 :                                         (errcode_for_file_access(),
     850                 :                                          errmsg("could not access status of transaction %u", xid),
     851                 :                                  errdetail("Could not seek in file \"%s\" to offset %u: %m.",
     852                 :                                                    path, offset)));
     853                 :                         break;
     854                 :                 case SLRU_READ_FAILED:
     855               0 :                         ereport(ERROR,
     856                 :                                         (errcode_for_file_access(),
     857                 :                                          errmsg("could not access status of transaction %u", xid),
     858                 :                            errdetail("Could not read from file \"%s\" at offset %u: %m.",
     859                 :                                                  path, offset)));
     860                 :                         break;
     861                 :                 case SLRU_WRITE_FAILED:
     862               0 :                         ereport(ERROR,
     863                 :                                         (errcode_for_file_access(),
     864                 :                                          errmsg("could not access status of transaction %u", xid),
     865                 :                                 errdetail("Could not write to file \"%s\" at offset %u: %m.",
     866                 :                                                   path, offset)));
     867                 :                         break;
     868                 :                 case SLRU_FSYNC_FAILED:
     869               0 :                         ereport(ERROR,
     870                 :                                         (errcode_for_file_access(),
     871                 :                                          errmsg("could not access status of transaction %u", xid),
     872                 :                                          errdetail("Could not fsync file \"%s\": %m.",
     873                 :                                                            path)));
     874                 :                         break;
     875                 :                 case SLRU_CLOSE_FAILED:
     876               0 :                         ereport(ERROR,
     877                 :                                         (errcode_for_file_access(),
     878                 :                                          errmsg("could not access status of transaction %u", xid),
     879                 :                                          errdetail("Could not close file \"%s\": %m.",
     880                 :                                                            path)));
     881                 :                         break;
     882                 :                 default:
     883                 :                         /* can't get here, we trust */
     884               0 :                         elog(ERROR, "unrecognized SimpleLru error cause: %d",
     885                 :                                  (int) slru_errcause);
     886                 :                         break;
     887                 :         }
     888               0 : }
     889                 : 
     890                 : /*
     891                 :  * Select the slot to re-use when we need a free slot.
     892                 :  *
     893                 :  * The target page number is passed because we need to consider the
     894                 :  * possibility that some other process reads in the target page while
     895                 :  * we are doing I/O to free a slot.  Hence, check or recheck to see if
     896                 :  * any slot already holds the target page, and return that slot if so.
     897                 :  * Thus, the returned slot is *either* a slot already holding the pageno
     898                 :  * (could be any state except EMPTY), *or* a freeable slot (state EMPTY
     899                 :  * or CLEAN).
     900                 :  *
     901                 :  * Control lock must be held at entry, and will be held at exit.
     902                 :  */
     903                 : static int
     904                 : SlruSelectLRUPage(SlruCtl ctl, int pageno)
     905            8788 : {
     906            8788 :         SlruShared      shared = ctl->shared;
     907                 : 
     908                 :         /* Outer loop handles restart after I/O */
     909                 :         for (;;)
     910                 :         {
     911                 :                 int                     slotno;
     912                 :                 int                     cur_count;
     913                 :                 int                     bestslot;
     914                 :                 int                     best_delta;
     915                 :                 int                     best_page_number;
     916                 : 
     917                 :                 /* See if page already has a buffer assigned */
     918            9555 :                 for (slotno = 0; slotno < shared->num_slots; slotno++)
     919                 :                 {
     920            9510 :                         if (shared->page_number[slotno] == pageno &&
     921                 :                                 shared->page_status[slotno] != SLRU_PAGE_EMPTY)
     922            8743 :                                 return slotno;
     923                 :                 }
     924                 : 
     925                 :                 /*
     926                 :                  * If we find any EMPTY slot, just select that one. Else locate the
     927                 :                  * least-recently-used slot to replace.
     928                 :                  *
     929                 :                  * Normally the page_lru_count values will all be different and so
     930                 :                  * there will be a well-defined LRU page.  But since we allow
     931                 :                  * concurrent execution of SlruRecentlyUsed() within
     932                 :                  * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
     933                 :                  * acquire the same lru_count values.  In that case we break ties by
     934                 :                  * choosing the furthest-back page.
     935                 :                  *
     936                 :                  * In no case will we select the slot containing latest_page_number
     937                 :                  * for replacement, even if it appears least recently used.
     938                 :                  *
     939                 :                  * Notice that this next line forcibly advances cur_lru_count to a
     940                 :                  * value that is certainly beyond any value that will be in the
     941                 :                  * page_lru_count array after the loop finishes.  This ensures that
     942                 :                  * the next execution of SlruRecentlyUsed will mark the page newly
     943                 :                  * used, even if it's for a page that has the current counter value.
     944                 :                  * That gets us back on the path to having good data when there are
     945                 :                  * multiple pages with the same lru_count.
     946                 :                  */
     947              45 :                 cur_count = (shared->cur_lru_count)++;
     948              45 :                 best_delta = -1;
     949              45 :                 bestslot = 0;                   /* no-op, just keeps compiler quiet */
     950              45 :                 best_page_number = 0;   /* ditto */
     951              48 :                 for (slotno = 0; slotno < shared->num_slots; slotno++)
     952                 :                 {
     953                 :                         int                     this_delta;
     954                 :                         int                     this_page_number;
     955                 : 
     956              48 :                         if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
     957              45 :                                 return slotno;
     958               3 :                         this_delta = cur_count - shared->page_lru_count[slotno];
     959               3 :                         if (this_delta < 0)
     960                 :                         {
     961                 :                                 /*
     962                 :                                  * Clean up in case shared updates have caused cur_count
     963                 :                                  * increments to get "lost".  We back off the page counts,
     964                 :                                  * rather than trying to increase cur_count, to avoid any
     965                 :                                  * question of infinite loops or failure in the presence of
     966                 :                                  * wrapped-around counts.
     967                 :                                  */
     968               0 :                                 shared->page_lru_count[slotno] = cur_count;
     969               0 :                                 this_delta = 0;
     970                 :                         }
     971               3 :                         this_page_number = shared->page_number[slotno];
     972               3 :                         if ((this_delta > best_delta ||
     973                 :                                  (this_delta == best_delta &&
     974                 :                                   ctl->PagePrecedes(this_page_number, best_page_number))) &&
     975                 :                                 this_page_number != shared->latest_page_number)
     976                 :                         {
     977               1 :                                 bestslot = slotno;
     978               1 :                                 best_delta = this_delta;
     979               1 :                                 best_page_number = this_page_number;
     980                 :                         }
     981                 :                 }
     982                 : 
     983                 :                 /*
     984                 :                  * If the selected page is clean, we're set.
     985                 :                  */
     986               0 :                 if (shared->page_status[bestslot] == SLRU_PAGE_VALID &&
     987                 :                         !shared->page_dirty[bestslot])
     988               0 :                         return bestslot;
     989                 : 
     990                 :                 /*
     991                 :                  * We need to wait for I/O.  Normal case is that it's dirty and we
     992                 :                  * must initiate a write, but it's possible that the page is already
     993                 :                  * write-busy, or in the worst case still read-busy.  In those cases
     994                 :                  * we wait for the existing I/O to complete.
     995                 :                  */
     996               0 :                 if (shared->page_status[bestslot] == SLRU_PAGE_VALID)
     997               0 :                         SimpleLruWritePage(ctl, bestslot, NULL);
     998                 :                 else
     999               0 :                         SimpleLruWaitIO(ctl, bestslot);
    1000                 : 
    1001                 :                 /*
    1002                 :                  * Now loop back and try again.  This is the easiest way of dealing
    1003                 :                  * with corner cases such as the victim page being re-dirtied while we
    1004                 :                  * wrote it.
    1005                 :                  */
    1006                 :         }
    1007                 : }
    1008                 : 
    1009                 : /*
    1010                 :  * Flush dirty pages to disk during checkpoint or database shutdown
    1011                 :  */
    1012                 : void
    1013                 : SimpleLruFlush(SlruCtl ctl, bool checkpoint)
    1014             128 : {
    1015             128 :         SlruShared      shared = ctl->shared;
    1016                 :         SlruFlushData fdata;
    1017                 :         int                     slotno;
    1018             128 :         int                     pageno = 0;
    1019                 :         int                     i;
    1020                 :         bool            ok;
    1021                 : 
    1022                 :         /*
    1023                 :          * Find and write dirty pages
    1024                 :          */
    1025             128 :         fdata.num_files = 0;
    1026                 : 
    1027             128 :         LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
    1028                 : 
    1029            2176 :         for (slotno = 0; slotno < shared->num_slots; slotno++)
    1030                 :         {
    1031            2048 :                 SimpleLruWritePage(ctl, slotno, &fdata);
    1032                 : 
    1033                 :                 /*
    1034                 :                  * When called during a checkpoint, we cannot assert that the slot is
    1035                 :                  * clean now, since another process might have re-dirtied it already.
    1036                 :                  * That's okay.
    1037                 :                  */
    1038                 :                 Assert(checkpoint ||
    1039                 :                            shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
    1040                 :                            (shared->page_status[slotno] == SLRU_PAGE_VALID &&
    1041                 :                                 !shared->page_dirty[slotno]));
    1042                 :         }
    1043                 : 
    1044             128 :         LWLockRelease(shared->ControlLock);
    1045                 : 
    1046                 :         /*
    1047                 :          * Now fsync and close any files that were open
    1048                 :          */
    1049             128 :         ok = true;
    1050             176 :         for (i = 0; i < fdata.num_files; i++)
    1051                 :         {
    1052              48 :                 if (ctl->do_fsync && pg_fsync(fdata.fd[i]))
    1053                 :                 {
    1054               0 :                         slru_errcause = SLRU_FSYNC_FAILED;
    1055               0 :                         slru_errno = errno;
    1056               0 :                         pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
    1057               0 :                         ok = false;
    1058                 :                 }
    1059                 : 
    1060              48 :                 if (close(fdata.fd[i]))
    1061                 :                 {
    1062               0 :                         slru_errcause = SLRU_CLOSE_FAILED;
    1063               0 :                         slru_errno = errno;
    1064               0 :                         pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
    1065               0 :                         ok = false;
    1066                 :                 }
    1067                 :         }
    1068             128 :         if (!ok)
    1069               0 :                 SlruReportIOError(ctl, pageno, InvalidTransactionId);
    1070             128 : }
    1071                 : 
    1072                 : /*
    1073                 :  * Remove all segments before the one holding the passed page number
    1074                 :  */
    1075                 : void
    1076                 : SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
    1077              47 : {
    1078              47 :         SlruShared      shared = ctl->shared;
    1079                 :         int                     slotno;
    1080                 : 
    1081                 :         /*
    1082                 :          * The cutoff point is the start of the segment containing cutoffPage.
    1083                 :          */
    1084              47 :         cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
    1085                 : 
    1086                 :         /*
    1087                 :          * Scan shared memory and remove any pages preceding the cutoff page, to
    1088                 :          * ensure we won't rewrite them later.  (Since this is normally called in
    1089                 :          * or just after a checkpoint, any dirty pages should have been flushed
    1090                 :          * already ... we're just being extra careful here.)
    1091                 :          */
    1092              47 :         LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
    1093                 : 
    1094              47 : restart:;
    1095                 : 
    1096                 :         /*
    1097                 :          * While we are holding the lock, make an important safety check: the
    1098                 :          * planned cutoff point must be <= the current endpoint page. Otherwise we
    1099                 :          * have already wrapped around, and proceeding with the truncation would
    1100                 :          * risk removing the current segment.
    1101                 :          */
    1102              47 :         if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
    1103                 :         {
    1104               0 :                 LWLockRelease(shared->ControlLock);
    1105               0 :                 ereport(LOG,
    1106                 :                   (errmsg("could not truncate directory \"%s\": apparent wraparound",
    1107                 :                                   ctl->Dir)));
    1108                 :                 return;
    1109                 :         }
    1110                 : 
    1111             991 :         for (slotno = 0; slotno < shared->num_slots; slotno++)
    1112                 :         {
    1113             944 :                 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
    1114             904 :                         continue;
    1115              40 :                 if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
    1116              40 :                         continue;
    1117                 : 
    1118                 :                 /*
    1119                 :                  * If page is clean, just change state to EMPTY (expected case).
    1120                 :                  */
    1121               0 :                 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
    1122                 :                         !shared->page_dirty[slotno])
    1123                 :                 {
    1124               0 :                         shared->page_status[slotno] = SLRU_PAGE_EMPTY;
    1125               0 :                         continue;
    1126                 :                 }
    1127                 : 
    1128                 :                 /*
    1129                 :                  * Hmm, we have (or may have) I/O operations acting on the page, so
    1130                 :                  * we've got to wait for them to finish and then start again. This is
    1131                 :                  * the same logic as in SlruSelectLRUPage.      (XXX if page is dirty,
    1132                 :                  * wouldn't it be OK to just discard it without writing it?  For now,
    1133                 :                  * keep the logic the same as it was.)
    1134                 :                  */
    1135               0 :                 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
    1136               0 :                         SimpleLruWritePage(ctl, slotno, NULL);
    1137                 :                 else
    1138               0 :                         SimpleLruWaitIO(ctl, slotno);
    1139                 :                 goto restart;
    1140                 :         }
    1141                 : 
    1142              47 :         LWLockRelease(shared->ControlLock);
    1143                 : 
    1144                 :         /* Now we can remove the old segment(s) */
    1145              47 :         (void) SlruScanDirectory(ctl, cutoffPage, true);
    1146                 : }
    1147                 : 
    1148                 : /*
    1149                 :  * SimpleLruTruncate subroutine: scan directory for removable segments.
    1150                 :  * Actually remove them iff doDeletions is true.  Return TRUE iff any
    1151                 :  * removable segments were found.  Note: no locking is needed.
    1152                 :  *
    1153                 :  * This can be called directly from clog.c, for reasons explained there.
    1154                 :  */
    1155                 : bool
    1156                 : SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions)
    1157              49 : {
    1158              49 :         bool            found = false;
    1159                 :         DIR                *cldir;
    1160                 :         struct dirent *clde;
    1161                 :         int                     segno;
    1162                 :         int                     segpage;
    1163                 :         char            path[MAXPGPATH];
    1164                 : 
    1165                 :         /*
    1166                 :          * The cutoff point is the start of the segment containing cutoffPage.
    1167                 :          * (This is redundant when called from SimpleLruTruncate, but not when
    1168                 :          * called directly from clog.c.)
    1169                 :          */
    1170              49 :         cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
    1171                 : 
    1172              49 :         cldir = AllocateDir(ctl->Dir);
    1173             245 :         while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
    1174                 :         {
    1175             196 :                 if (strlen(clde->d_name) == 4 &&
    1176              49 :                         strspn(clde->d_name, "0123456789ABCDEF") == 4)
    1177                 :                 {
    1178              49 :                         segno = (int) strtol(clde->d_name, NULL, 16);
    1179              49 :                         segpage = segno * SLRU_PAGES_PER_SEGMENT;
    1180              49 :                         if (ctl->PagePrecedes(segpage, cutoffPage))
    1181                 :                         {
    1182               0 :                                 found = true;
    1183               0 :                                 if (doDeletions)
    1184                 :                                 {
    1185               0 :                                         snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, clde->d_name);
    1186               0 :                                         ereport(DEBUG2,
    1187                 :                                                         (errmsg("removing file \"%s\"", path)));
    1188               0 :                                         unlink(path);
    1189                 :                                 }
    1190                 :                         }
    1191                 :                 }
    1192                 :         }
    1193              49 :         FreeDir(cldir);
    1194                 : 
    1195              49 :         return found;
    1196                 : }

Generated by: LTP GCOV extension version 1.5