PostgreSQL pg_current_xlog_insert_location() & pg_current_xlog_location()
背景
PostgreSQL pg_current_xlog_insert_location() & pg_current_xlog_location() 是两个获取XLOG位置的函数,他们有什么不同呢?
答案是pg_current_xlog_insert_location指写入wal buffer的位置.
pg_current_xlog_location返回已经write到wal文件的位置.
使用异步提交可以看到这个差异.
synchronous_commit = off
wal_writer_delay = 10000ms
然后开启一个窗口执行一个大批量的写入操作.
在另外的窗口查看这两个函数的结果, fsync明显要小于insert.
digoal=# select pg_current_xlog_insert_location(),pg_current_xlog_location();
pg_current_xlog_insert_location | pg_current_xlog_location
---------------------------------+--------------------------
4/FCD58288 | 4/FCD50048
(1 row)
源码
digoal=# select prosrc, proname from pg_proc where proname ~ 'pg_current_xlog';
prosrc | proname
---------------------------------+---------------------------------
pg_current_xlog_location | pg_current_xlog_location
pg_current_xlog_insert_location | pg_current_xlog_insert_location
(2 rows)
backend/access/transam/xlogfuncs.c
/*
* Report the current WAL write location (same format as pg_start_backup etc)
*
* This is useful for determining how much of WAL is visible to an external
* archiving process. Note that the data before this point is written out
* to the kernel, but is not necessarily synced to disk.
*/
Datum
pg_current_xlog_location(PG_FUNCTION_ARGS)
{
XLogRecPtr current_recptr;
char location[MAXFNAMELEN];
if (RecoveryInProgress())
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("recovery is in progress"),
errhint("WAL control functions cannot be executed during recovery.")));
current_recptr = GetXLogWriteRecPtr();
snprintf(location, sizeof(location), "%X/%X",
(uint32) (current_recptr >> 32), (uint32) current_recptr);
PG_RETURN_TEXT_P(cstring_to_text(location));
}
/*
* Report the current WAL insert location (same format as pg_start_backup etc)
*
* This function is mostly for debugging purposes.
*/
Datum
pg_current_xlog_insert_location(PG_FUNCTION_ARGS)
{
XLogRecPtr current_recptr;
char location[MAXFNAMELEN];
if (RecoveryInProgress())
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("recovery is in progress"),
errhint("WAL control functions cannot be executed during recovery.")));
current_recptr = GetXLogInsertRecPtr();
snprintf(location, sizeof(location), "%X/%X",
(uint32) (current_recptr >> 32), (uint32) current_recptr);
PG_RETURN_TEXT_P(cstring_to_text(location));
}
src/backend/access/transam/xlog.c
/*
* Get latest WAL insert pointer
*/
XLogRecPtr
GetXLogInsertRecPtr(void)
{
XLogCtlInsert *Insert = &XLogCtl->Insert;
XLogRecPtr current_recptr;
LWLockAcquire(WALInsertLock, LW_SHARED);
INSERT_RECPTR(current_recptr, Insert, Insert->curridx);
LWLockRelease(WALInsertLock);
return current_recptr;
}
/*
* Get latest WAL write pointer
*/
XLogRecPtr
GetXLogWriteRecPtr(void)
{
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
SpinLockAcquire(&xlogctl->info_lck);
LogwrtResult = xlogctl->LogwrtResult;
SpinLockRelease(&xlogctl->info_lck);
}
return LogwrtResult.Write;
}
/*----------
* Shared-memory data structures for XLOG control
*
* LogwrtRqst indicates a byte position that we need to write and/or fsync
* the log up to (all records before that point must be written or fsynced).
* LogwrtResult indicates the byte positions we have already written/fsynced.
* These structs are identical but are declared separately to indicate their
* slightly different functions.
*
* To read XLogCtl->LogwrtResult, you must hold either info_lck or
* WALWriteLock. To update it, you need to hold both locks. The point of
* this arrangement is that the value can be examined by code that already
* holds WALWriteLock without needing to grab info_lck as well. In addition
* to the shared variable, each backend has a private copy of LogwrtResult,
* which is updated when convenient.
*
* The request bookkeeping is simpler: there is a shared XLogCtl->LogwrtRqst
* (protected by info_lck), but we don't need to cache any copies of it.
*
* info_lck is only held long enough to read/update the protected variables,
* so it's a plain spinlock. The other locks are held longer (potentially
* over I/O operations), so we use LWLocks for them. These locks are:
*
* WALInsertLock: must be held to insert a record into the WAL buffers.
*
* WALWriteLock: must be held to write WAL buffers to disk (XLogWrite or
* XLogFlush).
*
* ControlFileLock: must be held to read/update control file or create
* new log file.
*
* CheckpointLock: must be held to do a checkpoint or restartpoint (ensures
* only one checkpointer at a time; currently, with all checkpoints done by
* the checkpointer, this is just pro forma).
*
*----------
*/
/*
* Shared state data for XLogInsert.
*/
typedef struct XLogCtlInsert
{
XLogRecPtr PrevRecord; /* start of previously-inserted record */
int curridx; /* current block index in cache */
XLogPageHeader currpage; /* points to header of block in cache */
char *currpos; /* current insertion point in cache */
XLogRecPtr RedoRecPtr; /* current redo point for insertions */
bool forcePageWrites; /* forcing full-page writes for PITR? */
/*
* fullPageWrites is the master copy used by all backends to determine
* whether to write full-page to WAL, instead of using process-local one.
* This is required because, when full_page_writes is changed by SIGHUP,
* we must WAL-log it before it actually affects WAL-logging by backends.
* Checkpointer sets at startup or after SIGHUP.
*/
bool fullPageWrites;
/*
* exclusiveBackup is true if a backup started with pg_start_backup() is
* in progress, and nonExclusiveBackups is a counter indicating the number
* of streaming base backups currently in progress. forcePageWrites is set
* to true when either of these is non-zero. lastBackupStart is the latest
* checkpoint redo location used as a starting point for an online backup.
*/
bool exclusiveBackup;
int nonExclusiveBackups;
XLogRecPtr lastBackupStart;
} XLogCtlInsert;