PostgreSQL 9.3 Change the WAL record format to allow splitting the record header across pages

4 minute read

背景

Change the WAL record format to allow splitting the record header across pages (Heikki Linnakangas)  
  
The new format is slightly more compact, and is more efficient to write.  
  
Allow WAL record header to be split across pages.  
  
This saves a few bytes of WAL space, but the real motivation is to make it  
  
predictable how much WAL space a record requires, as it no longer depends  
  
on whether we need to waste the last few bytes at end of WAL page because  
  
the header doesn't fit.  
  
The total length field of WAL record, xl_tot_len, is moved to the beginning  
  
of the WAL record header, so that it is still always found on the first page  
  
where a WAL record begins.  
  
Bump WAL version number again as this is an incompatible change.  

变更前的xlogrecord结构 :

  41 typedef struct XLogRecord  
  42 {  
  43     pg_crc32    xl_crc;         /* CRC for this record */  
  44     XLogRecPtr  xl_prev;        /* ptr to previous record in log */  
  45     TransactionId xl_xid;       /* xact id */  
  46     uint32      xl_tot_len;     /* total len of entire record */  
  47     uint32      xl_len;         /* total len of rmgr data */  
  48     uint8       xl_info;        /* flag bits, see below */  
  49     RmgrId      xl_rmid;        /* resource manager for this record */  
  50   
  51     /* Depending on MAXALIGN, there are either 2 or 6 wasted bytes here */  
  52   
  53     /* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */  
  54   
  55 } XLogRecord;  

变更后的xlogrecord结构 :

  41 typedef struct XLogRecord  
  42 {  
  43     uint32      xl_tot_len;     /* total len of entire record */  
  44     TransactionId xl_xid;       /* xact id */  
  45     uint32      xl_len;         /* total len of rmgr data */  
  46     uint8       xl_info;        /* flag bits, see below */  
  47     RmgrId      xl_rmid;        /* resource manager for this record */  
  48     /* 2 bytes of padding here, initialize to zero */  
  49     XLogRecPtr  xl_prev;        /* ptr to previous record in log */  
  50     pg_crc32    xl_crc;         /* CRC for this record */  
  51   
  52     /* If MAXALIGN==8, there are 4 wasted bytes here */  
  53   
  54     /* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */  
  55   
  56 } XLogRecord;  

xlog文件分析可参考 :

http://blog.163.com/digoal@126/blog/static/163877040201312311244246/

测试

PostgreSQL 9.2 :

digoal=# create table t(id int);  
CREATE TABLE  
digoal=# checkpoint;  
CHECKPOINT  
digoal=# select pg_current_xlog_insert_location();  
 pg_current_xlog_insert_location   
---------------------------------  
 1/A2019E50  
(1 row)  
digoal=# select pg_current_xlog_location();  
 pg_current_xlog_location   
--------------------------  
 1/A2019E50  
(1 row)  
digoal=# insert into t select generate_series(1,1000000);  
INSERT 0 1000000  
digoal=# select pg_current_xlog_insert_location();  
 pg_current_xlog_insert_location   
---------------------------------  
 1/A5D417C0  
(1 row)  
digoal=# select pg_current_xlog_location();  
 pg_current_xlog_location   
--------------------------  
 1/A5D417C0  
(1 row)  
digoal=# select pg_xlog_location_diff('1/A5D417C0','1/A2019E50');  
 pg_xlog_location_diff   
-----------------------  
              64125296  
(1 row)  

PostgreSQL 9.3 :

digoal=# create table t(id int);  
CREATE TABLE  
digoal=# checkpoint;  
CHECKPOINT  
digoal=# select pg_current_xlog_insert_location();  
 pg_current_xlog_insert_location   
---------------------------------  
 2/59A70730  
(1 row)  
digoal=# select pg_current_xlog_location();  
 pg_current_xlog_location   
--------------------------  
 2/59A70730  
(1 row)  
digoal=# insert into t select generate_series(1,1000000);  
INSERT 0 1000000  
digoal=# select pg_current_xlog_location();  
 pg_current_xlog_location   
--------------------------  
 2/5D790660  
(1 row)  
digoal=# select pg_current_xlog_insert_location();  
 pg_current_xlog_insert_location   
---------------------------------  
 2/5D790660  
(1 row)  
digoal=# select pg_xlog_location_diff('2/5D790660','2/59A70730');  
 pg_xlog_location_diff   
-----------------------  
              64094000  
(1 row)  

pg_xlogdump :

pg93@db-172-16-3-33-> pg_xlogdump -b -n 10 -p $PGDATA/pg_xlog 00000003000000020000005D 00000003000000020000005D -x 12159136   
rmgr: Heap        len (rec/tot):     31/    63, tx:   12159136, lsn: 2/5D794B90, prev 2/5D794B28, bkp: 0000, desc: insert(init): rel 1663/12815/17899; tid 0/1  
rmgr: Heap        len (rec/tot):     31/    63, tx:   12159136, lsn: 2/5D794BD0, prev 2/5D794B90, bkp: 0000, desc: insert: rel 1663/12815/17899; tid 0/2  
rmgr: Heap        len (rec/tot):     31/    63, tx:   12159136, lsn: 2/5D794C10, prev 2/5D794BD0, bkp: 0000, desc: insert: rel 1663/12815/17899; tid 0/3  
rmgr: Heap        len (rec/tot):     31/    63, tx:   12159136, lsn: 2/5D794C50, prev 2/5D794C10, bkp: 0000, desc: insert: rel 1663/12815/17899; tid 0/4  
rmgr: Heap        len (rec/tot):     31/    63, tx:   12159136, lsn: 2/5D794C90, prev 2/5D794C50, bkp: 0000, desc: insert: rel 1663/12815/17899; tid 0/5  
rmgr: Heap        len (rec/tot):     31/    63, tx:   12159136, lsn: 2/5D794CD0, prev 2/5D794C90, bkp: 0000, desc: insert: rel 1663/12815/17899; tid 0/6  
rmgr: Heap        len (rec/tot):     31/    63, tx:   12159136, lsn: 2/5D794D10, prev 2/5D794CD0, bkp: 0000, desc: insert: rel 1663/12815/17899; tid 0/7  
rmgr: Heap        len (rec/tot):     31/    63, tx:   12159136, lsn: 2/5D794D50, prev 2/5D794D10, bkp: 0000, desc: insert: rel 1663/12815/17899; tid 0/8  
rmgr: Heap        len (rec/tot):     31/    63, tx:   12159136, lsn: 2/5D794D90, prev 2/5D794D50, bkp: 0000, desc: insert: rel 1663/12815/17899; tid 0/9  
rmgr: Heap        len (rec/tot):     31/    63, tx:   12159136, lsn: 2/5D794DD0, prev 2/5D794D90, bkp: 0000, desc: insert: rel 1663/12815/17899; tid 0/10  

参考

1. http://www.postgresql.org/message-id/flat/4FDA5136.6080206@enterprisedb.com#4FDA5136.6080206@enterprisedb.com

2. http://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=061e7efb1b4c5b8a5d02122b7780531b8d5bf23d

3. http://blog.163.com/digoal@126/blog/static/163877040201312311244246/

4.

2012-06-24	Heikki Linnakangas	Use UINT64CONST for 64-bit integer constants.	commit | commitdiff | tree | snapshot  
2012-06-24	Heikki Linnakangas	Oops. Remove stray paren.	commit | commitdiff | tree | snapshot  
2012-06-24	Heikki Linnakangas	Use LL suffix for 64-bit constants.	commit | commitdiff | tree | snapshot  
2012-06-24	Heikki Linnakangas	Replace XLogRecPtr struct with a 64-bit integer.	commit | commitdiff | tree | snapshot  
2012-06-24	Heikki Linnakangas	Allow WAL record header to be split across pages.	commit | commitdiff | tree | snapshot  
2012-06-24	Heikki Linnakangas	Move WAL continuation record information to WAL page...	commit | commitdiff | tree | snapshot  
2012-06-24	Heikki Linnakangas	Don't waste the last segment of each 4GB logical log...	commit | commitdiff | tree | snapshot  

Flag Counter

digoal’s 大量PostgreSQL文章入口