pg_basebackup 不备份哪些文件

2 minute read

背景

pg_basebackup是PostgreSQL提供的基础备份工具,用户可以使用这个工具来实现对数据库的基础备份,当然你也可以使用pg_start_backup()+copy+pg_stop_backup()的方式进行备份。

那么pg_basebackup会帮你过滤哪些文件,或者不备份哪些文件呢?

1. pgssql_tmp开头的临时目录中的文件

2. postgresql.auto.conf.tmp

3. BACKUP_LABEL_FILE

4. postmaster.pid

5. pg_stat_tmp目录中的文件

6. pg_replslot目录中的文件

7. 如果没有加-x参数则同样跳过pg_xlog中的文件

源码如下:

src/include/storage/fd.h:#define PG_TEMP_FILE_PREFIX "pgsql_tmp"  
  
src/include/utils/guc.h:#define PG_AUTOCONF_FILENAME            "postgresql.auto.conf"  
  
contrib/pg_stat_statements/pg_stat_statements.c:#define PGSS_TEXT_FILE  PG_STAT_TMP_DIR "/pgss_query_texts.stat"  
  
src/include/pgstat.h:#define PG_STAT_TMP_DIR            "pg_stat_tmp"  

src/backend/replication/basebackup.c

/*  
 * Include all files from the given directory in the output tar stream. If  
 * 'sizeonly' is true, we just calculate a total length and return it, without  
 * actually sending anything.  
 *  
 * Omit any directory in the tablespaces list, to avoid backing up  
 * tablespaces twice when they were created inside PGDATA.  
 */  
static int64  
sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)  
{  
......  
        dir = AllocateDir(path);  
        while ((de = ReadDir(dir, path)) != NULL)  
        {  
                /* Skip special stuff */  
                if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)  
                        continue;  
跳过临时文件,即pgsql_tmp开头的文件  
                /* Skip temporary files */  
                if (strncmp(de->d_name,  
                                        PG_TEMP_FILE_PREFIX,  
                                        strlen(PG_TEMP_FILE_PREFIX)) == 0)  
                        continue;  
跳过postgresql.auto.conf的临时文件postgresql.auto.conf.tmp  
                /* skip auto conf temporary file */  
                if (strncmp(de->d_name,  
                                        PG_AUTOCONF_FILENAME ".tmp",  
                                        sizeof(PG_AUTOCONF_FILENAME) + 4) == 0)  
                        continue;  
跳过使用pg_start_backup()+copy+pg_stop_backup()的方式进行备份的标签文件,BACKUP_LABEL_FILE  
                /*  
                 * If there's a backup_label file, it belongs to a backup started by  
                 * the user with pg_start_backup(). It is *not* correct for this  
                 * backup, our backup_label is injected into the tar separately.  
                 */  
                if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)  
                        continue;  
......  
跳过pid文件  
                /* Skip postmaster.pid and postmaster.opts in the data directory */  
                if (strcmp(pathbuf, "./postmaster.pid") == 0 ||  
                        strcmp(pathbuf, "./postmaster.opts") == 0)  
                        continue;  
跳过控制文件,不是不备份,是放在最后备份,因为使用pg_basebackup不产生标签文件来记录备份开始的XLOG OFFSET,所以需要将pg_control文件放在最后备份。  
                /* Skip pg_control here to back up it last */  
                if (strcmp(pathbuf, "./global/pg_control") == 0)  
                        continue;  
......  
跳过统计信息的临时文件(包括pg_stat_statements插件的,因为这个插件的临时统计信息也放在pg_stat_tmp目录下。),  
                /*  
                 * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped  
                 * even when stats_temp_directory is set because PGSS_TEXT_FILE is  
                 * always created there.  
                 */  
                if ((statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0) ||  
                  strncmp(de->d_name, PG_STAT_TMP_DIR, strlen(PG_STAT_TMP_DIR)) == 0)  
                {  
                        if (!sizeonly)  
                                _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);  
                        size += 512;  
                        continue;  
                }  
跳过pg_replslot目录中的文件  
                /*  
                 * Skip pg_replslot, not useful to copy. But include it as an empty  
                 * directory anyway, so we get permissions right.  
                 */  
                if (strcmp(de->d_name, "pg_replslot") == 0)  
                {  
                        if (!sizeonly)  
                                _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);  
                        size += 512;            /* Size of the header just added */  
                        continue;  
                }  
......  
跳过pg_xlog,  
当打开了-x参数时,在另一处代码备份xlog  
  -x, --xlog             include required WAL files in backup (fetch mode)  
  
                /*  
                 * We can skip pg_xlog, the WAL segments need to be fetched from the  
                 * WAL archive anyway. But include it as an empty directory anyway, so  
                 * we get permissions right.  
                 */  
                if (strcmp(pathbuf, "./pg_xlog") == 0)  
                {  
                        if (!sizeonly)  
                        {  
                                /* If pg_xlog is a symlink, write it as a directory anyway */  
#ifndef WIN32  
                                if (S_ISLNK(statbuf.st_mode))  
#else  
                                if (pgwin32_is_junction(pathbuf))  
#endif  
                                        statbuf.st_mode = S_IFDIR | S_IRWXU;  
                                _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);  
                        }  
                        size += 512;            /* Size of the header just added */  
  
                        /*  
                         * Also send archive_status directory (by hackishly reusing  
                         * statbuf from above ...).  
                         */  
                        if (!sizeonly)  
                                _tarWriteHeader("./pg_xlog/archive_status", NULL, &statbuf);  
                        size += 512;            /* Size of the header just added */  
  
                        continue;                       /* don't recurse into pg_xlog */  
                }  

当打开了-x参数时,在此处代码备份xlog

src/backend/replication/basebackup.c

/*  
 * Actually do a base backup for the specified tablespaces.  
 *  
 * This is split out mainly to avoid complaints about "variable might be  
 * clobbered by longjmp" from stupider versions of gcc.  
 */  
static void  
perform_base_backup(basebackup_options *opt, DIR *tblspcdir)  
{  
......  
        if (opt->includewal)  
        {  
                /*  
......  

最后,我们注意到pg_basebackup备份了hash index,以及unlogged table的内容,实际上他们的备份都是无用功,因为他们不产生XLOG,在还原时,unlogged table会自动清掉,而hash index则可能存在不一致数据。

这是不合理的地方。

参考

1. src/backend/replication/basebackup.c

Flag Counter

digoal’s 大量PostgreSQL文章入口