PostgreSQL Dynamic Tracing using systemtap env prepare

5 minute read

背景

本文介绍一下PostgreSQL动态跟踪环境的搭建, 方便对PG内核动态跟踪感兴趣的童鞋参阅.  
环境准备 :   
一准备systemtap环境 :   
首先要确定内核版本  
[root@db-172-16-3-150 ~]# uname -r  
2.6.32-358.el6.x86_64  
  
安装systemtap包  
yum install -y systemtap-devel systemtap-client systemtap-runtime systemtap-sdt-devel systemtap  
其中systemtap-sdt-devel 是PostgreSQL --enable-dtrace需要的包.  
  
安装内核debuginfo包, 不是必须的, 但是最好安装一下, 方便将内核调试和PostgreSQL动态探针结合起来使用.  
开启debug源, enabled=1  
[root@db-172-16-3-150 ~]# vi /etc/yum.repos.d/CentOS-Debuginfo.repo   
[debug]  
name=CentOS-6 - Debuginfo  
baseurl=http://debuginfo.centos.org/6/$basearch/  
gpgcheck=1  
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-Debug-6  
enabled=1  
安装与内核版本对应的debuginfo包  
[root@db-172-16-3-150 ~]# yum install -y kernel-debuginfo-2.6.32-358.el6.x86_64  
  
测试stap是否可以正常使用 :   
[root@db-172-16-3-150 ~]# stap -e 'probe syscall.read { println("Hello"); exit() }'  
Hello  
  
接下来准备PostgreSQL环境 :   
useradd pg93  
su - pg93  
vi ~/.bash_profile  
export PS1="$USER@`/bin/hostname -s`-> "  
export PGPORT=1999  
export PGDATA=/pgdata/digoal/1921/data03/pg93/pg_root  
export LANG=en_US.utf8  
export PGHOME=/home/pg93/pgsql  
export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$LD_LIBRARY_PATH  
export DATE=`date +"%Y%m%d%H%M"`  
export PATH=$PGHOME/bin:$PATH:.  
export MANPATH=$PGHOME/share/man:$MANPATH  
export PGUSER=postgres  
export PGHOST=$PGDATA  
alias rm='rm -i'  
alias ll='ls -lh'  
export PGDATABASE=digoal  
  
su - root  
wget http://ftp.postgresql.org/pub/source/v9.3.1/postgresql-9.3.1.tar.bz2  
tar -jxvf postgresql-9.3.1.tar.bz2  
cd postgresql-9.3.1  
编译时必须带上--enable-dtrace选项  
./configure --prefix=/home/pg93/pgsql9.3.1 --with-pgport=1999 --with-perl --with-tcl --with-python --with-openssl --with-pam --without-ldap --with-libxml --with-libxslt --enable-thread-safety --with-wal-blocksize=16 --enable-dtrace --enable-debug && gmake world && gmake install-world  
ln -s /home/pg93/pgsql9.3.1 /home/pg93/pgsql  
mkdir -p /pgdata/digoal/1921/data03/pg93/pg_root  
chown -R pg93:pg93 /pgdata/digoal/1921/data03/pg93/pg_root  
初始化数据库  
su - pg93  
initdb -D $PGDATA -E UTF8 --locale=C -W -U postgres  
配置数据库  
cd $PGDATA  
vi pg_hba.conf  
host all all 0.0.0.0/0 md5  
  
vi postgresql.conf  
listen_addresses = '0.0.0.0'            # what IP address(es) to listen on;  
port = 1999                             # (change requires restart)  
max_connections = 500                   # (change requires restart)  
unix_socket_directories = '.'   # comma-separated list of directories  
unix_socket_permissions = 0700          # begin with 0 to use octal notation  
password_encryption = on  
tcp_keepalives_idle = 60                # TCP_KEEPIDLE, in seconds;  
tcp_keepalives_interval = 10            # TCP_KEEPINTVL, in seconds;  
tcp_keepalives_count = 10               # TCP_KEEPCNT;  
shared_buffers = 2048MB                 # min 128kB  
maintenance_work_mem = 512MB            # min 1MB  
max_stack_depth = 8MB                   # min 100kB  
shared_preload_libraries = 'pg_stat_statements'         # (change requires restart)  
vacuum_cost_delay = 10                  # 0-100 milliseconds  
vacuum_cost_limit = 10000               # 1-10000 credits  
bgwriter_delay = 10ms                   # 10-10000ms between rounds  
wal_level = hot_standby                 # minimal, archive, or hot_standby  
synchronous_commit = off                # synchronization level;  
wal_sync_method = fdatasync             # the default is the first option  
wal_buffers = 16384kB                   # min 32kB, -1 sets based on shared_buffers  
wal_writer_delay = 10ms         # 1-10000 milliseconds  
checkpoint_segments = 256               # in logfile segments, min 1, 16MB each  
archive_mode = on               # allows archiving to be done  
archive_command = '/bin/date'           # command to use to archive a logfile segment  
max_wal_senders = 32            # max number of walsender processes  
wal_keep_segments = 1024                # in logfile segments, 16MB each; 0 disables  
hot_standby = on                        # "on" allows queries during recovery  
max_standby_archive_delay = 300s        # max delay before canceling queries  
max_standby_streaming_delay = 300s      # max delay before canceling queries  
wal_receiver_status_interval = 1s       # send replies at least this often  
hot_standby_feedback = on               # send info from standby to prevent  
random_page_cost = 1.5                  # same scale as above  
effective_cache_size = 8192MB  
log_destination = 'csvlog'              # Valid values are combinations of  
logging_collector = on          # Enable capturing of stderr and csvlog  
log_directory = 'pg_log'                # directory where log files are written,  
log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log' # log file name pattern,  
log_file_mode = 0600                    # creation mode for log files,  
log_truncate_on_rotation = on           # If on, an existing log file with the  
log_rotation_age = 1d                   # Automatic rotation of logfiles will  
log_rotation_size = 10MB                # Automatic rotation of logfiles will  
log_checkpoints = on  
log_connections = on  
log_disconnections = on  
log_error_verbosity = verbose           # terse, default, or verbose messages  
log_lock_waits = on                     # log lock waits >= deadlock_timeout  
log_statement = 'ddl'                   # none, ddl, mod, all  
log_timezone = 'PRC'  
track_activity_query_size = 4096        # (change requires restart)  
autovacuum = on                 # Enable autovacuum subprocess?  'on'  
log_autovacuum_min_duration = 0 # -1 disables, 0 logs all actions and  
autovacuum_freeze_max_age = 1500000000  # maximum XID age before forced vacuum  
vacuum_freeze_min_age = 50000000  
vacuum_freeze_table_age = 1200000000  
datestyle = 'iso, mdy'  
timezone = 'PRC'  
lc_messages = 'C'                       # locale for system error message  
lc_monetary = 'C'                       # locale for monetary formatting  
lc_numeric = 'C'                        # locale for number formatting  
lc_time = 'C'                           # locale for time formatting  
default_text_search_config = 'pg_catalog.english'  
deadlock_timeout = 1s  
pg_stat_statements.max = 1000  
pg_stat_statements.track = all  
  
[root@db-172-16-3-150 pg93]# su - pg93  
pg93@db-172-16-3-150-> pg_ctl start  
server starting  
pg93@db-172-16-3-150-> LOG:  00000: loaded library "pg_stat_statements"  
LOCATION:  load_libraries, miscinit.c:1296  
LOG:  00000: redirecting log output to logging collector process  
HINT:  Future log output will appear in directory "pg_log".  
LOCATION:  SysLogger_Start, syslogger.c:649  
创建测试库以及测试表, 测试函数  
pg93@db-172-16-3-150-> psql postgres postgres  
psql (9.3.1)  
Type "help" for help.  
postgres=# create database digoal;  
CREATE DATABASE  
postgres=# create extension pg_stat_statements;  
CREATE EXTENSION  
digoal=# create table test(id int primary key, info text, crt_time timestamp);  
CREATE TABLE  
digoal=# create or replace function f_test(i_id int) returns void as $$  
declare  
begin  
  update test set info=md5(random()::text), crt_time=clock_timestamp() where id=i_id;  
  if not found then  
    insert into test(id,info,crt_time) values(i_id,md5(random()::text),clock_timestamp());  
  end if;  
  return;  
  exception when others then  
    return;  
end;  
$$ language plpgsql strict;  
  
测试PostgreSQL探针是否可以正常使用 :  
查看有多少个探针 :   
[root@db-172-16-3-150 utils]# stap -l 'process("/home/pg93/pgsql9.3.1/bin/postgres").mark("**")'  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__checkpoint__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__checkpoint__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__checkpoint__sync__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__flush__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__flush__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__read__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__read__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__sync__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__sync__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__sync__written")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__write__dirty__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__write__dirty__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("checkpoint__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("checkpoint__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("clog__checkpoint__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("clog__checkpoint__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("deadlock__found")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lock__wait__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lock__wait__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__acquire")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__condacquire")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__condacquire__fail")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__release")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__wait__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__wait__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__wait__until__free")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__wait__until__free__fail")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("multixact__checkpoint__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("multixact__checkpoint__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__execute__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__execute__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__parse__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__parse__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__plan__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__plan__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__rewrite__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__rewrite__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("smgr__md__read__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("smgr__md__read__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("smgr__md__write__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("smgr__md__write__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("sort__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("sort__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("statement__status")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("subtrans__checkpoint__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("subtrans__checkpoint__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("transaction__abort")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("transaction__commit")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("transaction__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("twophase__checkpoint__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("twophase__checkpoint__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("wal__buffer__write__dirty__done")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("wal__buffer__write__dirty__start")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("xlog__insert")  
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("xlog__switch")  
  
探针使用测试 :   
每隔3秒输出qps.  
pg93@db-172-16-3-150-> vi test.sql  
\setrandom id 1 50000000  
select f_test(:id);  
  
[root@db-172-16-3-150 ~]# stap -e '  
global var  
probe process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__execute__done") {  
  var++  
}  
probe timer.s($1) {  
  printf("qps:%d\n", var/$1)  
  var=0  
}' 3  
  
pg93@db-172-16-3-150-> pgbench -M prepared -n -r -f ./test.sql -c 16 -j 4 -T 30  
  
qps:31842  
qps:35328  
qps:35392  
qps:35504  
qps:35425  
qps:35520  
qps:35503  
qps:35467  
qps:35436  
qps:3034  
  
transaction type: Custom query  
scaling factor: 1  
query mode: prepared  
number of clients: 16  
number of threads: 4  
duration: 30 s  
number of transactions actually processed: 1037188  
tps = 34572.359432 (including connections establishing)  
tps = 34586.329195 (excluding connections establishing)  
statement latencies in milliseconds:  
        0.003126        \setrandom id 1 50000000  
        0.457397        select f_test(:id);  
  
PostgreSQL中与动态跟踪相关的文件 :   
postgresql-9.3.1/src/backend/utils/probes.d  
postgresql-9.3.1/src/backend/utils/probes.h  
/usr/include/sys/sdt.h  

http://www.postgresql.org/docs/9.3/static/dynamic-trace.html

https://sourceware.org/systemtap/tapsets/

PostgreSQL动态跟踪的详细使用参考 :

Systemtap EXP: PostgreSQL IN-BUILD mark Class 1 - transaction

http://blog.163.com/digoal@126/blog/static/163877040201391684012713/

Systemtap EXP: PostgreSQL IN-BUILD mark Class 2 - query

http://blog.163.com/digoal@126/blog/static/1638770402013916101117367/

Systemtap EXP: PostgreSQL IN-BUILD mark Class 3 - checkpoint

http://blog.163.com/digoal@126/blog/static/163877040201391622459221/

Systemtap EXP: PostgreSQL IN-BUILD mark Class 4 - buffer

http://blog.163.com/digoal@126/blog/static/1638770402013916488761/

Systemtap EXP: PostgreSQL IN-BUILD mark Class 5 - read write relation

http://blog.163.com/digoal@126/blog/static/163877040201391653616103/

Systemtap EXP: PostgreSQL IN-BUILD mark Class 6 - lock

http://blog.163.com/digoal@126/blog/static/163877040201391674922879/

Systemtap EXP: PostgreSQL IN-BUILD mark Class 7 - others(statement,xlog,sort)

http://blog.163.com/digoal@126/blog/static/1638770402013916221518/

下面截取一个probe定义进行讲解 :

src/backend/utils/probes.h  
#include <sys/sdt.h>  
/* TRACE_POSTGRESQL_TRANSACTION_START ( unsigned int) */  
#if defined STAP_SDT_V1  
#define TRACE_POSTGRESQL_TRANSACTION_START_ENABLED() __builtin_expect (transaction__start_semaphore, 0)  
#define postgresql_transaction__start_semaphore transaction__start_semaphore  
#else  
#define TRACE_POSTGRESQL_TRANSACTION_START_ENABLED() __builtin_expect (postgresql_transaction__start_semaphore, 0)  
#endif  
__extension__ extern unsigned short postgresql_transaction__start_semaphore __attribute__ ((unused)) __attribute__ ((section (".probes")));  
#define TRACE_POSTGRESQL_TRANSACTION_START(arg1) \  
DTRACE_PROBE1(postgresql,transaction__start,arg1)  
  
probe定义 :   
#define TRACE_POSTGRESQL_TRANSACTION_START(arg1) \  
DTRACE_PROBE1(postgresql,transaction__start,arg1)  
对应  
/usr/include/sys/sdt.h  
/* DTrace compatible macro names.  */  
#define DTRACE_PROBE1(provider,probe,parm1)     \  
  STAP_PROBE1(provider,probe,parm1)  
所以这个定义的probe = transaction__start  
参数为arg1.  
参数含义在代码中可以找到, 如下 :   
TRACE_POSTGRESQL_TRANSACTION_START 宏在代码中使用的位置  
src/backend/access/transam/xact.c  
/* ----------------------------------------------------------------  
 *                                              interface routines  
 * ----------------------------------------------------------------  
 */  
  
/*  
 *      StartTransaction  
 */  
static void  
StartTransaction(void)  
{  
        TransactionState s;  
        VirtualTransactionId vxid;  
...  
略  
        /*  
         * Advertise it in the proc array.      We assume assignment of  
         * LocalTransactionID is atomic, and the backendId should be set already.  
         */  
        Assert(MyProc->backendId == vxid.backendId);  
        MyProc->lxid = vxid.localTransactionId;  
  
        TRACE_POSTGRESQL_TRANSACTION_START(vxid.localTransactionId);  
...  
略  

自定义PostgreSQL探针参考 :

http://blog.163.com/digoal@126/blog/static/163877040201391123645546/

Flag Counter

digoal’s 大量PostgreSQL文章入口