PostgreSQL Dynamic Tracing using systemtap env prepare
背景
本文介绍一下PostgreSQL动态跟踪环境的搭建, 方便对PG内核动态跟踪感兴趣的童鞋参阅.
环境准备 :
一准备systemtap环境 :
首先要确定内核版本
[root@db-172-16-3-150 ~]# uname -r
2.6.32-358.el6.x86_64
安装systemtap包
yum install -y systemtap-devel systemtap-client systemtap-runtime systemtap-sdt-devel systemtap
其中systemtap-sdt-devel 是PostgreSQL --enable-dtrace需要的包.
安装内核debuginfo包, 不是必须的, 但是最好安装一下, 方便将内核调试和PostgreSQL动态探针结合起来使用.
开启debug源, enabled=1
[root@db-172-16-3-150 ~]# vi /etc/yum.repos.d/CentOS-Debuginfo.repo
[debug]
name=CentOS-6 - Debuginfo
baseurl=http://debuginfo.centos.org/6/$basearch/
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-Debug-6
enabled=1
安装与内核版本对应的debuginfo包
[root@db-172-16-3-150 ~]# yum install -y kernel-debuginfo-2.6.32-358.el6.x86_64
测试stap是否可以正常使用 :
[root@db-172-16-3-150 ~]# stap -e 'probe syscall.read { println("Hello"); exit() }'
Hello
接下来准备PostgreSQL环境 :
useradd pg93
su - pg93
vi ~/.bash_profile
export PS1="$USER@`/bin/hostname -s`-> "
export PGPORT=1999
export PGDATA=/pgdata/digoal/1921/data03/pg93/pg_root
export LANG=en_US.utf8
export PGHOME=/home/pg93/pgsql
export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$LD_LIBRARY_PATH
export DATE=`date +"%Y%m%d%H%M"`
export PATH=$PGHOME/bin:$PATH:.
export MANPATH=$PGHOME/share/man:$MANPATH
export PGUSER=postgres
export PGHOST=$PGDATA
alias rm='rm -i'
alias ll='ls -lh'
export PGDATABASE=digoal
su - root
wget http://ftp.postgresql.org/pub/source/v9.3.1/postgresql-9.3.1.tar.bz2
tar -jxvf postgresql-9.3.1.tar.bz2
cd postgresql-9.3.1
编译时必须带上--enable-dtrace选项
./configure --prefix=/home/pg93/pgsql9.3.1 --with-pgport=1999 --with-perl --with-tcl --with-python --with-openssl --with-pam --without-ldap --with-libxml --with-libxslt --enable-thread-safety --with-wal-blocksize=16 --enable-dtrace --enable-debug && gmake world && gmake install-world
ln -s /home/pg93/pgsql9.3.1 /home/pg93/pgsql
mkdir -p /pgdata/digoal/1921/data03/pg93/pg_root
chown -R pg93:pg93 /pgdata/digoal/1921/data03/pg93/pg_root
初始化数据库
su - pg93
initdb -D $PGDATA -E UTF8 --locale=C -W -U postgres
配置数据库
cd $PGDATA
vi pg_hba.conf
host all all 0.0.0.0/0 md5
vi postgresql.conf
listen_addresses = '0.0.0.0' # what IP address(es) to listen on;
port = 1999 # (change requires restart)
max_connections = 500 # (change requires restart)
unix_socket_directories = '.' # comma-separated list of directories
unix_socket_permissions = 0700 # begin with 0 to use octal notation
password_encryption = on
tcp_keepalives_idle = 60 # TCP_KEEPIDLE, in seconds;
tcp_keepalives_interval = 10 # TCP_KEEPINTVL, in seconds;
tcp_keepalives_count = 10 # TCP_KEEPCNT;
shared_buffers = 2048MB # min 128kB
maintenance_work_mem = 512MB # min 1MB
max_stack_depth = 8MB # min 100kB
shared_preload_libraries = 'pg_stat_statements' # (change requires restart)
vacuum_cost_delay = 10 # 0-100 milliseconds
vacuum_cost_limit = 10000 # 1-10000 credits
bgwriter_delay = 10ms # 10-10000ms between rounds
wal_level = hot_standby # minimal, archive, or hot_standby
synchronous_commit = off # synchronization level;
wal_sync_method = fdatasync # the default is the first option
wal_buffers = 16384kB # min 32kB, -1 sets based on shared_buffers
wal_writer_delay = 10ms # 1-10000 milliseconds
checkpoint_segments = 256 # in logfile segments, min 1, 16MB each
archive_mode = on # allows archiving to be done
archive_command = '/bin/date' # command to use to archive a logfile segment
max_wal_senders = 32 # max number of walsender processes
wal_keep_segments = 1024 # in logfile segments, 16MB each; 0 disables
hot_standby = on # "on" allows queries during recovery
max_standby_archive_delay = 300s # max delay before canceling queries
max_standby_streaming_delay = 300s # max delay before canceling queries
wal_receiver_status_interval = 1s # send replies at least this often
hot_standby_feedback = on # send info from standby to prevent
random_page_cost = 1.5 # same scale as above
effective_cache_size = 8192MB
log_destination = 'csvlog' # Valid values are combinations of
logging_collector = on # Enable capturing of stderr and csvlog
log_directory = 'pg_log' # directory where log files are written,
log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log' # log file name pattern,
log_file_mode = 0600 # creation mode for log files,
log_truncate_on_rotation = on # If on, an existing log file with the
log_rotation_age = 1d # Automatic rotation of logfiles will
log_rotation_size = 10MB # Automatic rotation of logfiles will
log_checkpoints = on
log_connections = on
log_disconnections = on
log_error_verbosity = verbose # terse, default, or verbose messages
log_lock_waits = on # log lock waits >= deadlock_timeout
log_statement = 'ddl' # none, ddl, mod, all
log_timezone = 'PRC'
track_activity_query_size = 4096 # (change requires restart)
autovacuum = on # Enable autovacuum subprocess? 'on'
log_autovacuum_min_duration = 0 # -1 disables, 0 logs all actions and
autovacuum_freeze_max_age = 1500000000 # maximum XID age before forced vacuum
vacuum_freeze_min_age = 50000000
vacuum_freeze_table_age = 1200000000
datestyle = 'iso, mdy'
timezone = 'PRC'
lc_messages = 'C' # locale for system error message
lc_monetary = 'C' # locale for monetary formatting
lc_numeric = 'C' # locale for number formatting
lc_time = 'C' # locale for time formatting
default_text_search_config = 'pg_catalog.english'
deadlock_timeout = 1s
pg_stat_statements.max = 1000
pg_stat_statements.track = all
[root@db-172-16-3-150 pg93]# su - pg93
pg93@db-172-16-3-150-> pg_ctl start
server starting
pg93@db-172-16-3-150-> LOG: 00000: loaded library "pg_stat_statements"
LOCATION: load_libraries, miscinit.c:1296
LOG: 00000: redirecting log output to logging collector process
HINT: Future log output will appear in directory "pg_log".
LOCATION: SysLogger_Start, syslogger.c:649
创建测试库以及测试表, 测试函数
pg93@db-172-16-3-150-> psql postgres postgres
psql (9.3.1)
Type "help" for help.
postgres=# create database digoal;
CREATE DATABASE
postgres=# create extension pg_stat_statements;
CREATE EXTENSION
digoal=# create table test(id int primary key, info text, crt_time timestamp);
CREATE TABLE
digoal=# create or replace function f_test(i_id int) returns void as $$
declare
begin
update test set info=md5(random()::text), crt_time=clock_timestamp() where id=i_id;
if not found then
insert into test(id,info,crt_time) values(i_id,md5(random()::text),clock_timestamp());
end if;
return;
exception when others then
return;
end;
$$ language plpgsql strict;
测试PostgreSQL探针是否可以正常使用 :
查看有多少个探针 :
[root@db-172-16-3-150 utils]# stap -l 'process("/home/pg93/pgsql9.3.1/bin/postgres").mark("**")'
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__checkpoint__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__checkpoint__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__checkpoint__sync__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__flush__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__flush__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__read__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__read__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__sync__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__sync__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__sync__written")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__write__dirty__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("buffer__write__dirty__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("checkpoint__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("checkpoint__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("clog__checkpoint__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("clog__checkpoint__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("deadlock__found")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lock__wait__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lock__wait__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__acquire")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__condacquire")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__condacquire__fail")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__release")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__wait__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__wait__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__wait__until__free")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("lwlock__wait__until__free__fail")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("multixact__checkpoint__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("multixact__checkpoint__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__execute__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__execute__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__parse__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__parse__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__plan__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__plan__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__rewrite__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__rewrite__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("smgr__md__read__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("smgr__md__read__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("smgr__md__write__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("smgr__md__write__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("sort__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("sort__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("statement__status")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("subtrans__checkpoint__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("subtrans__checkpoint__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("transaction__abort")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("transaction__commit")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("transaction__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("twophase__checkpoint__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("twophase__checkpoint__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("wal__buffer__write__dirty__done")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("wal__buffer__write__dirty__start")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("xlog__insert")
process("/home/pg93/pgsql9.3.1/bin/postgres").mark("xlog__switch")
探针使用测试 :
每隔3秒输出qps.
pg93@db-172-16-3-150-> vi test.sql
\setrandom id 1 50000000
select f_test(:id);
[root@db-172-16-3-150 ~]# stap -e '
global var
probe process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__execute__done") {
var++
}
probe timer.s($1) {
printf("qps:%d\n", var/$1)
var=0
}' 3
pg93@db-172-16-3-150-> pgbench -M prepared -n -r -f ./test.sql -c 16 -j 4 -T 30
qps:31842
qps:35328
qps:35392
qps:35504
qps:35425
qps:35520
qps:35503
qps:35467
qps:35436
qps:3034
transaction type: Custom query
scaling factor: 1
query mode: prepared
number of clients: 16
number of threads: 4
duration: 30 s
number of transactions actually processed: 1037188
tps = 34572.359432 (including connections establishing)
tps = 34586.329195 (excluding connections establishing)
statement latencies in milliseconds:
0.003126 \setrandom id 1 50000000
0.457397 select f_test(:id);
PostgreSQL中与动态跟踪相关的文件 :
postgresql-9.3.1/src/backend/utils/probes.d
postgresql-9.3.1/src/backend/utils/probes.h
/usr/include/sys/sdt.h
http://www.postgresql.org/docs/9.3/static/dynamic-trace.html
https://sourceware.org/systemtap/tapsets/
PostgreSQL动态跟踪的详细使用参考 :
Systemtap EXP: PostgreSQL IN-BUILD mark Class 1 - transaction
http://blog.163.com/digoal@126/blog/static/163877040201391684012713/
Systemtap EXP: PostgreSQL IN-BUILD mark Class 2 - query
http://blog.163.com/digoal@126/blog/static/1638770402013916101117367/
Systemtap EXP: PostgreSQL IN-BUILD mark Class 3 - checkpoint
http://blog.163.com/digoal@126/blog/static/163877040201391622459221/
Systemtap EXP: PostgreSQL IN-BUILD mark Class 4 - buffer
http://blog.163.com/digoal@126/blog/static/1638770402013916488761/
Systemtap EXP: PostgreSQL IN-BUILD mark Class 5 - read | write relation |
http://blog.163.com/digoal@126/blog/static/163877040201391653616103/
Systemtap EXP: PostgreSQL IN-BUILD mark Class 6 - lock
http://blog.163.com/digoal@126/blog/static/163877040201391674922879/
Systemtap EXP: PostgreSQL IN-BUILD mark Class 7 - others(statement,xlog,sort)
http://blog.163.com/digoal@126/blog/static/1638770402013916221518/
下面截取一个probe定义进行讲解 :
src/backend/utils/probes.h
#include <sys/sdt.h>
/* TRACE_POSTGRESQL_TRANSACTION_START ( unsigned int) */
#if defined STAP_SDT_V1
#define TRACE_POSTGRESQL_TRANSACTION_START_ENABLED() __builtin_expect (transaction__start_semaphore, 0)
#define postgresql_transaction__start_semaphore transaction__start_semaphore
#else
#define TRACE_POSTGRESQL_TRANSACTION_START_ENABLED() __builtin_expect (postgresql_transaction__start_semaphore, 0)
#endif
__extension__ extern unsigned short postgresql_transaction__start_semaphore __attribute__ ((unused)) __attribute__ ((section (".probes")));
#define TRACE_POSTGRESQL_TRANSACTION_START(arg1) \
DTRACE_PROBE1(postgresql,transaction__start,arg1)
probe定义 :
#define TRACE_POSTGRESQL_TRANSACTION_START(arg1) \
DTRACE_PROBE1(postgresql,transaction__start,arg1)
对应
/usr/include/sys/sdt.h
/* DTrace compatible macro names. */
#define DTRACE_PROBE1(provider,probe,parm1) \
STAP_PROBE1(provider,probe,parm1)
所以这个定义的probe = transaction__start
参数为arg1.
参数含义在代码中可以找到, 如下 :
TRACE_POSTGRESQL_TRANSACTION_START 宏在代码中使用的位置
src/backend/access/transam/xact.c
/* ----------------------------------------------------------------
* interface routines
* ----------------------------------------------------------------
*/
/*
* StartTransaction
*/
static void
StartTransaction(void)
{
TransactionState s;
VirtualTransactionId vxid;
...
略
/*
* Advertise it in the proc array. We assume assignment of
* LocalTransactionID is atomic, and the backendId should be set already.
*/
Assert(MyProc->backendId == vxid.backendId);
MyProc->lxid = vxid.localTransactionId;
TRACE_POSTGRESQL_TRANSACTION_START(vxid.localTransactionId);
...
略
自定义PostgreSQL探针参考 :
http://blog.163.com/digoal@126/blog/static/163877040201391123645546/