Systemtap EXP: Trace PostgreSQL iostat per SQL statement 2
背景
本例为上一篇blog的扩展, 可输出到块设备, 以及区分os cache的io请求操作.
相当精细.
stp文件如下 :
global io_stat1%[120000] // 非cache读写字节数(单SQL)
global io_stat11%[120000] // 非cache读写耗费时间(单SQL)
global io_stat2%[120000] // cache读写字节数(单SQL)
global io_stat22%[120000] // cache读写耗费时间(单SQL)
global io_stat3%[120000] // 非cache读取字节数(总,只关心设备号)
global io_stat33%[120000] // 非cache读写耗费时间(总,只关心设备号)
global io_stat4%[120000] // cache读写字节数(总,只关心设备号)
global io_stat44%[120000] // cache读写耗费时间(总,只关心设备号)
global del%[120000] // 因为foreach中不允许修改本数组, 所以需要使用另一个数组来存储索引, 方便删除
probe vfs.read.return {
try {
if ($return>0) {
v_us=gettimeofday_us() - @entry(gettimeofday_us())
if (devname!="N/A" && execname()=="postgres") { /*skip read from cache, filter postgres otherwise*/
io_stat1[pid(),execname(),"R",devname] <<< $return // 非cache读字节数(单SQL)
io_stat11[pid(),execname(),"R",devname] <<< v_us // 非cache读耗费时间(单SQL)
io_stat3["R",devname] <<< $return // 非cache读字节数(总,只关心设备号)
io_stat33["R",devname] <<< v_us // 非cache读耗费时间(总,只关心设备号)
}
if (devname=="N/A" && execname()=="postgres") {
io_stat2[pid(),execname(),"R",devname] <<< $return // cache读字节数(单SQL)
io_stat22[pid(),execname(),"R",devname] <<< v_us // cache读耗费时间(单SQL)
io_stat4["R",devname] <<< $return // cache读字节数(总,只关心设备号)
io_stat44["R",devname] <<< v_us // cache读耗费时间(总,只关心设备号)
}
}
}
catch(msg) {
printdln("---", pn(), msg)
}
}
probe vfs.write.return {
try {
if ($return>0) {
v_us=gettimeofday_us() - @entry(gettimeofday_us())
if (devname!="N/A" && execname()=="postgres") { /*skip read from cache, filter postgres otherwise*/
io_stat1[pid(),execname(),"W",devname] <<< $return
io_stat11[pid(),execname(),"W",devname] <<< v_us
io_stat3["W",devname] <<< $return
io_stat33["W",devname] <<< v_us
}
if (devname=="N/A" && execname()=="postgres") {
io_stat2[pid(),execname(),"W",devname] <<< $return
io_stat22[pid(),execname(),"W",devname] <<< v_us
io_stat4["W",devname] <<< $return
io_stat44["W",devname] <<< v_us
}
}
}
catch(msg) {
printdln("---", pn(), msg)
}
}
probe process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__start") {
try {
// SQL开始前, 先清除之前SQL的统计变量存储的信息.
// 清除非CACHE读写统计变量的信息.
// 因为foreach中不允许修改本数组, 所以需要使用另一个数组来存储索引, 方便删除, 这里就是del的用意.
foreach([a,b,c,d] in io_stat1) {
if (a==pid() && b==execname()) {
del[a,b,c,d]=1 // 将a,b,c,d作为索引存储任意一个值到del数组. a,b,c,d就是一会需要清除的io_stat1,io_stat11的索引值.
}
}
foreach([a,b,c,d] in del) {
delete io_stat1[a,b,c,d]
delete io_stat11[a,b,c,d]
}
delete del // 用完del后, 记得清除del的值.
// 清除CACHE读写统计变量的信息.
foreach([a,b,c,d] in io_stat2) {
if (a==pid() && b==execname()) {
del[a,b,c,d]=1
}
}
foreach([a,b,c,d] in del) {
delete io_stat2[a,b,c,d]
delete io_stat22[a,b,c,d]
}
delete del
}
catch(msg) {
printdln("---", pn(), msg)
}
}
probe process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__done") {
try {
// 输出SQL语句
printf("query: %s\n", user_string($arg1))
// 非cache统计
println("非cache输出")
foreach([a,b,c,d] in io_stat1 @sum -) {
if (c == "R" && a==pid() && b==execname()) {
var1 = @count(io_stat1[a,b,c,d]) // 请求次数
var2 = @sum(io_stat1[a,b,c,d]) / 1024 // 请求K字节数
var3 = @sum(io_stat11[a,b,c,d]) // 请求时间, us
spvar1 = ((var3!=0) ? ((1000000*var1)/var3) : 0) // 请求次数每秒
spvar2 = ((var3!=0) ? ((1000000*var2)/var3) : 0) // 请求K字节数每秒
printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var1, var2, spvar1, spvar2)
}
if (c == "W" && a==pid() && b==execname()) {
var4 = @count(io_stat1[a,b,c,d]) // 请求次数
var5 = @sum(io_stat1[a,b,c,d]) / 1024 // 请求K字节数
var6 = @sum(io_stat11[a,b,c,d]) // 请求时间
spvar4 = ((var6!=0) ? ((1000000*var4)/var6) : 0) // 请求次数每秒
spvar5 = ((var6!=0) ? ((1000000*var5)/var6) : 0) // 请求K字节数每秒
printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var4, var5, spvar4, spvar5)
}
del[a,b,c,d]=1 // 使用a,b,c,d填充del数组, 用于清除io_stat1, io_stat11.
}
foreach([a,b,c,d] in del) {
delete io_stat1[a,b,c,d]
delete io_stat11[a,b,c,d]
}
delete del // 用完清除del
// 清除非cache统计用过的本地变量, 后面的cache统计需要继续使用.
delete var1
delete var2
delete var3
delete var4
delete var5
delete var6
delete spvar1
delete spvar2
delete spvar4
delete spvar5
// cache统计
println("cache输出")
foreach([a,b,c,d] in io_stat2 @sum -) {
if (c == "R" && a==pid() && b==execname()) {
var1 = @count(io_stat2[a,b,c,d]) // 请求次数
var2 = @sum(io_stat2[a,b,c,d]) / 1024 // 请求K字节数
var3 = @sum(io_stat22[a,b,c,d]) // 请求时间
spvar1 = ((var3!=0) ? ((1000000*var1)/var3) : 0) // 请求次数每秒
spvar2 = ((var3!=0) ? ((1000000*var2)/var3) : 0) // 请求K字节数每秒
printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var1, var2, spvar1, spvar2)
}
if (c == "W" && a==pid() && b==execname()) {
var4 = @count(io_stat2[a,b,c,d]) // 请求次数
var5 = @sum(io_stat2[a,b,c,d]) / 1024 // 请求K字节数
var6 = @sum(io_stat22[a,b,c,d]) // 请求时间
spvar4 = ((var6!=0) ? ((1000000*var4)/var6) : 0) // 请求次数每秒
spvar5 = ((var6!=0) ? ((1000000*var5)/var6) : 0) // 请求K字节数每秒
printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var4, var5, spvar4, spvar5)
}
del[a,b,c,d]=1
}
foreach([a,b,c,d] in del) {
delete io_stat1[a,b,c,d]
delete io_stat11[a,b,c,d]
}
delete del
}
catch(msg) {
printdln("---", pn(), msg)
}
}
probe end{
try {
println("----------END----------")
// 非cache, 按设备的读写统计输出.
println("非cache输出")
foreach([c,d] in io_stat3 @sum -) {
if (c == "R") {
var1 = @count(io_stat3[c,d]) // 请求次数
var2 = @sum(io_stat3[c,d]) / 1024 // 请求K字节数
var3 = @sum(io_stat33[c,d]) // 请求时间
spvar1 = ((var3!=0) ? ((1000000*var1)/var3) : 0) // 请求次数每秒
spvar2 = ((var3!=0) ? ((1000000*var2)/var3) : 0) // 请求K字节数每秒
printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var1, var2, spvar1, spvar2)
}
if (c == "W") {
var4 = @count(io_stat3[c,d]) // 请求次数
var5 = @sum(io_stat3[c,d]) / 1024 // 请求K字节数
var6 = @sum(io_stat33[c,d]) // 请求时间
spvar4 = ((var6!=0) ? ((1000000*var4)/var6) : 0) // 请求次数每秒
spvar5 = ((var6!=0) ? ((1000000*var5)/var6) : 0) // 请求K字节数每秒
printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var4, var5, spvar4, spvar5)
}
}
delete var1
delete var2
delete var3
delete var4
delete var5
delete var6
delete spvar1
delete spvar2
delete spvar4
delete spvar5
// cache, 按设备的读写统计输出.
println("cache输出")
foreach([c,d] in io_stat4 @sum -) {
if (c == "R") {
var1 = @count(io_stat4[c,d]) // 请求次数
var2 = @sum(io_stat4[c,d]) / 1024 // 请求K字节数
var3 = @sum(io_stat44[c,d]) // 请求时间
spvar1 = ((var3!=0) ? ((1000000*var1)/var3) : 0) // 请求次数每秒
spvar2 = ((var3!=0) ? ((1000000*var2)/var3) : 0) // 请求K字节数每秒
printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var1, var2, spvar1, spvar2)
}
if (c == "W") {
var4 = @count(io_stat4[c,d]) // 请求次数
var5 = @sum(io_stat4[c,d]) / 1024 // 请求K字节数
var6 = @sum(io_stat44[c,d]) // 请求时间
spvar4 = ((var6!=0) ? ((1000000*var4)/var6) : 0) // 请求次数每秒
spvar5 = ((var6!=0) ? ((1000000*var5)/var6) : 0) // 请求K字节数每秒
printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var4, var5, spvar4, spvar5)
}
}
// 结束后清除所有的全局变量的值.
delete io_stat1
delete io_stat11
delete io_stat2
delete io_stat22
delete io_stat3
delete io_stat33
delete io_stat4
delete io_stat44
delete del
}
catch(msg) {
printdln("---", pn(), msg)
}
}
跟踪举例 :
[root@db-172-16-3-150 ~]# /opt/systemtap/bin/stap test.stp
SQL :
digoal=# drop table t;
DROP TABLE
digoal=# create table t(id int, info text, crt_time timestamp);
CREATE TABLE
digoal=# insert into t select generate_series(1,1000000), md5(random()::text), clock_timestamp();
INSERT 0 1000000
digoal=# create index idx_t_1 on t(id);
CREATE INDEX
digoal=# explain (analyze,verbose,costs,buffers,timing) select count(*) from t;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------
Aggregate (cost=21846.00..21846.01 rows=1 width=0) (actual time=467.932..467.932 rows=1 loops=1)
Output: count(*)
Buffers: shared hit=9346
-> Seq Scan on public.t (cost=0.00..19346.00 rows=1000000 width=0) (actual time=0.011..253.487 rows=1000000 loops=1)
Output: id, info, crt_time
Buffers: shared hit=9346
Total runtime: 467.978 ms
(7 rows)
digoal=# explain (analyze,verbose,costs,buffers,timing) select count(*) from t where id<10000;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------
-----
Aggregate (cost=16928.08..16928.09 rows=1 width=0) (actual time=8.509..8.509 rows=1 loops=1)
Output: count(*)
Buffers: shared hit=94 read=30
-> Index Only Scan using idx_t_1 on public.t (cost=0.42..16094.75 rows=333333 width=0) (actual time=0.146..6.010 rows=9999 loop
s=1)
Output: id
Index Cond: (t.id < 10000)
Heap Fetches: 9999
Buffers: shared hit=94 read=30
Total runtime: 8.558 ms
(9 rows)
digoal=# explain (analyze,verbose,costs,buffers,timing) select count(*) from generate_series(1,1000000);
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------
---------
Aggregate (cost=12.50..12.51 rows=1 width=0) (actual time=918.138..918.138 rows=1 loops=1)
Output: count(*)
Buffers: temp read=1710 written=1709
-> Function Scan on pg_catalog.generate_series (cost=0.00..10.00 rows=1000 width=0) (actual time=320.753..684.423 rows=1000000
loops=1)
Output: generate_series
Function Call: generate_series(1, 1000000)
Buffers: temp read=1710 written=1709
Total runtime: 925.695 ms
(8 rows)
输出, 包含IO请求的设备名, 如果数据在shared buffer中命中的话, 就没有os cache的io请求操作也没有块设备额的io请求操作 :
query: drop table t;
非cache输出
cache输出
query: create table t(id int, info text, crt_time timestamp);
非cache输出
cache输出
query: insert into t select generate_series(1,1000000), md5(random()::text), clock_timestamp();
非cache输出
-R-devname:sdc1, reqs:4, reqKbytes:32, reqs/s:76923, reqKbytes/s:615384
-W-devname:sdc1, reqs:4, reqKbytes:32, reqs/s:37735, reqKbytes/s:301886
cache输出
query: create index idx_t_1 on t(id);
非cache输出
-R-devname:sdc1, reqs:2, reqKbytes:16, reqs/s:105263, reqKbytes/s:842105
-W-devname:sdc1, reqs:2, reqKbytes:16, reqs/s:51282, reqKbytes/s:410256
cache输出
query: explain (analyze,verbose,costs,buffers,timing) select count(*) from t;
非cache输出
-R-devname:sdc1, reqs:3, reqKbytes:24, reqs/s:90909, reqKbytes/s:727272
-W-devname:sdc1, reqs:3, reqKbytes:24, reqs/s:46153, reqKbytes/s:369230
cache输出
query: explain (analyze,verbose,costs,buffers,timing) select count(*) from t where id<10000;
非cache输出
-R-devname:sdc1, reqs:32, reqKbytes:256, reqs/s:101265, reqKbytes/s:810126
-W-devname:sdc1, reqs:32, reqKbytes:256, reqs/s:64128, reqKbytes/s:513026
cache输出
query: explain (analyze,verbose,costs,buffers,timing) select count(*) from generate_series(1,1000000);
非cache输出
-R-devname:sdc1, reqs:1711, reqKbytes:13687, reqs/s:104969, reqKbytes/s:839693
-W-devname:sdc1, reqs:1711, reqKbytes:13687, reqs/s:69103, reqKbytes/s:552786
cache输出
^C----------END----------
非cache输出
-R-devname:sdc1, reqs:1836, reqKbytes:14238, reqs/s:106477, reqKbytes/s:825726
-W-devname:sdc1, reqs:1836, reqKbytes:14238, reqs/s:69445, reqKbytes/s:538543
cache输出
-W-devname:N/A, reqs:8, reqKbytes:0, reqs/s:5242, reqKbytes/s:0
-R-devname:N/A, reqs:7, reqKbytes:0, reqs/s:20771, reqKbytes/s:0
参考
1. http://blog.163.com/digoal@126/blog/static/1638770402013915115254543/