Systemtap EXP: Trace PostgreSQL iostat per SQL statement 2

6 minute read

背景

本例为上一篇blog的扩展, 可输出到块设备, 以及区分os cache的io请求操作.  
相当精细.  
stp文件如下 :   
global io_stat1%[120000] // 非cache读写字节数(单SQL)  
global io_stat11%[120000] // 非cache读写耗费时间(单SQL)  
global io_stat2%[120000] // cache读写字节数(单SQL)  
global io_stat22%[120000] // cache读写耗费时间(单SQL)  
global io_stat3%[120000] // 非cache读取字节数(总,只关心设备号)  
global io_stat33%[120000] // 非cache读写耗费时间(总,只关心设备号)  
global io_stat4%[120000] // cache读写字节数(总,只关心设备号)  
global io_stat44%[120000] // cache读写耗费时间(总,只关心设备号)  
global del%[120000] // 因为foreach中不允许修改本数组, 所以需要使用另一个数组来存储索引, 方便删除  
  
probe vfs.read.return {  
try {  
  if ($return>0) {  
    v_us=gettimeofday_us() - @entry(gettimeofday_us())  
    if (devname!="N/A" && execname()=="postgres") { /*skip read from cache, filter postgres otherwise*/  
      io_stat1[pid(),execname(),"R",devname] <<< $return   // 非cache读字节数(单SQL)  
      io_stat11[pid(),execname(),"R",devname] <<< v_us     // 非cache读耗费时间(单SQL)  
      io_stat3["R",devname] <<< $return   // 非cache读字节数(总,只关心设备号)  
      io_stat33["R",devname] <<< v_us     // 非cache读耗费时间(总,只关心设备号)  
    }  
    if (devname=="N/A" && execname()=="postgres") {  
      io_stat2[pid(),execname(),"R",devname] <<< $return   // cache读字节数(单SQL)  
      io_stat22[pid(),execname(),"R",devname] <<< v_us     // cache读耗费时间(单SQL)  
      io_stat4["R",devname] <<< $return   // cache读字节数(总,只关心设备号)  
      io_stat44["R",devname] <<< v_us     // cache读耗费时间(总,只关心设备号)  
    }  
  }  
}  
catch(msg) {  
  printdln("---", pn(), msg)  
}  
}  
  
probe vfs.write.return {  
try {  
  if ($return>0) {  
    v_us=gettimeofday_us() - @entry(gettimeofday_us())  
    if (devname!="N/A" && execname()=="postgres") { /*skip read from cache, filter postgres otherwise*/  
      io_stat1[pid(),execname(),"W",devname] <<< $return  
      io_stat11[pid(),execname(),"W",devname] <<< v_us  
      io_stat3["W",devname] <<< $return  
      io_stat33["W",devname] <<< v_us  
    }  
    if (devname=="N/A" && execname()=="postgres") {  
      io_stat2[pid(),execname(),"W",devname] <<< $return  
      io_stat22[pid(),execname(),"W",devname] <<< v_us  
      io_stat4["W",devname] <<< $return  
      io_stat44["W",devname] <<< v_us  
    }  
  }  
}  
catch(msg) {  
  printdln("---", pn(), msg)  
}  
}  
  
probe process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__start") {  
try {  
  // SQL开始前, 先清除之前SQL的统计变量存储的信息.  
  // 清除非CACHE读写统计变量的信息.  
  // 因为foreach中不允许修改本数组, 所以需要使用另一个数组来存储索引, 方便删除, 这里就是del的用意.  
  foreach([a,b,c,d] in io_stat1) {  
    if (a==pid() && b==execname()) {  
      del[a,b,c,d]=1   //  将a,b,c,d作为索引存储任意一个值到del数组. a,b,c,d就是一会需要清除的io_stat1,io_stat11的索引值.  
    }  
  }  
  foreach([a,b,c,d] in del) {  
    delete io_stat1[a,b,c,d]  
    delete io_stat11[a,b,c,d]  
  }  
  delete del  // 用完del后, 记得清除del的值.  
  
  // 清除CACHE读写统计变量的信息.  
  foreach([a,b,c,d] in io_stat2) {  
    if (a==pid() && b==execname()) {  
      del[a,b,c,d]=1  
    }  
  }  
  foreach([a,b,c,d] in del) {  
    delete io_stat2[a,b,c,d]  
    delete io_stat22[a,b,c,d]  
  }  
  delete del  
}  
catch(msg) {  
  printdln("---", pn(), msg)  
}  
}  
  
probe process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__done") {  
try {  
  // 输出SQL语句  
  printf("query: %s\n", user_string($arg1))  
  
  // 非cache统计  
  println("非cache输出")  
  foreach([a,b,c,d] in io_stat1 @sum -) {  
    if (c == "R" && a==pid() && b==execname()) {  
      var1 = @count(io_stat1[a,b,c,d])  // 请求次数  
      var2 = @sum(io_stat1[a,b,c,d]) / 1024  // 请求K字节数  
      var3 = @sum(io_stat11[a,b,c,d])  // 请求时间, us  
      spvar1 = ((var3!=0) ? ((1000000*var1)/var3) : 0)  // 请求次数每秒  
      spvar2 = ((var3!=0) ? ((1000000*var2)/var3) : 0)  // 请求K字节数每秒  
      printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var1, var2, spvar1, spvar2)  
    }  
    if (c == "W" && a==pid() && b==execname()) {  
      var4 = @count(io_stat1[a,b,c,d])  // 请求次数  
      var5 = @sum(io_stat1[a,b,c,d]) / 1024  // 请求K字节数  
      var6 = @sum(io_stat11[a,b,c,d])  // 请求时间  
      spvar4 = ((var6!=0) ? ((1000000*var4)/var6) : 0)  // 请求次数每秒  
      spvar5 = ((var6!=0) ? ((1000000*var5)/var6) : 0)  // 请求K字节数每秒  
      printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var4, var5, spvar4, spvar5)  
    }  
    del[a,b,c,d]=1    // 使用a,b,c,d填充del数组, 用于清除io_stat1, io_stat11.  
  }  
  foreach([a,b,c,d] in del) {  
    delete io_stat1[a,b,c,d]  
    delete io_stat11[a,b,c,d]  
  }  
  delete del   // 用完清除del  
  
// 清除非cache统计用过的本地变量, 后面的cache统计需要继续使用.  
  delete var1  
  delete var2  
  delete var3  
  delete var4  
  delete var5  
  delete var6  
  delete spvar1  
  delete spvar2  
  delete spvar4  
  delete spvar5  
  
  // cache统计  
  println("cache输出")  
  foreach([a,b,c,d] in io_stat2 @sum -) {  
    if (c == "R" && a==pid() && b==execname()) {  
      var1 = @count(io_stat2[a,b,c,d])  // 请求次数  
      var2 = @sum(io_stat2[a,b,c,d]) / 1024  // 请求K字节数  
      var3 = @sum(io_stat22[a,b,c,d])  // 请求时间  
      spvar1 = ((var3!=0) ? ((1000000*var1)/var3) : 0)  // 请求次数每秒  
      spvar2 = ((var3!=0) ? ((1000000*var2)/var3) : 0)  // 请求K字节数每秒  
      printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var1, var2, spvar1, spvar2)  
    }  
    if (c == "W" && a==pid() && b==execname()) {  
      var4 = @count(io_stat2[a,b,c,d])  // 请求次数  
      var5 = @sum(io_stat2[a,b,c,d]) / 1024  // 请求K字节数  
      var6 = @sum(io_stat22[a,b,c,d])  // 请求时间  
      spvar4 = ((var6!=0) ? ((1000000*var4)/var6) : 0)  // 请求次数每秒  
      spvar5 = ((var6!=0) ? ((1000000*var5)/var6) : 0)  // 请求K字节数每秒  
      printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var4, var5, spvar4, spvar5)  
    }  
    del[a,b,c,d]=1  
  }  
  foreach([a,b,c,d] in del) {  
    delete io_stat1[a,b,c,d]  
    delete io_stat11[a,b,c,d]  
  }  
  delete del  
}  
catch(msg) {  
  printdln("---", pn(), msg)  
}  
}  
  
probe end{  
try {  
  println("----------END----------")  
  // 非cache, 按设备的读写统计输出.  
  println("非cache输出")  
  foreach([c,d] in io_stat3 @sum -) {  
    if (c == "R") {  
      var1 = @count(io_stat3[c,d])  // 请求次数  
      var2 = @sum(io_stat3[c,d]) / 1024  // 请求K字节数  
      var3 = @sum(io_stat33[c,d])  // 请求时间  
      spvar1 = ((var3!=0) ? ((1000000*var1)/var3) : 0)  // 请求次数每秒  
      spvar2 = ((var3!=0) ? ((1000000*var2)/var3) : 0)  // 请求K字节数每秒  
      printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var1, var2, spvar1, spvar2)  
    }  
    if (c == "W") {  
      var4 = @count(io_stat3[c,d])  // 请求次数  
      var5 = @sum(io_stat3[c,d]) / 1024  // 请求K字节数  
      var6 = @sum(io_stat33[c,d])  // 请求时间  
      spvar4 = ((var6!=0) ? ((1000000*var4)/var6) : 0)  // 请求次数每秒  
      spvar5 = ((var6!=0) ? ((1000000*var5)/var6) : 0)  // 请求K字节数每秒  
      printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var4, var5, spvar4, spvar5)  
    }  
  }  
  
  delete var1  
  delete var2  
  delete var3  
  delete var4  
  delete var5  
  delete var6  
  delete spvar1  
  delete spvar2  
  delete spvar4  
  delete spvar5  
  
  // cache, 按设备的读写统计输出.  
  println("cache输出")  
  foreach([c,d] in io_stat4 @sum -) {  
    if (c == "R") {  
      var1 = @count(io_stat4[c,d])  // 请求次数  
      var2 = @sum(io_stat4[c,d]) / 1024  // 请求K字节数  
      var3 = @sum(io_stat44[c,d])  // 请求时间  
      spvar1 = ((var3!=0) ? ((1000000*var1)/var3) : 0)  // 请求次数每秒  
      spvar2 = ((var3!=0) ? ((1000000*var2)/var3) : 0)  // 请求K字节数每秒  
      printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var1, var2, spvar1, spvar2)  
    }  
    if (c == "W") {  
      var4 = @count(io_stat4[c,d])  // 请求次数  
      var5 = @sum(io_stat4[c,d]) / 1024  // 请求K字节数  
      var6 = @sum(io_stat44[c,d])  // 请求时间  
      spvar4 = ((var6!=0) ? ((1000000*var4)/var6) : 0)  // 请求次数每秒  
      spvar5 = ((var6!=0) ? ((1000000*var5)/var6) : 0)  // 请求K字节数每秒  
      printf("-%s-devname:%s, reqs:%d, reqKbytes:%d, reqs/s:%d, reqKbytes/s:%d\n", c, d, var4, var5, spvar4, spvar5)  
    }  
  }  
  
// 结束后清除所有的全局变量的值.  
  delete io_stat1  
  delete io_stat11  
  delete io_stat2  
  delete io_stat22  
  delete io_stat3  
  delete io_stat33  
  delete io_stat4  
  delete io_stat44  
  delete del  
}  
catch(msg) {  
  printdln("---", pn(), msg)  
}  
}  
  
跟踪举例 :   
[root@db-172-16-3-150 ~]# /opt/systemtap/bin/stap test.stp   
SQL :   
digoal=# drop table t;  
DROP TABLE  
digoal=# create table t(id int, info text, crt_time timestamp);  
CREATE TABLE  
digoal=# insert into t select generate_series(1,1000000), md5(random()::text), clock_timestamp();  
INSERT 0 1000000  
digoal=# create index idx_t_1 on t(id);  
CREATE INDEX  
digoal=# explain (analyze,verbose,costs,buffers,timing) select count(*) from t;  
                                                        QUERY PLAN                                                          
--------------------------------------------------------------------------------------------------------------------------  
 Aggregate  (cost=21846.00..21846.01 rows=1 width=0) (actual time=467.932..467.932 rows=1 loops=1)  
   Output: count(*)  
   Buffers: shared hit=9346  
   ->  Seq Scan on public.t  (cost=0.00..19346.00 rows=1000000 width=0) (actual time=0.011..253.487 rows=1000000 loops=1)  
         Output: id, info, crt_time  
         Buffers: shared hit=9346  
 Total runtime: 467.978 ms  
(7 rows)  
  
digoal=# explain (analyze,verbose,costs,buffers,timing) select count(*) from t where id<10000;  
                                                               QUERY PLAN                                                             
       
------------------------------------------------------------------------------------------------------------------------------------  
-----  
 Aggregate  (cost=16928.08..16928.09 rows=1 width=0) (actual time=8.509..8.509 rows=1 loops=1)  
   Output: count(*)  
   Buffers: shared hit=94 read=30  
   ->  Index Only Scan using idx_t_1 on public.t  (cost=0.42..16094.75 rows=333333 width=0) (actual time=0.146..6.010 rows=9999 loop  
s=1)  
         Output: id  
         Index Cond: (t.id < 10000)  
         Heap Fetches: 9999  
         Buffers: shared hit=94 read=30  
 Total runtime: 8.558 ms  
(9 rows)  
  
digoal=# explain (analyze,verbose,costs,buffers,timing) select count(*) from generate_series(1,1000000);  
                                                                 QUERY PLAN                                                           
           
------------------------------------------------------------------------------------------------------------------------------------  
---------  
 Aggregate  (cost=12.50..12.51 rows=1 width=0) (actual time=918.138..918.138 rows=1 loops=1)  
   Output: count(*)  
   Buffers: temp read=1710 written=1709  
   ->  Function Scan on pg_catalog.generate_series  (cost=0.00..10.00 rows=1000 width=0) (actual time=320.753..684.423 rows=1000000   
loops=1)  
         Output: generate_series  
         Function Call: generate_series(1, 1000000)  
         Buffers: temp read=1710 written=1709  
 Total runtime: 925.695 ms  
(8 rows)  
  
输出, 包含IO请求的设备名, 如果数据在shared buffer中命中的话, 就没有os cache的io请求操作也没有块设备额的io请求操作 :   
query: drop table t;  
非cache输出  
cache输出  
query: create table t(id int, info text, crt_time timestamp);  
非cache输出  
cache输出  
query: insert into t select generate_series(1,1000000), md5(random()::text), clock_timestamp();  
非cache输出  
-R-devname:sdc1, reqs:4, reqKbytes:32, reqs/s:76923, reqKbytes/s:615384  
-W-devname:sdc1, reqs:4, reqKbytes:32, reqs/s:37735, reqKbytes/s:301886  
cache输出  
query: create index idx_t_1 on t(id);  
非cache输出  
-R-devname:sdc1, reqs:2, reqKbytes:16, reqs/s:105263, reqKbytes/s:842105  
-W-devname:sdc1, reqs:2, reqKbytes:16, reqs/s:51282, reqKbytes/s:410256  
cache输出  
query: explain (analyze,verbose,costs,buffers,timing) select count(*) from t;  
非cache输出  
-R-devname:sdc1, reqs:3, reqKbytes:24, reqs/s:90909, reqKbytes/s:727272  
-W-devname:sdc1, reqs:3, reqKbytes:24, reqs/s:46153, reqKbytes/s:369230  
cache输出  
query: explain (analyze,verbose,costs,buffers,timing) select count(*) from t where id<10000;  
非cache输出  
-R-devname:sdc1, reqs:32, reqKbytes:256, reqs/s:101265, reqKbytes/s:810126  
-W-devname:sdc1, reqs:32, reqKbytes:256, reqs/s:64128, reqKbytes/s:513026  
cache输出  
query: explain (analyze,verbose,costs,buffers,timing) select count(*) from generate_series(1,1000000);  
非cache输出  
-R-devname:sdc1, reqs:1711, reqKbytes:13687, reqs/s:104969, reqKbytes/s:839693  
-W-devname:sdc1, reqs:1711, reqKbytes:13687, reqs/s:69103, reqKbytes/s:552786  
cache输出  
^C----------END----------  
非cache输出  
-R-devname:sdc1, reqs:1836, reqKbytes:14238, reqs/s:106477, reqKbytes/s:825726  
-W-devname:sdc1, reqs:1836, reqKbytes:14238, reqs/s:69445, reqKbytes/s:538543  
cache输出  
-W-devname:N/A, reqs:8, reqKbytes:0, reqs/s:5242, reqKbytes/s:0  
-R-devname:N/A, reqs:7, reqKbytes:0, reqs/s:20771, reqKbytes/s:0  

参考

1. http://blog.163.com/digoal@126/blog/static/1638770402013915115254543/

Flag Counter

digoal’s 大量PostgreSQL文章入口