Systemtap EXP: trace PostgreSQL netflow per session or per sql

4 minute read

背景

前面几篇blog介绍了使用systemtap统计PostgreSQL单挑SQL以及整个会话的io操作.  
本文将要介绍一下对数据库单条SQL的网络传输包, 传输字节数的统计. 以及整个会话的统计.  
需要用到的探针如下 :   
tapset/tcp.stp  
/**  
 * probe tcp.recvmsg.return - Receiving TCP message complete  
 * @name: Name of this probe  
 * @size: Number of bytes received or error code if an error occurred.  
 * @family: IP address family  
 * @saddr: A string representing the source IP address  
 * @daddr: A string representing the destination IP address  
 * @sport: TCP source port   
 * @dport: TCP destination port  
 *  
 * Context:  
 *  The process which receives a tcp message  
 */  
probe tcp.recvmsg.return = kernel.function("tcp_recvmsg").return {  
        name = "tcp.recvmsg"  
        size = $return  
        family  = __ip_sock_family($sk)  
        saddr   = format_ipaddr(__ip_sock_saddr($sk), __ip_sock_family($sk))  
        daddr   = format_ipaddr(__ip_sock_daddr($sk), __ip_sock_family($sk))  
        sport = __tcp_sock_sport($sk)  
        dport = __tcp_sock_dport($sk)  
}  
  
/**  
 * probe tcp.sendmsg.return -  Sending TCP message is done  
 * @name: Name of this probe  
 * @size: Number of bytes sent or error code if an error occurred.   
 *  
 * Context:  
 *  The process which sends a tcp message  
 */  
probe tcp.sendmsg.return = kernel.function("tcp_sendmsg").return {  
        name = "tcp.sendmsg"  
        size = $return   
}  
以上探针对应的linux 源码 :   
[root@db-172-16-3-150 tapset]# stap -l 'kernel.function("tcp_sendmsg")'  
kernel.function("tcp_sendmsg@net/ipv4/tcp.c:902")  
[root@db-172-16-3-150 tapset]# less /usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/net/ipv4/tcp.c   
int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,  
                size_t size)  
{  
        struct sock *sk = sock->sk;  
        struct iovec *iov;  
        struct tcp_sock *tp = tcp_sk(sk);  
        struct sk_buff *skb;  
        int iovlen, flags;  
        int mss_now, size_goal;  
        int err, copied;  
        long timeo;  
接下来写stp脚本, tcp.sendmsg.return没有源和目的ip信息, 所以需要自己写 :   
[root@db-172-16-3-150 ~]# cat t.stp  
global var1%[60000]  // 记录每SQL网络传输流量统计信息  
global var11%[60000]  // 记录每SQL网络传输时间统计信息  
global var2%[60000]  // 记录所有会话网络传输流量统计信息  
global var22%[60000]  // 记录所有会话网络传输时间统计信息  
global del%[120000] // 因为foreach中不允许修改本数组, 所以需要使用另一个数组来存储索引, 方便删除  
  
probe tcp.sendmsg.return {  
  if ($return>0 && execname()=="postgres") {  
    v_us = gettimeofday_us() - @entry(gettimeofday_us())  
    saddr = format_ipaddr(__ip_sock_saddr($sock->sk), __ip_sock_family($sock->sk))  
    daddr = format_ipaddr(__ip_sock_daddr($sock->sk), __ip_sock_family($sock->sk))  
    sport = __tcp_sock_sport($sock->sk)  
    dport = __tcp_sock_dport($sock->sk)  
    var1[pid(),execname(),saddr,sport,daddr,dport,"S"] <<< $return  
    var11[pid(),execname(),saddr,sport,daddr,dport,"S"] <<< v_us  
    var2[saddr,sport,daddr,dport,"S"] <<< $return  
    var22[saddr,sport,daddr,dport,"S"] <<< v_us  
  }  
}  
  
probe tcp.recvmsg.return {  
  if ($return>0 && execname()=="postgres") {  
    v_us = gettimeofday_us() - @entry(gettimeofday_us())  
    var1[pid(),execname(),saddr,sport,daddr,dport,"R"] <<< $return  
    var11[pid(),execname(),saddr,sport,daddr,dport,"R"] <<< v_us  
    var2[saddr,sport,daddr,dport,"R"] <<< $return  
    var22[saddr,sport,daddr,dport,"R"] <<< v_us  
  }  
}  
  
probe process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__start") {  
  foreach([a,b,c,d,e,f,g] in var1) {  
    if (a==pid() && b==execname()) {  
      del[a,b,c,d,e,f,g]=1  
    }  
  }  
  foreach([a,b,c,d,e,f,g] in del) {  
    delete var1[a,b,c,d,e,f,g]  
    delete var11[a,b,c,d,e,f,g]  
  }  
  delete del  
}  
  
probe process("/home/pg93/pgsql9.3.1/bin/postgres").mark("query__done") {  
  // 输出SQL语句  
  printf("query: %s\n", user_string($arg1))  
  
  foreach([a,b,c,d,e,f,g] in var1 @sum -) {  
    if (g=="S" && a==pid() && b==execname()) {  
      v1 = @count(var1[a,b,c,d,e,f,g])  // 发送包数  
      v2 = @sum(var1[a,b,c,d,e,f,g]) / 1024  // 发送K字节数  
      v3 = @sum(var11[a,b,c,d,e,f,g])  // 发送时间  
      spv1 = ((v3!=0) ? ((1000000*v1)/v3) : 0)  // 发送包每秒  
      spv2 = ((v3!=0) ? ((1000000*v2)/v3) : 0)  // 发送K字节数每秒  
      printf("-%s-from:%s:%d-to:%s:%d, pkgs:%d, Kbytes:%d, pkgs/s:%d, Kbytes/s:%d\n", g,c,d,e,f, v1, v2, spv1, spv2)  
    }  
    if (g=="R" && a==pid() && b==execname()) {  
      v4 = @count(var1[a,b,c,d,e,f,g])  // 接收包数  
      v5 = @sum(var1[a,b,c,d,e,f,g]) / 1024  // 接收K字节数  
      v6 = @sum(var11[a,b,c,d,e,f,g])  // 接收时间  
      spv3 = ((v6!=0) ? ((1000000*v4)/v6) : 0)  // 接收包每秒  
      spv4 = ((v6!=0) ? ((1000000*v5)/v6) : 0)  // 接收K字节数每秒  
      printf("-%s-from:%s:%d-to:%s:%d, pkgs:%d, Kbytes:%d, pkgs/s:%d, Kbytes/s:%d\n", g,e,f,c,d, v4, v5, spv3, spv4)  
    }  
    del[a,b,c,d,e,f,g]=1  
  }  
  
  foreach([a,b,c,d,e,f,g] in del) {  
    delete var1[a,b,c,d,e,f,g]  
    delete var11[a,b,c,d,e,f,g]  
  }  
  delete del  
}  
  
probe end {  
  println("----------END----------")  
  foreach([a,b,c,d,e] in var2 @sum -) {  
    if (e == "S") {  
      v1 = @count(var2[a,b,c,d,e])  // 发送包数  
      v2 = @sum(var2[a,b,c,d,e]) / 1024  // 发送K字节数  
      v3 = @sum(var22[a,b,c,d,e])  // 发送时间  
      spv1 = ((v3!=0) ? ((1000000*v1)/v3) : 0)  // 发送包每秒  
      spv2 = ((v3!=0) ? ((1000000*v2)/v3) : 0)  // 发送K字节数每秒  
      printf("-%s-from:%s:%d-to:%s:%d, pkgs:%d, Kbytes:%d, pkgs/s:%d, Kbytes/s:%d\n", e,a,b,c,d, v1, v2, spv1, spv2)  
    }  
    if (e == "R") {  
      v4 = @count(var2[a,b,c,d,e])  // 接收包数  
      v5 = @sum(var2[a,b,c,d,e]) / 1024  // 接收K字节数  
      v6 = @sum(var22[a,b,c,d,e])  // 接收时间  
      spv3 = ((v6!=0) ? ((1000000*v4)/v6) : 0)  // 接收包每秒  
      spv4 = ((v6!=0) ? ((1000000*v5)/v6) : 0)  // 接收K字节数每秒  
      printf("-%s-from:%s:%d-to:%s:%d, pkgs:%d, Kbytes:%d, pkgs/s:%d, Kbytes/s:%d\n", e,c,d,a,b, v4, v5, spv3, spv4)  
    }  
  }  
  delete var1  
  delete var11  
  delete var2  
  delete var22  
  delete del  
}  
测试 :   
[root@db-172-16-3-150 ~]# /opt/systemtap/bin/stap -u t.stp  
执行以下SQL :   
digoal=# drop table t;  
DROP TABLE  
digoal=# create table t(id int, info text, crt_time timestamp);  
CREATE TABLE  
digoal=# insert into t select generate_series(1,1000000), md5(random()::text), clock_timestamp();  
INSERT 0 1000000  
digoal=# \dt+  
                   List of relations  
 Schema | Name | Type  |  Owner   | Size  | Description   
--------+------+-------+----------+-------+-------------  
 public | t    | table | postgres | 73 MB |   
(1 row)  
digoal=# \q  
pg93@db-172-16-3-39-> psql -h 172.16.3.150 -p 1921 -c "copy t to stdout"|psql -h 172.16.3.150 -p 1921 -c "copy t from stdin"  
pg93@db-172-16-3-39-> psql -h 172.16.3.150 -p 1921 -c "copy t to stdout"|psql -h 172.16.3.150 -p 1921 -c "copy t from stdin"  
stap输出结果如下  
query: drop table t;  
-R-from:172.16.3.39:37919-to:172.16.3.150:1921, pkgs:2, Kbytes:0, pkgs/s:5141, Kbytes/s:0  
-S-from:172.16.3.150:1921-to:172.16.3.39:37919, pkgs:2, Kbytes:0, pkgs/s:60606, Kbytes/s:0  
query: create table t(id int, info text, crt_time timestamp);  
-R-from:172.16.3.39:37919-to:172.16.3.150:1921, pkgs:2, Kbytes:0, pkgs/s:5141, Kbytes/s:0  
-S-from:172.16.3.150:1921-to:172.16.3.39:37919, pkgs:2, Kbytes:0, pkgs/s:60606, Kbytes/s:0  
query: insert into t select generate_series(1,1000000), md5(random()::text), clock_timestamp();  
-R-from:172.16.3.39:37919-to:172.16.3.150:1921, pkgs:2, Kbytes:0, pkgs/s:5141, Kbytes/s:0  
-S-from:172.16.3.150:1921-to:172.16.3.39:37919, pkgs:3, Kbytes:0, pkgs/s:53571, Kbytes/s:0  
query: SELECT n.nspname as "Schema",  
  c.relname as "Name",  
  CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' WHEN 'm' THEN 'materialized view' WHEN 'i' THEN 'index' WHEN 'S' THEN 'sequence' WHEN 's' THEN 'special' WHEN 'f' THEN 'foreign table' END as "Type",  
  pg_catalog.pg_get_userbyid(c.relowner) as "Owner",  
  pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(c.oid)) as "Size",  
  pg_catalog.obj_description(c.oid, 'pg_class') as "Description"  
FROM pg_catalog.pg_class c  
     LEFT JOIN pg_catalog.pg_na  
-R-from:172.16.3.39:37920-to:172.16.3.150:1921, pkgs:1, Kbytes:0, pkgs/s:83333, Kbytes/s:0  
-S-from:172.16.3.150:1921-to:172.16.3.39:37920, pkgs:1, Kbytes:0, pkgs/s:83333, Kbytes/s:0  
query: copy t to stdout  
-S-from:172.16.3.150:1921-to:172.16.3.39:37921, pkgs:8761, Kbytes:70088, pkgs/s:7313, Kbytes/s:58505  
query: copy t from stdin  
-R-from:172.16.3.39:37922-to:172.16.3.150:1921, pkgs:8765, Kbytes:70094, pkgs/s:72554, Kbytes/s:580224  
-S-from:172.16.3.150:1921-to:172.16.3.39:37922, pkgs:2, Kbytes:0, pkgs/s:7662, Kbytes/s:0  
query: copy t to stdout  
-S-from:172.16.3.150:1921-to:172.16.3.39:37933, pkgs:17523, Kbytes:140184, pkgs/s:4478, Kbytes/s:35829  
query: copy t from stdin  
-R-from:172.16.3.39:37932-to:172.16.3.150:1921, pkgs:8636, Kbytes:69078, pkgs/s:64990, Kbytes/s:519848  
-S-from:172.16.3.150:1921-to:172.16.3.39:37932, pkgs:1, Kbytes:0, pkgs/s:38461, Kbytes/s:0  
^C----------END----------  
-S-from:172.16.3.150:1921-to:172.16.3.39:37933, pkgs:17527, Kbytes:140190, pkgs/s:4479, Kbytes/s:35831  
-R-from:172.16.3.39:37932-to:172.16.3.150:1921, pkgs:17530, Kbytes:140190, pkgs/s:55968, Kbytes/s:447589  
-S-from:172.16.3.150:1921-to:172.16.3.39:37921, pkgs:8765, Kbytes:70095, pkgs/s:7316, Kbytes/s:58509  
-R-from:172.16.3.39:37922-to:172.16.3.150:1921, pkgs:8769, Kbytes:70095, pkgs/s:71634, Kbytes/s:572610  
-R-from:172.16.3.39:37919-to:172.16.3.150:1921, pkgs:9, Kbytes:0, pkgs/s:0, Kbytes/s:0  
-R-from:172.16.3.39:37920-to:172.16.3.150:1921, pkgs:5, Kbytes:0, pkgs/s:0, Kbytes/s:0  
-S-from:172.16.3.150:1921-to:172.16.3.39:37919, pkgs:9, Kbytes:0, pkgs/s:56603, Kbytes/s:0  
-S-from:172.16.3.150:1921-to:172.16.3.39:37920, pkgs:4, Kbytes:0, pkgs/s:70175, Kbytes/s:0  
-S-from:172.16.3.150:1921-to:172.16.3.39:37922, pkgs:5, Kbytes:0, pkgs/s:16722, Kbytes/s:0  
-S-from:172.16.3.150:1921-to:172.16.3.39:37932, pkgs:5, Kbytes:0, pkgs/s:16501, Kbytes/s:0  
-R-from:172.16.3.39:37921-to:172.16.3.150:1921, pkgs:5, Kbytes:0, pkgs/s:87, Kbytes/s:0  
-R-from:172.16.3.39:37933-to:172.16.3.150:1921, pkgs:5, Kbytes:0, pkgs/s:61, Kbytes/s:0  
从PostgreSQL发送包到客户端的速度明显慢于从客户端发送到服务端的速度.  
这可能和测试环境有关, 如果直接输出到/dev/null是快的, 例如 :   
psql -h 172.16.3.150 -p 1921 -c "copy t to stdout" >/dev/null 2>&1  
输出的速率提高到了433MB/s  
query: copy t to stdout  
-S-from:172.16.3.150:1921-to:172.16.3.39:37961, pkgs:70096, Kbytes:560752, pkgs/s:54195, Kbytes/s:433552  
-R-from:172.16.3.39:37961-to:172.16.3.150:1921, pkgs:2, Kbytes:0, pkgs/s:4889, Kbytes/s:0  

参考

1. https://sourceware.org/systemtap/SystemTap_Beginners_Guide/useful-systemtap-scripts.html

2. http://blog.163.com/digoal@126/blog/static/16387704020139152191581/

Flag Counter

digoal’s 大量PostgreSQL文章入口