Systemtap examples, Network - 1 Network Profiling
背景
systemtap-testsuite包收集了大量的stp脚本, 可直接用于跟踪以下分类的内容 :
= AUDIT =
= AUTOFS =
= BACKTRACE =
= CALLGRAPH =
= CPU =
= DEVICE =
= DISK =
= FILE =
= FILESYSTEM =
= FORMAT =
= FUNCTION =
= FUTEX =
= INTERRUPT =
= IO =
= KVM =
= LIMITS =
= LOCKING =
= MEMORY =
= MONITORING =
= NANOSLEEP =
= NETWORK =
= NFS =
= NUMA =
= PROCESS =
= PROFILING =
= QEMU =
= SCHEDULER =
= SCSI =
= SECURITY =
= SIGNALS =
= SIMPLE =
= SLAB =
= SOCKET =
= STATISTICS =
= SYSCALL =
= TCP =
= THREAD =
= TIME =
= TRACE =
= TRACEPOINT =
= TRAFFIC =
= TTY =
= UTILIZATION =
= VIRTUALIZATION =
= WATCHDOG =
= WATCHPOINT =
以上分类取自/usr/share/systemtap/testsuite/systemtap.examples/keyword-index.txt
对应这些例子的讲解, 除了直接去/usr/share/systemtap/testsuite/systemtap.examples目录取之外, 也可以去以下地址查看, 都很方便.
https://sourceware.org/systemtap/examples/
接下来我挑选一些例子来讲解. (取材自SystemTap_Beginners_Guide)
第一个例子来自nettop.stp脚本, 统计各网络接口上网络收发情况, 按照收发包个数(注意不是收发字节)排序, 输出pid, uid, dev, 传输字节数, 接收字节数, command 信息.
nettop.stp 脚本内容以及注解
[root@db-172-16-3-150 network]# cd /usr/share/systemtap/testsuite/systemtap.examples/network
[root@db-172-16-3-150 network]# cat nettop.stp
#!/usr/bin/stap
// 指定脚本解释器命令, 如果安装位置不在/usr/bin/stap, 可以修改这行, 或者使用stap nettop.stp执行.
global ifxmit, ifrecv
global ifmerged
// 定义3个全局变量, 分别用于存储传输,接收,以及合并数组;
// 传输和接收数组中存储统计信息;
// 合并数组存储网络接口上按照pid(), dev_name, execname(), uid()维度累加的传输和接收包个数.
probe netdev.transmit
{
ifxmit[pid(), dev_name, execname(), uid()] <<< length
}
// netdev.transmit 探针, 网络设备传输buffer时触发.
probe netdev.receive
{
ifrecv[pid(), dev_name, execname(), uid()] <<< length
}
// netdev.receive 探针, 从网络设备接收数据时触发.
function print_activity()
{
printf("%5s %5s %-7s %7s %7s %7s %7s %-15s\n",
"PID", "UID", "DEV", "XMIT_PK", "RECV_PK",
"XMIT_KB", "RECV_KB", "COMMAND")
foreach ([pid, dev, exec, uid] in ifrecv) {
ifmerged[pid, dev, exec, uid] += @count(ifrecv[pid,dev,exec,uid]);
}
foreach ([pid, dev, exec, uid] in ifxmit) {
ifmerged[pid, dev, exec, uid] += @count(ifxmit[pid,dev,exec,uid]);
}
foreach ([pid, dev, exec, uid] in ifmerged-) {
n_xmit = @count(ifxmit[pid, dev, exec, uid])
n_recv = @count(ifrecv[pid, dev, exec, uid])
printf("%5d %5d %-7s %7d %7d %7d %7d %-15s\n",
pid, uid, dev, n_xmit, n_recv,
n_xmit ? @sum(ifxmit[pid, dev, exec, uid])/1024 : 0,
n_recv ? @sum(ifrecv[pid, dev, exec, uid])/1024 : 0,
exec)
}
// 输出: pid, uid, 网络接口, 传输包数, 接收包数, 传输KB, 接收KB, command.
print("\n")
delete ifxmit
delete ifrecv
delete ifmerged
}
// print_activity 函数, 按照pid,dev,exec,uid维度, 根据网络接收和传输包数倒序输出.
// n_xmit ? @sum(ifxmit[pid, dev, exec, uid])/1024 : 0,
// n_recv ? @sum(ifrecv[pid, dev, exec, uid])/1024 : 0,
// 表示传输和接收的KB数.
// print_activity 函数的末尾清除三个全局变量的值. 下次调用时重新输出上一次输出以来的统计信息.
probe timer.ms(5000), end, error
{
print_activity()
}
// 每5秒调用一次print_activity .
执行输出举例
[root@db-172-16-3-150 network]# stap ./nettop.stp
PID UID DEV XMIT_PK RECV_PK XMIT_KB RECV_KB COMMAND
0 0 em1 5 24 0 1 swapper
PID UID DEV XMIT_PK RECV_PK XMIT_KB RECV_KB COMMAND
0 0 em1 6 26 0 2 swapper
25178 0 em1 1 0 0 0 sshd
PID UID DEV XMIT_PK RECV_PK XMIT_KB RECV_KB COMMAND
0 0 em1 5 32 0 2 swapper
25525 500 lo 5 0 1 0 postgres
25178 0 em1 1 0 0 0 sshd
7015 500 lo 1 0 0 0 postgres
PID UID DEV XMIT_PK RECV_PK XMIT_KB RECV_KB COMMAND
0 0 em1 6 30 0 1 swapper
25178 0 em1 1 0 0 0 sshd
本文用到的2个probe alias原型.
/**
* probe netdev.transmit - Network device transmitting buffer
* @dev_name: The name of the device. e.g: eth0, ath1.
* @length: The length of the transmit buffer.
* @protocol: The protocol of this packet(defined in include/linux/if_ether.h).
* @truesize: The size of the data to be transmitted.
*
*/
// Queue a buffer for transmission to a network device
probe netdev.transmit
= kernel.function("dev_queue_xmit")
{
dev_name = kernel_string($skb->dev->name)
length = $skb->len
protocol = $skb->protocol
truesize = $skb->truesize
}
/**
* probe netdev.receive - Data received from network device.
* @dev_name: The name of the device. e.g: eth0, ath1.
* @length: The length of the receiving buffer.
* @protocol: Protocol of received packet.
*
*/
/// <varlistentry><term>protocol</term>
/// <listitem><para>The possible values of protocol could be:
/// <table frame='all'><title>Protocol Values</title>
/// <tgroup cols='2' align='left' colsep='1' rowsep='1'>
/// <colspec colname='Value'/>
/// <colspec colname='Protocol'/>
/// <thead>
/// <row><entry>Value(Hex)</entry><entry>Protocol</entry></row>
/// </thead>
/// <tbody>
/// <row><entry>0001</entry><entry>802.3</entry></row>
/// <row><entry>0002</entry><entry>AX.25</entry></row>
/// <row><entry>0004</entry><entry>802.2</entry></row>
/// <row><entry>0005</entry><entry>SNAP</entry></row>
/// <row><entry>0009</entry><entry>Localtalk</entry></row>
/// <row><entry>0800</entry><entry>IP</entry></row>
/// <row><entry>0805</entry><entry>X.25</entry></row>
/// <row><entry>0806</entry><entry>ARP</entry></row>
/// <row><entry>8035</entry><entry>RARP</entry></row>
/// <row><entry>8100</entry><entry>802.1Q VLAN</entry></row>
/// <row><entry>8137</entry><entry>IPX</entry></row>
/// <row><entry>86DD</entry><entry>IPv6</entry></row>
/// </tbody>
/// </tgroup>
/// </table>
/// </para></listitem>
/// </varlistentry>
///
/// <varlistentry><term>truesize</term>
/// <listitem><para>
/// The size of the received data.
/// </para></listitem>
/// </varlistentry>
///
/// </variablelist>
///</para>
// Main device receive routine, be called when packet arrives on network device
probe netdev.receive
= kernel.function("netif_receive_skb")
{
dev_name = kernel_string($skb->dev->name)
length = $skb->len
protocol = $skb->protocol
truesize = $skb->truesize
}
参考
1. /usr/share/systemtap/testsuite/systemtap.examples
2. https://sourceware.org/systemtap/SystemTap_Beginners_Guide/useful-systemtap-scripts.html
3. systemtap-testsuite
4. https://sourceware.org/systemtap/examples/
5. /usr/share/systemtap/testsuite/systemtap.examples/index.txt
6. /usr/share/systemtap/testsuite/systemtap.examples/keyword-index.txt
7. /usr/share/systemtap/tapset