systemtap probe aliases (Prologue-style = & Epilogue-style +=) and suffixes

8 minute read

背景

前面我们多次用到的tapset, 其实就是一堆function和probe alias的定义.

例如 : /usr/share/systemtap/tapset/syscalls2.stp中的probe syscall.read, 就是一个alias.

本文要讲的也是probe alias.

probe alias分2种, 1种是把alias中定义的handler加到底层probe point handler的前面, 另一种是把handler加到末尾.

分别为 Prologue-style = & Epilogue-style += ;

For a prologue style alias, the statement block that follows an alias definition is implicitly added as a prologue to any probe that refers to the alias.

The statement block that follows an alias definition is implicitly added as an epilogue to any probe that refers to the alias. It is not useful to define new variables there (since no subsequent code will see them), but rather the code can take action based upon variables set by the prologue or by the user code.

1. 例如, 下面这个是prologue-style, 用等号定义. :

probe syscall.read = kernel.function("sys_read").call  
{  
        name = "read"  
        fd = $fd  
        buf_uaddr = $buf  
        count = $count  
        argstr = sprintf("%d, %p, %d", $fd, $buf, $count)  
}  

这个alias的handler中定义了一些本地变量, 赋予值. 在使用这个alias时, 默认会将这些定义放到handler前面.

例如 :

[root@db-172-16-3-39 ~]# cat test.stp   
probe syscall.read {  
  for (i=0; i<10; i++) {  
    printf("line %d: %s, %d, %s, %d, %s\n", i, name, fd, kernel_string(buf_uaddr), count, argstr)  
  }  
  exit()  
}  
  
[root@db-172-16-3-39 ~]# stap test.stp   
line 0: read, 4, , 8196, 4, 0x7fff5b2b7010, 8196  
line 1: read, 4, , 8196, 4, 0x7fff5b2b7010, 8196  
line 2: read, 4, , 8196, 4, 0x7fff5b2b7010, 8196  
line 3: read, 4, , 8196, 4, 0x7fff5b2b7010, 8196  
line 4: read, 4, , 8196, 4, 0x7fff5b2b7010, 8196  
line 5: read, 4, , 8196, 4, 0x7fff5b2b7010, 8196  
line 6: read, 4, , 8196, 4, 0x7fff5b2b7010, 8196  
line 7: read, 4, , 8196, 4, 0x7fff5b2b7010, 8196  
line 8: read, 4, , 8196, 4, 0x7fff5b2b7010, 8196  
line 9: read, 4, , 8196, 4, 0x7fff5b2b7010, 8196  

使用alias和不使用alias的以下用法一样 :

[root@db-172-16-3-39 ~]# cat test.stp  
probe kernel.function("sys_read").call {  
        name = "read"  
        fd = $fd  
        buf_uaddr = $buf  
        count = $count  
        argstr = sprintf("%d, %p, %d", $fd, $buf, $count)  
  for (i=0; i<10; i++) {  
    printf("line %d: %s, %d, %s, %d, %s\n", i, name, fd, kernel_string(buf_uaddr), count, argstr)  
  }  
  exit()  
}  
  
[root@db-172-16-3-39 ~]# stap test.stp   
line 0: read, 4, , 8196, 4, 0x7fffcb11bf90, 8196  
line 1: read, 4, , 8196, 4, 0x7fffcb11bf90, 8196  
line 2: read, 4, , 8196, 4, 0x7fffcb11bf90, 8196  
line 3: read, 4, , 8196, 4, 0x7fffcb11bf90, 8196  
line 4: read, 4, , 8196, 4, 0x7fffcb11bf90, 8196  
line 5: read, 4, , 8196, 4, 0x7fffcb11bf90, 8196  
line 6: read, 4, , 8196, 4, 0x7fffcb11bf90, 8196  
line 7: read, 4, , 8196, 4, 0x7fffcb11bf90, 8196  
line 8: read, 4, , 8196, 4, 0x7fffcb11bf90, 8196  
line 9: read, 4, , 8196, 4, 0x7fffcb11bf90, 8196  

2. 例如, 下面这个是Epilogue-style, 用+=定义. :

[root@db-172-16-3-39 ~]# cat test.stp  
probe myprobe += kernel.function("sys_read").call {  
        name = "read"  
        fd = $fd  
        buf_uaddr = $buf  
        count = $count  
        argstr = sprintf("%d, %p, %d", $fd, $buf, $count)  
}  
probe myprobe {  
  for (i=0; i<10; i++) {  
    printf("line %d: %s, %d, %d, %d, %s\n", i, name, fd, buf_uaddr, count, argstr)  
  }  
  exit()  
}  
  
[root@db-172-16-3-39 ~]# stap --vp 5 test.stp   
Parsed kernel "/lib/modules/2.6.18-348.12.1.el5/build/.config", containing 1977 tuples  
Parsed kernel /lib/modules/2.6.18-348.12.1.el5/build/Module.symvers, which contained 3546 vmlinux exports  
Searched: " /usr/share/systemtap/tapset/x86_64/*.stp ", found: 4, processed: 4  
Searched: " /usr/share/systemtap/tapset/*.stp ", found: 81, processed: 81  
Pass 1: parsed user script and 85 library script(s) using 146868virt/23720res/3008shr/21464data kb, in 170usr/0sys/171real ms.  
line 0: , 0, 0, 0,   
line 1: , 0, 0, 0,   
line 2: , 0, 0, 0,   
line 3: , 0, 0, 0,   
line 4: , 0, 0, 0,   
line 5: , 0, 0, 0,   
line 6: , 0, 0, 0,   
line 7: , 0, 0, 0,   
line 8: , 0, 0, 0,   
line 9: , 0, 0, 0,   

这些本地变量的值都是初始值, 就好象没有被赋值一样. 为什么呢?

因为+=定义的probe alias中handler是加到末尾的. 所以以上的stp和以下用法一致.

[root@db-172-16-3-39 ~]# cat test.stp  
probe kernel.function("sys_read").call {  
  for (i=0; i<10; i++) {  
    printf("line %d: %s, %d, %d, %d, %s\n", i, name, fd, buf_uaddr, count, argstr)  
  }  
  exit()  
        name = "read"  
        fd = $fd  
        buf_uaddr = $buf  
        count = $count  
        argstr = sprintf("%d, %p, %d", $fd, $buf, $count)  
}  
  
[root@db-172-16-3-39 ~]# stap --vp 5 test.stp   
Parsed kernel "/lib/modules/2.6.18-348.12.1.el5/build/.config", containing 1977 tuples  
Parsed kernel /lib/modules/2.6.18-348.12.1.el5/build/Module.symvers, which contained 3546 vmlinux exports  
Searched: " /usr/share/systemtap/tapset/x86_64/*.stp ", found: 4, processed: 4  
Searched: " /usr/share/systemtap/tapset/*.stp ", found: 81, processed: 81  
Pass 1: parsed user script and 85 library script(s) using 146808virt/23704res/3008shr/21404data kb, in 170usr/0sys/173real ms.  
line 0: , 0, 0, 0,   
line 1: , 0, 0, 0,   
line 2: , 0, 0, 0,   
line 3: , 0, 0, 0,   
line 4: , 0, 0, 0,   
line 5: , 0, 0, 0,   
line 6: , 0, 0, 0,   
line 7: , 0, 0, 0,   
line 8: , 0, 0, 0,   
line 9: , 0, 0, 0,   

更直观的例子 :

[root@db-172-16-3-39 ~]# cat test.stp  
probe myprobe += kernel.function("sys_read").call {  
        name = "read"  
        fd = $fd  
        buf_uaddr = $buf  
        count = $count  
        argstr = sprintf("%d, %p, %d", $fd, $buf, $count)  
  printf("this is epilogue style probe alias\n")  
}  
probe myprobe {  
  for (i=0; i<10; i++) {  
    printf("line %d: %s, %d, %d, %d, %s\n", i, name, fd, buf_uaddr, count, argstr)  
  }  
  exit()  
}  
  
[root@db-172-16-3-39 ~]# stap --vp 5 test.stp   
Parsed kernel "/lib/modules/2.6.18-348.12.1.el5/build/.config", containing 1977 tuples  
Parsed kernel /lib/modules/2.6.18-348.12.1.el5/build/Module.symvers, which contained 3546 vmlinux exports  
Searched: " /usr/share/systemtap/tapset/x86_64/*.stp ", found: 4, processed: 4  
Searched: " /usr/share/systemtap/tapset/*.stp ", found: 81, processed: 81  
Pass 1: parsed user script and 85 library script(s) using 146812virt/23716res/3008shr/21408data kb, in 170usr/0sys/173real ms.  
line 0: , 0, 0, 0,   
line 1: , 0, 0, 0,   
line 2: , 0, 0, 0,   
line 3: , 0, 0, 0,   
line 4: , 0, 0, 0,   
line 5: , 0, 0, 0,   
line 6: , 0, 0, 0,   
line 7: , 0, 0, 0,   
line 8: , 0, 0, 0,   
line 9: , 0, 0, 0,   
this is epilogue style probe alias  

根据epilogue style alias的解释, 以上脚本与以下脚本功能一致.

[root@db-172-16-3-39 ~]# cat test.stp  
probe kernel.function("sys_read").call {  
  for (i=0; i<10; i++) {  
    printf("line %d: %s, %d, %d, %d, %s\n", i, name, fd, buf_uaddr, count, argstr)  
  }  
  exit()  
        name = "read"  
        fd = $fd  
        buf_uaddr = $buf  
        count = $count  
        argstr = sprintf("%d, %p, %d", $fd, $buf, $count)  
  printf("this is epilogue style probe alias\n")  
}  
[root@db-172-16-3-39 ~]# stap --vp 5 test.stp   
Parsed kernel "/lib/modules/2.6.18-348.12.1.el5/build/.config", containing 1977 tuples  
Parsed kernel /lib/modules/2.6.18-348.12.1.el5/build/Module.symvers, which contained 3546 vmlinux exports  
Searched: " /usr/share/systemtap/tapset/x86_64/*.stp ", found: 4, processed: 4  
Searched: " /usr/share/systemtap/tapset/*.stp ", found: 81, processed: 81  
Pass 1: parsed user script and 85 library script(s) using 146800virt/23704res/3008shr/21396data kb, in 170usr/10sys/172real ms.  
line 0: , 0, 0, 0,   
line 1: , 0, 0, 0,   
line 2: , 0, 0, 0,   
line 3: , 0, 0, 0,   
line 4: , 0, 0, 0,   
line 5: , 0, 0, 0,   
line 6: , 0, 0, 0,   
line 7: , 0, 0, 0,   
line 8: , 0, 0, 0,   
line 9: , 0, 0, 0,   
this is epilogue style probe alias  

注意这里exit()执行了为什么还会有最后的printf执行, 原因见 :

man function::exit  
DESCRIPTION  
  
This only enqueues a request to start shutting down the script. New probes will not fire (except lqendrq probes), but all currently running ones may complete their work.  

当前正在处理的handler会全部执行完然后退出. 所以exit()后的打印可以被输出.

本文的第二部分要讲一下alias的suffix, 我们知道probe point其实有点类似DNS的管理风格, 方便event的分组表示.

在使用alias时, 可以自定义suffix, 也可以继承上一级point的suffix.

这个很好理解, 所以直接截取手册原文.

It is possible to include a suffix with a probe alias invocation. If only the initial part of a probe point matches an alias, the remainder is treated as a suffix and attached to the underlying probe point(s) when the alias is expanded. For example:

/* Define an alias: */  
probe sendrecv = tcp.sendmsg, tcp.recvmsg { ... }  
  
/* Use the alias in its basic form: */  
probe sendrecv { ... }  
  
/* Use the alias with an additional suffix: */  
probe sendrecv.return { ... }  
Here, the second use of the probe alias is equivalent to writing probe tcp.sendmsg.return, tcp.recvmsg.return.  
  
As another example, the probe points tcp.sendmsg.return and tcp.recvmsg.return are actually defined as aliases in the tapset tcp.stp. They expand to a probe point of the form kernel.function("...").return, so they can also be suffixed:  
  
probe tcp.sendmsg.return.maxactive(10) {  
    printf("returning from sending %d bytes\n", size)  
}  
  
Here, the probe point expands to kernel.function("tcp_sendmsg").return.maxactive(10).  

下面是suffix的通配符介绍 :

When expanding wildcards, SystemTap generally avoids considering alias suffixes in the expansion. The exception is when a wildcard element is encountered that does not have any ordinary expansions. Consider the following example:

probe some_unrelated_probe = ... { ... }  
  
probe myprobe = syscall.read { ... }  
  
probe myprobe.test = some_unrelated_probe { ... }  
  
probe myprobe.* { ... }  
  
probe myprobe.ret* { ... }  
  
Here, return would be a valid suffix for myprobe. The wildcard myprobe.* matches the ordinary alias myprobe.test, and hence the suffix expansion myprobe.return is not included. Conversely, myprobe.ret* does not match any ordinary aliases, so the suffix myprobe.return is included as an expansion.  

这里要说一下, 当仅仅使用*时, 如果alias没有定义suffix, 会报错.

只有当用到了前缀+*时, 并且alias中没有匹配的suffix, 才会去上一级probe point中匹配对应的suffix.

例如 :

[root@db-172-16-3-39 ~]# stap -l 'syscall.read.**'  
syscall.read.return  

当alias没有定义suffix, 即使上一级的probe point有return这个suffix, 使用*通配符会报错. 如下 :

[root@db-172-16-3-39 ~]# cat test.stp  
probe myprobe = syscall.read {   
  var = "myprobe"  
}  
  
probe myprobe.* {  
  printf("%s\n", var)  
  exit()  
}  
  
[root@db-172-16-3-39 ~]# stap test.stp   
semantic error: while resolving probe point: identifier 'myprobe' at test.stp:5:7  
        source: probe myprobe.* {  
                      ^  
  
semantic error: probe point mismatch at position 1  didn't find any wildcard matches: identifier '*' at :5:15  
        source: probe myprobe.* {  
                              ^  
Pass 2: analysis failed.  Try again with another '--vp 01' option.  

只有定义了alias的suffix, 直接使用*通配符才不会报错.

[root@db-172-16-3-39 ~]# cat test.stp  
probe myprobe.test = syscall.read {   
  var = "myprobe"  
}  
  
probe myprobe.* {  
  printf("%s\n", var)  
  exit()  
}  
  
[root@db-172-16-3-39 ~]# stap test.stp   
myprobe  

最后手册中提到的probe myprobe.ret*在我环境中未测试成功, 可能和systemtap版本有关系.

[root@db-172-16-3-39 ~]# cat test.stp  
probe myprobe = syscall.read {   
  var = "myprobe"  
}  
  
probe myprobe.test = syscall.read.return {   
  var = "myprobe"  
}  
  
probe myprobe.* {  
  printf("%s\n", var)  
  exit()  
}  
[root@db-172-16-3-39 ~]# stap test.stp   
myprobe  

失败 :

[root@db-172-16-3-39 ~]# cat test.stp  
probe myprobe = syscall.read {   
  var = "myprobe"  
}  
  
probe myprobe.test = syscall.read.return {   
  var = "myprobe"  
}  
  
probe myprobe.ret* {  
  printf("%s\n", var)  
  exit()  
}  
  
[root@db-172-16-3-39 ~]# stap test.stp   
semantic error: while resolving probe point: identifier 'myprobe' at test.stp:9:7  
        source: probe myprobe.ret* {  
                      ^  
  
semantic error: probe point mismatch at position 1  (alternatives:  test) didn't find any wildcard matches: identifier 'ret*' at :9:15  
        source: probe myprobe.ret* {  
                              ^  
Pass 2: analysis failed.  Try again with another '--vp 01' option.  

stap版本

[root@db-172-16-3-39 ~]# stap -V  
Systemtap translator/driver (version 1.8/0.152 non-git sources)  
Copyright (C) 2005-2012 Red Hat, Inc. and others  
This is free software; see the source for copying conditions.  
enabled features: AVAHI LIBRPM LIBSQLITE3 NSS BOOST_SHARED_PTR TR1_UNORDERED_MAP NLS  

参考

1. https://sourceware.org/systemtap/langref/Components_SystemTap_script.html

2. /usr/share/systemtap/tapset/syscalls2.stp

# read _______________________________________________________  
# ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)  
probe syscall.read = kernel.function("sys_read").call  
{  
        name = "read"  
        fd = $fd  
        buf_uaddr = $buf  
        count = $count  
        argstr = sprintf("%d, %p, %d", $fd, $buf, $count)  
}  
probe syscall.read.return = kernel.function("sys_read").return  
{  
        name = "read"  
        retstr = return_str(1, $return)  
}  

3. https://sourceware.org/systemtap/man/function::exit.3stap.html

Flag Counter

digoal’s 大量PostgreSQL文章入口