stap trace blockdev’s iops
less than 1 minute read
背景
群里的一个问题, 用dd来测试一个块设备的写性能, 因为可能会被os cache误导. 所以建议同时使用stap来跟踪一下.
当然stap会带来一定的开销, 得到的实际结果会低于实际的IO水平.
[root@db-172-16-3-150 io]# grep -r vfs.write.return /opt/systemtap/share/systemtap/tapset/*
/opt/systemtap/share/systemtap/tapset/linux/vfs.stp:probe vfs.write.return = kernel.function("vfs_write").return
probe vfs.write.return = kernel.function("vfs_write").return
{
name = "vfs.write"
retstr = sprintf("%d", $return)
file = $file
pos = $pos
buf = $buf
bytes_to_write = $count
dev = __file_dev($file)
devname = __find_bdevname(dev, __file_bdev($file))
ino = __file_ino($file)
ret = $return
bytes_written = $return > 0 ? $return : 0
error = $return < 0 ? $return : 0
error_str = error ? errno_str(error) : ""
}
[root@db-172-16-3-150 io]# stap -L 'kernel.function("vfs_write")'
kernel.function("vfs_write@fs/read_write.c:349") $file:struct file* $buf:char const* $count:size_t $pos:loff_t*
使用如下脚本 :
stap -e 'global total_time, total_bytes, total_reqs
probe vfs.write.return {
ts = gettimeofday_ns() - @entry(gettimeofday_ns())
if ($return>0) {
if (devname!="N/A") { /*skip update cache*/
total_time[devname] += ts
total_bytes[devname] += $return
total_reqs[devname] += 1
}
}
}
probe timer.s(3) {
foreach(dev in total_time) {
printf("dev:%s Wreqs/s:%d WKbytes/s:%d\n", dev, total_reqs[dev]/3, (1000000000*total_bytes[dev])/(total_time[dev]*1024))
}
delete total_time
delete total_bytes
delete total_reqs
}'
输出举例:
dev:sdb1 Wreqs/s:108606 WKbytes/s:152986
dev:sdb1 Wreqs/s:108150 WKbytes/s:152320
dev:sdb1 Wreqs/s:108173 WKbytes/s:152246
dd 一个文件, 输出 :
[root@db-172-16-3-150 ssd4]# dd if=/dev/zero of=./test.img bs=1k count=10240000
10240000+0 records in
10240000+0 records out
10485760000 bytes (10 GB) copied, 96.6477 s, 108 MB/s