Systemtap kernel.trace(*) events source code
背景
我们知道内核trace event可以使用stap -l或-L查看, 而trace的源代码则可以在/usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/include/trace/events中查找到.
(关注你的内核版本. 目录地址可能不同)
输出系统所有支持的kernel.trace, 使用stap -l 或者stap -L. 如下.
[root@db-172-16-3-150 events]# stap -l 'kernel.trace("**")'
kernel.trace("__extent_writepage")
kernel.trace("block_bio_backmerge")
kernel.trace("block_bio_bounce")
kernel.trace("block_bio_complete")
kernel.trace("block_bio_frontmerge")
kernel.trace("block_bio_queue")
kernel.trace("block_getrq")
kernel.trace("block_plug")
kernel.trace("block_remap")
kernel.trace("block_rq_abort")
kernel.trace("block_rq_complete")
kernel.trace("block_rq_insert")
kernel.trace("block_rq_issue")
kernel.trace("block_rq_remap")
kernel.trace("block_rq_requeue")
kernel.trace("block_sleeprq")
kernel.trace("block_split")
kernel.trace("block_unplug_io")
kernel.trace("block_unplug_timer")
kernel.trace("btrfs_chunk_alloc")
kernel.trace("btrfs_chunk_free")
kernel.trace("btrfs_cow_block")
kernel.trace("btrfs_delayed_data_ref")
kernel.trace("btrfs_delayed_ref_head")
kernel.trace("btrfs_delayed_tree_ref")
kernel.trace("btrfs_failed_cluster_setup")
kernel.trace("btrfs_find_cluster")
kernel.trace("btrfs_get_extent")
kernel.trace("btrfs_inode_evict")
kernel.trace("btrfs_inode_new")
kernel.trace("btrfs_inode_request")
kernel.trace("btrfs_ordered_extent_add")
kernel.trace("btrfs_ordered_extent_put")
kernel.trace("btrfs_ordered_extent_remove")
kernel.trace("btrfs_ordered_extent_start")
kernel.trace("btrfs_reserve_extent")
kernel.trace("btrfs_reserve_extent_cluster")
kernel.trace("btrfs_reserved_extent_alloc")
kernel.trace("btrfs_reserved_extent_free")
kernel.trace("btrfs_setup_cluster")
kernel.trace("btrfs_space_reservation")
kernel.trace("btrfs_sync_file")
kernel.trace("btrfs_sync_fs")
kernel.trace("btrfs_transaction_commit")
kernel.trace("btrfs_writepage_end_io_hook")
kernel.trace("consume_skb")
kernel.trace("ext3_alloc_new_reservation")
kernel.trace("ext3_allocate_blocks")
kernel.trace("ext3_allocate_inode")
kernel.trace("ext3_delete_inode")
kernel.trace("ext3_direct_IO_enter")
kernel.trace("ext3_direct_IO_exit")
kernel.trace("ext3_discard_blocks")
kernel.trace("ext3_discard_reservation")
kernel.trace("ext3_forget")
kernel.trace("ext3_free_blocks")
kernel.trace("ext3_free_inode")
kernel.trace("ext3_get_blocks_enter")
kernel.trace("ext3_get_blocks_exit")
kernel.trace("ext3_invalidatepage")
kernel.trace("ext3_journalled_write_end")
kernel.trace("ext3_journalled_writepage")
kernel.trace("ext3_load_inode")
kernel.trace("ext3_mark_inode_dirty")
kernel.trace("ext3_ordered_write_end")
kernel.trace("ext3_ordered_writepage")
kernel.trace("ext3_read_block_bitmap")
kernel.trace("ext3_readpage")
kernel.trace("ext3_releasepage")
kernel.trace("ext3_request_blocks")
kernel.trace("ext3_request_inode")
kernel.trace("ext3_reserved")
kernel.trace("ext3_rsv_window_add")
kernel.trace("ext3_sync_file_enter")
kernel.trace("ext3_sync_file_exit")
kernel.trace("ext3_sync_fs")
kernel.trace("ext3_truncate_enter")
kernel.trace("ext3_truncate_exit")
kernel.trace("ext3_unlink_enter")
kernel.trace("ext3_unlink_exit")
kernel.trace("ext3_write_begin")
kernel.trace("ext3_writeback_write_end")
kernel.trace("ext3_writeback_writepage")
kernel.trace("ext4_alloc_da_blocks")
kernel.trace("ext4_allocate_blocks")
kernel.trace("ext4_allocate_inode")
kernel.trace("ext4_da_write_begin")
kernel.trace("ext4_da_write_end")
kernel.trace("ext4_da_write_pages")
kernel.trace("ext4_da_writepages")
kernel.trace("ext4_da_writepages_result")
kernel.trace("ext4_discard_blocks")
kernel.trace("ext4_discard_preallocations")
kernel.trace("ext4_free_blocks")
kernel.trace("ext4_free_inode")
kernel.trace("ext4_journalled_write_end")
kernel.trace("ext4_mb_discard_preallocations")
kernel.trace("ext4_mb_new_group_pa")
kernel.trace("ext4_mb_new_inode_pa")
kernel.trace("ext4_mb_release_group_pa")
kernel.trace("ext4_mb_release_inode_pa")
kernel.trace("ext4_mballoc_alloc")
kernel.trace("ext4_mballoc_discard")
kernel.trace("ext4_mballoc_free")
kernel.trace("ext4_mballoc_prealloc")
kernel.trace("ext4_ordered_write_end")
kernel.trace("ext4_request_blocks")
kernel.trace("ext4_request_inode")
kernel.trace("ext4_sync_file")
kernel.trace("ext4_sync_fs")
kernel.trace("ext4_trim_all_free")
kernel.trace("ext4_trim_extent")
kernel.trace("ext4_write_begin")
kernel.trace("ext4_writeback_write_end")
kernel.trace("ext4_writepage")
... 略
kernel.trace("xfs_lookup")
kernel.trace("xfs_map_blocks_alloc")
kernel.trace("xfs_map_blocks_found")
kernel.trace("xfs_pagecache_inval")
kernel.trace("xfs_perag_clear_reclaim")
kernel.trace("xfs_perag_get")
kernel.trace("xfs_perag_get_tag")
kernel.trace("xfs_perag_put")
kernel.trace("xfs_perag_set_reclaim")
kernel.trace("xfs_readdir")
kernel.trace("xfs_readlink")
kernel.trace("xfs_releasepage")
kernel.trace("xfs_remove")
kernel.trace("xfs_rename")
kernel.trace("xfs_reset_dqcounts")
kernel.trace("xfs_setattr")
kernel.trace("xfs_swap_extent_after")
kernel.trace("xfs_swap_extent_before")
kernel.trace("xfs_symlink")
kernel.trace("xfs_trans_bhold")
kernel.trace("xfs_trans_bhold_release")
kernel.trace("xfs_trans_binval")
kernel.trace("xfs_trans_bjoin")
kernel.trace("xfs_trans_brelse")
kernel.trace("xfs_trans_commit_lsn")
kernel.trace("xfs_trans_get_buf")
kernel.trace("xfs_trans_get_buf_recur")
kernel.trace("xfs_trans_getsb")
kernel.trace("xfs_trans_getsb_recur")
kernel.trace("xfs_trans_log_buf")
kernel.trace("xfs_trans_read_buf")
kernel.trace("xfs_trans_read_buf_io")
kernel.trace("xfs_trans_read_buf_recur")
kernel.trace("xfs_trans_read_buf_shut")
kernel.trace("xfs_unwritten_convert")
kernel.trace("xfs_vm_bmap")
kernel.trace("xfs_writepage")
stap -L包含上下文变量信息 :
[root@db-172-16-3-150 events]# stap -L 'kernel.trace("**")'|less
kernel.trace("__extent_writepage") $page:struct page* $inode:struct inode* $wbc:struct writeback_control*
kernel.trace("block_bio_backmerge") $q:struct request_queue* $bio:struct bio*
kernel.trace("block_bio_bounce") $q:struct request_queue* $bio:struct bio*
kernel.trace("block_bio_complete") $q:struct request_queue* $bio:struct bio*
kernel.trace("block_bio_frontmerge") $q:struct request_queue* $bio:struct bio*
kernel.trace("block_bio_queue") $q:struct request_queue* $bio:struct bio*
kernel.trace("block_getrq") $q:struct request_queue* $bio:struct bio* $rw:int
这些trace的source在/usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/include/trace/events中可找到,
例如block_bio_backmerge trace :
[root@db-172-16-3-150 events]# grep -rn block_bio_backmerge /usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/include/trace/events
/usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/include/trace/events/block.h:203:DEFINE_EVENT(block_bio, block_bio_backmerge,
对应的source code.
DEFINE_EVENT(block_bio, block_bio_backmerge,
TP_PROTO(struct request_queue *q, struct bio *bio),
TP_ARGS(q, bio)
);
通过trace name, 追踪这个trace在哪些函数中被调用了.
[root@db-172-16-3-150 events]# grep -rn block_bio_backmerge /usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/
/usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/include/trace/events/block.h:203:DEFINE_EVENT(block_bio, block_bio_backmerge,
/usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/block/blk-core.c:1421: trace_block_bio_backmerge(q, bio);
/usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/kernel/trace/blktrace.c:946: ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
/usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/kernel/trace/blktrace.c:982: unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
例如以上输出的block/blk-core.c文件中包含了这个trace, 通过这个文件我们看一下它是在函数(blk_queue_bio)中被调用到了?
[root@db-172-16-3-150 events]# less /usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/block/blk-core.c
int blk_queue_bio(struct request_queue *q, struct bio *bio)
{
struct request *req;
int el_ret;
unsigned int bytes = bio->bi_size;
const unsigned short prio = bio_prio(bio);
const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
int where = ELEVATOR_INSERT_SORT;
int rw_flags;
/* BIO_RW_BARRIER is deprecated */
if (WARN_ONCE(bio_rw_flagged(bio, BIO_RW_BARRIER),
"block: BARRIER is deprecated, use FLUSH/FUA instead\n")) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
* ISA dma in theory)
*/
blk_queue_bounce(q, &bio);
spin_lock_irq(q->queue_lock);
if (bio->bi_rw & (BIO_FLUSH | BIO_FUA)) {
where = ELEVATOR_INSERT_FLUSH;
goto get_rq;
}
if (elv_queue_empty(q))
goto get_rq;
el_ret = elv_merge(q, &req, bio);
switch (el_ret) {
case ELEVATOR_BACK_MERGE:
BUG_ON(!rq_mergeable(req));
if (!ll_back_merge_fn(q, req, bio))
break;
// 就在这里
trace_block_bio_backmerge(q, bio);
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
blk_rq_set_mixed_merge(req);
req->biotail->bi_next = bio;
req->biotail = bio;
req->__data_len += bytes;
req->ioprio = ioprio_best(req->ioprio, prio);
if (!blk_rq_cpu_valid(req))
req->cpu = bio->bi_comp_cpu;
drive_stat_acct(req, 0);
elv_bio_merged(q, req, bio);
if (!attempt_back_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out;
case ELEVATOR_FRONT_MERGE:
BUG_ON(!rq_mergeable(req));
if (!ll_front_merge_fn(q, req, bio))
break;
trace_block_bio_frontmerge(q, bio);
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
blk_rq_set_mixed_merge(req);
req->cmd_flags &= ~REQ_FAILFAST_MASK;
req->cmd_flags |= ff;
}
bio->bi_next = req->bio;
req->bio = bio;
/*
* may not be valid. if the low level driver said
* it didn't need a bounce buffer then it better
* not touch req->buffer either...
*/
req->buffer = bio_data(bio);
/*
* The merge may happen accross partitions
* We must update in_flight value accordingly
*/
blk_account_io_front_merge(req, bio->bi_sector);
req->__sector = bio->bi_sector;
req->__data_len += bytes;
req->ioprio = ioprio_best(req->ioprio, prio);
if (!blk_rq_cpu_valid(req))
req->cpu = bio->bi_comp_cpu;
drive_stat_acct(req, 0);
elv_bio_merged(q, req, bio);
if (!attempt_front_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out;
/* ELV_NO_MERGE: elevator says don't/can't merge. */
default:
;
}
get_rq:
/*
* This sync check and mask will be re-done in init_request_from_bio(),
* but we need to set it earlier to expose the sync flag to the
* rq allocator and io schedulers.
*/
rw_flags = bio_data_dir(bio);
if (sync)
rw_flags |= REQ_SYNC;
/*
* Grab a free request. This is might sleep but can not fail.
* Returns with the queue unlocked.
*/
req = get_request_wait(q, rw_flags, bio);
if (unlikely(!req)) {
bio_endio(bio, -ENODEV); /* @q is dead */
goto out_unlock;
}
/*
* After dropping the lock and possibly sleeping here, our request
* may now be mergeable after it had proven unmergeable (above).
* We don't worry about that case for efficiency. It won't happen
* often, and the elevators are able to handle it.
*/
init_request_from_bio(req, bio);
spin_lock_irq(q->queue_lock);
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
bio_flagged(bio, BIO_CPU_AFFINE))
req->cpu = raw_smp_processor_id();
if (queue_should_plug(q) && elv_queue_empty(q))
blk_plug_device(q);
/* insert the request into the elevator */
drive_stat_acct(req, 1);
__elv_add_request(q, req, where, 0);
out:
if (unplug || !queue_should_plug(q))
__generic_unplug_device(q);
out_unlock:
spin_unlock_irq(q->queue_lock);
return 0;
}
EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */
参考
1. /usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/include/trace/events