Linux kvm_stat VCPU Statistics and perf_event Integration
2026/6/15 20:34:46 网站建设 项目流程

Linux kvm_stat VCPU Statistics and perf_event Integration

kvm_stat是KVM的性能统计基础设施,用于暴露VCPU级别的虚拟化事件计数。它通过tracepoint、debugfs和perf_event三种路径收集数据。内核侧的实现集中在virt/kvm/kvm_main.c和arch/x86/kvm/的统计计数器更新点,用户侧通过tools/kvm/kvm_stat/kvm_stat脚本解析并展示。

内核侧,VCPU统计定义在struct kvm_vcpu_stat和struct kvm_vm_stat中:

```c
struct kvm_vcpu_stat {
u64 halt_successful_poll;
u64 halt_attempted_poll;
u64 halt_poll_invalid;
u64 halt_wakeup;
u64 hypercalls;
u64 req_event;
u64 irq_injections;
u64 irq_window_exits;
u64 nmi_window_exits;
u64 signal_exits;
u64 pf_fixed;
u64 pf_guest;
u64 tlb_flush;
u64 invlpg;
u64 io_exits;
u64 mmio_exits;
u64 cpuid_exits;
u64 halt_exits;
u64 irq_exits;
u64 host_state_reload;
u64 fpu_reload;
u64 insn_emulation;
u64 insn_emulation_fail;
u64 nmi_injections;
};

struct kvm_vm_stat {
u64 mmu_shadow_zapped;
u64 mmu_pte_write;
u64 mmu_pde_zapped;
u64 mmu_flooded;
u64 mmu_recycled;
u64 mmu_cache_miss;
u64 mmu_unsync;
u64 remote_tlb_flush;
u64 lpages;
u64 max_mmu_page_hash_collisions;
};
```

统计计数更新在代码各关键路径通过this_cpu_inc或atomic64_inc触发:

```c
/* KVM_RUN路径中统计中断注入次数 */
static void kvm_inject_irq(struct kvm_vcpu *vcpu)
{
++vcpu->stat.irq_injections;

/* 实际注入中断到虚拟IRQ chip */
kvm_x86_ops.set_irq(vcpu);

/* 如果使用perf_event上报,触发采样 */
if (vcpu->kvm->arch.perf_caps & KVM_PERF_CAP_IRQ_INJECT)
perf_event_do_callchain(vcpu);
}

/* 缺页处理路径统计 */
static int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code)
{
int r;

if (error_code & PFERR_GUEST_MASK)
++vcpu->stat.pf_guest;
else
++vcpu->stat.pf_fixed;

r = vcpu->arch.mmu->page_fault(vcpu, gpa, error_code);
if (r < 0)
goto out;

return r;
out:
return r;
}

/* VM-Exit原因分发统计 */
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
/* ... */

switch (exit_reason) {
case EXIT_REASON_IO_INSTRUCTION:
++vcpu->stat.io_exits;
break;
case EXIT_REASON_MSR_READ:
case EXIT_REASON_MSR_WRITE:
++vcpu->stat.msr_exits;
break;
case EXIT_REASON_CPUID:
++vcpu->stat.cpuid_exits;
break;
case EXIT_REASON_HLT:
++vcpu->stat.halt_exits;
break;
case EXIT_REASON_EPT_VIOLATION:
++vcpu->stat.mmio_exits;
break;
/* ... */
}
}
```

perf_event集成通过KVM的perf子系统实现,KVM注册perf callbacks使得perf工具能够采样VCPU执行时的PMU事件:

```c
/* KVM perf_event注册接口 */
int kvm_perf_register(struct kvm *kvm)
{
int ret;

/* 注册perf回调 */
kvm->arch.perf_caps = KVM_PERF_CAP_PMU | KVM_PERF_CAP_TRACE;

/* 创建perf_event上下文 */
ret = perf_event_create_kernel_counter(&kvm->arch.perf_attr,
-1, current->pid,
kvm_perf_event_handler,
kvm);
if (ret < 0)
return ret;

/* 注册VCPU层面的perf_event */
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.perf_event = perf_event_create_kernel_counter(
&vcpu->arch.perf_attr, -1, vcpu->pid,
kvm_vcpu_perf_event_handler, vcpu);
}

return 0;
}

/* perf_event采样回调,记录Guest IP */
static void kvm_vcpu_perf_event_handler(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
struct kvm_vcpu *vcpu = event->private;
struct pt_regs guest_regs;

/* 保存Guest RIP信息到perf sample */
kvm_get_guest_regs(vcpu, &guest_regs);

/* 标记为Guest模式采样 */
data->sample_flags |= PERF_SAMPLE_GUEST;
data->guest_ip = guest_regs.ip;
data->guest_kernel = !(vcpu->arch.cr0 & X86_CR0_PE) ||
guest_regs.cs & 3;

perf_sample_output(event, data, &guest_regs);
}
```

kvm_stat用户态工具通过以下三种数据源聚合统计信息:debugfs读取/sys/kernel/debug/kvm/*/stat/下的各统计文件;tracepoint通过perf_event_open挂载kvm:kvm_entry/kvm:kvm_exit等tracepoint事件;perf_event直接读取PMU计数器。kvm_stat命令默认使用debugfs模式,每隔指定间隔刷新显示:

```
kvm_stat -l
Event Total% Cur%
pf_fixed 12.34 15.21
io_exits 8.76 9.45
irq_injections 7.89 8.12
mmio_exits 6.54 7.01
halt_exits 5.43 5.67
cpuid_exits 4.21 4.89
signal_exits 3.12 2.98
```

VCPU统计在KVM内部通过struct kvm_stat_data封装,关联到对应的stat字段和perf_event文件描述符。统计值通过kvm_stat_data_read导出到debugfs,用户态读文件即触发stat_fops.read回调,返回当前累加值。perf_event集成使得kvm_stat可以同时展示Host和Guest的PMU计数,用于分析VT-x/AMD-V的exit频率分布、EPT miss比率、中断注入延迟等关键性能指标。

需要专业的网站建设服务?

联系我们获取免费的网站建设咨询和方案报价,让我们帮助您实现业务目标

立即咨询