perf stat -e cache-references,cache-misses,instructions,cycles,cpu-migrations,context-switches -p <pid> sleep 10
perf stat -e branch-misses,bus-cycles,cache-misses,cache-references,cpu-cycles,instructions,L1-dcache-load-misses,L1-dcache-loads,L1-dcache-store-misses,L1-dcache-stores,L1-icache-load-misses,L1-icache-loads,branch-load-misses,branch-loads,dTLB-load-misses,iTLB-load-misses -a -p
# 查看内存访问模式
perf stat -e cache-references,cache-misses,L1-dcache-loads,L1-dcache-load-misses,L1-dcache-stores,L1-dcache-store-misses,LLC-loads,LLC-load-misses,LLC-stores,LLC-store-misses -a sleep 10
# 查看TLB相关
perf stat -e dTLB-loads,dTLB-load-misses,dTLB-stores,dTLB-store-misses,iTLB-loads,iTLB-load-misses -a sleep 10
# 内存带宽相关
perf stat -e uncore_imc/cas_count_read/,uncore_imc/cas_count_write/ -a sleep 10
# NUMA内存访问
perf stat -e node-loads,node-load-misses,node-stores,node-store-misses -a sleep 10
# 页面错误
perf stat -e page-faults,minor-faults,major-faults -a sleep 10
# L1/L2/L3缓存详细分析
perf stat -e L1-dcache-loads,L1-dcache-load-misses,L1-icache-loads,L1-icache-load-misses,l2_rqsts.all_demand_data_rd,l2_rqsts.demand_data_rd_miss -a sleep 10