Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TMA 4.8 Release #181

Merged
merged 14 commits into from
May 21, 2024
6,193 changes: 6,193 additions & 0 deletions BDW/metrics/broadwell_metrics.json

Large diffs are not rendered by default.

968 changes: 968 additions & 0 deletions BDW/metrics/perf/broadwell_metrics_perf.json

Large diffs are not rendered by default.

2,817 changes: 2,361 additions & 456 deletions BDX/metrics/broadwellx_metrics.json

Large diffs are not rendered by default.

827 changes: 472 additions & 355 deletions BDX/metrics/perf/broadwellx_metrics_perf.json

Large diffs are not rendered by default.

4,793 changes: 3,858 additions & 935 deletions CLX/metrics/cascadelakex_metrics.json

Large diffs are not rendered by default.

1,234 changes: 709 additions & 525 deletions CLX/metrics/perf/cascadelakex_metrics_perf.json

Large diffs are not rendered by default.

453 changes: 283 additions & 170 deletions EMR/metrics/emeraldrapids_metrics.json

Large diffs are not rendered by default.

94 changes: 82 additions & 12 deletions EMR/metrics/perf/emeraldrapids_metrics_perf.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@
"MetricName": "llc_demand_data_read_miss_latency_for_remote_requests",
"ScaleUnit": "1ns"
},
{
"BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) addressed to Intel(R) Optane(TM) Persistent Memory(PMEM) in nano seconds",
"MetricExpr": "( 1000000000 * ( UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM ) / ( UNC_CHA_CLOCKTICKS / ( source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM) * #num_packages ) ) ) * duration_time",
"MetricGroup": "",
"MetricName": "llc_demand_data_read_miss_to_pmem_latency",
"ScaleUnit": "1ns"
},
{
"BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) addressed to DRAM in nano seconds",
"MetricExpr": "( 1000000000 * ( UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_DDR ) / ( UNC_CHA_CLOCKTICKS / ( source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR) * #num_packages ) ) ) * duration_time",
Expand Down Expand Up @@ -216,6 +223,27 @@
"MetricName": "memory_extra_write_bw_due_to_directory_updates",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory read bandwidth (MB/sec)",
"MetricExpr": "( UNC_M_PMM_RPQ_INSERTS * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "pmem_memory_bandwidth_read",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory write bandwidth (MB/sec)",
"MetricExpr": "( UNC_M_PMM_WPQ_INSERTS * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "pmem_memory_bandwidth_write",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory bandwidth (MB/sec)",
"MetricExpr": "(( UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS ) * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "pmem_memory_bandwidth_total",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU.",
"MetricExpr": "( UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1000000) / duration_time",
Expand All @@ -230,6 +258,27 @@
"MetricName": "io_bandwidth_write",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Percentage of inbound reads initiated by end device controllers that miss the L3 cache.",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR",
"MetricGroup": "",
"MetricName": "io_percent_of_inbound_reads_that_miss_l3",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Percentage of inbound partial cacheline writes initiated by end device controllers that miss the L3 cache.",
"MetricExpr": "(( UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO ) / ( UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO ) )",
"MetricGroup": "",
"MetricName": "io_percent_of_inbound_partial_writes_that_miss_l3",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Percentage of inbound full cacheline writes initiated by end device controllers that miss the L3 cache.",
"MetricExpr": "( UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM )",
"MetricGroup": "",
"MetricName": "io_percent_of_inbound_full_writes_that_miss_l3",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Uops delivered from decoded instruction cache (decoded stream buffer or DSB) as a percent of total uops delivered to Instruction Decode Queue",
"MetricExpr": "( IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS + LSD.UOPS ) )",
Expand All @@ -252,24 +301,31 @@
"ScaleUnit": "100%"
},
{
"BriefDescription": "The estimated bandwidth of core initiated memory read requests to local socket in MB/second. Includes demand, L1 and L2 prefetches for data reads, code reads and read for ownership (RFO) that hit in local cluster and subnuma cluster memory (DRAM or persistent memory), includes the estimates of L3 hardware prefetches that went to Local cluster and subnuma Memory (DRAM or persistent memory).",
"MetricExpr": "(( OCR.READS_TO_CORE.L3_MISS_LOCAL_SOCKET + ( OCR.HWPF_L3.L3_MISS_LOCAL * OCR.READS_TO_CORE.L3_MISS_LOCAL_SOCKET / ( OCR.READS_TO_CORE.L3_MISS_LOCAL_SOCKET + OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_FWD ) ) ) * 64 / 1000000) / duration_time",
"BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket.",
"MetricExpr": "( UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "io_bandwidth_read_local",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from a remote CPU socket.",
"MetricExpr": "( UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_REMOTE * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "core_initiated_local_socket_memory_read_bandwidth",
"MetricName": "io_bandwidth_read_remote",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "The estimated bandwidth of core initiated memory read requests to remote socket in MB/second. Includes demand, L1, L2 prefetches for all data read, code read and read for ownership (RFO) that hit remote DRAM and L3 Prefetches that might have gone to Remote DRAM, Remote NON_DRAM, or Remote cache.",
"MetricExpr": "(( OCR.READS_TO_CORE.REMOTE_MEMORY + ( OCR.HWPF_L3.REMOTE * OCR.READS_TO_CORE.REMOTE_MEMORY / ( OCR.READS_TO_CORE.REMOTE_MEMORY + OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_FWD ) ) ) * 64 / 1000000) / duration_time",
"BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket.",
"MetricExpr": "(( UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL ) * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "core_initiated_remote_socket_memory_read_bandwidth",
"MetricName": "io_bandwidth_write_local",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "The bandwidth of core initiated writes in MB/second. Does not include writebacks due to read for ownerships (RFO).",
"MetricExpr": "(( OCR.MODIFIED_WRITE.ANY_RESPONSE - OCR.RFO_TO_CORE.L3_HIT_M ) * 64 / 1000000) / duration_time",
"BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to a remote CPU socket.",
"MetricExpr": "(( UNC_CHA_TOR_INSERTS.IO_ITOM_REMOTE + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_REMOTE ) * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "core_initiated_write_bandwidth",
"MetricName": "io_bandwidth_write_remote",
"ScaleUnit": "1MB/s"
},
{
Expand All @@ -293,6 +349,13 @@
"MetricName": "llc_miss_remote_memory_bandwidth_read",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to remote memory.",
"MetricExpr": "( UNC_CHA_REQUESTS.WRITES_REMOTE * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "llc_miss_remote_memory_bandwidth_write",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)",
"MetricExpr": "( UNC_UPI_RxL_FLITS.ALL_DATA * (64 / 9.0) / 1000000) / duration_time",
Expand All @@ -301,10 +364,17 @@
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to remote memory.",
"MetricExpr": "( UNC_CHA_REQUESTS.WRITES_REMOTE * 64 / 1000000) / duration_time",
"BriefDescription": "Bandwidth observed by the integrated I/O traffic controller (IIO) of IO reads that are initiated by end device controllers that are requesting memory from the CPU.",
"MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.ALL_PARTS * 4 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "llc_miss_remote_memory_bandwidth_write",
"MetricName": "iio_bandwidth_read",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Bandwidth observed by the integrated I/O traffic controller (IIO) of IO writes that are initiated by end device controllers that are writing memory to the CPU.",
"MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.ALL_PARTS * 4 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "iio_bandwidth_write",
"ScaleUnit": "1MB/s"
}
]
Loading
Loading