intel · calebbiggers · May 21, 2024 · May 10, 2024 · May 10, 2024 · May 10, 2024
@@ -118,6 +118,13 @@
         "MetricName": "llc_demand_data_read_miss_latency_for_remote_requests",
         "ScaleUnit": "1ns"
     },
+    {
+        "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) addressed to Intel(R) Optane(TM) Persistent Memory(PMEM) in nano seconds",
+        "MetricExpr": "( 1000000000 * ( UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM ) / ( UNC_CHA_CLOCKTICKS / ( source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM) * #num_packages ) ) ) * duration_time",
+        "MetricGroup": "",
+        "MetricName": "llc_demand_data_read_miss_to_pmem_latency",
+        "ScaleUnit": "1ns"
+    },
     {
         "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) addressed to DRAM in nano seconds",
         "MetricExpr": "( 1000000000 * ( UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_DDR ) / ( UNC_CHA_CLOCKTICKS / ( source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR) * #num_packages ) ) ) * duration_time",
@@ -216,6 +223,27 @@
         "MetricName": "memory_extra_write_bw_due_to_directory_updates",
         "ScaleUnit": "1MB/s"
     },
+    {
+        "BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory read bandwidth (MB/sec)",
+        "MetricExpr": "( UNC_M_PMM_RPQ_INSERTS * 64 / 1000000) / duration_time",
+        "MetricGroup": "",
+        "MetricName": "pmem_memory_bandwidth_read",
+        "ScaleUnit": "1MB/s"
+    },
+    {
+        "BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory write bandwidth (MB/sec)",
+        "MetricExpr": "( UNC_M_PMM_WPQ_INSERTS * 64 / 1000000) / duration_time",
+        "MetricGroup": "",
+        "MetricName": "pmem_memory_bandwidth_write",
+        "ScaleUnit": "1MB/s"
+    },
+    {
+        "BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory bandwidth (MB/sec)",
+        "MetricExpr": "(( UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS ) * 64 / 1000000) / duration_time",
+        "MetricGroup": "",
+        "MetricName": "pmem_memory_bandwidth_total",
+        "ScaleUnit": "1MB/s"
+    },
     {
         "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU.",
         "MetricExpr": "( UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1000000) / duration_time",
@@ -230,6 +258,27 @@
         "MetricName": "io_bandwidth_write",
         "ScaleUnit": "1MB/s"
     },
+    {
+        "BriefDescription": "Percentage of inbound reads initiated by end device controllers that miss the L3 cache.",
+        "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR",
+        "MetricGroup": "",
+        "MetricName": "io_percent_of_inbound_reads_that_miss_l3",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Percentage of inbound partial cacheline writes initiated by end device controllers that miss the L3 cache.",
+        "MetricExpr": "(( UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO ) / ( UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO ) )",
+        "MetricGroup": "",
+        "MetricName": "io_percent_of_inbound_partial_writes_that_miss_l3",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Percentage of inbound full cacheline writes initiated by end device controllers that miss the L3 cache.",
+        "MetricExpr": "( UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM )",
+        "MetricGroup": "",
+        "MetricName": "io_percent_of_inbound_full_writes_that_miss_l3",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "Uops delivered from decoded instruction cache (decoded stream buffer or DSB) as a percent of total uops delivered to Instruction Decode Queue",
         "MetricExpr": "( IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS + LSD.UOPS ) )",
@@ -252,24 +301,31 @@
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "The estimated bandwidth of core initiated memory read requests to local socket in MB/second. Includes demand, L1 and L2 prefetches for data reads, code reads and read for ownership (RFO) that hit in local cluster and subnuma cluster memory (DRAM or  persistent memory), includes the estimates of  L3 hardware prefetches that went to Local cluster and subnuma Memory (DRAM or persistent memory).",
-        "MetricExpr": "(( OCR.READS_TO_CORE.L3_MISS_LOCAL_SOCKET + ( OCR.HWPF_L3.L3_MISS_LOCAL * OCR.READS_TO_CORE.L3_MISS_LOCAL_SOCKET / ( OCR.READS_TO_CORE.L3_MISS_LOCAL_SOCKET + OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_FWD ) ) ) * 64 / 1000000) / duration_time",
+        "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket.",
+        "MetricExpr": "( UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1000000) / duration_time",
+        "MetricGroup": "",
+        "MetricName": "io_bandwidth_read_local",
+        "ScaleUnit": "1MB/s"
+    },
+    {
+        "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from a remote CPU socket.",
+        "MetricExpr": "( UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_REMOTE * 64 / 1000000) / duration_time",
         "MetricGroup": "",
-        "MetricName": "core_initiated_local_socket_memory_read_bandwidth",
+        "MetricName": "io_bandwidth_read_remote",
         "ScaleUnit": "1MB/s"
     },
     {
-        "BriefDescription": "The estimated bandwidth of core initiated memory read requests to remote socket in MB/second. Includes demand, L1, L2 prefetches for all data read, code read and read for ownership (RFO) that hit remote DRAM and L3 Prefetches that might have gone to Remote DRAM, Remote NON_DRAM, or Remote cache.",
-        "MetricExpr": "(( OCR.READS_TO_CORE.REMOTE_MEMORY + ( OCR.HWPF_L3.REMOTE * OCR.READS_TO_CORE.REMOTE_MEMORY / ( OCR.READS_TO_CORE.REMOTE_MEMORY + OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_FWD ) ) ) * 64 / 1000000) / duration_time",
+        "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket.",
+        "MetricExpr": "(( UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL ) * 64 / 1000000) / duration_time",
         "MetricGroup": "",
-        "MetricName": "core_initiated_remote_socket_memory_read_bandwidth",
+        "MetricName": "io_bandwidth_write_local",
         "ScaleUnit": "1MB/s"
     },
     {
-        "BriefDescription": "The bandwidth of core initiated writes in MB/second. Does not include writebacks due to read for ownerships (RFO).",
-        "MetricExpr": "(( OCR.MODIFIED_WRITE.ANY_RESPONSE - OCR.RFO_TO_CORE.L3_HIT_M ) * 64 / 1000000) / duration_time",
+        "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to a remote CPU socket.",
+        "MetricExpr": "(( UNC_CHA_TOR_INSERTS.IO_ITOM_REMOTE + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_REMOTE ) * 64 / 1000000) / duration_time",
         "MetricGroup": "",
-        "MetricName": "core_initiated_write_bandwidth",
+        "MetricName": "io_bandwidth_write_remote",
         "ScaleUnit": "1MB/s"
     },
     {
@@ -293,6 +349,13 @@
         "MetricName": "llc_miss_remote_memory_bandwidth_read",
         "ScaleUnit": "1MB/s"
     },
+    {
+        "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to remote memory.",
+        "MetricExpr": "( UNC_CHA_REQUESTS.WRITES_REMOTE * 64 / 1000000) / duration_time",
+        "MetricGroup": "",
+        "MetricName": "llc_miss_remote_memory_bandwidth_write",
+        "ScaleUnit": "1MB/s"
+    },
     {
         "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)",
         "MetricExpr": "( UNC_UPI_RxL_FLITS.ALL_DATA * (64 / 9.0) / 1000000) / duration_time",
@@ -301,10 +364,17 @@
         "ScaleUnit": "1MB/s"
     },
     {
-        "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to remote memory.",
-        "MetricExpr": "( UNC_CHA_REQUESTS.WRITES_REMOTE * 64 / 1000000) / duration_time",
+        "BriefDescription": "Bandwidth observed by the integrated I/O traffic controller (IIO) of IO reads that are initiated by end device controllers that are requesting memory from the CPU.",
+        "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.ALL_PARTS * 4 / 1000000) / duration_time",
         "MetricGroup": "",
-        "MetricName": "llc_miss_remote_memory_bandwidth_write",
+        "MetricName": "iio_bandwidth_read",
+        "ScaleUnit": "1MB/s"
+    },
+    {
+        "BriefDescription": "Bandwidth observed by the integrated I/O traffic controller (IIO) of IO writes that are initiated by end device controllers that are writing memory to the CPU.",
+        "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.ALL_PARTS * 4 / 1000000) / duration_time",
+        "MetricGroup": "",
+        "MetricName": "iio_bandwidth_write",
         "ScaleUnit": "1MB/s"
     }
 ]