From bb1aeb903aab6e2717b0844c67203e8416e43ebf Mon Sep 17 00:00:00 2001 From: Besar Wicaksono Date: Tue, 7 Nov 2023 13:45:21 -0600 Subject: [PATCH] NVIDIA: SAUCE: perf: arm_cspmu: nvidia: add T264 support Adds PMU support for the following IPs in NVIDIA Tegra T264 SOC: - Unified Coherency Fabric (UCF) - Vision - Display - High-speed IO - UCF GPU Bug 5524939 Change-Id: I595dc746e3b45b9f40c5f4343212c37f42f0faa1 Signed-off-by: Besar Wicaksono Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3459617 Tested-by: Ryan Bissell GVS: buildbot_gerritrpt Reviewed-by: Jon Hunter --- Documentation/admin-guide/perf/nvidia-pmu.rst | 158 ++++++++++++++++ drivers/perf/arm_cspmu/nvidia_cspmu.c | 174 ++++++++++++++++++ 2 files changed, 332 insertions(+) diff --git a/Documentation/admin-guide/perf/nvidia-pmu.rst b/Documentation/admin-guide/perf/nvidia-pmu.rst index 2e0d47cfe7ea..5befe5628a15 100644 --- a/Documentation/admin-guide/perf/nvidia-pmu.rst +++ b/Documentation/admin-guide/perf/nvidia-pmu.rst @@ -10,6 +10,11 @@ metrics like memory bandwidth, latency, and utilization: * NVLink-C2C1 * CNVLink * PCIE +* Unified Coherency Fabric (UCF) +* Vision +* Display +* High-speed IO +* UCF-GPU PMU Driver ---------- @@ -183,6 +188,159 @@ Example usage: perf stat -a -e nvidia_pcie_pmu_1/event=0x0,root_port=0x3/ +UCF PMU +------- + +The UCF PMU monitors system level cache events and DRAM traffic that flows +through UCF. + +The events and configuration options of this PMU device are described in sysfs, +see /sys/bus/event_sources/devices/nvidia_ucf_pmu_. + +User can configure the PMU to capture events from specific source and destination. +The source/destination filter is described in +/sys/bus/event_sources/devices/nvidia_ucf_pmu_/format/. By default +traffic from all sources and destinations will be captured if no source/destination +is specified. + +Example usage: + +* Count event id 0x0 from any source/destination of socket 0:: + + perf stat -a -e nvidia_ucf_pmu_0/event=0x0/ + +* Count event id 0x1 from socket 0's CPUs to socket 0's DRAM:: + + perf stat -a -e nvidia_ucf_pmu_0/event=0x1,src_loc_cpu=0x1,dst_loc=0x1/ + +* Count event id 0x1 from remote source of socket 0 to local and remote DRAM:: + + perf stat -a -e nvidia_ucf_pmu_0/event=0x1,src_rem=0x1,dst_loc=0x1,dst_rem=0x1/ + +* Count event id 0x2 from any source/destination of socket 1:: + + perf stat -a -e nvidia_ucf_pmu_1/event=0x2/ + +* Count event id 0x3 from socket 1's CPUs to socket 1's DRAM:: + + perf stat -a -e nvidia_ucf_pmu_1/event=0x3,src_loc_cpu=0x1,dst_loc=0x1/ + + +Vision PMU +------------ + +The vision PMU monitors memory traffic from the multimedia IPs in the SOC. + +The events and configuration options of this PMU device are described in sysfs, +see /sys/bus/event_sources/devices/nvidia_vision_pmu_. + +User can configure the PMU to capture events from specific IPs. +/sys/bus/event_sources/devices/nvidia_vision_pmu_/format/ contains +the filter attribute name of each multimedia IP. This filter attribute is a +bitmask to select the AXI/hub interface of the IP to monitor. By default traffic +from all interfaces of all IPs will be captured if no IPs are specified. + +Example usage: + +* Count event id 0x0 from all multimedia IPs in socket 0:: + + perf stat -a -e nvidia_vision_pmu_0/event=0x0/ + +* Count event id 0x1 from AXI/hub interface 0 in VI-0 of socket 0:: + + perf stat -a -e nvidia_vision_pmu_0/event=0x1,vi_0=0x1/ + +* Count event id 0x1 from AXI/hub interface 0 and 1 in VI-0 of socket 0:: + + perf stat -a -e nvidia_vision_pmu_0/event=0x1,vi_0=0x3/ + +* Count event id 0x2 from all multimedia IPs in socket 1:: + + perf stat -a -e nvidia_vision_pmu_1/event=0x2/ + +* Count event id 0x3 from AXI/hub interface 0 in VI-0 and PVA of socket 1:: + + perf stat -a -e nvidia_vision_pmu_1/event=0x3,vi_0=0x1,pva=0x1/ + + +Display PMU +------------ + +The display PMU monitors memory traffic from the display IP in the SOC. + +The events and configuration options of this PMU device are described in sysfs, +see /sys/bus/event_sources/devices/nvidia_display_pmu_. + +Example usage: + +* Count event id 0x0 in socket 0:: + + perf stat -a -e nvidia_display_pmu_0/event=0x0/ + +* Count event id 0x0 in socket 1:: + + perf stat -a -e nvidia_display_pmu_1/event=0x0/ + + +High-speed I/O PMU +------------------- + +The high-speed I/O PMU monitors memory traffic from the high speed I/O devices +in the SOC. + +The events and configuration options of this PMU device are described in sysfs, +see /sys/bus/event_sources/devices/nvidia_uphy_pmu_. + +User can configure the PMU to capture events from specific I/Os. +/sys/bus/event_sources/devices/nvidia_uphy_pmu_/format/ contains +the filter attribute name of each I/O. This filter attribute is a +bitmask to select the AXI/hub interface of the I/O to monitor. By default +traffic from all interfaces of all I/Os will be captured if no I/Os are +specified. + +Example usage: + +* Count event id 0x0 from all I/Os in socket 0:: + + perf stat -a -e nvidia_uphy_pmu_0/event=0x0/ + +* Count event id 0x1 from PCIE Root Port 1 of socket 0:: + + perf stat -a -e nvidia_uphy_pmu_0/event=0x1,pcie_rp_1=0x1/ + +* Count event id 0x1 from PCIE Root Port 1 and Root Port 2 of socket 0:: + + perf stat -a -e nvidia_uphy_pmu_0/event=0x1,pcie_rp_1=0x1,pcie_rp_2=0x1/ + +* Count event id 0x2 from all IPs in socket 1:: + + perf stat -a -e nvidia_uphy_pmu_1/event=0x2/ + +* Count event id 0x3 from PCIE Root Port 3 and UFS of socket 1:: + + perf stat -a -e nvidia_uphy_pmu_1/event=0x1,pcie_rp_3=0x1,ufs=0x1/ + + +UCF-GPU PMU +------------ + +The UCF-GPU PMU monitors integrated GPU physical address traffic flowing through +UCF. + +The events and configuration options of this PMU device are described in sysfs, +see /sys/bus/event_sources/devices/nvidia_ucf_gpu_pmu_. + +Example usage: + +* Count event id 0x0 in socket 0:: + + perf stat -a -e nvidia_ucf_gpu_pmu_0/event=0x0/ + +* Count event id 0x0 in socket 1:: + + perf stat -a -e nvidia_ucf_gpu_pmu_1/event=0x0/ + + .. _NVIDIA_Uncore_PMU_Traffic_Coverage_Section: Traffic Coverage diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c index 71803564c72f..da8d8b2c1739 100644 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c @@ -20,6 +20,16 @@ #define NV_CNVL_PORT_COUNT 4ULL #define NV_CNVL_FILTER_ID_MASK GENMASK_ULL(NV_CNVL_PORT_COUNT - 1, 0) +#define NV_UCF_FILTER_ID_MASK GENMASK_ULL(4, 0) + +#define NV_UPHY_FILTER_ID_MASK GENMASK_ULL(16, 0) + +#define NV_VISION_FILTER_ID_MASK GENMASK_ULL(19, 0) + +#define NV_DISPLAY_FILTER_ID_MASK BIT(0) + +#define NV_UCF_GPU_FILTER_ID_MASK BIT(0) + #define NV_GENERIC_FILTER_ID_MASK GENMASK_ULL(31, 0) #define NV_PRODID_MASK (ARM_CSPMU_PMIIDR_PRODUCTID | \ @@ -178,6 +188,72 @@ static struct attribute *mcf_pmu_event_attrs[] = { NULL, }; +static struct attribute *ucf_pmu_event_attrs[] = { + ARM_CSPMU_EVENT_ATTR(slc_allocate, 0xf0), + ARM_CSPMU_EVENT_ATTR(slc_refill, 0xf1), + ARM_CSPMU_EVENT_ATTR(slc_access, 0xf2), + ARM_CSPMU_EVENT_ATTR(slc_wb, 0xf3), + ARM_CSPMU_EVENT_ATTR(slc_hit, 0x118), + ARM_CSPMU_EVENT_ATTR(slc_access_wr, 0x112), + ARM_CSPMU_EVENT_ATTR(slc_access_rd, 0x111), + ARM_CSPMU_EVENT_ATTR(slc_refill_wr, 0x10a), + ARM_CSPMU_EVENT_ATTR(slc_refill_rd, 0x109), + ARM_CSPMU_EVENT_ATTR(slc_hit_wr, 0x11a), + ARM_CSPMU_EVENT_ATTR(slc_hit_rd, 0x119), + ARM_CSPMU_EVENT_ATTR(slc_access_dataless, 0x183), + ARM_CSPMU_EVENT_ATTR(slc_access_atomic, 0x184), + ARM_CSPMU_EVENT_ATTR(local_snoop, 0x180), + ARM_CSPMU_EVENT_ATTR(ext_snp_access, 0x181), + ARM_CSPMU_EVENT_ATTR(ext_snp_evict, 0x182), + + ARM_CSPMU_EVENT_ATTR(ucf_bus_cycles, 0x1d), + + ARM_CSPMU_EVENT_ATTR(any_access_wr, 0x112), + ARM_CSPMU_EVENT_ATTR(any_access_rd, 0x111), + ARM_CSPMU_EVENT_ATTR(any_byte_wr, 0x114), + ARM_CSPMU_EVENT_ATTR(any_byte_rd, 0x113), + ARM_CSPMU_EVENT_ATTR(any_outstanding_rd, 0x115), + + ARM_CSPMU_EVENT_ATTR(local_dram_access_wr, 0x122), + ARM_CSPMU_EVENT_ATTR(local_dram_access_rd, 0x121), + ARM_CSPMU_EVENT_ATTR(local_dram_byte_wr, 0x124), + ARM_CSPMU_EVENT_ATTR(local_dram_byte_rd, 0x123), + + ARM_CSPMU_EVENT_ATTR(mmio_access_wr, 0x132), + ARM_CSPMU_EVENT_ATTR(mmio_access_rd, 0x131), + ARM_CSPMU_EVENT_ATTR(mmio_byte_wr, 0x134), + ARM_CSPMU_EVENT_ATTR(mmio_byte_rd, 0x133), + ARM_CSPMU_EVENT_ATTR(mmio_outstanding_rd, 0x135), + + ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT), + + NULL, +}; + +static struct attribute *display_pmu_event_attrs[] = { + ARM_CSPMU_EVENT_ATTR(rd_bytes_loc, 0x0), + ARM_CSPMU_EVENT_ATTR(rd_req_loc, 0x6), + ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc, 0xc), + + ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT), + + NULL, +}; + +static struct attribute *ucf_gpu_pmu_event_attrs[] = { + ARM_CSPMU_EVENT_ATTR(rd_bytes_loc_rem, 0x0), + ARM_CSPMU_EVENT_ATTR(wr_bytes_loc, 0x2), + ARM_CSPMU_EVENT_ATTR(wr_bytes_rem, 0x3), + ARM_CSPMU_EVENT_ATTR(rd_req_loc_rem, 0x6), + ARM_CSPMU_EVENT_ATTR(wr_req_loc, 0x8), + ARM_CSPMU_EVENT_ATTR(wr_req_rem, 0x9), + ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc_rem, 0xc), + + ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT), + + NULL, +}; + static struct attribute *generic_pmu_event_attrs[] = { ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT), NULL, @@ -205,6 +281,54 @@ static struct attribute *cnvlink_pmu_format_attrs[] = { NULL, }; +static struct attribute *ucf_pmu_format_attrs[] = { + ARM_CSPMU_FORMAT_EVENT_ATTR, + ARM_CSPMU_FORMAT_ATTR(src_loc_noncpu, "config1:0"), + ARM_CSPMU_FORMAT_ATTR(src_loc_cpu, "config1:1"), + ARM_CSPMU_FORMAT_ATTR(src_rem, "config1:2"), + ARM_CSPMU_FORMAT_ATTR(dst_loc, "config1:3"), + ARM_CSPMU_FORMAT_ATTR(dst_rem, "config1:4"), + NULL, +}; + +static struct attribute *display_pmu_format_attrs[] = { + ARM_CSPMU_FORMAT_EVENT_ATTR, + NULL, +}; + +static struct attribute *ucf_gpu_pmu_format_attrs[] = { + ARM_CSPMU_FORMAT_EVENT_ATTR, + NULL, +}; + +static struct attribute *uphy_pmu_format_attrs[] = { + ARM_CSPMU_FORMAT_EVENT_ATTR, + ARM_CSPMU_FORMAT_ATTR(pcie_rp_1, "config1:0"), + ARM_CSPMU_FORMAT_ATTR(pcie_rp_2, "config1:1"), + ARM_CSPMU_FORMAT_ATTR(pcie_rp_3, "config1:2"), + ARM_CSPMU_FORMAT_ATTR(pcie_rp_4, "config1:3"), + ARM_CSPMU_FORMAT_ATTR(pcie_rp_5, "config1:4"), + ARM_CSPMU_FORMAT_ATTR(xusb, "config1:5-10"), + ARM_CSPMU_FORMAT_ATTR(mgbe_0, "config1:11"), + ARM_CSPMU_FORMAT_ATTR(mgbe_1, "config1:12"), + ARM_CSPMU_FORMAT_ATTR(mgbe_2, "config1:13"), + ARM_CSPMU_FORMAT_ATTR(mgbe_3, "config1:14"), + ARM_CSPMU_FORMAT_ATTR(eqos, "config1:15"), + ARM_CSPMU_FORMAT_ATTR(ufs, "config1:16"), + NULL, +}; + +static struct attribute *vision_pmu_format_attrs[] = { + ARM_CSPMU_FORMAT_EVENT_ATTR, + ARM_CSPMU_FORMAT_ATTR(vi_0, "config1:0-1"), + ARM_CSPMU_FORMAT_ATTR(vi_1, "config1:2-3"), + ARM_CSPMU_FORMAT_ATTR(isp_0, "config1:4-7"), + ARM_CSPMU_FORMAT_ATTR(isp_1, "config1:8-11"), + ARM_CSPMU_FORMAT_ATTR(vic, "config1:12-13"), + ARM_CSPMU_FORMAT_ATTR(pva, "config1:14-19"), + NULL, +}; + static struct attribute *generic_pmu_format_attrs[] = { ARM_CSPMU_FORMAT_EVENT_ATTR, ARM_CSPMU_FORMAT_FILTER_ATTR, @@ -313,6 +437,56 @@ static const struct nv_cspmu_match nv_cspmu_match[] = { .event_attr = scf_pmu_event_attrs, .format_attr = scf_pmu_format_attrs }, + { + .prodid = 0x2CF10000, + .prodid_mask = NV_PRODID_MASK, + .filter_mask = NV_UCF_FILTER_ID_MASK, + .filter_default_val = NV_UCF_FILTER_ID_MASK, + .name_pattern = "nvidia_ucf_pmu_%u", + .name_fmt = NAME_FMT_SOCKET, + .event_attr = ucf_pmu_event_attrs, + .format_attr = ucf_pmu_format_attrs + }, + { + .prodid = 0x10800000, + .prodid_mask = NV_PRODID_MASK, + .filter_mask = NV_UPHY_FILTER_ID_MASK, + .filter_default_val = NV_UPHY_FILTER_ID_MASK, + .name_pattern = "nvidia_uphy_pmu_%u", + .name_fmt = NAME_FMT_SOCKET, + .event_attr = mcf_pmu_event_attrs, + .format_attr = uphy_pmu_format_attrs + }, + { + .prodid = 0x10a00000, + .prodid_mask = NV_PRODID_MASK, + .filter_mask = 0, + .filter_default_val = NV_UCF_GPU_FILTER_ID_MASK, + .name_pattern = "nvidia_ucf_gpu_pmu_%u", + .name_fmt = NAME_FMT_SOCKET, + .event_attr = ucf_gpu_pmu_event_attrs, + .format_attr = ucf_gpu_pmu_format_attrs + }, + { + .prodid = 0x10d00000, + .prodid_mask = NV_PRODID_MASK, + .filter_mask = 0, + .filter_default_val = NV_DISPLAY_FILTER_ID_MASK, + .name_pattern = "nvidia_display_pmu_%u", + .name_fmt = NAME_FMT_SOCKET, + .event_attr = display_pmu_event_attrs, + .format_attr = display_pmu_format_attrs + }, + { + .prodid = 0x10e00000, + .prodid_mask = NV_PRODID_MASK, + .filter_mask = NV_VISION_FILTER_ID_MASK, + .filter_default_val = NV_VISION_FILTER_ID_MASK, + .name_pattern = "nvidia_vision_pmu_%u", + .name_fmt = NAME_FMT_SOCKET, + .event_attr = mcf_pmu_event_attrs, + .format_attr = vision_pmu_format_attrs + }, { .prodid = 0, .prodid_mask = 0,