NVIDIA: SAUCE: perf: arm_cspmu: nvidia: add T264 support
Adds PMU support for the following IPs in NVIDIA Tegra T264 SOC: - Unified Coherency Fabric (UCF) - Vision - Display - High-speed IO - UCF GPU Bug 5524939 Change-Id: I595dc746e3b45b9f40c5f4343212c37f42f0faa1 Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3459617 Tested-by: Ryan Bissell <rbissell@nvidia.com> GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com> Reviewed-by: Jon Hunter <jonathanh@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
b837fb9d0d
commit
bb1aeb903a
@@ -10,6 +10,11 @@ metrics like memory bandwidth, latency, and utilization:
|
||||
* NVLink-C2C1
|
||||
* CNVLink
|
||||
* PCIE
|
||||
* Unified Coherency Fabric (UCF)
|
||||
* Vision
|
||||
* Display
|
||||
* High-speed IO
|
||||
* UCF-GPU
|
||||
|
||||
PMU Driver
|
||||
----------
|
||||
@@ -183,6 +188,159 @@ Example usage:
|
||||
|
||||
perf stat -a -e nvidia_pcie_pmu_1/event=0x0,root_port=0x3/
|
||||
|
||||
UCF PMU
|
||||
-------
|
||||
|
||||
The UCF PMU monitors system level cache events and DRAM traffic that flows
|
||||
through UCF.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_ucf_pmu_<socket-id>.
|
||||
|
||||
User can configure the PMU to capture events from specific source and destination.
|
||||
The source/destination filter is described in
|
||||
/sys/bus/event_sources/devices/nvidia_ucf_pmu_<socket-id>/format/. By default
|
||||
traffic from all sources and destinations will be captured if no source/destination
|
||||
is specified.
|
||||
|
||||
Example usage:
|
||||
|
||||
* Count event id 0x0 from any source/destination of socket 0::
|
||||
|
||||
perf stat -a -e nvidia_ucf_pmu_0/event=0x0/
|
||||
|
||||
* Count event id 0x1 from socket 0's CPUs to socket 0's DRAM::
|
||||
|
||||
perf stat -a -e nvidia_ucf_pmu_0/event=0x1,src_loc_cpu=0x1,dst_loc=0x1/
|
||||
|
||||
* Count event id 0x1 from remote source of socket 0 to local and remote DRAM::
|
||||
|
||||
perf stat -a -e nvidia_ucf_pmu_0/event=0x1,src_rem=0x1,dst_loc=0x1,dst_rem=0x1/
|
||||
|
||||
* Count event id 0x2 from any source/destination of socket 1::
|
||||
|
||||
perf stat -a -e nvidia_ucf_pmu_1/event=0x2/
|
||||
|
||||
* Count event id 0x3 from socket 1's CPUs to socket 1's DRAM::
|
||||
|
||||
perf stat -a -e nvidia_ucf_pmu_1/event=0x3,src_loc_cpu=0x1,dst_loc=0x1/
|
||||
|
||||
|
||||
Vision PMU
|
||||
------------
|
||||
|
||||
The vision PMU monitors memory traffic from the multimedia IPs in the SOC.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_vision_pmu_<socket-id>.
|
||||
|
||||
User can configure the PMU to capture events from specific IPs.
|
||||
/sys/bus/event_sources/devices/nvidia_vision_pmu_<socket-id>/format/ contains
|
||||
the filter attribute name of each multimedia IP. This filter attribute is a
|
||||
bitmask to select the AXI/hub interface of the IP to monitor. By default traffic
|
||||
from all interfaces of all IPs will be captured if no IPs are specified.
|
||||
|
||||
Example usage:
|
||||
|
||||
* Count event id 0x0 from all multimedia IPs in socket 0::
|
||||
|
||||
perf stat -a -e nvidia_vision_pmu_0/event=0x0/
|
||||
|
||||
* Count event id 0x1 from AXI/hub interface 0 in VI-0 of socket 0::
|
||||
|
||||
perf stat -a -e nvidia_vision_pmu_0/event=0x1,vi_0=0x1/
|
||||
|
||||
* Count event id 0x1 from AXI/hub interface 0 and 1 in VI-0 of socket 0::
|
||||
|
||||
perf stat -a -e nvidia_vision_pmu_0/event=0x1,vi_0=0x3/
|
||||
|
||||
* Count event id 0x2 from all multimedia IPs in socket 1::
|
||||
|
||||
perf stat -a -e nvidia_vision_pmu_1/event=0x2/
|
||||
|
||||
* Count event id 0x3 from AXI/hub interface 0 in VI-0 and PVA of socket 1::
|
||||
|
||||
perf stat -a -e nvidia_vision_pmu_1/event=0x3,vi_0=0x1,pva=0x1/
|
||||
|
||||
|
||||
Display PMU
|
||||
------------
|
||||
|
||||
The display PMU monitors memory traffic from the display IP in the SOC.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_display_pmu_<socket-id>.
|
||||
|
||||
Example usage:
|
||||
|
||||
* Count event id 0x0 in socket 0::
|
||||
|
||||
perf stat -a -e nvidia_display_pmu_0/event=0x0/
|
||||
|
||||
* Count event id 0x0 in socket 1::
|
||||
|
||||
perf stat -a -e nvidia_display_pmu_1/event=0x0/
|
||||
|
||||
|
||||
High-speed I/O PMU
|
||||
-------------------
|
||||
|
||||
The high-speed I/O PMU monitors memory traffic from the high speed I/O devices
|
||||
in the SOC.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_uphy_pmu_<socket-id>.
|
||||
|
||||
User can configure the PMU to capture events from specific I/Os.
|
||||
/sys/bus/event_sources/devices/nvidia_uphy_pmu_<socket-id>/format/ contains
|
||||
the filter attribute name of each I/O. This filter attribute is a
|
||||
bitmask to select the AXI/hub interface of the I/O to monitor. By default
|
||||
traffic from all interfaces of all I/Os will be captured if no I/Os are
|
||||
specified.
|
||||
|
||||
Example usage:
|
||||
|
||||
* Count event id 0x0 from all I/Os in socket 0::
|
||||
|
||||
perf stat -a -e nvidia_uphy_pmu_0/event=0x0/
|
||||
|
||||
* Count event id 0x1 from PCIE Root Port 1 of socket 0::
|
||||
|
||||
perf stat -a -e nvidia_uphy_pmu_0/event=0x1,pcie_rp_1=0x1/
|
||||
|
||||
* Count event id 0x1 from PCIE Root Port 1 and Root Port 2 of socket 0::
|
||||
|
||||
perf stat -a -e nvidia_uphy_pmu_0/event=0x1,pcie_rp_1=0x1,pcie_rp_2=0x1/
|
||||
|
||||
* Count event id 0x2 from all IPs in socket 1::
|
||||
|
||||
perf stat -a -e nvidia_uphy_pmu_1/event=0x2/
|
||||
|
||||
* Count event id 0x3 from PCIE Root Port 3 and UFS of socket 1::
|
||||
|
||||
perf stat -a -e nvidia_uphy_pmu_1/event=0x1,pcie_rp_3=0x1,ufs=0x1/
|
||||
|
||||
|
||||
UCF-GPU PMU
|
||||
------------
|
||||
|
||||
The UCF-GPU PMU monitors integrated GPU physical address traffic flowing through
|
||||
UCF.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_ucf_gpu_pmu_<socket-id>.
|
||||
|
||||
Example usage:
|
||||
|
||||
* Count event id 0x0 in socket 0::
|
||||
|
||||
perf stat -a -e nvidia_ucf_gpu_pmu_0/event=0x0/
|
||||
|
||||
* Count event id 0x0 in socket 1::
|
||||
|
||||
perf stat -a -e nvidia_ucf_gpu_pmu_1/event=0x0/
|
||||
|
||||
|
||||
.. _NVIDIA_Uncore_PMU_Traffic_Coverage_Section:
|
||||
|
||||
Traffic Coverage
|
||||
|
||||
@@ -20,6 +20,16 @@
|
||||
#define NV_CNVL_PORT_COUNT 4ULL
|
||||
#define NV_CNVL_FILTER_ID_MASK GENMASK_ULL(NV_CNVL_PORT_COUNT - 1, 0)
|
||||
|
||||
#define NV_UCF_FILTER_ID_MASK GENMASK_ULL(4, 0)
|
||||
|
||||
#define NV_UPHY_FILTER_ID_MASK GENMASK_ULL(16, 0)
|
||||
|
||||
#define NV_VISION_FILTER_ID_MASK GENMASK_ULL(19, 0)
|
||||
|
||||
#define NV_DISPLAY_FILTER_ID_MASK BIT(0)
|
||||
|
||||
#define NV_UCF_GPU_FILTER_ID_MASK BIT(0)
|
||||
|
||||
#define NV_GENERIC_FILTER_ID_MASK GENMASK_ULL(31, 0)
|
||||
|
||||
#define NV_PRODID_MASK (ARM_CSPMU_PMIIDR_PRODUCTID | \
|
||||
@@ -178,6 +188,72 @@ static struct attribute *mcf_pmu_event_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *ucf_pmu_event_attrs[] = {
|
||||
ARM_CSPMU_EVENT_ATTR(slc_allocate, 0xf0),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_refill, 0xf1),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_access, 0xf2),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_wb, 0xf3),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_hit, 0x118),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_access_wr, 0x112),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_access_rd, 0x111),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_refill_wr, 0x10a),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_refill_rd, 0x109),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_hit_wr, 0x11a),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_hit_rd, 0x119),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_access_dataless, 0x183),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_access_atomic, 0x184),
|
||||
ARM_CSPMU_EVENT_ATTR(local_snoop, 0x180),
|
||||
ARM_CSPMU_EVENT_ATTR(ext_snp_access, 0x181),
|
||||
ARM_CSPMU_EVENT_ATTR(ext_snp_evict, 0x182),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(ucf_bus_cycles, 0x1d),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(any_access_wr, 0x112),
|
||||
ARM_CSPMU_EVENT_ATTR(any_access_rd, 0x111),
|
||||
ARM_CSPMU_EVENT_ATTR(any_byte_wr, 0x114),
|
||||
ARM_CSPMU_EVENT_ATTR(any_byte_rd, 0x113),
|
||||
ARM_CSPMU_EVENT_ATTR(any_outstanding_rd, 0x115),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(local_dram_access_wr, 0x122),
|
||||
ARM_CSPMU_EVENT_ATTR(local_dram_access_rd, 0x121),
|
||||
ARM_CSPMU_EVENT_ATTR(local_dram_byte_wr, 0x124),
|
||||
ARM_CSPMU_EVENT_ATTR(local_dram_byte_rd, 0x123),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(mmio_access_wr, 0x132),
|
||||
ARM_CSPMU_EVENT_ATTR(mmio_access_rd, 0x131),
|
||||
ARM_CSPMU_EVENT_ATTR(mmio_byte_wr, 0x134),
|
||||
ARM_CSPMU_EVENT_ATTR(mmio_byte_rd, 0x133),
|
||||
ARM_CSPMU_EVENT_ATTR(mmio_outstanding_rd, 0x135),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
|
||||
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *display_pmu_event_attrs[] = {
|
||||
ARM_CSPMU_EVENT_ATTR(rd_bytes_loc, 0x0),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_req_loc, 0x6),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc, 0xc),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
|
||||
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *ucf_gpu_pmu_event_attrs[] = {
|
||||
ARM_CSPMU_EVENT_ATTR(rd_bytes_loc_rem, 0x0),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_bytes_loc, 0x2),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_bytes_rem, 0x3),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_req_loc_rem, 0x6),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_req_loc, 0x8),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_req_rem, 0x9),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc_rem, 0xc),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
|
||||
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *generic_pmu_event_attrs[] = {
|
||||
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
|
||||
NULL,
|
||||
@@ -205,6 +281,54 @@ static struct attribute *cnvlink_pmu_format_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *ucf_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
ARM_CSPMU_FORMAT_ATTR(src_loc_noncpu, "config1:0"),
|
||||
ARM_CSPMU_FORMAT_ATTR(src_loc_cpu, "config1:1"),
|
||||
ARM_CSPMU_FORMAT_ATTR(src_rem, "config1:2"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_loc, "config1:3"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_rem, "config1:4"),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *display_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *ucf_gpu_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *uphy_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
ARM_CSPMU_FORMAT_ATTR(pcie_rp_1, "config1:0"),
|
||||
ARM_CSPMU_FORMAT_ATTR(pcie_rp_2, "config1:1"),
|
||||
ARM_CSPMU_FORMAT_ATTR(pcie_rp_3, "config1:2"),
|
||||
ARM_CSPMU_FORMAT_ATTR(pcie_rp_4, "config1:3"),
|
||||
ARM_CSPMU_FORMAT_ATTR(pcie_rp_5, "config1:4"),
|
||||
ARM_CSPMU_FORMAT_ATTR(xusb, "config1:5-10"),
|
||||
ARM_CSPMU_FORMAT_ATTR(mgbe_0, "config1:11"),
|
||||
ARM_CSPMU_FORMAT_ATTR(mgbe_1, "config1:12"),
|
||||
ARM_CSPMU_FORMAT_ATTR(mgbe_2, "config1:13"),
|
||||
ARM_CSPMU_FORMAT_ATTR(mgbe_3, "config1:14"),
|
||||
ARM_CSPMU_FORMAT_ATTR(eqos, "config1:15"),
|
||||
ARM_CSPMU_FORMAT_ATTR(ufs, "config1:16"),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *vision_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
ARM_CSPMU_FORMAT_ATTR(vi_0, "config1:0-1"),
|
||||
ARM_CSPMU_FORMAT_ATTR(vi_1, "config1:2-3"),
|
||||
ARM_CSPMU_FORMAT_ATTR(isp_0, "config1:4-7"),
|
||||
ARM_CSPMU_FORMAT_ATTR(isp_1, "config1:8-11"),
|
||||
ARM_CSPMU_FORMAT_ATTR(vic, "config1:12-13"),
|
||||
ARM_CSPMU_FORMAT_ATTR(pva, "config1:14-19"),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *generic_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
ARM_CSPMU_FORMAT_FILTER_ATTR,
|
||||
@@ -313,6 +437,56 @@ static const struct nv_cspmu_match nv_cspmu_match[] = {
|
||||
.event_attr = scf_pmu_event_attrs,
|
||||
.format_attr = scf_pmu_format_attrs
|
||||
},
|
||||
{
|
||||
.prodid = 0x2CF10000,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.filter_mask = NV_UCF_FILTER_ID_MASK,
|
||||
.filter_default_val = NV_UCF_FILTER_ID_MASK,
|
||||
.name_pattern = "nvidia_ucf_pmu_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET,
|
||||
.event_attr = ucf_pmu_event_attrs,
|
||||
.format_attr = ucf_pmu_format_attrs
|
||||
},
|
||||
{
|
||||
.prodid = 0x10800000,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.filter_mask = NV_UPHY_FILTER_ID_MASK,
|
||||
.filter_default_val = NV_UPHY_FILTER_ID_MASK,
|
||||
.name_pattern = "nvidia_uphy_pmu_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET,
|
||||
.event_attr = mcf_pmu_event_attrs,
|
||||
.format_attr = uphy_pmu_format_attrs
|
||||
},
|
||||
{
|
||||
.prodid = 0x10a00000,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.filter_mask = 0,
|
||||
.filter_default_val = NV_UCF_GPU_FILTER_ID_MASK,
|
||||
.name_pattern = "nvidia_ucf_gpu_pmu_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET,
|
||||
.event_attr = ucf_gpu_pmu_event_attrs,
|
||||
.format_attr = ucf_gpu_pmu_format_attrs
|
||||
},
|
||||
{
|
||||
.prodid = 0x10d00000,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.filter_mask = 0,
|
||||
.filter_default_val = NV_DISPLAY_FILTER_ID_MASK,
|
||||
.name_pattern = "nvidia_display_pmu_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET,
|
||||
.event_attr = display_pmu_event_attrs,
|
||||
.format_attr = display_pmu_format_attrs
|
||||
},
|
||||
{
|
||||
.prodid = 0x10e00000,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.filter_mask = NV_VISION_FILTER_ID_MASK,
|
||||
.filter_default_val = NV_VISION_FILTER_ID_MASK,
|
||||
.name_pattern = "nvidia_vision_pmu_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET,
|
||||
.event_attr = mcf_pmu_event_attrs,
|
||||
.format_attr = vision_pmu_format_attrs
|
||||
},
|
||||
{
|
||||
.prodid = 0,
|
||||
.prodid_mask = 0,
|
||||
|
||||
Reference in New Issue
Block a user