aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds2022-01-10 08:49:37 -0800
committerLinus Torvalds2022-01-10 08:49:37 -0800
commit9b9e211360044c12d7738973c944d6f300134881 (patch)
treef994a379135e70397df3c653e6578ad7e5d295fe /drivers
parenta7ac314061375c7805e0d3a26aad6eb0c41100df (diff)
parent945409a6ef442cfe5f2f14e5626d4306d53100f0 (diff)
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: - KCSAN enabled for arm64. - Additional kselftests to exercise the syscall ABI w.r.t. SVE/FPSIMD. - Some more SVE clean-ups and refactoring in preparation for SME support (scalable matrix extensions). - BTI clean-ups (SYM_FUNC macros etc.) - arm64 atomics clean-up and codegen improvements. - HWCAPs for FEAT_AFP (alternate floating point behaviour) and FEAT_RPRESS (increased precision of reciprocal estimate and reciprocal square root estimate). - Use SHA3 instructions to speed-up XOR. - arm64 unwind code refactoring/unification. - Avoid DC (data cache maintenance) instructions when DCZID_EL0.DZP == 1 (potentially set by a hypervisor; user-space already does this). - Perf updates for arm64: support for CI-700, HiSilicon PCIe PMU, Marvell CN10K LLC-TAD PMU, miscellaneous clean-ups. - Other fixes and clean-ups; highlights: fix the handling of erratum 1418040, correct the calculation of the nomap region boundaries, introduce io_stop_wc() mapped to the new DGH instruction (data gathering hint). * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (81 commits) arm64: Use correct method to calculate nomap region boundaries arm64: Drop outdated links in comments arm64: perf: Don't register user access sysctl handler multiple times drivers: perf: marvell_cn10k: fix an IS_ERR() vs NULL check perf/smmuv3: Fix unused variable warning when CONFIG_OF=n arm64: errata: Fix exec handling in erratum 1418040 workaround arm64: Unhash early pointer print plus improve comment asm-generic: introduce io_stop_wc() and add implementation for ARM64 arm64: Ensure that the 'bti' macro is defined where linkage.h is included arm64: remove __dma_*_area() aliases docs/arm64: delete a space from tagged-address-abi arm64: Enable KCSAN kselftest/arm64: Add pidbench for floating point syscall cases arm64/fp: Add comments documenting the usage of state restore functions kselftest/arm64: Add a test program to exercise the syscall ABI kselftest/arm64: Allow signal tests to trigger from a function kselftest/arm64: Parameterise ptrace vector length information arm64/sve: Minor clarification of ABI documentation arm64/sve: Generalise vector length configuration prctl() for SME arm64/sve: Make sysctl interface for SVE reusable by SME ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/perf/Kconfig9
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/arm-cmn.c1111
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c68
-rw-r--r--drivers/perf/hisilicon/Kconfig9
-rw-r--r--drivers/perf/hisilicon/Makefile2
-rw-r--r--drivers/perf/hisilicon/hisi_pcie_pmu.c948
-rw-r--r--drivers/perf/marvell_cn10k_tad_pmu.c429
8 files changed, 2213 insertions, 364 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 4374af292e6d..e1a0c44bc686 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -43,7 +43,7 @@ config ARM_CCN
config ARM_CMN
tristate "Arm CMN-600 PMU support"
- depends on ARM64 || (COMPILE_TEST && 64BIT)
+ depends on ARM64 || COMPILE_TEST
help
Support for PMU events monitoring on the Arm CMN-600 Coherent Mesh
Network interconnect.
@@ -139,6 +139,13 @@ config ARM_DMC620_PMU
Support for PMU events monitoring on the ARM DMC-620 memory
controller.
+config MARVELL_CN10K_TAD_PMU
+ tristate "Marvell CN10K LLC-TAD PMU"
+ depends on ARM64 || (COMPILE_TEST && 64BIT)
+ help
+ Provides support for Last-Level cache Tag-and-data Units (LLC-TAD)
+ performance monitors on CN10K family silicons.
+
source "drivers/perf/hisilicon/Kconfig"
endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 5260b116c7da..2db5418d5b0a 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -14,3 +14,4 @@ obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
+obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index bc3cba5f8c5d..0e48adce57ef 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -5,8 +5,10 @@
#include <linux/acpi.h>
#include <linux/bitfield.h>
#include <linux/bitops.h>
+#include <linux/debugfs.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
@@ -23,7 +25,10 @@
#define CMN_NI_LOGICAL_ID GENMASK_ULL(47, 32)
#define CMN_NODEID_DEVID(reg) ((reg) & 3)
+#define CMN_NODEID_EXT_DEVID(reg) ((reg) & 1)
#define CMN_NODEID_PID(reg) (((reg) >> 2) & 1)
+#define CMN_NODEID_EXT_PID(reg) (((reg) >> 1) & 3)
+#define CMN_NODEID_1x1_PID(reg) (((reg) >> 2) & 7)
#define CMN_NODEID_X(reg, bits) ((reg) >> (3 + (bits)))
#define CMN_NODEID_Y(reg, bits) (((reg) >> 3) & ((1U << (bits)) - 1))
@@ -34,20 +39,28 @@
#define CMN_CHILD_NODE_ADDR GENMASK(27, 0)
#define CMN_CHILD_NODE_EXTERNAL BIT(31)
-#define CMN_ADDR_NODE_PTR GENMASK(27, 14)
+#define CMN_MAX_DIMENSION 8
+#define CMN_MAX_XPS (CMN_MAX_DIMENSION * CMN_MAX_DIMENSION)
+#define CMN_MAX_DTMS (CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4)
-#define CMN_NODE_PTR_DEVID(ptr) (((ptr) >> 2) & 3)
-#define CMN_NODE_PTR_PID(ptr) ((ptr) & 1)
-#define CMN_NODE_PTR_X(ptr, bits) ((ptr) >> (6 + (bits)))
-#define CMN_NODE_PTR_Y(ptr, bits) (((ptr) >> 6) & ((1U << (bits)) - 1))
-
-#define CMN_MAX_XPS (8 * 8)
-
-/* The CFG node has one other useful purpose */
+/* The CFG node has various info besides the discovery tree */
#define CMN_CFGM_PERIPH_ID_2 0x0010
#define CMN_CFGM_PID2_REVISION GENMASK(7, 4)
-/* PMU registers occupy the 3rd 4KB page of each node's 16KB space */
+#define CMN_CFGM_INFO_GLOBAL 0x900
+#define CMN_INFO_MULTIPLE_DTM_EN BIT_ULL(63)
+#define CMN_INFO_RSP_VC_NUM GENMASK_ULL(53, 52)
+#define CMN_INFO_DAT_VC_NUM GENMASK_ULL(51, 50)
+
+/* XPs also have some local topology info which has uses too */
+#define CMN_MXP__CONNECT_INFO_P0 0x0008
+#define CMN_MXP__CONNECT_INFO_P1 0x0010
+#define CMN_MXP__CONNECT_INFO_P2 0x0028
+#define CMN_MXP__CONNECT_INFO_P3 0x0030
+#define CMN_MXP__CONNECT_INFO_P4 0x0038
+#define CMN_MXP__CONNECT_INFO_P5 0x0040
+
+/* PMU registers occupy the 3rd 4KB page of each node's region */
#define CMN_PMU_OFFSET 0x2000
/* For most nodes, this is all there is */
@@ -57,6 +70,7 @@
/* DTMs live in the PMU space of XP registers */
#define CMN_DTM_WPn(n) (0x1A0 + (n) * 0x18)
#define CMN_DTM_WPn_CONFIG(n) (CMN_DTM_WPn(n) + 0x00)
+#define CMN_DTM_WPn_CONFIG_WP_DEV_SEL2 GENMASK_ULL(18,17)
#define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(6)
#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(5)
#define CMN_DTM_WPn_CONFIG_WP_GRP BIT(4)
@@ -81,7 +95,11 @@
#define CMN_DTM_PMEVCNTSR 0x240
+#define CMN_DTM_UNIT_INFO 0x0910
+
#define CMN_DTM_NUM_COUNTERS 4
+/* Want more local counters? Why not replicate the whole DTM! Ugh... */
+#define CMN_DTM_OFFSET(n) ((n) * 0x200)
/* The DTC node is where the magic happens */
#define CMN_DT_DTC_CTL 0x0a00
@@ -122,11 +140,11 @@
/* Event attributes */
-#define CMN_CONFIG_TYPE GENMASK(15, 0)
-#define CMN_CONFIG_EVENTID GENMASK(23, 16)
-#define CMN_CONFIG_OCCUPID GENMASK(27, 24)
-#define CMN_CONFIG_BYNODEID BIT(31)
-#define CMN_CONFIG_NODEID GENMASK(47, 32)
+#define CMN_CONFIG_TYPE GENMASK_ULL(15, 0)
+#define CMN_CONFIG_EVENTID GENMASK_ULL(23, 16)
+#define CMN_CONFIG_OCCUPID GENMASK_ULL(27, 24)
+#define CMN_CONFIG_BYNODEID BIT_ULL(31)
+#define CMN_CONFIG_NODEID GENMASK_ULL(47, 32)
#define CMN_EVENT_TYPE(event) FIELD_GET(CMN_CONFIG_TYPE, (event)->attr.config)
#define CMN_EVENT_EVENTID(event) FIELD_GET(CMN_CONFIG_EVENTID, (event)->attr.config)
@@ -134,13 +152,13 @@
#define CMN_EVENT_BYNODEID(event) FIELD_GET(CMN_CONFIG_BYNODEID, (event)->attr.config)
#define CMN_EVENT_NODEID(event) FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config)
-#define CMN_CONFIG_WP_COMBINE GENMASK(27, 24)
-#define CMN_CONFIG_WP_DEV_SEL BIT(48)
-#define CMN_CONFIG_WP_CHN_SEL GENMASK(50, 49)
-#define CMN_CONFIG_WP_GRP BIT(52)
-#define CMN_CONFIG_WP_EXCLUSIVE BIT(53)
-#define CMN_CONFIG1_WP_VAL GENMASK(63, 0)
-#define CMN_CONFIG2_WP_MASK GENMASK(63, 0)
+#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(27, 24)
+#define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48)
+#define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51)
+#define CMN_CONFIG_WP_GRP BIT_ULL(56)
+#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57)
+#define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0)
+#define CMN_CONFIG2_WP_MASK GENMASK_ULL(63, 0)
#define CMN_EVENT_WP_COMBINE(event) FIELD_GET(CMN_CONFIG_WP_COMBINE, (event)->attr.config)
#define CMN_EVENT_WP_DEV_SEL(event) FIELD_GET(CMN_CONFIG_WP_DEV_SEL, (event)->attr.config)
@@ -155,7 +173,13 @@
#define CMN_WP_DOWN 2
-/* r0px probably don't exist in silicon, thankfully */
+enum cmn_model {
+ CMN_ANY = -1,
+ CMN600 = 1,
+ CI700 = 2,
+};
+
+/* CMN-600 r0px shouldn't exist in silicon, thankfully */
enum cmn_revision {
CMN600_R1P0,
CMN600_R1P1,
@@ -163,6 +187,10 @@ enum cmn_revision {
CMN600_R1P3,
CMN600_R2P0,
CMN600_R3P0,
+ CMN600_R3P1,
+ CI700_R0P0 = 0,
+ CI700_R1P0,
+ CI700_R2P0,
};
enum cmn_node_type {
@@ -174,9 +202,12 @@ enum cmn_node_type {
CMN_TYPE_HNF,
CMN_TYPE_XP,
CMN_TYPE_SBSX,
- CMN_TYPE_RNI = 0xa,
+ CMN_TYPE_MPAM_S,
+ CMN_TYPE_MPAM_NS,
+ CMN_TYPE_RNI,
CMN_TYPE_RND = 0xd,
CMN_TYPE_RNSAM = 0xf,
+ CMN_TYPE_MTSX,
CMN_TYPE_CXRA = 0x100,
CMN_TYPE_CXHA = 0x101,
CMN_TYPE_CXLA = 0x102,
@@ -189,32 +220,32 @@ struct arm_cmn_node {
u16 id, logid;
enum cmn_node_type type;
+ int dtm;
union {
- /* Device node */
+ /* DN/HN-F/CXHA */
struct {
- int to_xp;
- /* DN/HN-F/CXHA */
- unsigned int occupid_val;
- unsigned int occupid_count;
+ u8 occupid_val;
+ u8 occupid_count;
};
/* XP */
- struct {
- int dtc;
- u32 pmu_config_low;
- union {
- u8 input_sel[4];
- __le32 pmu_config_high;
- };
- s8 wp_event[4];
- };
+ u8 dtc;
};
-
union {
u8 event[4];
__le32 event_sel;
};
};
+struct arm_cmn_dtm {
+ void __iomem *base;
+ u32 pmu_config_low;
+ union {
+ u8 input_sel[4];
+ __le32 pmu_config_high;
+ };
+ s8 wp_event[4];
+};
+
struct arm_cmn_dtc {
void __iomem *base;
int irq;
@@ -231,35 +262,238 @@ struct arm_cmn_dtc {
struct arm_cmn {
struct device *dev;
void __iomem *base;
+ unsigned int state;
enum cmn_revision rev;
+ enum cmn_model model;
u8 mesh_x;
u8 mesh_y;
u16 num_xps;
u16 num_dns;
+ bool multi_dtm;
+ u8 ports_used;
+ struct {
+ unsigned int rsp_vc_num : 2;
+ unsigned int dat_vc_num : 2;
+ };
+
struct arm_cmn_node *xps;
struct arm_cmn_node *dns;
+ struct arm_cmn_dtm *dtms;
struct arm_cmn_dtc *dtc;
unsigned int num_dtcs;
int cpu;
struct hlist_node cpuhp_node;
- unsigned int state;
struct pmu pmu;
+ struct dentry *debug;
};
#define to_cmn(p) container_of(p, struct arm_cmn, pmu)
static int arm_cmn_hp_state;
+struct arm_cmn_nodeid {
+ u8 x;
+ u8 y;
+ u8 port;
+ u8 dev;
+};
+
+static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
+{
+ int dim = max(cmn->mesh_x, cmn->mesh_y);
+
+ return dim > 4 ? 3 : 2;
+}
+
+static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id)
+{
+ struct arm_cmn_nodeid nid;
+
+ if (cmn->num_xps == 1) {
+ nid.x = 0;
+ nid.y = 0;
+ nid.port = CMN_NODEID_1x1_PID(id);
+ nid.dev = CMN_NODEID_DEVID(id);
+ } else {
+ int bits = arm_cmn_xyidbits(cmn);
+
+ nid.x = CMN_NODEID_X(id, bits);
+ nid.y = CMN_NODEID_Y(id, bits);
+ if (cmn->ports_used & 0xc) {
+ nid.port = CMN_NODEID_EXT_PID(id);
+ nid.dev = CMN_NODEID_EXT_DEVID(id);
+ } else {
+ nid.port = CMN_NODEID_PID(id);
+ nid.dev = CMN_NODEID_DEVID(id);
+ }
+ }
+ return nid;
+}
+
+static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn,
+ const struct arm_cmn_node *dn)
+{
+ struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
+ int xp_idx = cmn->mesh_x * nid.y + nid.x;
+
+ return cmn->xps + xp_idx;
+}
+static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
+ enum cmn_node_type type)
+{
+ struct arm_cmn_node *dn;
+
+ for (dn = cmn->dns; dn->type; dn++)
+ if (dn->type == type)
+ return dn;
+ return NULL;
+}
+
+struct dentry *arm_cmn_debugfs;
+
+#ifdef CONFIG_DEBUG_FS
+static const char *arm_cmn_device_type(u8 type)
+{
+ switch(type) {
+ case 0x01: return " RN-I |";
+ case 0x02: return " RN-D |";
+ case 0x04: return " RN-F_B |";
+ case 0x05: return "RN-F_B_E|";
+ case 0x06: return " RN-F_A |";
+ case 0x07: return "RN-F_A_E|";
+ case 0x08: return " HN-T |";
+ case 0x09: return " HN-I |";
+ case 0x0a: return " HN-D |";
+ case 0x0c: return " SN-F |";
+ case 0x0d: return " SBSX |";
+ case 0x0e: return " HN-F |";
+ case 0x0f: return " SN-F_E |";
+ case 0x10: return " SN-F_D |";
+ case 0x11: return " CXHA |";
+ case 0x12: return " CXRA |";
+ case 0x13: return " CXRH |";
+ case 0x14: return " RN-F_D |";
+ case 0x15: return "RN-F_D_E|";
+ case 0x16: return " RN-F_C |";
+ case 0x17: return "RN-F_C_E|";
+ case 0x1c: return " MTSX |";
+ default: return " |";
+ }
+}
+
+static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
+{
+ struct arm_cmn *cmn = s->private;
+ struct arm_cmn_node *dn;
+
+ for (dn = cmn->dns; dn->type; dn++) {
+ struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
+
+ if (dn->type == CMN_TYPE_XP)
+ continue;
+ /* Ignore the extra components that will overlap on some ports */
+ if (dn->type < CMN_TYPE_HNI)
+ continue;
+
+ if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d)
+ continue;
+
+ seq_printf(s, " #%-2d |", dn->logid);
+ return;
+ }
+ seq_puts(s, " |");
+}
+
+static int arm_cmn_map_show(struct seq_file *s, void *data)
+{
+ struct arm_cmn *cmn = s->private;
+ int x, y, p, pmax = fls(cmn->ports_used);
+
+ seq_puts(s, " X");
+ for (x = 0; x < cmn->mesh_x; x++)
+ seq_printf(s, " %d ", x);
+ seq_puts(s, "\nY P D+");
+ y = cmn->mesh_y;
+ while (y--) {
+ int xp_base = cmn->mesh_x * y;
+ u8 port[6][CMN_MAX_DIMENSION];
+
+ for (x = 0; x < cmn->mesh_x; x++)
+ seq_puts(s, "--------+");
+
+ seq_printf(s, "\n%d |", y);
+ for (x = 0; x < cmn->mesh_x; x++) {
+ struct arm_cmn_node *xp = cmn->xps + xp_base + x;
+ void __iomem *base = xp->pmu_base - CMN_PMU_OFFSET;
+
+ port[0][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P0);
+ port[1][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P1);
+ port[2][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P2);
+ port[3][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P3);
+ port[4][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P4);
+ port[5][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P5);
+ seq_printf(s, " XP #%-2d |", xp_base + x);
+ }
+
+ seq_puts(s, "\n |");
+ for (x = 0; x < cmn->mesh_x; x++) {
+ u8 dtc = cmn->xps[xp_base + x].dtc;
+
+ if (dtc & (dtc - 1))
+ seq_puts(s, " DTC ?? |");
+ else
+ seq_printf(s, " DTC %ld |", __ffs(dtc));
+ }
+ seq_puts(s, "\n |");
+ for (x = 0; x < cmn->mesh_x; x++)
+ seq_puts(s, "........|");
+
+ for (p = 0; p < pmax; p++) {
+ seq_printf(s, "\n %d |", p);
+ for (x = 0; x < cmn->mesh_x; x++)
+ seq_puts(s, arm_cmn_device_type(port[p][x]));
+ seq_puts(s, "\n 0|");
+ for (x = 0; x < cmn->mesh_x; x++)
+ arm_cmn_show_logid(s, x, y, p, 0);
+ seq_puts(s, "\n 1|");
+ for (x = 0; x < cmn->mesh_x; x++)
+ arm_cmn_show_logid(s, x, y, p, 1);
+ }
+ seq_puts(s, "\n-----+");
+ }
+ for (x = 0; x < cmn->mesh_x; x++)
+ seq_puts(s, "--------+");
+ seq_puts(s, "\n");
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(arm_cmn_map);
+
+static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id)
+{
+ const char *name = "map";
+
+ if (id > 0)
+ name = devm_kasprintf(cmn->dev, GFP_KERNEL, "map_%d", id);
+ if (!name)
+ return;
+
+ cmn->debug = debugfs_create_file(name, 0444, arm_cmn_debugfs, cmn, &arm_cmn_map_fops);
+}
+#else
+static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
+#endif
+
struct arm_cmn_hw_event {
struct arm_cmn_node *dn;
u64 dtm_idx[2];
unsigned int dtc_idx;
u8 dtcs_used;
u8 num_dns;
+ u8 dtm_offset;
};
#define for_each_hw_dn(hw, dn, i) \
@@ -283,6 +517,7 @@ static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos)
struct arm_cmn_event_attr {
struct device_attribute attr;
+ enum cmn_model model;
enum cmn_node_type type;
u8 eventid;
u8 occupid;
@@ -294,50 +529,22 @@ struct arm_cmn_format_attr {
int config;
};
-static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
-{
- return cmn->mesh_x > 4 || cmn->mesh_y > 4 ? 3 : 2;
-}
-
-static void arm_cmn_init_node_to_xp(const struct arm_cmn *cmn,
- struct arm_cmn_node *dn)
-{
- int bits = arm_cmn_xyidbits(cmn);
- int x = CMN_NODEID_X(dn->id, bits);
- int y = CMN_NODEID_Y(dn->id, bits);
- int xp_idx = cmn->mesh_x * y + x;
-
- dn->to_xp = (cmn->xps + xp_idx) - dn;
-}
-
-static struct arm_cmn_node *arm_cmn_node_to_xp(struct arm_cmn_node *dn)
-{
- return dn->type == CMN_TYPE_XP ? dn : dn + dn->to_xp;
-}
-
-static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
- enum cmn_node_type type)
-{
- int i;
-
- for (i = 0; i < cmn->num_dns; i++)
- if (cmn->dns[i].type == type)
- return &cmn->dns[i];
- return NULL;
-}
-
-#define CMN_EVENT_ATTR(_name, _type, _eventid, _occupid) \
+#define CMN_EVENT_ATTR(_model, _name, _type, _eventid, _occupid) \
(&((struct arm_cmn_event_attr[]) {{ \
.attr = __ATTR(_name, 0444, arm_cmn_event_show, NULL), \
+ .model = _model, \
.type = _type, \
.eventid = _eventid, \
.occupid = _occupid, \
}})[0].attr.attr)
-static bool arm_cmn_is_occup_event(enum cmn_node_type type, unsigned int id)
+static bool arm_cmn_is_occup_event(enum cmn_model model,
+ enum cmn_node_type type, unsigned int id)
{
- return (type == CMN_TYPE_DVM && id == 0x05) ||
- (type == CMN_TYPE_HNF && id == 0x0f);
+ if (type == CMN_TYPE_DVM)
+ return (model == CMN600 && id == 0x05) ||
+ (model == CI700 && id == 0x0c);
+ return type == CMN_TYPE_HNF && id == 0x0f;
}
static ssize_t arm_cmn_event_show(struct device *dev,
@@ -355,7 +562,7 @@ static ssize_t arm_cmn_event_show(struct device *dev,
"type=0x%x,eventid=0x%x,wp_dev_sel=?,wp_chn_sel=?,wp_grp=?,wp_val=?,wp_mask=?\n",
eattr->type, eattr->eventid);
- if (arm_cmn_is_occup_event(eattr->type, eattr->eventid))
+ if (arm_cmn_is_occup_event(eattr->model, eattr->type, eattr->eventid))
return sysfs_emit(buf, "type=0x%x,eventid=0x%x,occupid=0x%x\n",
eattr->type, eattr->eventid, eattr->occupid);
@@ -370,60 +577,81 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
struct device *dev = kobj_to_dev(kobj);
struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev));
struct arm_cmn_event_attr *eattr;
- enum cmn_node_type type;
eattr = container_of(attr, typeof(*eattr), attr.attr);
- type = eattr->type;
- /* Watchpoints aren't nodes */
- if (type == CMN_TYPE_WP)
- type = CMN_TYPE_XP;
+ if (!(eattr->model & cmn->model))
+ return 0;
+
+ /* Watchpoints aren't nodes, so avoid confusion */
+ if (eattr->type == CMN_TYPE_WP)
+ return attr->mode;
+
+ /* Hide XP events for unused interfaces/channels */
+ if (eattr->type == CMN_TYPE_XP) {
+ unsigned int intf = (eattr->eventid >> 2) & 7;
+ unsigned int chan = eattr->eventid >> 5;
+
+ if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3)))
+ return 0;
+
+ if ((chan == 5 && cmn->rsp_vc_num < 2) ||
+ (chan == 6 && cmn->dat_vc_num < 2))
+ return 0;
+ }
/* Revision-specific differences */
- if (cmn->rev < CMN600_R1P2) {
- if (type == CMN_TYPE_HNF && eattr->eventid == 0x1b)
+ if (cmn->model == CMN600 && cmn->rev < CMN600_R1P2) {
+ if (eattr->type == CMN_TYPE_HNF && eattr->eventid == 0x1b)
return 0;
}
- if (!arm_cmn_node(cmn, type))
+ if (!arm_cmn_node(cmn, eattr->type))
return 0;
return attr->mode;
}
-#define _CMN_EVENT_DVM(_name, _event, _occup) \
- CMN_EVENT_ATTR(dn_##_name, CMN_TYPE_DVM, _event, _occup)
+#define _CMN_EVENT_DVM(_model, _name, _event, _occup) \
+ CMN_EVENT_ATTR(_model, dn_##_name, CMN_TYPE_DVM, _event, _occup)
#define CMN_EVENT_DTC(_name) \
- CMN_EVENT_ATTR(dtc_##_name, CMN_TYPE_DTC, 0, 0)
-#define _CMN_EVENT_HNF(_name, _event, _occup) \
- CMN_EVENT_ATTR(hnf_##_name, CMN_TYPE_HNF, _event, _occup)
+ CMN_EVENT_ATTR(CMN_ANY, dtc_##_name, CMN_TYPE_DTC, 0, 0)
+#define _CMN_EVENT_HNF(_model, _name, _event, _occup) \
+ CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event, _occup)
#define CMN_EVENT_HNI(_name, _event) \
- CMN_EVENT_ATTR(hni_##_name, CMN_TYPE_HNI, _event, 0)
+ CMN_EVENT_ATTR(CMN_ANY, hni_##_name, CMN_TYPE_HNI, _event, 0)
#define __CMN_EVENT_XP(_name, _event) \
- CMN_EVENT_ATTR(mxp_##_name, CMN_TYPE_XP, _event, 0)
-#define CMN_EVENT_SBSX(_name, _event) \
- CMN_EVENT_ATTR(sbsx_##_name, CMN_TYPE_SBSX, _event, 0)
-#define CMN_EVENT_RNID(_name, _event) \
- CMN_EVENT_ATTR(rnid_##_name, CMN_TYPE_RNI, _event, 0)
-
-#define CMN_EVENT_DVM(_name, _event) \
- _CMN_EVENT_DVM(_name, _event, 0)
-#define CMN_EVENT_HNF(_name, _event) \
- _CMN_EVENT_HNF(_name, _event, 0)
+ CMN_EVENT_ATTR(CMN_ANY, mxp_##_name, CMN_TYPE_XP, _event, 0)
+#define CMN_EVENT_SBSX(_model, _name, _event) \
+ CMN_EVENT_ATTR(_model, sbsx_##_name, CMN_TYPE_SBSX, _event, 0)
+#define CMN_EVENT_RNID(_model, _name, _event) \
+ CMN_EVENT_ATTR(_model, rnid_##_name, CMN_TYPE_RNI, _event, 0)
+#define CMN_EVENT_MTSX(_name, _event) \
+ CMN_EVENT_ATTR(CMN_ANY, mtsx_##_name, CMN_TYPE_MTSX, _event, 0)
+
+#define CMN_EVENT_DVM(_model, _name, _event) \
+ _CMN_EVENT_DVM(_model, _name, _event, 0)
+#define CMN_EVENT_HNF(_model, _name, _event) \
+ _CMN_EVENT_HNF(_model, _name, _event, 0)
#define _CMN_EVENT_XP(_name, _event) \
__CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \
__CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)), \
__CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)), \
__CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)), \
__CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)), \
- __CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2))
+ __CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)), \
+ __CMN_EVENT_XP(p2_##_name, (_event) | (6 << 2)), \
+ __CMN_EVENT_XP(p3_##_name, (_event) | (7 << 2))
/* Good thing there are only 3 fundamental XP events... */
#define CMN_EVENT_XP(_name, _event) \
_CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)), \
_CMN_EVENT_XP(rsp_##_name, (_event) | (1 << 5)), \
_CMN_EVENT_XP(snp_##_name, (_event) | (2 << 5)), \
- _CMN_EVENT_XP(dat_##_name, (_event) | (3 << 5))
+ _CMN_EVENT_XP(dat_##_name, (_event) | (3 << 5)), \
+ _CMN_EVENT_XP(pub_##_name, (_event) | (4 << 5)), \
+ _CMN_EVENT_XP(rsp2_##_name, (_event) | (5 << 5)), \
+ _CMN_EVENT_XP(dat2_##_name, (_event) | (6 << 5))
static struct attribute *arm_cmn_event_attrs[] = {
@@ -434,115 +662,152 @@ static struct attribute *arm_cmn_event_attrs[] = {
* slot, but our lazy short-cut of using the DTM counter index for
* the PMU index as well happens to avoid that by construction.
*/
- CMN_EVENT_DVM(rxreq_dvmop, 0x01),
- CMN_EVENT_DVM(rxreq_dvmsync, 0x02),
- CMN_EVENT_DVM(rxreq_dvmop_vmid_filtered, 0x03),
- CMN_EVENT_DVM(rxreq_retried, 0x04),
- _CMN_EVENT_DVM(rxreq_trk_occupancy_all, 0x05, 0),
- _CMN_EVENT_DVM(rxreq_trk_occupancy_dvmop, 0x05, 1),
- _CMN_EVENT_DVM(rxreq_trk_occupancy_dvmsync, 0x05, 2),
-
- CMN_EVENT_HNF(cache_miss, 0x01),
- CMN_EVENT_HNF(slc_sf_cache_access, 0x02),
- CMN_EVENT_HNF(cache_fill, 0x03),
- CMN_EVENT_HNF(pocq_retry, 0x04),
- CMN_EVENT_HNF(pocq_reqs_recvd, 0x05),
- CMN_EVENT_HNF(sf_hit, 0x06),
- CMN_EVENT_HNF(sf_evictions, 0x07),
- CMN_EVENT_HNF(dir_snoops_sent, 0x08),
- CMN_EVENT_HNF(brd_snoops_sent, 0x09),
- CMN_EVENT_HNF(slc_eviction, 0x0a),
- CMN_EVENT_HNF(slc_fill_invalid_way, 0x0b),
- CMN_EVENT_HNF(mc_retries, 0x0c),
- CMN_EVENT_HNF(mc_reqs, 0x0d),
- CMN_EVENT_HNF(qos_hh_retry, 0x0e),
- _CMN_EVENT_HNF(qos_pocq_occupancy_all, 0x0f, 0),
- _CMN_EVENT_HNF(qos_pocq_occupancy_read, 0x0f, 1),
- _CMN_EVENT_HNF(qos_pocq_occupancy_write, 0x0f, 2),
- _CMN_EVENT_HNF(qos_pocq_occupancy_atomic, 0x0f, 3),
- _CMN_EVENT_HNF(qos_pocq_occupancy_stash, 0x0f, 4),
- CMN_EVENT_HNF(pocq_addrhaz, 0x10),
- CMN_EVENT_HNF(pocq_atomic_addrhaz, 0x11),
- CMN_EVENT_HNF(ld_st_swp_adq_full, 0x12),
- CMN_EVENT_HNF(cmp_adq_full, 0x13),
- CMN_EVENT_HNF(txdat_stall, 0x14),
- CMN_EVENT_HNF(txrsp_stall, 0x15),
- CMN_EVENT_HNF(seq_full, 0x16),
- CMN_EVENT_HNF(seq_hit, 0x17),
- CMN_EVENT_HNF(snp_sent, 0x18),
- CMN_EVENT_HNF(sfbi_dir_snp_sent, 0x19),
- CMN_EVENT_HNF(sfbi_brd_snp_sent, 0x1a),
- CMN_EVENT_HNF(snp_sent_untrk, 0x1b),
- CMN_EVENT_HNF(intv_dirty, 0x1c),
- CMN_EVENT_HNF(stash_snp_sent, 0x1d),
- CMN_EVENT_HNF(stash_data_pull, 0x1e),
- CMN_EVENT_HNF(snp_fwded, 0x1f),
-
- CMN_EVENT_HNI(rrt_rd_occ_cnt_ovfl, 0x20),
- CMN_EVENT_HNI(rrt_wr_occ_cnt_ovfl, 0x21),
- CMN_EVENT_HNI(rdt_rd_occ_cnt_ovfl, 0x22),
- CMN_EVENT_HNI(rdt_wr_occ_cnt_ovfl, 0x23),
- CMN_EVENT_HNI(wdb_occ_cnt_ovfl, 0x24),
- CMN_EVENT_HNI(rrt_rd_alloc, 0x25),
- CMN_EVENT_HNI(rrt_wr_alloc, 0x26),
- CMN_EVENT_HNI(rdt_rd_alloc, 0x27),
- CMN_EVENT_HNI(rdt_wr_alloc, 0x28),
- CMN_EVENT_HNI(wdb_alloc, 0x29),
- CMN_EVENT_HNI(txrsp_retryack, 0x2a),
- CMN_EVENT_HNI(arvalid_no_arready, 0x2b),
- CMN_EVENT_HNI(arready_no_arvalid, 0x2c),
- CMN_EVENT_HNI(awvalid_no_awready, 0x2d),
- CMN_EVENT_HNI(awready_no_awvalid, 0x2e),
- CMN_EVENT_HNI(wvalid_no_wready, 0x2f),
- CMN_EVENT_HNI(txdat_stall, 0x30),
- CMN_EVENT_HNI(nonpcie_serialization, 0x31),
- CMN_EVENT_HNI(pcie_serialization, 0x32),
-
- CMN_EVENT_XP(txflit_valid, 0x01),
- CMN_EVENT_XP(txflit_stall, 0x02),
- CMN_EVENT_XP(partial_dat_flit, 0x03),
+ CMN_EVENT_DVM(CMN600, rxreq_dvmop, 0x01),
+ CMN_EVENT_DVM(CMN600, rxreq_dvmsync, 0x02),
+ CMN_EVENT_DVM(CMN600, rxreq_dvmop_vmid_filtered, 0x03),
+ CMN_EVENT_DVM(CMN600, rxreq_retried, 0x04),
+ _CMN_EVENT_DVM(CMN600, rxreq_trk_occupancy_all, 0x05, 0),
+ _CMN_EVENT_DVM(CMN600, rxreq_trk_occupancy_dvmop, 0x05, 1),
+ _CMN_EVENT_DVM(CMN600, rxreq_trk_occupancy_dvmsync, 0x05, 2),
+ CMN_EVENT_DVM(CI700, dvmop_tlbi, 0x01),
+ CMN_EVENT_DVM(CI700, dvmop_bpi, 0x02),
+ CMN_EVENT_DVM(CI700, dvmop_pici, 0x03),
+ CMN_EVENT_DVM(CI700, dvmop_vici, 0x04),
+ CMN_EVENT_DVM(CI700, dvmsync, 0x05),
+ CMN_EVENT_DVM(CI700, vmid_filtered, 0x06),
+ CMN_EVENT_DVM(CI700, rndop_filtered, 0x07),
+ CMN_EVENT_DVM(CI700, retry, 0x08),
+ CMN_EVENT_DVM(CI700, txsnp_flitv, 0x09),
+ CMN_EVENT_DVM(CI700, txsnp_stall, 0x0a),
+ CMN_EVENT_DVM(CI700, trkfull, 0x0b),
+ _CMN_EVENT_DVM(CI700, trk_occupancy_all, 0x0c, 0),
+ _CMN_EVENT_DVM(CI700, trk_occupancy_dvmop, 0x0c, 1),
+ _CMN_EVENT_DVM(CI700, trk_occupancy_dvmsync, 0x0c, 2),
+
+ CMN_EVENT_HNF(CMN_ANY, cache_miss, 0x01),
+ CMN_EVENT_HNF(CMN_ANY, slc_sf_cache_access, 0x02),
+ CMN_EVENT_HNF(CMN_ANY, cache_fill, 0x03),
+ CMN_EVENT_HNF(CMN_ANY, pocq_retry, 0x04),
+ CMN_EVENT_HNF(CMN_ANY, pocq_reqs_recvd, 0x05),
+ CMN_EVENT_HNF(CMN_ANY, sf_hit, 0x06),
+ CMN_EVENT_HNF(CMN_ANY, sf_evictions, 0x07),
+ CMN_EVENT_HNF(CMN_ANY, dir_snoops_sent, 0x08),
+ CMN_EVENT_HNF(CMN_ANY, brd_snoops_sent, 0x09),
+ CMN_EVENT_HNF(CMN_ANY, slc_eviction, 0x0a),
+ CMN_EVENT_HNF(CMN_ANY, slc_fill_invalid_way, 0x0b),
+ CMN_EVENT_HNF(CMN_ANY, mc_retries, 0x0c),
+ CMN_EVENT_HNF(CMN_ANY, mc_reqs, 0x0d),
+ CMN_EVENT_HNF(CMN_ANY, qos_hh_retry, 0x0e),
+ _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_all, 0x0f, 0),
+ _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_read, 0x0f, 1),
+ _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_write, 0x0f, 2),
+ _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_atomic, 0x0f, 3),
+ _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_stash, 0x0f, 4),
+ CMN_EVENT_HNF(CMN_ANY, pocq_addrhaz, 0x10),
+ CMN_EVENT_HNF(CMN_ANY, pocq_atomic_addrhaz, 0x11),
+ CMN_EVENT_HNF(CMN_ANY, ld_st_swp_adq_full, 0x12),
+ CMN_EVENT_HNF(CMN_ANY, cmp_adq_full, 0x13),
+ CMN_EVENT_HNF(CMN_ANY, txdat_stall, 0x14),
+ CMN_EVENT_HNF(CMN_ANY, txrsp_stall, 0x15),
+ CMN_EVENT_HNF(CMN_ANY, seq_full, 0x16),
+ CMN_EVENT_HNF(CMN_ANY, seq_hit, 0x17),
+ CMN_EVENT_HNF(CMN_ANY, snp_sent, 0x18),
+ CMN_EVENT_HNF(CMN_ANY, sfbi_dir_snp_sent, 0x19),
+ CMN_EVENT_HNF(CMN_ANY, sfbi_brd_snp_sent, 0x1a),
+ CMN_EVENT_HNF(CMN_ANY, snp_sent_untrk, 0x1b),
+ CMN_EVENT_HNF(CMN_ANY, intv_dirty, 0x1c),
+ CMN_EVENT_HNF(CMN_ANY, stash_snp_sent, 0x1d),
+ CMN_EVENT_HNF(CMN_ANY, stash_data_pull, 0x1e),
+ CMN_EVENT_HNF(CMN_ANY, snp_fwded, 0x1f),
+ CMN_EVENT_HNF(CI700, atomic_fwd, 0x20),
+ CMN_EVENT_HNF(CI700, mpam_hardlim, 0x21),
+ CMN_EVENT_HNF(CI700, mpam_softlim, 0x22),
+
+ CMN_EVENT_HNI(rrt_rd_occ_cnt_ovfl, 0x20),
+ CMN_EVENT_HNI(rrt_wr_occ_cnt_ovfl, 0x21),
+ CMN_EVENT_HNI(rdt_rd_occ_cnt_ovfl, 0x22),
+ CMN_EVENT_HNI(rdt_wr_occ_cnt_ovfl, 0x23),
+ CMN_EVENT_HNI(wdb_occ_cnt_ovfl, 0x24),
+ CMN_EVENT_HNI(rrt_rd_alloc, 0x25),
+ CMN_EVENT_HNI(rrt_wr_alloc, 0x26),
+ CMN_EVENT_HNI(rdt_rd_alloc, 0x27),
+ CMN_EVENT_HNI(rdt_wr_alloc, 0x28),
+ CMN_EVENT_HNI(wdb_alloc, 0x29),
+ CMN_EVENT_HNI(txrsp_retryack, 0x2a),
+ CMN_EVENT_HNI(arvalid_no_arready, 0x2b),
+ CMN_EVENT_HNI(arready_no_arvalid, 0x2c),
+ CMN_EVENT_HNI(awvalid_no_awready, 0x2d),
+ CMN_EVENT_HNI(awready_no_awvalid, 0x2e),
+ CMN_EVENT_HNI(wvalid_no_wready, 0x2f),
+ CMN_EVENT_HNI(txdat_stall, 0x30),
+ CMN_EVENT_HNI(nonpcie_serialization, 0x31),
+ CMN_EVENT_HNI(pcie_serialization, 0x32),
+
+ CMN_EVENT_XP(txflit_valid, 0x01),
+ CMN_EVENT_XP(txflit_stall, 0x02),
+ CMN_EVENT_XP(partial_dat_flit, 0x03),
/* We treat watchpoints as a special made-up class of XP events */
- CMN_EVENT_ATTR(watchpoint_up, CMN_TYPE_WP, 0, 0),
- CMN_EVENT_ATTR(watchpoint_down, CMN_TYPE_WP, 2, 0),
-
- CMN_EVENT_SBSX(rd_req, 0x01),
- CMN_EVENT_SBSX(wr_req, 0x02),
- CMN_EVENT_SBSX(cmo_req, 0x03),
- CMN_EVENT_SBSX(txrsp_retryack, 0x04),
- CMN_EVENT_SBSX(txdat_flitv, 0x05),
- CMN_EVENT_SBSX(txrsp_flitv, 0x06),
- CMN_EVENT_SBSX(rd_req_trkr_occ_cnt_ovfl, 0x11),
- CMN_EVENT_SBSX(wr_req_trkr_occ_cnt_ovfl, 0x12),
- CMN_EVENT_SBSX(cmo_req_trkr_occ_cnt_ovfl, 0x13),
- CMN_EVENT_SBSX(wdb_occ_cnt_ovfl, 0x14),
- CMN_EVENT_SBSX(rd_axi_trkr_occ_cnt_ovfl, 0x15),
- CMN_EVENT_SBSX(cmo_axi_trkr_occ_cnt_ovfl, 0x16),
- CMN_EVENT_SBSX(arvalid_no_arready, 0x21),
- CMN_EVENT_SBSX(awvalid_no_awready, 0x22),
- CMN_EVENT_SBSX(wvalid_no_wready, 0x23),
- CMN_EVENT_SBSX(txdat_stall, 0x24),
- CMN_EVENT_SBSX(txrsp_stall, 0x25),
-
- CMN_EVENT_RNID(s0_rdata_beats, 0x01),
- CMN_EVENT_RNID(s1_rdata_beats, 0x02),
- CMN_EVENT_RNID(s2_rdata_beats, 0x03),
- CMN_EVENT_RNID(rxdat_flits, 0x04),
- CMN_EVENT_RNID(txdat_flits, 0x05),
- CMN_EVENT_RNID(txreq_flits_total, 0x06),
- CMN_EVENT_RNID(txreq_flits_retried, 0x07),
- CMN_EVENT_RNID(rrt_occ_ovfl, 0x08),
- CMN_EVENT_RNID(wrt_occ_ovfl, 0x09),
- CMN_EVENT_RNID(txreq_flits_replayed, 0x0a),
- CMN_EVENT_RNID(wrcancel_sent, 0x0b),
- CMN_EVENT_RNID(s0_wdata_beats, 0x0c),
- CMN_EVENT_RNID(s1_wdata_beats, 0x0d),
- CMN_EVENT_RNID(s2_wdata_beats, 0x0e),
- CMN_EVENT_RNID(rrt_alloc, 0x0f),
- CMN_EVENT_RNID(wrt_alloc, 0x10),
- CMN_EVENT_RNID(rdb_unord, 0x11),
- CMN_EVENT_RNID(rdb_replay, 0x12),
- CMN_EVENT_RNID(rdb_hybrid, 0x13),
- CMN_EVENT_RNID(rdb_ord, 0x14),
+ CMN_EVENT_ATTR(CMN_ANY, watchpoint_up, CMN_TYPE_WP, CMN_WP_UP, 0),
+ CMN_EVENT_ATTR(CMN_ANY, watchpoint_down, CMN_TYPE_WP, CMN_WP_DOWN, 0),
+
+ CMN_EVENT_SBSX(CMN_ANY, rd_req, 0x01),
+ CMN_EVENT_SBSX(CMN_ANY, wr_req, 0x02),
+ CMN_EVENT_SBSX(CMN_ANY, cmo_req, 0x03),
+ CMN_EVENT_SBSX(CMN_ANY, txrsp_retryack, 0x04),
+ CMN_EVENT_SBSX(CMN_ANY, txdat_flitv, 0x05),
+ CMN_EVENT_SBSX(CMN_ANY, txrsp_flitv, 0x06),
+ CMN_EVENT_SBSX(CMN_ANY, rd_req_trkr_occ_cnt_ovfl, 0x11),
+ CMN_EVENT_SBSX(CMN_ANY, wr_req_trkr_occ_cnt_ovfl, 0x12),
+ CMN_EVENT_SBSX(CMN_ANY, cmo_req_trkr_occ_cnt_ovfl, 0x13),
+ CMN_EVENT_SBSX(CMN_ANY, wdb_occ_cnt_ovfl, 0x14),
+ CMN_EVENT_SBSX(CMN_ANY, rd_axi_trkr_occ_cnt_ovfl, 0x15),
+ CMN_EVENT_SBSX(CMN_ANY, cmo_axi_trkr_occ_cnt_ovfl, 0x16),
+ CMN_EVENT_SBSX(CI700, rdb_occ_cnt_ovfl, 0x17),
+ CMN_EVENT_SBSX(CMN_ANY, arvalid_no_arready, 0x21),
+ CMN_EVENT_SBSX(CMN_ANY, awvalid_no_awready, 0x22),
+ CMN_EVENT_SBSX(CMN_ANY, wvalid_no_wready, 0x23),
+ CMN_EVENT_SBSX(CMN_ANY, txdat_stall, 0x24),
+ CMN_EVENT_SBSX(CMN_ANY, txrsp_stall, 0x25),
+
+ CMN_EVENT_RNID(CMN_ANY, s0_rdata_beats, 0x01),
+ CMN_EVENT_RNID(CMN_ANY, s1_rdata_beats, 0x02),
+ CMN_EVENT_RNID(CMN_ANY, s2_rdata_beats, 0x03),
+ CMN_EVENT_RNID(CMN_ANY, rxdat_flits, 0x04),
+ CMN_EVENT_RNID(CMN_ANY, txdat_flits, 0x05),
+ CMN_EVENT_RNID(CMN_ANY, txreq_flits_total, 0x06),
+ CMN_EVENT_RNID(CMN_ANY, txreq_flits_retried, 0x07),
+ CMN_EVENT_RNID(CMN_ANY, rrt_occ_ovfl, 0x08),
+ CMN_EVENT_RNID(CMN_ANY, wrt_occ_ovfl, 0x09),
+ CMN_EVENT_RNID(CMN_ANY, txreq_flits_replayed, 0x0a),
+ CMN_EVENT_RNID(CMN_ANY, wrcancel_sent, 0x0b),
+ CMN_EVENT_RNID(CMN_ANY, s0_wdata_beats, 0x0c),
+ CMN_EVENT_RNID(CMN_ANY, s1_wdata_beats, 0x0d),
+ CMN_EVENT_RNID(CMN_ANY, s2_wdata_beats, 0x0e),
+ CMN_EVENT_RNID(CMN_ANY, rrt_alloc, 0x0f),
+ CMN_EVENT_RNID(CMN_ANY, wrt_alloc, 0x10),
+ CMN_EVENT_RNID(CMN600, rdb_unord, 0x11),
+ CMN_EVENT_RNID(CMN600, rdb_replay, 0x12),
+ CMN_EVENT_RNID(CMN600, rdb_hybrid, 0x13),
+ CMN_EVENT_RNID(CMN600, rdb_ord, 0x14),
+ CMN_EVENT_RNID(CI700, padb_occ_ovfl, 0x11),
+ CMN_EVENT_RNID(CI700, rpdb_occ_ovfl, 0x12),
+ CMN_EVENT_RNID(CI700, rrt_occup_ovfl_slice1, 0x13),
+ CMN_EVENT_RNID(CI700, rrt_occup_ovfl_slice2, 0x14),
+ CMN_EVENT_RNID(CI700, rrt_occup_ovfl_slice3, 0x15),
+ CMN_EVENT_RNID(CI700, wrt_throttled, 0x16),
+
+ CMN_EVENT_MTSX(tc_lookup, 0x01),
+ CMN_EVENT_MTSX(tc_fill, 0x02),
+ CMN_EVENT_MTSX(tc_miss, 0x03),
+ CMN_EVENT_MTSX(tdb_forward, 0x04),
+ CMN_EVENT_MTSX(tcq_hazard, 0x05),
+ CMN_EVENT_MTSX(tcq_rd_alloc, 0x06),
+ CMN_EVENT_MTSX(tcq_wr_alloc, 0x07),
+ CMN_EVENT_MTSX(tcq_cmo_alloc, 0x08),
+ CMN_EVENT_MTSX(axi_rd_req, 0x09),
+ CMN_EVENT_MTSX(axi_wr_req, 0x0a),
+ CMN_EVENT_MTSX(tcq_occ_cnt_ovfl, 0x0b),
+ CMN_EVENT_MTSX(tdb_occ_cnt_ovfl, 0x0c),
NULL
};
@@ -644,7 +909,8 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) |
- FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc);
+ FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc) |
+ FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL2, dev >> 1);
if (combine && !grp)
config |= CMN_DTM_WPn_CONFIG_WP_COMBINE;
@@ -679,18 +945,19 @@ static void arm_cmn_pmu_disable(struct pmu *pmu)
static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw,
bool snapshot)
{
+ struct arm_cmn_dtm *dtm = NULL;
struct arm_cmn_node *dn;
- unsigned int i, offset;
- u64 count = 0;
+ unsigned int i, offset, dtm_idx;
+ u64 reg, count = 0;
offset = snapshot ? CMN_DTM_PMEVCNTSR : CMN_DTM_PMEVCNT;
for_each_hw_dn(hw, dn, i) {
- struct arm_cmn_node *xp = arm_cmn_node_to_xp(dn);
- int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
- u64 reg = readq_relaxed(xp->pmu_base + offset);
- u16 dtm_count = reg >> (dtm_idx * 16);
-
- count += dtm_count;
+ if (dtm != &cmn->dtms[dn->dtm]) {
+ dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
+ reg = readq_relaxed(dtm->base + offset);
+ }
+ dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
+ count += (u16)(reg >> (dtm_idx * 16));
}
return count;
}
@@ -774,8 +1041,10 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
u64 mask = CMN_EVENT_WP_MASK(event);
for_each_hw_dn(hw, dn, i) {
- writeq_relaxed(val, dn->pmu_base + CMN_DTM_WPn_VAL(wp_idx));
- writeq_relaxed(mask, dn->pmu_base + CMN_DTM_WPn_MASK(wp_idx));
+ void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
+
+ writeq_relaxed(val, base + CMN_DTM_WPn_VAL(wp_idx));
+ writeq_relaxed(mask, base + CMN_DTM_WPn_MASK(wp_idx));
}
} else for_each_hw_dn(hw, dn, i) {
int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
@@ -800,8 +1069,10 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
int wp_idx = arm_cmn_wp_idx(event);
for_each_hw_dn(hw, dn, i) {
- writeq_relaxed(0, dn->pmu_base + CMN_DTM_WPn_MASK(wp_idx));
- writeq_relaxed(~0ULL, dn->pmu_base + CMN_DTM_WPn_VAL(wp_idx));
+ void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
+
+ writeq_relaxed(0, base + CMN_DTM_WPn_MASK(wp_idx));
+ writeq_relaxed(~0ULL, base + CMN_DTM_WPn_VAL(wp_idx));
}
} else for_each_hw_dn(hw, dn, i) {
int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
@@ -814,14 +1085,15 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
}
struct arm_cmn_val {
- u8 dtm_count[CMN_MAX_XPS];
- u8 occupid[CMN_MAX_XPS];
- u8 wp[CMN_MAX_XPS][4];
+ u8 dtm_count[CMN_MAX_DTMS];
+ u8 occupid[CMN_MAX_DTMS];
+ u8 wp[CMN_MAX_DTMS][4];
int dtc_count;
bool cycles;
};
-static void arm_cmn_val_add_event(struct arm_cmn_val *val, struct perf_event *event)
+static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
+ struct perf_event *event)
{
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
struct arm_cmn_node *dn;
@@ -839,33 +1111,33 @@ static void arm_cmn_val_add_event(struct arm_cmn_val *val, struct perf_event *ev
}
val->dtc_count++;
- if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event)))
+ if (arm_cmn_is_occup_event(cmn->model, type, CMN_EVENT_EVENTID(event)))
occupid = CMN_EVENT_OCCUPID(event) + 1;
else
occupid = 0;
for_each_hw_dn(hw, dn, i) {
- int wp_idx, xp = arm_cmn_node_to_xp(dn)->logid;
+ int wp_idx, dtm = dn->dtm;
- val->dtm_count[xp]++;
- val->occupid[xp] = occupid;
+ val->dtm_count[dtm]++;
+ val->occupid[dtm] = occupid;
if (type != CMN_TYPE_WP)
continue;
wp_idx = arm_cmn_wp_idx(event);
- val->wp[xp][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1;
+ val->wp[dtm][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1;
}
}
-static int arm_cmn_validate_group(struct perf_event *event)
+static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
{
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
struct arm_cmn_node *dn;
struct perf_event *sibling, *leader = event->group_leader;
enum cmn_node_type type;
- struct arm_cmn_val val;
- int i;
+ struct arm_cmn_val *val;
+ int i, ret = -EINVAL;
u8 occupid;
if (leader == event)
@@ -874,54 +1146,61 @@ static int arm_cmn_validate_group(struct perf_event *event)
if (event->pmu != leader->pmu && !is_software_event(leader))
return -EINVAL;
- memset(&val, 0, sizeof(val));
+ val = kzalloc(sizeof(*val), GFP_KERNEL);
+ if (!val)
+ return -ENOMEM;
- arm_cmn_val_add_event(&val, leader);
+ arm_cmn_val_add_event(cmn, val, leader);
for_each_sibling_event(sibling, leader)
- arm_cmn_val_add_event(&val, sibling);
+ arm_cmn_val_add_event(cmn, val, sibling);
type = CMN_EVENT_TYPE(event);
- if (type == CMN_TYPE_DTC)
- return val.cycles ? -EINVAL : 0;
+ if (type == CMN_TYPE_DTC) {
+ ret = val->cycles ? -EINVAL : 0;
+ goto done;
+ }
- if (val.dtc_count == CMN_DT_NUM_COUNTERS)
- return -EINVAL;
+ if (val->dtc_count == CMN_DT_NUM_COUNTERS)
+ goto done;
- if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event)))
+ if (arm_cmn_is_occup_event(cmn->model, type, CMN_EVENT_EVENTID(event)))
occupid = CMN_EVENT_OCCUPID(event) + 1;
else
occupid = 0;
for_each_hw_dn(hw, dn, i) {
- int wp_idx, wp_cmb, xp = arm_cmn_node_to_xp(dn)->logid;
+ int wp_idx, wp_cmb, dtm = dn->dtm;
- if (val.dtm_count[xp] == CMN_DTM_NUM_COUNTERS)
- return -EINVAL;
+ if (val->dtm_count[dtm] == CMN_DTM_NUM_COUNTERS)
+ goto done;
- if (occupid && val.occupid[xp] && occupid != val.occupid[xp])
- return -EINVAL;
+ if (occupid && val->occupid[dtm] && occupid != val->occupid[dtm])
+ goto done;
if (type != CMN_TYPE_WP)
continue;
wp_idx = arm_cmn_wp_idx(event);
- if (val.wp[xp][wp_idx])
- return -EINVAL;
+ if (val->wp[dtm][wp_idx])
+ goto done;
- wp_cmb = val.wp[xp][wp_idx ^ 1];
+ wp_cmb = val->wp[dtm][wp_idx ^ 1];
if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1)
- return -EINVAL;
+ goto done;
}
- return 0;
+ ret = 0;
+done:
+ kfree(val);
+ return ret;
}
static int arm_cmn_event_init(struct perf_event *event)
{
struct arm_cmn *cmn = to_cmn(event->pmu);
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+ struct arm_cmn_node *dn;
enum cmn_node_type type;
- unsigned int i;
bool bynodeid;
u16 nodeid, eventid;
@@ -947,38 +1226,37 @@ static int arm_cmn_event_init(struct perf_event *event)
eventid = CMN_EVENT_EVENTID(event);
if (eventid != CMN_WP_UP && eventid != CMN_WP_DOWN)
return -EINVAL;
+ /* ...but the DTM may depend on which port we're watching */
+ if (cmn->multi_dtm)
+ hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
}
bynodeid = CMN_EVENT_BYNODEID(event);
nodeid = CMN_EVENT_NODEID(event);
hw->dn = arm_cmn_node(cmn, type);
- for (i = hw->dn - cmn->dns; i < cmn->num_dns && cmn->dns[i].type == type; i++) {
- if (!bynodeid) {
- hw->num_dns++;
- } else if (cmn->dns[i].id != nodeid) {
+ if (!hw->dn)
+ return -EINVAL;
+ for (dn = hw->dn; dn->type == type; dn++) {
+ if (bynodeid && dn->id != nodeid) {
hw->dn++;
- } else {
- hw->num_dns = 1;
- break;
+ continue;
}
+ hw->dtcs_used |= arm_cmn_node_to_xp(cmn, dn)->dtc;
+ hw->num_dns++;
+ if (bynodeid)
+ break;
}
if (!hw->num_dns) {
- int bits = arm_cmn_xyidbits(cmn);
+ struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid);
dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n",
- nodeid, CMN_NODEID_X(nodeid, bits), CMN_NODEID_Y(nodeid, bits),
- CMN_NODEID_PID(nodeid), CMN_NODEID_DEVID(nodeid), type);
+ nodeid, nid.x, nid.y, nid.port, nid.dev, type);
return -EINVAL;
}
- /*
- * By assuming events count in all DTC domains, we cunningly avoid
- * needing to know anything about how XPs are assigned to domains.
- */
- hw->dtcs_used = (1U << cmn->num_dtcs) - 1;
- return arm_cmn_validate_group(event);
+ return arm_cmn_validate_group(cmn, event);
}
static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
@@ -988,17 +1266,17 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
enum cmn_node_type type = CMN_EVENT_TYPE(event);
while (i--) {
- struct arm_cmn_node *xp = arm_cmn_node_to_xp(hw->dn + i);
+ struct arm_cmn_dtm *dtm = &cmn->dtms[hw->dn[i].dtm] + hw->dtm_offset;
unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
if (type == CMN_TYPE_WP)
- hw->dn[i].wp_event[arm_cmn_wp_idx(event)] = -1;
+ dtm->wp_event[arm_cmn_wp_idx(event)] = -1;
- if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event)))
+ if (arm_cmn_is_occup_event(cmn->model, type, CMN_EVENT_EVENTID(event)))
hw->dn[i].occupid_count--;
- xp->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(dtm_idx);
- writel_relaxed(xp->pmu_config_low, xp->pmu_base + CMN_DTM_PMU_CONFIG);
+ dtm->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(dtm_idx);
+ writel_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG);
}
memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
@@ -1040,12 +1318,12 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
/* ...then the local counters to feed it. */
for_each_hw_dn(hw, dn, i) {
- struct arm_cmn_node *xp = arm_cmn_node_to_xp(dn);
+ struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
unsigned int dtm_idx, shift;
u64 reg;
dtm_idx = 0;
- while (xp->pmu_config_low & CMN__PMEVCNT_PAIRED(dtm_idx))
+ while (dtm->pmu_config_low & CMN__PMEVCNT_PAIRED(dtm_idx))
if (++dtm_idx == CMN_DTM_NUM_COUNTERS)
goto free_dtms;
@@ -1055,26 +1333,28 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
int tmp, wp_idx = arm_cmn_wp_idx(event);
u32 cfg = arm_cmn_wp_config(event);
- if (dn->wp_event[wp_idx] >= 0)
+ if (dtm->wp_event[wp_idx] >= 0)
goto free_dtms;
- tmp = dn->wp_event[wp_idx ^ 1];
+ tmp = dtm->wp_event[wp_idx ^ 1];
if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
goto free_dtms;
input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
- dn->wp_event[wp_idx] = dtc_idx;
- writel_relaxed(cfg, dn->pmu_base + CMN_DTM_WPn_CONFIG(wp_idx));
+ dtm->wp_event[wp_idx] = dtc_idx;
+ writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
} else {
- unsigned int port = CMN_NODEID_PID(dn->id);
- unsigned int dev = CMN_NODEID_DEVID(dn->id);
+ struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
+
+ if (cmn->multi_dtm)
+ nid.port %= 2;
input_sel = CMN__PMEVCNT0_INPUT_SEL_DEV + dtm_idx +
- (port << 4) + (dev << 2);
+ (nid.port << 4) + (nid.dev << 2);
- if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) {
- int occupid = CMN_EVENT_OCCUPID(event);
+ if (arm_cmn_is_occup_event(cmn->model, type, CMN_EVENT_EVENTID(event))) {
+ u8 occupid = CMN_EVENT_OCCUPID(event);
if (dn->occupid_count == 0) {
dn->occupid_val = occupid;
@@ -1089,13 +1369,13 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
arm_cmn_set_index(hw->dtm_idx, i, dtm_idx);
- xp->input_sel[dtm_idx] = input_sel;
+ dtm->input_sel[dtm_idx] = input_sel;
shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx);
- xp->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift);
- xp->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift;
- xp->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx);
- reg = (u64)le32_to_cpu(xp->pmu_config_high) << 32 | xp->pmu_config_low;
- writeq_relaxed(reg, xp->pmu_base + CMN_DTM_PMU_CONFIG);
+ dtm->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift);
+ dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift;
+ dtm->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx);
+ reg = (u64)le32_to_cpu(dtm->pmu_config_high) << 32 | dtm->pmu_config_low;
+ writeq_relaxed(reg, dtm->base + CMN_DTM_PMU_CONFIG);
}
/* Go go go! */
@@ -1147,23 +1427,47 @@ static int arm_cmn_commit_txn(struct pmu *pmu)
return 0;
}
-static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+static void arm_cmn_migrate(struct arm_cmn *cmn, unsigned int cpu)
+{
+ unsigned int i;
+
+ perf_pmu_migrate_context(&cmn->pmu, cmn->cpu, cpu);
+ for (i = 0; i < cmn->num_dtcs; i++)
+ irq_set_affinity(cmn->dtc[i].irq, cpumask_of(cpu));
+ cmn->cpu = cpu;
+}
+
+static int arm_cmn_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
{
struct arm_cmn *cmn;
- unsigned int i, target;
+ int node;
- cmn = hlist_entry_safe(node, struct arm_cmn, cpuhp_node);
- if (cpu != cmn->cpu)
- return 0;
+ cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node);
+ node = dev_to_node(cmn->dev);
+ if (node != NUMA_NO_NODE && cpu_to_node(cmn->cpu) != node && cpu_to_node(cpu) == node)
+ arm_cmn_migrate(cmn, cpu);
+ return 0;
+}
+
+static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct arm_cmn *cmn;
+ unsigned int target;
+ int node;
+ cpumask_t mask;
- target = cpumask_any_but(cpu_online_mask, cpu);
- if (target >= nr_cpu_ids)
+ cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node);
+ if (cpu != cmn->cpu)
return 0;
- perf_pmu_migrate_context(&cmn->pmu, cpu, target);
- for (i = 0; i < cmn->num_dtcs; i++)
- irq_set_affinity(cmn->dtc[i].irq, cpumask_of(target));
- cmn->cpu = target;
+ node = dev_to_node(cmn->dev);
+ if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
+ cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
+ target = cpumask_any(&mask);
+ else
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target < nr_cpu_ids)
+ arm_cmn_migrate(cmn, target);
return 0;
}
@@ -1231,23 +1535,22 @@ static int arm_cmn_init_irqs(struct arm_cmn *cmn)
return 0;
}
-static void arm_cmn_init_dtm(struct arm_cmn_node *xp)
+static void arm_cmn_init_dtm(struct arm_cmn_dtm *dtm, struct arm_cmn_node *xp, int idx)
{
int i;
+ dtm->base = xp->pmu_base + CMN_DTM_OFFSET(idx);
+ dtm->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN;
for (i = 0; i < 4; i++) {
- xp->wp_event[i] = -1;
- writeq_relaxed(0, xp->pmu_base + CMN_DTM_WPn_MASK(i));
- writeq_relaxed(~0ULL, xp->pmu_base + CMN_DTM_WPn_VAL(i));
+ dtm->wp_event[i] = -1;
+ writeq_relaxed(0, dtm->base + CMN_DTM_WPn_MASK(i));
+ writeq_relaxed(~0ULL, dtm->base + CMN_DTM_WPn_VAL(i));
}
- xp->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN;
- xp->dtc = -1;
}
static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int idx)
{
struct arm_cmn_dtc *dtc = cmn->dtc + idx;
- struct arm_cmn_node *xp;
dtc->base = dn->pmu_base - CMN_PMU_OFFSET;
dtc->irq = platform_get_irq(to_platform_device(cmn->dev), idx);
@@ -1258,10 +1561,6 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id
writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR);
writel_relaxed(CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR);
- /* We do at least know that a DTC's XP must be in that DTC's domain */
- xp = arm_cmn_node_to_xp(dn);
- xp->dtc = idx;
-
return 0;
}
@@ -1278,8 +1577,9 @@ static int arm_cmn_node_cmp(const void *a, const void *b)
static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
{
- struct arm_cmn_node *dn;
+ struct arm_cmn_node *dn, *xp;
int dtc_idx = 0;
+ u8 dtcs_present = (1 << cmn->num_dtcs) - 1;
cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL);
if (!cmn->dtc)
@@ -1289,14 +1589,26 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP);
- for (dn = cmn->dns; dn < cmn->dns + cmn->num_dns; dn++) {
- if (dn->type != CMN_TYPE_XP)
- arm_cmn_init_node_to_xp(cmn, dn);
- else if (cmn->num_dtcs == 1)
- dn->dtc = 0;
+ for (dn = cmn->dns; dn->type; dn++) {
+ if (dn->type == CMN_TYPE_XP) {
+ dn->dtc &= dtcs_present;
+ continue;
+ }
- if (dn->type == CMN_TYPE_DTC)
- arm_cmn_init_dtc(cmn, dn, dtc_idx++);
+ xp = arm_cmn_node_to_xp(cmn, dn);
+ dn->dtm = xp->dtm;
+ if (cmn->multi_dtm)
+ dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
+
+ if (dn->type == CMN_TYPE_DTC) {
+ int err;
+ /* We do at least know that a DTC's XP must be in that DTC's domain */
+ if (xp->dtc == 0xf)
+ xp->dtc = 1 << dtc_idx;
+ err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
+ if (err)
+ return err;
+ }
/* To the PMU, RN-Ds don't add anything over RN-Is, so smoosh them together */
if (dn->type == CMN_TYPE_RND)
@@ -1335,19 +1647,25 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
{
void __iomem *cfg_region;
struct arm_cmn_node cfg, *dn;
+ struct arm_cmn_dtm *dtm;
u16 child_count, child_poff;
u32 xp_offset[CMN_MAX_XPS];
u64 reg;
int i, j;
+ size_t sz;
+
+ arm_cmn_init_node_info(cmn, rgn_offset, &cfg);
+ if (cfg.type != CMN_TYPE_CFG)
+ return -ENODEV;
cfg_region = cmn->base + rgn_offset;
reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2);
cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg);
- dev_dbg(cmn->dev, "periph_id_2 revision: %d\n", cmn->rev);
- arm_cmn_init_node_info(cmn, rgn_offset, &cfg);
- if (cfg.type != CMN_TYPE_CFG)
- return -ENODEV;
+ reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL);
+ cmn->multi_dtm = reg & CMN_INFO_MULTIPLE_DTM_EN;
+ cmn->rsp_vc_num = FIELD_GET(CMN_INFO_RSP_VC_NUM, reg);
+ cmn->dat_vc_num = FIELD_GET(CMN_INFO_DAT_VC_NUM, reg);
reg = readq_relaxed(cfg_region + CMN_CHILD_INFO);
child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg);
@@ -1365,20 +1683,28 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
cmn->num_dns += FIELD_GET(CMN_CI_CHILD_COUNT, reg);
}
- /* Cheeky +1 to help terminate pointer-based iteration */
- cmn->dns = devm_kcalloc(cmn->dev, cmn->num_dns + 1,
- sizeof(*cmn->dns), GFP_KERNEL);
- if (!cmn->dns)
+ /* Cheeky +1 to help terminate pointer-based iteration later */
+ dn = devm_kcalloc(cmn->dev, cmn->num_dns + 1, sizeof(*dn), GFP_KERNEL);
+ if (!dn)
+ return -ENOMEM;
+
+ /* Initial safe upper bound on DTMs for any possible mesh layout */
+ i = cmn->num_xps;
+ if (cmn->multi_dtm)
+ i += cmn->num_xps + 1;
+ dtm = devm_kcalloc(cmn->dev, i, sizeof(*dtm), GFP_KERNEL);
+ if (!dtm)
return -ENOMEM;
/* Pass 2: now we can actually populate the nodes */
- dn = cmn->dns;
+ cmn->dns = dn;
+ cmn->dtms = dtm;
for (i = 0; i < cmn->num_xps; i++) {
void __iomem *xp_region = cmn->base + xp_offset[i];
struct arm_cmn_node *xp = dn++;
+ unsigned int xp_ports = 0;
arm_cmn_init_node_info(cmn, xp_offset[i], xp);
- arm_cmn_init_dtm(xp);
/*
* Thanks to the order in which XP logical IDs seem to be
* assigned, we can handily infer the mesh X dimension by
@@ -1388,6 +1714,40 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
if (xp->id == (1 << 3))
cmn->mesh_x = xp->logid;
+ if (cmn->model == CMN600)
+ xp->dtc = 0xf;
+ else
+ xp->dtc = 1 << readl_relaxed(xp_region + CMN_DTM_UNIT_INFO);
+
+ xp->dtm = dtm - cmn->dtms;
+ arm_cmn_init_dtm(dtm++, xp, 0);
+ /*
+ * Keeping track of connected ports will let us filter out
+ * unnecessary XP events easily. We can also reliably infer the
+ * "extra device ports" configuration for the node ID format
+ * from this, since in that case we will see at least one XP
+ * with port 2 connected, for the HN-D.
+ */
+ if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P0))
+ xp_ports |= BIT(0);
+ if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P1))
+ xp_ports |= BIT(1);
+ if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P2))
+ xp_ports |= BIT(2);
+ if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P3))
+ xp_ports |= BIT(3);
+ if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P4))
+ xp_ports |= BIT(4);
+ if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P5))
+ xp_ports |= BIT(5);
+
+ if (cmn->multi_dtm && (xp_ports & 0xc))
+ arm_cmn_init_dtm(dtm++, xp, 1);
+ if (cmn->multi_dtm && (xp_ports & 0x30))
+ arm_cmn_init_dtm(dtm++, xp, 2);
+
+ cmn->ports_used |= xp_ports;
+
reg = readq_relaxed(xp_region + CMN_CHILD_INFO);
child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg);
child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg);
@@ -1422,11 +1782,14 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
case CMN_TYPE_SBSX:
case CMN_TYPE_RNI:
case CMN_TYPE_RND:
+ case CMN_TYPE_MTSX:
case CMN_TYPE_CXRA:
case CMN_TYPE_CXHA:
dn++;
break;
/* Nothing to see here */
+ case CMN_TYPE_MPAM_S:
+ case CMN_TYPE_MPAM_NS:
case CMN_TYPE_RNSAM:
case CMN_TYPE_CXLA:
break;
@@ -1441,6 +1804,16 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
/* Correct for any nodes we skipped */
cmn->num_dns = dn - cmn->dns;
+ sz = (void *)(dn + 1) - (void *)cmn->dns;
+ dn = devm_krealloc(cmn->dev, cmn->dns, sz, GFP_KERNEL);
+ if (dn)
+ cmn->dns = dn;
+
+ sz = (void *)dtm - (void *)cmn->dtms;
+ dtm = devm_krealloc(cmn->dev, cmn->dtms, sz, GFP_KERNEL);
+ if (dtm)
+ cmn->dtms = dtm;
+
/*
* If mesh_x wasn't set during discovery then we never saw
* an XP at (0,1), thus we must have an Nx1 configuration.
@@ -1449,13 +1822,20 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
cmn->mesh_x = cmn->num_xps;
cmn->mesh_y = cmn->num_xps / cmn->mesh_x;
- dev_dbg(cmn->dev, "mesh %dx%d, ID width %d\n",
- cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn));
+ /* 1x1 config plays havoc with XP event encodings */
+ if (cmn->num_xps == 1)
+ dev_warn(cmn->dev, "1x1 config not fully supported, translate XP events manually\n");
+
+ dev_dbg(cmn->dev, "model %d, periph_id_2 revision %d\n", cmn->model, cmn->rev);
+ reg = cmn->ports_used;
+ dev_dbg(cmn->dev, "mesh %dx%d, ID width %d, ports %6pbl%s\n",
+ cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn), &reg,
+ cmn->multi_dtm ? ", multi-DTM" : "");
return 0;
}
-static int arm_cmn_acpi_probe(struct platform_device *pdev, struct arm_cmn *cmn)
+static int arm_cmn600_acpi_probe(struct platform_device *pdev, struct arm_cmn *cmn)
{
struct resource *cfg, *root;
@@ -1482,21 +1862,11 @@ static int arm_cmn_acpi_probe(struct platform_device *pdev, struct arm_cmn *cmn)
return root->start - cfg->start;
}
-static int arm_cmn_of_probe(struct platform_device *pdev, struct arm_cmn *cmn)
+static int arm_cmn600_of_probe(struct device_node *np)
{
- struct device_node *np = pdev->dev.of_node;
u32 rootnode;
- int ret;
-
- cmn->base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(cmn->base))
- return PTR_ERR(cmn->base);
- ret = of_property_read_u32(np, "arm,root-node", &rootnode);
- if (ret)
- return ret;
-
- return rootnode;
+ return of_property_read_u32(np, "arm,root-node", &rootnode) ?: rootnode;
}
static int arm_cmn_probe(struct platform_device *pdev)
@@ -1504,19 +1874,26 @@ static int arm_cmn_probe(struct platform_device *pdev)
struct arm_cmn *cmn;
const char *name;
static atomic_t id;
- int err, rootnode;
+ int err, rootnode, this_id;
cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL);
if (!cmn)
return -ENOMEM;
cmn->dev = &pdev->dev;
+ cmn->model = (unsigned long)device_get_match_data(cmn->dev);
platform_set_drvdata(pdev, cmn);
- if (has_acpi_companion(cmn->dev))
- rootnode = arm_cmn_acpi_probe(pdev, cmn);
- else
- rootnode = arm_cmn_of_probe(pdev, cmn);
+ if (cmn->model == CMN600 && has_acpi_companion(cmn->dev)) {
+ rootnode = arm_cmn600_acpi_probe(pdev, cmn);
+ } else {
+ rootnode = 0;
+ cmn->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(cmn->base))
+ return PTR_ERR(cmn->base);
+ if (cmn->model == CMN600)
+ rootnode = arm_cmn600_of_probe(pdev->dev.of_node);
+ }
if (rootnode < 0)
return rootnode;
@@ -1532,7 +1909,7 @@ static int arm_cmn_probe(struct platform_device *pdev)
if (err)
return err;
- cmn->cpu = raw_smp_processor_id();
+ cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev));
cmn->pmu = (struct pmu) {
.module = THIS_MODULE,
.attr_groups = arm_cmn_attr_groups,
@@ -1551,7 +1928,8 @@ static int arm_cmn_probe(struct platform_device *pdev)
.cancel_txn = arm_cmn_end_txn,
};
- name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", atomic_fetch_inc(&id));
+ this_id = atomic_fetch_inc(&id);
+ name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", this_id);
if (!name)
return -ENOMEM;
@@ -1561,7 +1939,10 @@ static int arm_cmn_probe(struct platform_device *pdev)
err = perf_pmu_register(&cmn->pmu, name, -1);
if (err)
- cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node);
+ cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node);
+ else
+ arm_cmn_debugfs_init(cmn, this_id);
+
return err;
}
@@ -1572,13 +1953,15 @@ static int arm_cmn_remove(struct platform_device *pdev)
writel_relaxed(0, cmn->dtc[0].base + CMN_DT_DTC_CTL);
perf_pmu_unregister(&cmn->pmu);
- cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node);
+ cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node);
+ debugfs_remove(cmn->debug);
return 0;
}
#ifdef CONFIG_OF
static const struct of_device_id arm_cmn_of_match[] = {
- { .compatible = "arm,cmn-600", },
+ { .compatible = "arm,cmn-600", .data = (void *)CMN600 },
+ { .compatible = "arm,ci-700", .data = (void *)CI700 },
{}
};
MODULE_DEVICE_TABLE(of, arm_cmn_of_match);
@@ -1586,7 +1969,7 @@ MODULE_DEVICE_TABLE(of, arm_cmn_of_match);
#ifdef CONFIG_ACPI
static const struct acpi_device_id arm_cmn_acpi_match[] = {
- { "ARMHC600", },
+ { "ARMHC600", CMN600 },
{}
};
MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match);
@@ -1607,15 +1990,20 @@ static int __init arm_cmn_init(void)
int ret;
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
- "perf/arm/cmn:online", NULL,
+ "perf/arm/cmn:online",
+ arm_cmn_pmu_online_cpu,
arm_cmn_pmu_offline_cpu);
if (ret < 0)
return ret;
arm_cmn_hp_state = ret;
+ arm_cmn_debugfs = debugfs_create_dir("arm-cmn", NULL);
+
ret = platform_driver_register(&arm_cmn_driver);
- if (ret)
+ if (ret) {
cpuhp_remove_multi_state(arm_cmn_hp_state);
+ debugfs_remove(arm_cmn_debugfs);
+ }
return ret;
}
@@ -1623,6 +2011,7 @@ static void __exit arm_cmn_exit(void)
{
platform_driver_unregister(&arm_cmn_driver);
cpuhp_remove_multi_state(arm_cmn_hp_state);
+ debugfs_remove(arm_cmn_debugfs);
}
module_init(arm_cmn_init);
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 226348822ab3..1ae19f7301b2 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -47,6 +47,7 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/msi.h>
+#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <linux/smp.h>
@@ -75,6 +76,10 @@
#define SMMU_PMCG_CR 0xE04
#define SMMU_PMCG_CR_ENABLE BIT(0)
#define SMMU_PMCG_IIDR 0xE08
+#define SMMU_PMCG_IIDR_PRODUCTID GENMASK(31, 20)
+#define SMMU_PMCG_IIDR_VARIANT GENMASK(19, 16)
+#define SMMU_PMCG_IIDR_REVISION GENMASK(15, 12)
+#define SMMU_PMCG_IIDR_IMPLEMENTER GENMASK(11, 0)
#define SMMU_PMCG_CEID0 0xE20
#define SMMU_PMCG_CEID1 0xE28
#define SMMU_PMCG_IRQ_CTRL 0xE50
@@ -83,6 +88,20 @@
#define SMMU_PMCG_IRQ_CFG1 0xE60
#define SMMU_PMCG_IRQ_CFG2 0xE64
+/* IMP-DEF ID registers */
+#define SMMU_PMCG_PIDR0 0xFE0
+#define SMMU_PMCG_PIDR0_PART_0 GENMASK(7, 0)
+#define SMMU_PMCG_PIDR1 0xFE4
+#define SMMU_PMCG_PIDR1_DES_0 GENMASK(7, 4)
+#define SMMU_PMCG_PIDR1_PART_1 GENMASK(3, 0)
+#define SMMU_PMCG_PIDR2 0xFE8
+#define SMMU_PMCG_PIDR2_REVISION GENMASK(7, 4)
+#define SMMU_PMCG_PIDR2_DES_1 GENMASK(2, 0)
+#define SMMU_PMCG_PIDR3 0xFEC
+#define SMMU_PMCG_PIDR3_REVAND GENMASK(7, 4)
+#define SMMU_PMCG_PIDR4 0xFD0
+#define SMMU_PMCG_PIDR4_DES_2 GENMASK(3, 0)
+
/* MSI config fields */
#define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
#define MSI_CFG2_MEMATTR_DEVICE_nGnRE 0x1
@@ -754,6 +773,41 @@ static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu)
dev_notice(smmu_pmu->dev, "option mask 0x%x\n", smmu_pmu->options);
}
+static bool smmu_pmu_coresight_id_regs(struct smmu_pmu *smmu_pmu)
+{
+ return of_device_is_compatible(smmu_pmu->dev->of_node,
+ "arm,mmu-600-pmcg");
+}
+
+static void smmu_pmu_get_iidr(struct smmu_pmu *smmu_pmu)
+{
+ u32 iidr = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_IIDR);
+
+ if (!iidr && smmu_pmu_coresight_id_regs(smmu_pmu)) {
+ u32 pidr0 = readl(smmu_pmu->reg_base + SMMU_PMCG_PIDR0);
+ u32 pidr1 = readl(smmu_pmu->reg_base + SMMU_PMCG_PIDR1);
+ u32 pidr2 = readl(smmu_pmu->reg_base + SMMU_PMCG_PIDR2);
+ u32 pidr3 = readl(smmu_pmu->reg_base + SMMU_PMCG_PIDR3);
+ u32 pidr4 = readl(smmu_pmu->reg_base + SMMU_PMCG_PIDR4);
+
+ u32 productid = FIELD_GET(SMMU_PMCG_PIDR0_PART_0, pidr0) |
+ (FIELD_GET(SMMU_PMCG_PIDR1_PART_1, pidr1) << 8);
+ u32 variant = FIELD_GET(SMMU_PMCG_PIDR2_REVISION, pidr2);
+ u32 revision = FIELD_GET(SMMU_PMCG_PIDR3_REVAND, pidr3);
+ u32 implementer =
+ FIELD_GET(SMMU_PMCG_PIDR1_DES_0, pidr1) |
+ (FIELD_GET(SMMU_PMCG_PIDR2_DES_1, pidr2) << 4) |
+ (FIELD_GET(SMMU_PMCG_PIDR4_DES_2, pidr4) << 8);
+
+ iidr = FIELD_PREP(SMMU_PMCG_IIDR_PRODUCTID, productid) |
+ FIELD_PREP(SMMU_PMCG_IIDR_VARIANT, variant) |
+ FIELD_PREP(SMMU_PMCG_IIDR_REVISION, revision) |
+ FIELD_PREP(SMMU_PMCG_IIDR_IMPLEMENTER, implementer);
+ }
+
+ smmu_pmu->iidr = iidr;
+}
+
static int smmu_pmu_probe(struct platform_device *pdev)
{
struct smmu_pmu *smmu_pmu;
@@ -825,7 +879,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
return err;
}
- smmu_pmu->iidr = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_IIDR);
+ smmu_pmu_get_iidr(smmu_pmu);
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx",
(res_0->start) >> SMMU_PMCG_PA_SHIFT);
@@ -834,7 +888,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
return -EINVAL;
}
- smmu_pmu_get_acpi_options(smmu_pmu);
+ if (!dev->of_node)
+ smmu_pmu_get_acpi_options(smmu_pmu);
/* Pick one CPU to be the preferred one to use */
smmu_pmu->on_cpu = raw_smp_processor_id();
@@ -884,9 +939,18 @@ static void smmu_pmu_shutdown(struct platform_device *pdev)
smmu_pmu_disable(&smmu_pmu->pmu);
}
+#ifdef CONFIG_OF
+static const struct of_device_id smmu_pmu_of_match[] = {
+ { .compatible = "arm,smmu-v3-pmcg" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, smmu_pmu_of_match);
+#endif
+
static struct platform_driver smmu_pmu_driver = {
.driver = {
.name = "arm-smmu-v3-pmcg",
+ .of_match_table = of_match_ptr(smmu_pmu_of_match),
.suppress_bind_attrs = true,
},
.probe = smmu_pmu_probe,
diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig
index c5d1b7019fff..5546218b5598 100644
--- a/drivers/perf/hisilicon/Kconfig
+++ b/drivers/perf/hisilicon/Kconfig
@@ -5,3 +5,12 @@ config HISI_PMU
help
Support for HiSilicon SoC L3 Cache performance monitor, Hydra Home
Agent performance monitor and DDR Controller performance monitor.
+
+config HISI_PCIE_PMU
+ tristate "HiSilicon PCIE PERF PMU"
+ depends on PCI && ARM64
+ help
+ Provide support for HiSilicon PCIe performance monitoring unit (PMU)
+ RCiEP devices.
+ Adds the PCIe PMU into perf events system for monitoring latency,
+ bandwidth etc.
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
index 7643c9f93e36..506ed39e3266 100644
--- a/drivers/perf/hisilicon/Makefile
+++ b/drivers/perf/hisilicon/Makefile
@@ -2,3 +2,5 @@
obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \
hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \
hisi_uncore_pa_pmu.o
+
+obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
new file mode 100644
index 000000000000..21771708597d
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -0,0 +1,948 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This driver adds support for PCIe PMU RCiEP device. Related
+ * perf events are bandwidth, latency etc.
+ *
+ * Copyright (C) 2021 HiSilicon Limited
+ * Author: Qi Liu <liuqi115@huawei.com>
+ */
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/bug.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/perf_event.h>
+
+#define DRV_NAME "hisi_pcie_pmu"
+/* Define registers */
+#define HISI_PCIE_GLOBAL_CTRL 0x00
+#define HISI_PCIE_EVENT_CTRL 0x010
+#define HISI_PCIE_CNT 0x090
+#define HISI_PCIE_EXT_CNT 0x110
+#define HISI_PCIE_INT_STAT 0x150
+#define HISI_PCIE_INT_MASK 0x154
+#define HISI_PCIE_REG_BDF 0xfe0
+#define HISI_PCIE_REG_VERSION 0xfe4
+#define HISI_PCIE_REG_INFO 0xfe8
+
+/* Define command in HISI_PCIE_GLOBAL_CTRL */
+#define HISI_PCIE_GLOBAL_EN 0x01
+#define HISI_PCIE_GLOBAL_NONE 0
+
+/* Define command in HISI_PCIE_EVENT_CTRL */
+#define HISI_PCIE_EVENT_EN BIT_ULL(20)
+#define HISI_PCIE_RESET_CNT BIT_ULL(22)
+#define HISI_PCIE_INIT_SET BIT_ULL(34)
+#define HISI_PCIE_THR_EN BIT_ULL(26)
+#define HISI_PCIE_TARGET_EN BIT_ULL(32)
+#define HISI_PCIE_TRIG_EN BIT_ULL(52)
+
+/* Define offsets in HISI_PCIE_EVENT_CTRL */
+#define HISI_PCIE_EVENT_M GENMASK_ULL(15, 0)
+#define HISI_PCIE_THR_MODE_M GENMASK_ULL(27, 27)
+#define HISI_PCIE_THR_M GENMASK_ULL(31, 28)
+#define HISI_PCIE_TARGET_M GENMASK_ULL(52, 36)
+#define HISI_PCIE_TRIG_MODE_M GENMASK_ULL(53, 53)
+#define HISI_PCIE_TRIG_M GENMASK_ULL(59, 56)
+
+#define HISI_PCIE_MAX_COUNTERS 8
+#define HISI_PCIE_REG_STEP 8
+#define HISI_PCIE_THR_MAX_VAL 10
+#define HISI_PCIE_TRIG_MAX_VAL 10
+#define HISI_PCIE_MAX_PERIOD (GENMASK_ULL(63, 0))
+#define HISI_PCIE_INIT_VAL BIT_ULL(63)
+
+struct hisi_pcie_pmu {
+ struct perf_event *hw_events[HISI_PCIE_MAX_COUNTERS];
+ struct hlist_node node;
+ struct pci_dev *pdev;
+ struct pmu pmu;
+ void __iomem *base;
+ int irq;
+ u32 identifier;
+ /* Minimum and maximum BDF of root ports monitored by PMU */
+ u16 bdf_min;
+ u16 bdf_max;
+ int on_cpu;
+};
+
+struct hisi_pcie_reg_pair {
+ u16 lo;
+ u16 hi;
+};
+
+#define to_pcie_pmu(p) (container_of((p), struct hisi_pcie_pmu, pmu))
+#define GET_PCI_DEVFN(bdf) ((bdf) & 0xff)
+
+#define HISI_PCIE_PMU_FILTER_ATTR(_name, _config, _hi, _lo) \
+ static u64 hisi_pcie_get_##_name(struct perf_event *event) \
+ { \
+ return FIELD_GET(GENMASK(_hi, _lo), event->attr._config); \
+ } \
+
+HISI_PCIE_PMU_FILTER_ATTR(event, config, 16, 0);
+HISI_PCIE_PMU_FILTER_ATTR(thr_len, config1, 3, 0);
+HISI_PCIE_PMU_FILTER_ATTR(thr_mode, config1, 4, 4);
+HISI_PCIE_PMU_FILTER_ATTR(trig_len, config1, 8, 5);
+HISI_PCIE_PMU_FILTER_ATTR(trig_mode, config1, 9, 9);
+HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0);
+HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16);
+
+static ssize_t hisi_pcie_format_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct dev_ext_attribute *eattr;
+
+ eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+ return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+static ssize_t hisi_pcie_event_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct perf_pmu_events_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_attr, attr);
+
+ return sysfs_emit(buf, "config=0x%llx\n", pmu_attr->id);
+}
+
+#define HISI_PCIE_PMU_FORMAT_ATTR(_name, _format) \
+ (&((struct dev_ext_attribute[]){ \
+ { .attr = __ATTR(_name, 0444, hisi_pcie_format_sysfs_show, \
+ NULL), \
+ .var = (void *)_format } \
+ })[0].attr.attr)
+
+#define HISI_PCIE_PMU_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, hisi_pcie_event_sysfs_show, _id)
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(pcie_pmu->on_cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static ssize_t identifier_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%#x\n", pcie_pmu->identifier);
+}
+static DEVICE_ATTR_RO(identifier);
+
+static ssize_t bus_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%#04x\n", PCI_BUS_NUM(pcie_pmu->bdf_min));
+}
+static DEVICE_ATTR_RO(bus);
+
+static struct hisi_pcie_reg_pair
+hisi_pcie_parse_reg_value(struct hisi_pcie_pmu *pcie_pmu, u32 reg_off)
+{
+ u32 val = readl_relaxed(pcie_pmu->base + reg_off);
+ struct hisi_pcie_reg_pair regs = {
+ .lo = val,
+ .hi = val >> 16,
+ };
+
+ return regs;
+}
+
+/*
+ * Hardware counter and ext_counter work together for bandwidth, latency, bus
+ * utilization and buffer occupancy events. For example, RX memory write latency
+ * events(index = 0x0010), counter counts total delay cycles and ext_counter
+ * counts RX memory write PCIe packets number.
+ *
+ * As we don't want PMU driver to process these two data, "delay cycles" can
+ * be treated as an independent event(index = 0x0010), "RX memory write packets
+ * number" as another(index = 0x10010). BIT 16 is used to distinguish and 0-15
+ * bits are "real" event index, which can be used to set HISI_PCIE_EVENT_CTRL.
+ */
+#define EXT_COUNTER_IS_USED(idx) ((idx) & BIT(16))
+
+static u32 hisi_pcie_get_real_event(struct perf_event *event)
+{
+ return hisi_pcie_get_event(event) & GENMASK(15, 0);
+}
+
+static u32 hisi_pcie_pmu_get_offset(u32 offset, u32 idx)
+{
+ return offset + HISI_PCIE_REG_STEP * idx;
+}
+
+static u32 hisi_pcie_pmu_readl(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset,
+ u32 idx)
+{
+ u32 offset = hisi_pcie_pmu_get_offset(reg_offset, idx);
+
+ return readl_relaxed(pcie_pmu->base + offset);
+}
+
+static void hisi_pcie_pmu_writel(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, u32 idx, u32 val)
+{
+ u32 offset = hisi_pcie_pmu_get_offset(reg_offset, idx);
+
+ writel_relaxed(val, pcie_pmu->base + offset);
+}
+
+static u64 hisi_pcie_pmu_readq(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, u32 idx)
+{
+ u32 offset = hisi_pcie_pmu_get_offset(reg_offset, idx);
+
+ return readq_relaxed(pcie_pmu->base + offset);
+}
+
+static void hisi_pcie_pmu_writeq(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, u32 idx, u64 val)
+{
+ u32 offset = hisi_pcie_pmu_get_offset(reg_offset, idx);
+
+ writeq_relaxed(val, pcie_pmu->base + offset);
+}
+
+static void hisi_pcie_pmu_config_filter(struct perf_event *event)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 reg = HISI_PCIE_INIT_SET;
+ u64 port, trig_len, thr_len;
+
+ /* Config HISI_PCIE_EVENT_CTRL according to event. */
+ reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event));
+
+ /* Config HISI_PCIE_EVENT_CTRL according to root port or EP device. */
+ port = hisi_pcie_get_port(event);
+ if (port)
+ reg |= FIELD_PREP(HISI_PCIE_TARGET_M, port);
+ else
+ reg |= HISI_PCIE_TARGET_EN |
+ FIELD_PREP(HISI_PCIE_TARGET_M, hisi_pcie_get_bdf(event));
+
+ /* Config HISI_PCIE_EVENT_CTRL according to trigger condition. */
+ trig_len = hisi_pcie_get_trig_len(event);
+ if (trig_len) {
+ reg |= FIELD_PREP(HISI_PCIE_TRIG_M, trig_len);
+ reg |= FIELD_PREP(HISI_PCIE_TRIG_MODE_M, hisi_pcie_get_trig_mode(event));
+ reg |= HISI_PCIE_TRIG_EN;
+ }
+
+ /* Config HISI_PCIE_EVENT_CTRL according to threshold condition. */
+ thr_len = hisi_pcie_get_thr_len(event);
+ if (thr_len) {
+ reg |= FIELD_PREP(HISI_PCIE_THR_M, thr_len);
+ reg |= FIELD_PREP(HISI_PCIE_THR_MODE_M, hisi_pcie_get_thr_mode(event));
+ reg |= HISI_PCIE_THR_EN;
+ }
+
+ hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg);
+}
+
+static void hisi_pcie_pmu_clear_filter(struct perf_event *event)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, HISI_PCIE_INIT_SET);
+}
+
+static bool hisi_pcie_pmu_valid_requester_id(struct hisi_pcie_pmu *pcie_pmu, u32 bdf)
+{
+ struct pci_dev *root_port, *pdev;
+ u16 rp_bdf;
+
+ pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pcie_pmu->pdev->bus), PCI_BUS_NUM(bdf),
+ GET_PCI_DEVFN(bdf));
+ if (!pdev)
+ return false;
+
+ root_port = pcie_find_root_port(pdev);
+ if (!root_port) {
+ pci_dev_put(pdev);
+ return false;
+ }
+
+ pci_dev_put(pdev);
+ rp_bdf = pci_dev_id(root_port);
+ return rp_bdf >= pcie_pmu->bdf_min && rp_bdf <= pcie_pmu->bdf_max;
+}
+
+static bool hisi_pcie_pmu_valid_filter(struct perf_event *event,
+ struct hisi_pcie_pmu *pcie_pmu)
+{
+ u32 requester_id = hisi_pcie_get_bdf(event);
+
+ if (hisi_pcie_get_thr_len(event) > HISI_PCIE_THR_MAX_VAL)
+ return false;
+
+ if (hisi_pcie_get_trig_len(event) > HISI_PCIE_TRIG_MAX_VAL)
+ return false;
+
+ if (requester_id) {
+ if (!hisi_pcie_pmu_valid_requester_id(pcie_pmu, requester_id))
+ return false;
+ }
+
+ return true;
+}
+
+static bool hisi_pcie_pmu_cmp_event(struct perf_event *target,
+ struct perf_event *event)
+{
+ return hisi_pcie_get_real_event(target) == hisi_pcie_get_real_event(event);
+}
+
+static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct perf_event *event_group[HISI_PCIE_MAX_COUNTERS];
+ int counters = 1;
+ int num;
+
+ event_group[0] = leader;
+ if (!is_software_event(leader)) {
+ if (leader->pmu != event->pmu)
+ return false;
+
+ if (leader != event && !hisi_pcie_pmu_cmp_event(leader, event))
+ event_group[counters++] = event;
+ }
+
+ for_each_sibling_event(sibling, event->group_leader) {
+ if (is_software_event(sibling))
+ continue;
+
+ if (sibling->pmu != event->pmu)
+ return false;
+
+ for (num = 0; num < counters; num++) {
+ if (hisi_pcie_pmu_cmp_event(event_group[num], sibling))
+ break;
+ }
+
+ if (num == counters)
+ event_group[counters++] = sibling;
+ }
+
+ return counters <= HISI_PCIE_MAX_COUNTERS;
+}
+
+static int hisi_pcie_pmu_event_init(struct perf_event *event)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ event->cpu = pcie_pmu->on_cpu;
+
+ if (EXT_COUNTER_IS_USED(hisi_pcie_get_event(event)))
+ hwc->event_base = HISI_PCIE_EXT_CNT;
+ else
+ hwc->event_base = HISI_PCIE_CNT;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* Sampling is not supported. */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EOPNOTSUPP;
+
+ if (!hisi_pcie_pmu_valid_filter(event, pcie_pmu))
+ return -EINVAL;
+
+ if (!hisi_pcie_pmu_validate_event_group(event))
+ return -EINVAL;
+
+ return 0;
+}
+
+static u64 hisi_pcie_pmu_read_counter(struct perf_event *event)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+ u32 idx = event->hw.idx;
+
+ return hisi_pcie_pmu_readq(pcie_pmu, event->hw.event_base, idx);
+}
+
+static int hisi_pcie_pmu_find_related_event(struct hisi_pcie_pmu *pcie_pmu,
+ struct perf_event *event)
+{
+ struct perf_event *sibling;
+ int idx;
+
+ for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) {
+ sibling = pcie_pmu->hw_events[idx];
+ if (!sibling)
+ continue;
+
+ if (!hisi_pcie_pmu_cmp_event(sibling, event))
+ continue;
+
+ /* Related events must be used in group */
+ if (sibling->group_leader == event->group_leader)
+ return idx;
+ else
+ return -EINVAL;
+ }
+
+ return idx;
+}
+
+static int hisi_pcie_pmu_get_event_idx(struct hisi_pcie_pmu *pcie_pmu)
+{
+ int idx;
+
+ for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) {
+ if (!pcie_pmu->hw_events[idx])
+ return idx;
+ }
+
+ return -EINVAL;
+}
+
+static void hisi_pcie_pmu_event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 new_cnt, prev_cnt, delta;
+
+ do {
+ prev_cnt = local64_read(&hwc->prev_count);
+ new_cnt = hisi_pcie_pmu_read_counter(event);
+ } while (local64_cmpxchg(&hwc->prev_count, prev_cnt,
+ new_cnt) != prev_cnt);
+
+ delta = (new_cnt - prev_cnt) & HISI_PCIE_MAX_PERIOD;
+ local64_add(delta, &event->count);
+}
+
+static void hisi_pcie_pmu_read(struct perf_event *event)
+{
+ hisi_pcie_pmu_event_update(event);
+}
+
+static void hisi_pcie_pmu_set_period(struct perf_event *event)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ local64_set(&hwc->prev_count, HISI_PCIE_INIT_VAL);
+ hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_CNT, idx, HISI_PCIE_INIT_VAL);
+ hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EXT_CNT, idx, HISI_PCIE_INIT_VAL);
+}
+
+static void hisi_pcie_pmu_enable_counter(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc)
+{
+ u32 idx = hwc->idx;
+ u64 val;
+
+ val = hisi_pcie_pmu_readq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx);
+ val |= HISI_PCIE_EVENT_EN;
+ hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx, val);
+}
+
+static void hisi_pcie_pmu_disable_counter(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc)
+{
+ u32 idx = hwc->idx;
+ u64 val;
+
+ val = hisi_pcie_pmu_readq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx);
+ val &= ~HISI_PCIE_EVENT_EN;
+ hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx, val);
+}
+
+static void hisi_pcie_pmu_enable_int(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc)
+{
+ u32 idx = hwc->idx;
+
+ hisi_pcie_pmu_writel(pcie_pmu, HISI_PCIE_INT_MASK, idx, 0);
+}
+
+static void hisi_pcie_pmu_disable_int(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc)
+{
+ u32 idx = hwc->idx;
+
+ hisi_pcie_pmu_writel(pcie_pmu, HISI_PCIE_INT_MASK, idx, 1);
+}
+
+static void hisi_pcie_pmu_reset_counter(struct hisi_pcie_pmu *pcie_pmu, int idx)
+{
+ hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx, HISI_PCIE_RESET_CNT);
+ hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx, HISI_PCIE_INIT_SET);
+}
+
+static void hisi_pcie_pmu_start(struct perf_event *event, int flags)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u64 prev_cnt;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ hisi_pcie_pmu_config_filter(event);
+ hisi_pcie_pmu_enable_counter(pcie_pmu, hwc);
+ hisi_pcie_pmu_enable_int(pcie_pmu, hwc);
+ hisi_pcie_pmu_set_period(event);
+
+ if (flags & PERF_EF_RELOAD) {
+ prev_cnt = local64_read(&hwc->prev_count);
+ hisi_pcie_pmu_writeq(pcie_pmu, hwc->event_base, idx, prev_cnt);
+ }
+
+ perf_event_update_userpage(event);
+}
+
+static void hisi_pcie_pmu_stop(struct perf_event *event, int flags)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ hisi_pcie_pmu_event_update(event);
+ hisi_pcie_pmu_disable_int(pcie_pmu, hwc);
+ hisi_pcie_pmu_disable_counter(pcie_pmu, hwc);
+ hisi_pcie_pmu_clear_filter(event);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int hisi_pcie_pmu_add(struct perf_event *event, int flags)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+ /* Check all working events to find a related event. */
+ idx = hisi_pcie_pmu_find_related_event(pcie_pmu, event);
+ if (idx < 0)
+ return idx;
+
+ /* Current event shares an enabled counter with the related event */
+ if (idx < HISI_PCIE_MAX_COUNTERS) {
+ hwc->idx = idx;
+ goto start_count;
+ }
+
+ idx = hisi_pcie_pmu_get_event_idx(pcie_pmu);
+ if (idx < 0)
+ return idx;
+
+ hwc->idx = idx;
+ pcie_pmu->hw_events[idx] = event;
+ /* Reset Counter to avoid previous statistic interference. */
+ hisi_pcie_pmu_reset_counter(pcie_pmu, idx);
+
+start_count:
+ if (flags & PERF_EF_START)
+ hisi_pcie_pmu_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void hisi_pcie_pmu_del(struct perf_event *event, int flags)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ hisi_pcie_pmu_stop(event, PERF_EF_UPDATE);
+ pcie_pmu->hw_events[hwc->idx] = NULL;
+ perf_event_update_userpage(event);
+}
+
+static void hisi_pcie_pmu_enable(struct pmu *pmu)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(pmu);
+ int num;
+
+ for (num = 0; num < HISI_PCIE_MAX_COUNTERS; num++) {
+ if (pcie_pmu->hw_events[num])
+ break;
+ }
+
+ if (num == HISI_PCIE_MAX_COUNTERS)
+ return;
+
+ writel(HISI_PCIE_GLOBAL_EN, pcie_pmu->base + HISI_PCIE_GLOBAL_CTRL);
+}
+
+static void hisi_pcie_pmu_disable(struct pmu *pmu)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(pmu);
+
+ writel(HISI_PCIE_GLOBAL_NONE, pcie_pmu->base + HISI_PCIE_GLOBAL_CTRL);
+}
+
+static irqreturn_t hisi_pcie_pmu_irq(int irq, void *data)
+{
+ struct hisi_pcie_pmu *pcie_pmu = data;
+ irqreturn_t ret = IRQ_NONE;
+ struct perf_event *event;
+ u32 overflown;
+ int idx;
+
+ for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) {
+ overflown = hisi_pcie_pmu_readl(pcie_pmu, HISI_PCIE_INT_STAT, idx);
+ if (!overflown)
+ continue;
+
+ /* Clear status of interrupt. */
+ hisi_pcie_pmu_writel(pcie_pmu, HISI_PCIE_INT_STAT, idx, 1);
+ event = pcie_pmu->hw_events[idx];
+ if (!event)
+ continue;
+
+ hisi_pcie_pmu_event_update(event);
+ hisi_pcie_pmu_set_period(event);
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+}
+
+static int hisi_pcie_pmu_irq_register(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_pmu)
+{
+ int irq, ret;
+
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+ if (ret < 0) {
+ pci_err(pdev, "Failed to enable MSI vectors: %d\n", ret);
+ return ret;
+ }
+
+ irq = pci_irq_vector(pdev, 0);
+ ret = request_irq(irq, hisi_pcie_pmu_irq, IRQF_NOBALANCING | IRQF_NO_THREAD, DRV_NAME,
+ pcie_pmu);
+ if (ret) {
+ pci_err(pdev, "Failed to register IRQ: %d\n", ret);
+ pci_free_irq_vectors(pdev);
+ return ret;
+ }
+
+ pcie_pmu->irq = irq;
+
+ return 0;
+}
+
+static void hisi_pcie_pmu_irq_unregister(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_pmu)
+{
+ free_irq(pcie_pmu->irq, pcie_pmu);
+ pci_free_irq_vectors(pdev);
+}
+
+static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
+
+ if (pcie_pmu->on_cpu == -1) {
+ pcie_pmu->on_cpu = cpu;
+ WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)));
+ }
+
+ return 0;
+}
+
+static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
+ unsigned int target;
+
+ /* Nothing to do if this CPU doesn't own the PMU */
+ if (pcie_pmu->on_cpu != cpu)
+ return 0;
+
+ pcie_pmu->on_cpu = -1;
+ /* Choose a new CPU from all online cpus. */
+ target = cpumask_first(cpu_online_mask);
+ if (target >= nr_cpu_ids) {
+ pci_err(pcie_pmu->pdev, "There is no CPU to set\n");
+ return 0;
+ }
+
+ perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target);
+ /* Use this CPU for event counting */
+ pcie_pmu->on_cpu = target;
+ WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(target)));
+
+ return 0;
+}
+
+static struct attribute *hisi_pcie_pmu_events_attr[] = {
+ HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_latency, 0x0010),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_cnt, 0x10010),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_latency, 0x0210),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_cnt, 0x10210),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_latency, 0x0011),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_cnt, 0x10011),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x1005),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x11005),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x2004),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x12004),
+ NULL
+};
+
+static struct attribute_group hisi_pcie_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_pcie_pmu_events_attr,
+};
+
+static struct attribute *hisi_pcie_pmu_format_attr[] = {
+ HISI_PCIE_PMU_FORMAT_ATTR(event, "config:0-16"),
+ HISI_PCIE_PMU_FORMAT_ATTR(thr_len, "config1:0-3"),
+ HISI_PCIE_PMU_FORMAT_ATTR(thr_mode, "config1:4"),
+ HISI_PCIE_PMU_FORMAT_ATTR(trig_len, "config1:5-8"),
+ HISI_PCIE_PMU_FORMAT_ATTR(trig_mode, "config1:9"),
+ HISI_PCIE_PMU_FORMAT_ATTR(port, "config2:0-15"),
+ HISI_PCIE_PMU_FORMAT_ATTR(bdf, "config2:16-31"),
+ NULL
+};
+
+static const struct attribute_group hisi_pcie_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_pcie_pmu_format_attr,
+};
+
+static struct attribute *hisi_pcie_pmu_bus_attrs[] = {
+ &dev_attr_bus.attr,
+ NULL
+};
+
+static const struct attribute_group hisi_pcie_pmu_bus_attr_group = {
+ .attrs = hisi_pcie_pmu_bus_attrs,
+};
+
+static struct attribute *hisi_pcie_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static const struct attribute_group hisi_pcie_pmu_cpumask_attr_group = {
+ .attrs = hisi_pcie_pmu_cpumask_attrs,
+};
+
+static struct attribute *hisi_pcie_pmu_identifier_attrs[] = {
+ &dev_attr_identifier.attr,
+ NULL
+};
+
+static const struct attribute_group hisi_pcie_pmu_identifier_attr_group = {
+ .attrs = hisi_pcie_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_pcie_pmu_attr_groups[] = {
+ &hisi_pcie_pmu_events_group,
+ &hisi_pcie_pmu_format_group,
+ &hisi_pcie_pmu_bus_attr_group,
+ &hisi_pcie_pmu_cpumask_attr_group,
+ &hisi_pcie_pmu_identifier_attr_group,
+ NULL
+};
+
+static int hisi_pcie_alloc_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_pmu)
+{
+ struct hisi_pcie_reg_pair regs;
+ u16 sicl_id, core_id;
+ char *name;
+
+ regs = hisi_pcie_parse_reg_value(pcie_pmu, HISI_PCIE_REG_BDF);
+ pcie_pmu->bdf_min = regs.lo;
+ pcie_pmu->bdf_max = regs.hi;
+
+ regs = hisi_pcie_parse_reg_value(pcie_pmu, HISI_PCIE_REG_INFO);
+ sicl_id = regs.hi;
+ core_id = regs.lo;
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_pcie%u_core%u", sicl_id, core_id);
+ if (!name)
+ return -ENOMEM;
+
+ pcie_pmu->pdev = pdev;
+ pcie_pmu->on_cpu = -1;
+ pcie_pmu->identifier = readl(pcie_pmu->base + HISI_PCIE_REG_VERSION);
+ pcie_pmu->pmu = (struct pmu) {
+ .name = name,
+ .module = THIS_MODULE,
+ .event_init = hisi_pcie_pmu_event_init,
+ .pmu_enable = hisi_pcie_pmu_enable,
+ .pmu_disable = hisi_pcie_pmu_disable,
+ .add = hisi_pcie_pmu_add,
+ .del = hisi_pcie_pmu_del,
+ .start = hisi_pcie_pmu_start,
+ .stop = hisi_pcie_pmu_stop,
+ .read = hisi_pcie_pmu_read,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = hisi_pcie_pmu_attr_groups,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ };
+
+ return 0;
+}
+
+static int hisi_pcie_init_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_pmu)
+{
+ int ret;
+
+ pcie_pmu->base = pci_ioremap_bar(pdev, 2);
+ if (!pcie_pmu->base) {
+ pci_err(pdev, "Ioremap failed for pcie_pmu resource\n");
+ return -ENOMEM;
+ }
+
+ ret = hisi_pcie_alloc_pmu(pdev, pcie_pmu);
+ if (ret)
+ goto err_iounmap;
+
+ ret = hisi_pcie_pmu_irq_register(pdev, pcie_pmu);
+ if (ret)
+ goto err_iounmap;
+
+ ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, &pcie_pmu->node);
+ if (ret) {
+ pci_err(pdev, "Failed to register hotplug: %d\n", ret);
+ goto err_irq_unregister;
+ }
+
+ ret = perf_pmu_register(&pcie_pmu->pmu, pcie_pmu->pmu.name, -1);
+ if (ret) {
+ pci_err(pdev, "Failed to register PCIe PMU: %d\n", ret);
+ goto err_hotplug_unregister;
+ }
+
+ return ret;
+
+err_hotplug_unregister:
+ cpuhp_state_remove_instance_nocalls(
+ CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, &pcie_pmu->node);
+
+err_irq_unregister:
+ hisi_pcie_pmu_irq_unregister(pdev, pcie_pmu);
+
+err_iounmap:
+ iounmap(pcie_pmu->base);
+
+ return ret;
+}
+
+static void hisi_pcie_uninit_pmu(struct pci_dev *pdev)
+{
+ struct hisi_pcie_pmu *pcie_pmu = pci_get_drvdata(pdev);
+
+ perf_pmu_unregister(&pcie_pmu->pmu);
+ cpuhp_state_remove_instance_nocalls(
+ CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, &pcie_pmu->node);
+ hisi_pcie_pmu_irq_unregister(pdev, pcie_pmu);
+ iounmap(pcie_pmu->base);
+}
+
+static int hisi_pcie_init_dev(struct pci_dev *pdev)
+{
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ pci_err(pdev, "Failed to enable PCI device: %d\n", ret);
+ return ret;
+ }
+
+ ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME);
+ if (ret < 0) {
+ pci_err(pdev, "Failed to request PCI mem regions: %d\n", ret);
+ return ret;
+ }
+
+ pci_set_master(pdev);
+
+ return 0;
+}
+
+static int hisi_pcie_pmu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct hisi_pcie_pmu *pcie_pmu;
+ int ret;
+
+ pcie_pmu = devm_kzalloc(&pdev->dev, sizeof(*pcie_pmu), GFP_KERNEL);
+ if (!pcie_pmu)
+ return -ENOMEM;
+
+ ret = hisi_pcie_init_dev(pdev);
+ if (ret)
+ return ret;
+
+ ret = hisi_pcie_init_pmu(pdev, pcie_pmu);
+ if (ret)
+ return ret;
+
+ pci_set_drvdata(pdev, pcie_pmu);
+
+ return ret;
+}
+
+static void hisi_pcie_pmu_remove(struct pci_dev *pdev)
+{
+ hisi_pcie_uninit_pmu(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+static const struct pci_device_id hisi_pcie_pmu_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa12d) },
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, hisi_pcie_pmu_ids);
+
+static struct pci_driver hisi_pcie_pmu_driver = {
+ .name = DRV_NAME,
+ .id_table = hisi_pcie_pmu_ids,
+ .probe = hisi_pcie_pmu_probe,
+ .remove = hisi_pcie_pmu_remove,
+};
+
+static int __init hisi_pcie_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE,
+ "AP_PERF_ARM_HISI_PCIE_PMU_ONLINE",
+ hisi_pcie_pmu_online_cpu,
+ hisi_pcie_pmu_offline_cpu);
+ if (ret) {
+ pr_err("Failed to setup PCIe PMU hotplug: %d\n", ret);
+ return ret;
+ }
+
+ ret = pci_register_driver(&hisi_pcie_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE);
+
+ return ret;
+}
+module_init(hisi_pcie_module_init);
+
+static void __exit hisi_pcie_module_exit(void)
+{
+ pci_unregister_driver(&hisi_pcie_pmu_driver);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE);
+}
+module_exit(hisi_pcie_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon PCIe PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>");
diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
new file mode 100644
index 000000000000..7f4d292658e3
--- /dev/null
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell CN10K LLC-TAD perf driver
+ *
+ * Copyright (C) 2021 Marvell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "tad_pmu: " fmt
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/cpuhotplug.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+#define TAD_PFC_OFFSET 0x0
+#define TAD_PFC(counter) (TAD_PFC_OFFSET | (counter << 3))
+#define TAD_PRF_OFFSET 0x100
+#define TAD_PRF(counter) (TAD_PRF_OFFSET | (counter << 3))
+#define TAD_PRF_CNTSEL_MASK 0xFF
+#define TAD_MAX_COUNTERS 8
+
+#define to_tad_pmu(p) (container_of(p, struct tad_pmu, pmu))
+
+struct tad_region {
+ void __iomem *base;
+};
+
+struct tad_pmu {
+ struct pmu pmu;
+ struct tad_region *regions;
+ u32 region_cnt;
+ unsigned int cpu;
+ struct hlist_node node;
+ struct perf_event *events[TAD_MAX_COUNTERS];
+ DECLARE_BITMAP(counters_map, TAD_MAX_COUNTERS);
+};
+
+static int tad_pmu_cpuhp_state;
+
+static void tad_pmu_event_counter_read(struct perf_event *event)
+{
+ struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u32 counter_idx = hwc->idx;
+ u64 prev, new;
+ int i;
+
+ do {
+ prev = local64_read(&hwc->prev_count);
+ for (i = 0, new = 0; i < tad_pmu->region_cnt; i++)
+ new += readq(tad_pmu->regions[i].base +
+ TAD_PFC(counter_idx));
+ } while (local64_cmpxchg(&hwc->prev_count, prev, new) != prev);
+
+ local64_add(new - prev, &event->count);
+}
+
+static void tad_pmu_event_counter_stop(struct perf_event *event, int flags)
+{
+ struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u32 counter_idx = hwc->idx;
+ int i;
+
+ /* TAD()_PFC() stop counting on the write
+ * which sets TAD()_PRF()[CNTSEL] == 0
+ */
+ for (i = 0; i < tad_pmu->region_cnt; i++) {
+ writeq_relaxed(0, tad_pmu->regions[i].base +
+ TAD_PRF(counter_idx));
+ }
+
+ tad_pmu_event_counter_read(event);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static void tad_pmu_event_counter_start(struct perf_event *event, int flags)
+{
+ struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u32 event_idx = event->attr.config;
+ u32 counter_idx = hwc->idx;
+ u64 reg_val;
+ int i;
+
+ hwc->state = 0;
+
+ /* Typically TAD_PFC() are zeroed to start counting */
+ for (i = 0; i < tad_pmu->region_cnt; i++)
+ writeq_relaxed(0, tad_pmu->regions[i].base +
+ TAD_PFC(counter_idx));
+
+ /* TAD()_PFC() start counting on the write
+ * which sets TAD()_PRF()[CNTSEL] != 0
+ */
+ for (i = 0; i < tad_pmu->region_cnt; i++) {
+ reg_val = readq_relaxed(tad_pmu->regions[i].base +
+ TAD_PRF(counter_idx));
+ reg_val |= (event_idx & 0xFF);
+ writeq_relaxed(reg_val, tad_pmu->regions[i].base +
+ TAD_PRF(counter_idx));
+ }
+}
+
+static void tad_pmu_event_counter_del(struct perf_event *event, int flags)
+{
+ struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ tad_pmu_event_counter_stop(event, flags | PERF_EF_UPDATE);
+ tad_pmu->events[idx] = NULL;
+ clear_bit(idx, tad_pmu->counters_map);
+}
+
+static int tad_pmu_event_counter_add(struct perf_event *event, int flags)
+{
+ struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ /* Get a free counter for this event */
+ idx = find_first_zero_bit(tad_pmu->counters_map, TAD_MAX_COUNTERS);
+ if (idx == TAD_MAX_COUNTERS)
+ return -EAGAIN;
+
+ set_bit(idx, tad_pmu->counters_map);
+
+ hwc->idx = idx;
+ hwc->state = PERF_HES_STOPPED;
+ tad_pmu->events[idx] = event;
+
+ if (flags & PERF_EF_START)
+ tad_pmu_event_counter_start(event, flags);
+
+ return 0;
+}
+
+static int tad_pmu_event_init(struct perf_event *event)
+{
+ struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+
+ if (!event->attr.disabled)
+ return -EINVAL;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (event->state != PERF_EVENT_STATE_OFF)
+ return -EINVAL;
+
+ event->cpu = tad_pmu->cpu;
+ event->hw.idx = -1;
+ event->hw.config_base = event->attr.config;
+
+ return 0;
+}
+
+static ssize_t tad_pmu_event_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define TAD_PMU_EVENT_ATTR(name, config) \
+ PMU_EVENT_ATTR_ID(name, tad_pmu_event_show, config)
+
+static struct attribute *tad_pmu_event_attrs[] = {
+ TAD_PMU_EVENT_ATTR(tad_none, 0x0),
+ TAD_PMU_EVENT_ATTR(tad_req_msh_in_any, 0x1),
+ TAD_PMU_EVENT_ATTR(tad_req_msh_in_mn, 0x2),
+ TAD_PMU_EVENT_ATTR(tad_req_msh_in_exlmn, 0x3),
+ TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_any, 0x4),
+ TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_mn, 0x5),
+ TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_exlmn, 0x6),
+ TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_dss, 0x7),
+ TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_retry_dss, 0x8),
+ TAD_PMU_EVENT_ATTR(tad_dat_msh_in_any, 0x9),
+ TAD_PMU_EVENT_ATTR(tad_dat_msh_in_dss, 0xa),
+ TAD_PMU_EVENT_ATTR(tad_req_msh_out_any, 0xb),
+ TAD_PMU_EVENT_ATTR(tad_req_msh_out_dss_rd, 0xc),
+ TAD_PMU_EVENT_ATTR(tad_req_msh_out_dss_wr, 0xd),
+ TAD_PMU_EVENT_ATTR(tad_req_msh_out_evict, 0xe),
+ TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_any, 0xf),
+ TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_retry_exlmn, 0x10),
+ TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_retry_mn, 0x11),
+ TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_exlmn, 0x12),
+ TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_mn, 0x13),
+ TAD_PMU_EVENT_ATTR(tad_snp_msh_out_any, 0x14),
+ TAD_PMU_EVENT_ATTR(tad_snp_msh_out_mn, 0x15),
+ TAD_PMU_EVENT_ATTR(tad_snp_msh_out_exlmn, 0x16),
+ TAD_PMU_EVENT_ATTR(tad_dat_msh_out_any, 0x17),
+ TAD_PMU_EVENT_ATTR(tad_dat_msh_out_fill, 0x18),
+ TAD_PMU_EVENT_ATTR(tad_dat_msh_out_dss, 0x19),
+ TAD_PMU_EVENT_ATTR(tad_alloc_dtg, 0x1a),
+ TAD_PMU_EVENT_ATTR(tad_alloc_ltg, 0x1b),
+ TAD_PMU_EVENT_ATTR(tad_alloc_any, 0x1c),
+ TAD_PMU_EVENT_ATTR(tad_hit_dtg, 0x1d),
+ TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e),
+ TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f),
+ TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20),
+ TAD_PMU_EVENT_ATTR(tad_dat_rd, 0x21),
+ TAD_PMU_EVENT_ATTR(tad_dat_rd_byp, 0x22),
+ TAD_PMU_EVENT_ATTR(tad_ifb_occ, 0x23),
+ TAD_PMU_EVENT_ATTR(tad_req_occ, 0x24),
+ NULL
+};
+
+static const struct attribute_group tad_pmu_events_attr_group = {
+ .name = "events",
+ .attrs = tad_pmu_event_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *tad_pmu_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL
+};
+
+static struct attribute_group tad_pmu_format_attr_group = {
+ .name = "format",
+ .attrs = tad_pmu_format_attrs,
+};
+
+static ssize_t tad_pmu_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct tad_pmu *tad_pmu = to_tad_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(tad_pmu->cpu));
+}
+
+static DEVICE_ATTR(cpumask, 0444, tad_pmu_cpumask_show, NULL);
+
+static struct attribute *tad_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static struct attribute_group tad_pmu_cpumask_attr_group = {
+ .attrs = tad_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *tad_pmu_attr_groups[] = {
+ &tad_pmu_events_attr_group,
+ &tad_pmu_format_attr_group,
+ &tad_pmu_cpumask_attr_group,
+ NULL
+};
+
+static int tad_pmu_probe(struct platform_device *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct tad_region *regions;
+ struct tad_pmu *tad_pmu;
+ struct resource *res;
+ u32 tad_pmu_page_size;
+ u32 tad_page_size;
+ u32 tad_cnt;
+ int i, ret;
+ char *name;
+
+ tad_pmu = devm_kzalloc(&pdev->dev, sizeof(*tad_pmu), GFP_KERNEL);
+ if (!tad_pmu)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, tad_pmu);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(&pdev->dev, "Mem resource not found\n");
+ return -ENODEV;
+ }
+
+ ret = of_property_read_u32(node, "marvell,tad-page-size",
+ &tad_page_size);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't find tad-page-size property\n");
+ return ret;
+ }
+
+ ret = of_property_read_u32(node, "marvell,tad-pmu-page-size",
+ &tad_pmu_page_size);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't find tad-pmu-page-size property\n");
+ return ret;
+ }
+
+ ret = of_property_read_u32(node, "marvell,tad-cnt", &tad_cnt);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't find tad-cnt property\n");
+ return ret;
+ }
+
+ regions = devm_kcalloc(&pdev->dev, tad_cnt,
+ sizeof(*regions), GFP_KERNEL);
+ if (!regions)
+ return -ENOMEM;
+
+ /* ioremap the distributed TAD pmu regions */
+ for (i = 0; i < tad_cnt && res->start < res->end; i++) {
+ regions[i].base = devm_ioremap(&pdev->dev,
+ res->start,
+ tad_pmu_page_size);
+ if (!regions[i].base) {
+ dev_err(&pdev->dev, "TAD%d ioremap fail\n", i);
+ return -ENOMEM;
+ }
+ res->start += tad_page_size;
+ }
+
+ tad_pmu->regions = regions;
+ tad_pmu->region_cnt = tad_cnt;
+
+ tad_pmu->pmu = (struct pmu) {
+
+ .module = THIS_MODULE,
+ .attr_groups = tad_pmu_attr_groups,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE |
+ PERF_PMU_CAP_NO_INTERRUPT,
+ .task_ctx_nr = perf_invalid_context,
+
+ .event_init = tad_pmu_event_init,
+ .add = tad_pmu_event_counter_add,
+ .del = tad_pmu_event_counter_del,
+ .start = tad_pmu_event_counter_start,
+ .stop = tad_pmu_event_counter_stop,
+ .read = tad_pmu_event_counter_read,
+ };
+
+ tad_pmu->cpu = raw_smp_processor_id();
+
+ /* Register pmu instance for cpu hotplug */
+ ret = cpuhp_state_add_instance_nocalls(tad_pmu_cpuhp_state,
+ &tad_pmu->node);
+ if (ret) {
+ dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+ return ret;
+ }
+
+ name = "tad";
+ ret = perf_pmu_register(&tad_pmu->pmu, name, -1);
+ if (ret)
+ cpuhp_state_remove_instance_nocalls(tad_pmu_cpuhp_state,
+ &tad_pmu->node);
+
+ return ret;
+}
+
+static int tad_pmu_remove(struct platform_device *pdev)
+{
+ struct tad_pmu *pmu = platform_get_drvdata(pdev);
+
+ cpuhp_state_remove_instance_nocalls(tad_pmu_cpuhp_state,
+ &pmu->node);
+ perf_pmu_unregister(&pmu->pmu);
+
+ return 0;
+}
+
+static const struct of_device_id tad_pmu_of_match[] = {
+ { .compatible = "marvell,cn10k-tad-pmu", },
+ {},
+};
+
+static struct platform_driver tad_pmu_driver = {
+ .driver = {
+ .name = "cn10k_tad_pmu",
+ .of_match_table = of_match_ptr(tad_pmu_of_match),
+ .suppress_bind_attrs = true,
+ },
+ .probe = tad_pmu_probe,
+ .remove = tad_pmu_remove,
+};
+
+static int tad_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct tad_pmu *pmu = hlist_entry_safe(node, struct tad_pmu, node);
+ unsigned int target;
+
+ if (cpu != pmu->cpu)
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+ pmu->cpu = target;
+
+ return 0;
+}
+
+static int __init tad_pmu_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/cn10k/tadpmu:online",
+ NULL,
+ tad_pmu_offline_cpu);
+ if (ret < 0)
+ return ret;
+ tad_pmu_cpuhp_state = ret;
+ return platform_driver_register(&tad_pmu_driver);
+}
+
+static void __exit tad_pmu_exit(void)
+{
+ platform_driver_unregister(&tad_pmu_driver);
+ cpuhp_remove_multi_state(tad_pmu_cpuhp_state);
+}
+
+module_init(tad_pmu_init);
+module_exit(tad_pmu_exit);
+
+MODULE_DESCRIPTION("Marvell CN10K LLC-TAD Perf driver");
+MODULE_AUTHOR("Bhaskara Budiredla <bbudiredla@marvell.com>");
+MODULE_LICENSE("GPL v2");