aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds2016-03-24 10:02:14 -0700
committerLinus Torvalds2016-03-24 10:02:14 -0700
commit3fa2fe2ce09c5a16be69c5319eb3347271a99735 (patch)
tree7067760e7006dec7ac6fe18fe3d09851293b63ea /arch/x86
parentd88f48e12821ab4b2244124d50ac094568f48db5 (diff)
parent05f5ece76a88a2cd4859bc93f90379733dd8b4a3 (diff)
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "This tree contains various perf fixes on the kernel side, plus three hw/event-enablement late additions: - Intel Memory Bandwidth Monitoring events and handling - the AMD Accumulated Power Mechanism reporting facility - more IOMMU events ... and a final round of perf tooling updates/fixes" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits) perf llvm: Use strerror_r instead of the thread unsafe strerror one perf llvm: Use realpath to canonicalize paths perf tools: Unexport some methods unused outside strbuf.c perf probe: No need to use formatting strbuf method perf help: Use asprintf instead of adhoc equivalents perf tools: Remove unused perf_pathdup, xstrdup functions perf tools: Do not include stringify.h from the kernel sources tools include: Copy linux/stringify.h from the kernel tools lib traceevent: Remove redundant CPU output perf tools: Remove needless 'extern' from function prototypes perf tools: Simplify die() mechanism perf tools: Remove unused DIE_IF macro perf script: Remove lots of unused arguments perf thread: Rename perf_event__preprocess_sample_addr to thread__resolve perf machine: Rename perf_event__preprocess_sample to machine__resolve perf tools: Add cpumode to struct perf_sample perf tests: Forward the perf_sample in the dwarf unwind test perf tools: Remove misplaced __maybe_unused perf list: Fix documentation of :ppp perf bench numa: Fix assertion for nodes bitfield ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig9
-rw-r--r--arch/x86/events/Makefile1
-rw-r--r--arch/x86/events/amd/ibs.c37
-rw-r--r--arch/x86/events/amd/iommu.c5
-rw-r--r--arch/x86/events/amd/power.c353
-rw-r--r--arch/x86/events/core.c4
-rw-r--r--arch/x86/events/intel/cqm.c454
-rw-r--r--arch/x86/events/intel/ds.c5
-rw-r--r--arch/x86/events/intel/rapl.c2
-rw-r--r--arch/x86/events/intel/uncore_snbep.c7
-rw-r--r--arch/x86/include/asm/cpufeatures.h4
-rw-r--r--arch/x86/kernel/cpu/amd.c18
-rw-r--r--arch/x86/kernel/cpu/common.c4
13 files changed, 864 insertions, 39 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 54478b7635de..2dc18605831f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1210,6 +1210,15 @@ config MICROCODE_OLD_INTERFACE
def_bool y
depends on MICROCODE
+config PERF_EVENTS_AMD_POWER
+ depends on PERF_EVENTS && CPU_SUP_AMD
+ tristate "AMD Processor Power Reporting Mechanism"
+ ---help---
+ Provide power reporting mechanism support for AMD processors.
+ Currently, it leverages X86_FEATURE_ACC_POWER
+ (CPUID Fn8000_0007_EDX[12]) interface to calculate the
+ average power consumption on Family 15h processors.
+
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
---help---
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index fdfea1511cc0..f59618a39990 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,6 +1,7 @@
obj-y += core.o
obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o
+obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += amd/power.o
obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o
ifdef CONFIG_AMD_IOMMU
obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 51087c29b2c2..3ea25c3917c0 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -376,7 +376,13 @@ static void perf_ibs_start(struct perf_event *event, int flags)
hwc->state = 0;
perf_ibs_set_period(perf_ibs, hwc, &period);
+ /*
+ * Set STARTED before enabling the hardware, such that
+ * a subsequent NMI must observe it. Then clear STOPPING
+ * such that we don't consume NMIs by accident.
+ */
set_bit(IBS_STARTED, pcpu->state);
+ clear_bit(IBS_STOPPING, pcpu->state);
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
perf_event_update_userpage(event);
@@ -390,7 +396,7 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
u64 config;
int stopping;
- stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
+ stopping = test_bit(IBS_STARTED, pcpu->state);
if (!stopping && (hwc->state & PERF_HES_UPTODATE))
return;
@@ -398,8 +404,24 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
rdmsrl(hwc->config_base, config);
if (stopping) {
+ /*
+ * Set STOPPING before disabling the hardware, such that it
+ * must be visible to NMIs the moment we clear the EN bit,
+ * at which point we can generate an !VALID sample which
+ * we need to consume.
+ */
set_bit(IBS_STOPPING, pcpu->state);
perf_ibs_disable_event(perf_ibs, hwc, config);
+ /*
+ * Clear STARTED after disabling the hardware; if it were
+ * cleared before an NMI hitting after the clear but before
+ * clearing the EN bit might think it a spurious NMI and not
+ * handle it.
+ *
+ * Clearing it after, however, creates the problem of the NMI
+ * handler seeing STARTED but not having a valid sample.
+ */
+ clear_bit(IBS_STARTED, pcpu->state);
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
}
@@ -527,20 +549,24 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
u64 *buf, *config, period;
if (!test_bit(IBS_STARTED, pcpu->state)) {
+fail:
/*
* Catch spurious interrupts after stopping IBS: After
* disabling IBS there could be still incoming NMIs
* with samples that even have the valid bit cleared.
* Mark all this NMIs as handled.
*/
- return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
+ if (test_and_clear_bit(IBS_STOPPING, pcpu->state))
+ return 1;
+
+ return 0;
}
msr = hwc->config_base;
buf = ibs_data.regs;
rdmsrl(msr, *buf);
if (!(*buf++ & perf_ibs->valid_mask))
- return 0;
+ goto fail;
config = &ibs_data.regs[0];
perf_ibs_event_update(perf_ibs, event, config);
@@ -599,7 +625,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
throttle = perf_event_overflow(event, &data, &regs);
out:
if (throttle)
- perf_ibs_disable_event(perf_ibs, hwc, *config);
+ perf_ibs_stop(event, 0);
else
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
@@ -611,6 +637,7 @@ out:
static int
perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
{
+ u64 stamp = sched_clock();
int handled = 0;
handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
@@ -619,6 +646,8 @@ perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
if (handled)
inc_irq_stat(apic_perf_irqs);
+ perf_sample_event_took(sched_clock() - stamp);
+
return handled;
}
NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 635e5eba0caf..40625ca7a190 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -118,6 +118,11 @@ static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"),
AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"),
AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"),
+ AMD_IOMMU_EVENT_DESC(ign_rd_wr_mmio_1ff8h, "csource=0x14"),
+ AMD_IOMMU_EVENT_DESC(vapic_int_non_guest, "csource=0x15"),
+ AMD_IOMMU_EVENT_DESC(vapic_int_guest, "csource=0x16"),
+ AMD_IOMMU_EVENT_DESC(smi_recv, "csource=0x17"),
+ AMD_IOMMU_EVENT_DESC(smi_blk, "csource=0x18"),
{ /* end: all zeroes */ },
};
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
new file mode 100644
index 000000000000..55a3529dbf12
--- /dev/null
+++ b/arch/x86/events/amd/power.c
@@ -0,0 +1,353 @@
+/*
+ * Performance events - AMD Processor Power Reporting Mechanism
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Huang Rui <ray.huang@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+#define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a
+#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
+#define MSR_F15H_PTSC 0xc0010280
+
+/* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */
+#define AMD_POWER_EVENT_MASK 0xFFULL
+
+/*
+ * Accumulated power status counters.
+ */
+#define AMD_POWER_EVENTSEL_PKG 1
+
+/*
+ * The ratio of compute unit power accumulator sample period to the
+ * PTSC period.
+ */
+static unsigned int cpu_pwr_sample_ratio;
+
+/* Maximum accumulated power of a compute unit. */
+static u64 max_cu_acc_power;
+
+static struct pmu pmu_class;
+
+/*
+ * Accumulated power represents the sum of each compute unit's (CU) power
+ * consumption. On any core of each CU we read the total accumulated power from
+ * MSR_F15H_CU_PWR_ACCUMULATOR. cpu_mask represents CPU bit map of all cores
+ * which are picked to measure the power for the CUs they belong to.
+ */
+static cpumask_t cpu_mask;
+
+static void event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev_pwr_acc, new_pwr_acc, prev_ptsc, new_ptsc;
+ u64 delta, tdelta;
+
+ prev_pwr_acc = hwc->pwr_acc;
+ prev_ptsc = hwc->ptsc;
+ rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc);
+ rdmsrl(MSR_F15H_PTSC, new_ptsc);
+
+ /*
+ * Calculate the CU power consumption over a time period, the unit of
+ * final value (delta) is micro-Watts. Then add it to the event count.
+ */
+ if (new_pwr_acc < prev_pwr_acc) {
+ delta = max_cu_acc_power + new_pwr_acc;
+ delta -= prev_pwr_acc;
+ } else
+ delta = new_pwr_acc - prev_pwr_acc;
+
+ delta *= cpu_pwr_sample_ratio * 1000;
+ tdelta = new_ptsc - prev_ptsc;
+
+ do_div(delta, tdelta);
+ local64_add(delta, &event->count);
+}
+
+static void __pmu_event_start(struct perf_event *event)
+{
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ event->hw.state = 0;
+
+ rdmsrl(MSR_F15H_PTSC, event->hw.ptsc);
+ rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc);
+}
+
+static void pmu_event_start(struct perf_event *event, int mode)
+{
+ __pmu_event_start(event);
+}
+
+static void pmu_event_stop(struct perf_event *event, int mode)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* Mark event as deactivated and stopped. */
+ if (!(hwc->state & PERF_HES_STOPPED))
+ hwc->state |= PERF_HES_STOPPED;
+
+ /* Check if software counter update is necessary. */
+ if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ /*
+ * Drain the remaining delta count out of an event
+ * that we are disabling:
+ */
+ event_update(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int pmu_event_add(struct perf_event *event, int mode)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (mode & PERF_EF_START)
+ __pmu_event_start(event);
+
+ return 0;
+}
+
+static void pmu_event_del(struct perf_event *event, int flags)
+{
+ pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int pmu_event_init(struct perf_event *event)
+{
+ u64 cfg = event->attr.config & AMD_POWER_EVENT_MASK;
+
+ /* Only look at AMD power events. */
+ if (event->attr.type != pmu_class.type)
+ return -ENOENT;
+
+ /* Unsupported modes and filters. */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ /* no sampling */
+ event->attr.sample_period)
+ return -EINVAL;
+
+ if (cfg != AMD_POWER_EVENTSEL_PKG)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void pmu_event_read(struct perf_event *event)
+{
+ event_update(event);
+}
+
+static ssize_t
+get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &cpu_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, get_attr_cpumask, NULL);
+
+static struct attribute *pmu_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group pmu_attr_group = {
+ .attrs = pmu_attrs,
+};
+
+/*
+ * Currently it only supports to report the power of each
+ * processor/package.
+ */
+EVENT_ATTR_STR(power-pkg, power_pkg, "event=0x01");
+
+EVENT_ATTR_STR(power-pkg.unit, power_pkg_unit, "mWatts");
+
+/* Convert the count from micro-Watts to milli-Watts. */
+EVENT_ATTR_STR(power-pkg.scale, power_pkg_scale, "1.000000e-3");
+
+static struct attribute *events_attr[] = {
+ EVENT_PTR(power_pkg),
+ EVENT_PTR(power_pkg_unit),
+ EVENT_PTR(power_pkg_scale),
+ NULL,
+};
+
+static struct attribute_group pmu_events_group = {
+ .name = "events",
+ .attrs = events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *formats_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group pmu_format_group = {
+ .name = "format",
+ .attrs = formats_attr,
+};
+
+static const struct attribute_group *attr_groups[] = {
+ &pmu_attr_group,
+ &pmu_format_group,
+ &pmu_events_group,
+ NULL,
+};
+
+static struct pmu pmu_class = {
+ .attr_groups = attr_groups,
+ /* system-wide only */
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = pmu_event_init,
+ .add = pmu_event_add,
+ .del = pmu_event_del,
+ .start = pmu_event_start,
+ .stop = pmu_event_stop,
+ .read = pmu_event_read,
+};
+
+static void power_cpu_exit(int cpu)
+{
+ int target;
+
+ if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask))
+ return;
+
+ /*
+ * Find a new CPU on the same compute unit, if was set in cpumask
+ * and still some CPUs on compute unit. Then migrate event and
+ * context to new CPU.
+ */
+ target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+ if (target < nr_cpumask_bits) {
+ cpumask_set_cpu(target, &cpu_mask);
+ perf_pmu_migrate_context(&pmu_class, cpu, target);
+ }
+}
+
+static void power_cpu_init(int cpu)
+{
+ int target;
+
+ /*
+ * 1) If any CPU is set at cpu_mask in the same compute unit, do
+ * nothing.
+ * 2) If no CPU is set at cpu_mask in the same compute unit,
+ * set current STARTING CPU.
+ *
+ * Note: if there is a CPU aside of the new one already in the
+ * sibling mask, then it is also in cpu_mask.
+ */
+ target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+ if (target >= nr_cpumask_bits)
+ cpumask_set_cpu(cpu, &cpu_mask);
+}
+
+static int
+power_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_FAILED:
+ case CPU_STARTING:
+ power_cpu_init(cpu);
+ break;
+ case CPU_DOWN_PREPARE:
+ power_cpu_exit(cpu);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block power_cpu_notifier_nb = {
+ .notifier_call = power_cpu_notifier,
+ .priority = CPU_PRI_PERF,
+};
+
+static const struct x86_cpu_id cpu_match[] = {
+ { .vendor = X86_VENDOR_AMD, .family = 0x15 },
+ {},
+};
+
+static int __init amd_power_pmu_init(void)
+{
+ int cpu, target, ret;
+
+ if (!x86_match_cpu(cpu_match))
+ return 0;
+
+ if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
+ return -ENODEV;
+
+ cpu_pwr_sample_ratio = cpuid_ecx(0x80000007);
+
+ if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) {
+ pr_err("Failed to read max compute unit power accumulator MSR\n");
+ return -ENODEV;
+ }
+
+ cpu_notifier_register_begin();
+
+ /* Choose one online core of each compute unit. */
+ for_each_online_cpu(cpu) {
+ target = cpumask_first(topology_sibling_cpumask(cpu));
+ if (!cpumask_test_cpu(target, &cpu_mask))
+ cpumask_set_cpu(target, &cpu_mask);
+ }
+
+ ret = perf_pmu_register(&pmu_class, "power", -1);
+ if (WARN_ON(ret)) {
+ pr_warn("AMD Power PMU registration failed\n");
+ goto out;
+ }
+
+ __register_cpu_notifier(&power_cpu_notifier_nb);
+
+ pr_info("AMD Power PMU detected\n");
+
+out:
+ cpu_notifier_register_done();
+
+ return ret;
+}
+module_init(amd_power_pmu_init);
+
+static void __exit amd_power_pmu_exit(void)
+{
+ cpu_notifier_register_begin();
+ __unregister_cpu_notifier(&power_cpu_notifier_nb);
+ cpu_notifier_register_done();
+
+ perf_pmu_unregister(&pmu_class);
+}
+module_exit(amd_power_pmu_exit);
+
+MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
+MODULE_DESCRIPTION("AMD Processor Power Reporting Mechanism");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 9b6ad08aa51a..041e442a3e28 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1602,8 +1602,7 @@ __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
return new;
}
-ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
- char *page)
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page)
{
struct perf_pmu_events_attr *pmu_attr = \
container_of(attr, struct perf_pmu_events_attr, attr);
@@ -1615,6 +1614,7 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
return x86_pmu.events_sysfs_show(page, config);
}
+EXPORT_SYMBOL_GPL(events_sysfs_show);
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 93cb412a5579..7b5fd811ef45 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -13,8 +13,16 @@
#define MSR_IA32_QM_CTR 0x0c8e
#define MSR_IA32_QM_EVTSEL 0x0c8d
+#define MBM_CNTR_WIDTH 24
+/*
+ * Guaranteed time in ms as per SDM where MBM counters will not overflow.
+ */
+#define MBM_CTR_OVERFLOW_TIME 1000
+
static u32 cqm_max_rmid = -1;
static unsigned int cqm_l3_scale; /* supposedly cacheline size */
+static bool cqm_enabled, mbm_enabled;
+unsigned int mbm_socket_max;
/**
* struct intel_pqr_state - State cache for the PQR MSR
@@ -42,8 +50,37 @@ struct intel_pqr_state {
* interrupts disabled, which is sufficient for the protection.
*/
static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+static struct hrtimer *mbm_timers;
+/**
+ * struct sample - mbm event's (local or total) data
+ * @total_bytes #bytes since we began monitoring
+ * @prev_msr previous value of MSR
+ */
+struct sample {
+ u64 total_bytes;
+ u64 prev_msr;
+};
/*
+ * samples profiled for total memory bandwidth type events
+ */
+static struct sample *mbm_total;
+/*
+ * samples profiled for local memory bandwidth type events
+ */
+static struct sample *mbm_local;
+
+#define pkg_id topology_physical_package_id(smp_processor_id())
+/*
+ * rmid_2_index returns the index for the rmid in mbm_local/mbm_total array.
+ * mbm_total[] and mbm_local[] are linearly indexed by socket# * max number of
+ * rmids per socket, an example is given below
+ * RMID1 of Socket0: vrmid = 1
+ * RMID1 of Socket1: vrmid = 1 * (cqm_max_rmid + 1) + 1
+ * RMID1 of Socket2: vrmid = 2 * (cqm_max_rmid + 1) + 1
+ */
+#define rmid_2_index(rmid) ((pkg_id * (cqm_max_rmid + 1)) + rmid)
+/*
* Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
* Also protects event->hw.cqm_rmid
*
@@ -65,9 +102,13 @@ static cpumask_t cqm_cpumask;
#define RMID_VAL_ERROR (1ULL << 63)
#define RMID_VAL_UNAVAIL (1ULL << 62)
-#define QOS_L3_OCCUP_EVENT_ID (1 << 0)
-
-#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID
+/*
+ * Event IDs are used to program IA32_QM_EVTSEL before reading event
+ * counter from IA32_QM_CTR
+ */
+#define QOS_L3_OCCUP_EVENT_ID 0x01
+#define QOS_MBM_TOTAL_EVENT_ID 0x02
+#define QOS_MBM_LOCAL_EVENT_ID 0x03
/*
* This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
@@ -211,6 +252,21 @@ static void __put_rmid(u32 rmid)
list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
}
+static void cqm_cleanup(void)
+{
+ int i;
+
+ if (!cqm_rmid_ptrs)
+ return;
+
+ for (i = 0; i < cqm_max_rmid; i++)
+ kfree(cqm_rmid_ptrs[i]);
+
+ kfree(cqm_rmid_ptrs);
+ cqm_rmid_ptrs = NULL;
+ cqm_enabled = false;
+}
+
static int intel_cqm_setup_rmid_cache(void)
{
struct cqm_rmid_entry *entry;
@@ -218,7 +274,7 @@ static int intel_cqm_setup_rmid_cache(void)
int r = 0;
nr_rmids = cqm_max_rmid + 1;
- cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
+ cqm_rmid_ptrs = kzalloc(sizeof(struct cqm_rmid_entry *) *
nr_rmids, GFP_KERNEL);
if (!cqm_rmid_ptrs)
return -ENOMEM;
@@ -249,11 +305,9 @@ static int intel_cqm_setup_rmid_cache(void)
mutex_unlock(&cache_mutex);
return 0;
-fail:
- while (r--)
- kfree(cqm_rmid_ptrs[r]);
- kfree(cqm_rmid_ptrs);
+fail:
+ cqm_cleanup();
return -ENOMEM;
}
@@ -281,9 +335,13 @@ static bool __match_event(struct perf_event *a, struct perf_event *b)
/*
* Events that target same task are placed into the same cache group.
+ * Mark it as a multi event group, so that we update ->count
+ * for every event rather than just the group leader later.
*/
- if (a->hw.target == b->hw.target)
+ if (a->hw.target == b->hw.target) {
+ b->hw.is_group_event = true;
return true;
+ }
/*
* Are we an inherited event?
@@ -392,10 +450,26 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b)
struct rmid_read {
u32 rmid;
+ u32 evt_type;
atomic64_t value;
};
static void __intel_cqm_event_count(void *info);
+static void init_mbm_sample(u32 rmid, u32 evt_type);
+static void __intel_mbm_event_count(void *info);
+
+static bool is_mbm_event(int e)
+{
+ return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID);
+}
+
+static void cqm_mask_call(struct rmid_read *rr)
+{
+ if (is_mbm_event(rr->evt_type))
+ on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_count, rr, 1);
+ else
+ on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, rr, 1);
+}
/*
* Exchange the RMID of a group of events.
@@ -413,12 +487,12 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
*/
if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
struct rmid_read rr = {
- .value = ATOMIC64_INIT(0),
.rmid = old_rmid,
+ .evt_type = group->attr.config,
+ .value = ATOMIC64_INIT(0),
};
- on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count,
- &rr, 1);
+ cqm_mask_call(&rr);
local64_set(&group->count, atomic64_read(&rr.value));
}
@@ -430,6 +504,22 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
raw_spin_unlock_irq(&cache_lock);
+ /*
+ * If the allocation is for mbm, init the mbm stats.
+ * Need to check if each event in the group is mbm event
+ * because there could be multiple type of events in the same group.
+ */
+ if (__rmid_valid(rmid)) {
+ event = group;
+ if (is_mbm_event(event->attr.config))
+ init_mbm_sample(rmid, event->attr.config);
+
+ list_for_each_entry(event, head, hw.cqm_group_entry) {
+ if (is_mbm_event(event->attr.config))
+ init_mbm_sample(rmid, event->attr.config);
+ }
+ }
+
return old_rmid;
}
@@ -837,6 +927,72 @@ static void intel_cqm_rmid_rotate(struct work_struct *work)
schedule_delayed_work(&intel_cqm_rmid_work, delay);
}
+static u64 update_sample(unsigned int rmid, u32 evt_type, int first)
+{
+ struct sample *mbm_current;
+ u32 vrmid = rmid_2_index(rmid);
+ u64 val, bytes, shift;
+ u32 eventid;
+
+ if (evt_type == QOS_MBM_LOCAL_EVENT_ID) {
+ mbm_current = &mbm_local[vrmid];
+ eventid = QOS_MBM_LOCAL_EVENT_ID;
+ } else {
+ mbm_current = &mbm_total[vrmid];
+ eventid = QOS_MBM_TOTAL_EVENT_ID;
+ }
+
+ wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+ rdmsrl(MSR_IA32_QM_CTR, val);
+ if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+ return mbm_current->total_bytes;
+
+ if (first) {
+ mbm_current->prev_msr = val;
+ mbm_current->total_bytes = 0;
+ return mbm_current->total_bytes;
+ }
+
+ /*
+ * The h/w guarantees that counters will not overflow
+ * so long as we poll them at least once per second.
+ */
+ shift = 64 - MBM_CNTR_WIDTH;
+ bytes = (val << shift) - (mbm_current->prev_msr << shift);
+ bytes >>= shift;
+
+ bytes *= cqm_l3_scale;
+
+ mbm_current->total_bytes += bytes;
+ mbm_current->prev_msr = val;
+
+ return mbm_current->total_bytes;
+}
+
+static u64 rmid_read_mbm(unsigned int rmid, u32 evt_type)
+{
+ return update_sample(rmid, evt_type, 0);
+}
+
+static void __intel_mbm_event_init(void *info)
+{
+ struct rmid_read *rr = info;
+
+ update_sample(rr->rmid, rr->evt_type, 1);
+}
+
+static void init_mbm_sample(u32 rmid, u32 evt_type)
+{
+ struct rmid_read rr = {
+ .rmid = rmid,
+ .evt_type = evt_type,
+ .value = ATOMIC64_INIT(0),
+ };
+
+ /* on each socket, init sample */
+ on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_init, &rr, 1);
+}
+
/*
* Find a group and setup RMID.
*
@@ -849,6 +1005,7 @@ static void intel_cqm_setup_event(struct perf_event *event,
bool conflict = false;
u32 rmid;
+ event->hw.is_group_event = false;
list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
rmid = iter->hw.cqm_rmid;
@@ -856,6 +1013,8 @@ static void intel_cqm_setup_event(struct perf_event *event,
/* All tasks in a group share an RMID */
event->hw.cqm_rmid = rmid;
*group = iter;
+ if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
+ init_mbm_sample(rmid, event->attr.config);
return;
}
@@ -872,6 +1031,9 @@ static void intel_cqm_setup_event(struct perf_event *event,
else
rmid = __get_rmid();
+ if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
+ init_mbm_sample(rmid, event->attr.config);
+
event->hw.cqm_rmid = rmid;
}
@@ -893,7 +1055,10 @@ static void intel_cqm_event_read(struct perf_event *event)
if (!__rmid_valid(rmid))
goto out;
- val = __rmid_read(rmid);
+ if (is_mbm_event(event->attr.config))
+ val = rmid_read_mbm(rmid, event->attr.config);
+ else
+ val = __rmid_read(rmid);
/*
* Ignore this reading on error states and do not update the value.
@@ -924,10 +1089,100 @@ static inline bool cqm_group_leader(struct perf_event *event)
return !list_empty(&event->hw.cqm_groups_entry);
}
+static void __intel_mbm_event_count(void *info)
+{
+ struct rmid_read *rr = info;
+ u64 val;
+
+ val = rmid_read_mbm(rr->rmid, rr->evt_type);
+ if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+ return;
+ atomic64_add(val, &rr->value);
+}
+
+static enum hrtimer_restart mbm_hrtimer_handle(struct hrtimer *hrtimer)
+{
+ struct perf_event *iter, *iter1;
+ int ret = HRTIMER_RESTART;
+ struct list_head *head;
+ unsigned long flags;
+ u32 grp_rmid;
+
+ /*
+ * Need to cache_lock as the timer Event Select MSR reads
+ * can race with the mbm/cqm count() and mbm_init() reads.
+ */
+ raw_spin_lock_irqsave(&cache_lock, flags);
+
+ if (list_empty(&cache_groups)) {
+ ret = HRTIMER_NORESTART;
+ goto out;
+ }
+
+ list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+ grp_rmid = iter->hw.cqm_rmid;
+ if (!__rmid_valid(grp_rmid))
+ continue;
+ if (is_mbm_event(iter->attr.config))
+ update_sample(grp_rmid, iter->attr.config, 0);
+
+ head = &iter->hw.cqm_group_entry;
+ if (list_empty(head))
+ continue;
+ list_for_each_entry(iter1, head, hw.cqm_group_entry) {
+ if (!iter1->hw.is_group_event)
+ break;
+ if (is_mbm_event(iter1->attr.config))
+ update_sample(iter1->hw.cqm_rmid,
+ iter1->attr.config, 0);
+ }
+ }
+
+ hrtimer_forward_now(hrtimer, ms_to_ktime(MBM_CTR_OVERFLOW_TIME));
+out:
+ raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+ return ret;
+}
+
+static void __mbm_start_timer(void *info)
+{
+ hrtimer_start(&mbm_timers[pkg_id], ms_to_ktime(MBM_CTR_OVERFLOW_TIME),
+ HRTIMER_MODE_REL_PINNED);
+}
+
+static void __mbm_stop_timer(void *info)
+{
+ hrtimer_cancel(&mbm_timers[pkg_id]);
+}
+
+static void mbm_start_timers(void)
+{
+ on_each_cpu_mask(&cqm_cpumask, __mbm_start_timer, NULL, 1);
+}
+
+static void mbm_stop_timers(void)
+{
+ on_each_cpu_mask(&cqm_cpumask, __mbm_stop_timer, NULL, 1);
+}
+
+static void mbm_hrtimer_init(void)
+{
+ struct hrtimer *hr;
+ int i;
+
+ for (i = 0; i < mbm_socket_max; i++) {
+ hr = &mbm_timers[i];
+ hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hr->function = mbm_hrtimer_handle;
+ }
+}
+
static u64 intel_cqm_event_count(struct perf_event *event)
{
unsigned long flags;
struct rmid_read rr = {
+ .evt_type = event->attr.config,
.value = ATOMIC64_INIT(0),
};
@@ -940,7 +1195,9 @@ static u64 intel_cqm_event_count(struct perf_event *event)
return __perf_event_count(event);
/*
- * Only the group leader gets to report values. This stops us
+ * Only the group leader gets to report values except in case of
+ * multiple events in the same group, we still need to read the
+ * other events.This stops us
* reporting duplicate values to userspace, and gives us a clear
* rule for which task gets to report the values.
*
@@ -948,7 +1205,7 @@ static u64 intel_cqm_event_count(struct perf_event *event)
* specific packages - we forfeit that ability when we create
* task events.
*/
- if (!cqm_group_leader(event))
+ if (!cqm_group_leader(event) && !event->hw.is_group_event)
return 0;
/*
@@ -975,7 +1232,7 @@ static u64 intel_cqm_event_count(struct perf_event *event)
if (!__rmid_valid(rr.rmid))
goto out;
- on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
+ cqm_mask_call(&rr);
raw_spin_lock_irqsave(&cache_lock, flags);
if (event->hw.cqm_rmid == rr.rmid)
@@ -1046,8 +1303,14 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
static void intel_cqm_event_destroy(struct perf_event *event)
{
struct perf_event *group_other = NULL;
+ unsigned long flags;
mutex_lock(&cache_mutex);
+ /*
+ * Hold the cache_lock as mbm timer handlers could be
+ * scanning the list of events.
+ */
+ raw_spin_lock_irqsave(&cache_lock, flags);
/*
* If there's another event in this group...
@@ -1079,6 +1342,14 @@ static void intel_cqm_event_destroy(struct perf_event *event)
}
}
+ raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+ /*
+ * Stop the mbm overflow timers when the last event is destroyed.
+ */
+ if (mbm_enabled && list_empty(&cache_groups))
+ mbm_stop_timers();
+
mutex_unlock(&cache_mutex);
}
@@ -1086,11 +1357,13 @@ static int intel_cqm_event_init(struct perf_event *event)
{
struct perf_event *group = NULL;
bool rotate = false;
+ unsigned long flags;
if (event->attr.type != intel_cqm_pmu.type)
return -ENOENT;
- if (event->attr.config & ~QOS_EVENT_MASK)
+ if ((event->attr.config < QOS_L3_OCCUP_EVENT_ID) ||
+ (event->attr.config > QOS_MBM_LOCAL_EVENT_ID))
return -EINVAL;
/* unsupported modes and filters */
@@ -1110,9 +1383,21 @@ static int intel_cqm_event_init(struct perf_event *event)
mutex_lock(&cache_mutex);
+ /*
+ * Start the mbm overflow timers when the first event is created.
+ */
+ if (mbm_enabled && list_empty(&cache_groups))
+ mbm_start_timers();
+
/* Will also set rmid */
intel_cqm_setup_event(event, &group);
+ /*
+ * Hold the cache_lock as mbm timer handlers be
+ * scanning the list of events.
+ */
+ raw_spin_lock_irqsave(&cache_lock, flags);
+
if (group) {
list_add_tail(&event->hw.cqm_group_entry,
&group->hw.cqm_group_entry);
@@ -1131,6 +1416,7 @@ static int intel_cqm_event_init(struct perf_event *event)
rotate = true;
}
+ raw_spin_unlock_irqrestore(&cache_lock, flags);
mutex_unlock(&cache_mutex);
if (rotate)
@@ -1145,6 +1431,16 @@ EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
+EVENT_ATTR_STR(total_bytes, intel_cqm_total_bytes, "event=0x02");
+EVENT_ATTR_STR(total_bytes.per-pkg, intel_cqm_total_bytes_pkg, "1");
+EVENT_ATTR_STR(total_bytes.unit, intel_cqm_total_bytes_unit, "MB");
+EVENT_ATTR_STR(total_bytes.scale, intel_cqm_total_bytes_scale, "1e-6");
+
+EVENT_ATTR_STR(local_bytes, intel_cqm_local_bytes, "event=0x03");
+EVENT_ATTR_STR(local_bytes.per-pkg, intel_cqm_local_bytes_pkg, "1");
+EVENT_ATTR_STR(local_bytes.unit, intel_cqm_local_bytes_unit, "MB");
+EVENT_ATTR_STR(local_bytes.scale, intel_cqm_local_bytes_scale, "1e-6");
+
static struct attribute *intel_cqm_events_attr[] = {
EVENT_PTR(intel_cqm_llc),
EVENT_PTR(intel_cqm_llc_pkg),
@@ -1154,9 +1450,38 @@ static struct attribute *intel_cqm_events_attr[] = {
NULL,
};
+static struct attribute *intel_mbm_events_attr[] = {
+ EVENT_PTR(intel_cqm_total_bytes),
+ EVENT_PTR(intel_cqm_local_bytes),
+ EVENT_PTR(intel_cqm_total_bytes_pkg),
+ EVENT_PTR(intel_cqm_local_bytes_pkg),
+ EVENT_PTR(intel_cqm_total_bytes_unit),
+ EVENT_PTR(intel_cqm_local_bytes_unit),
+ EVENT_PTR(intel_cqm_total_bytes_scale),
+ EVENT_PTR(intel_cqm_local_bytes_scale),
+ NULL,
+};
+
+static struct attribute *intel_cmt_mbm_events_attr[] = {
+ EVENT_PTR(intel_cqm_llc),
+ EVENT_PTR(intel_cqm_total_bytes),
+ EVENT_PTR(intel_cqm_local_bytes),
+ EVENT_PTR(intel_cqm_llc_pkg),
+ EVENT_PTR(intel_cqm_total_bytes_pkg),
+ EVENT_PTR(intel_cqm_local_bytes_pkg),
+ EVENT_PTR(intel_cqm_llc_unit),
+ EVENT_PTR(intel_cqm_total_bytes_unit),
+ EVENT_PTR(intel_cqm_local_bytes_unit),
+ EVENT_PTR(intel_cqm_llc_scale),
+ EVENT_PTR(intel_cqm_total_bytes_scale),
+ EVENT_PTR(intel_cqm_local_bytes_scale),
+ EVENT_PTR(intel_cqm_llc_snapshot),
+ NULL,
+};
+
static struct attribute_group intel_cqm_events_group = {
.name = "events",
- .attrs = intel_cqm_events_attr,
+ .attrs = NULL,
};
PMU_FORMAT_ATTR(event, "config:0-7");
@@ -1303,12 +1628,70 @@ static const struct x86_cpu_id intel_cqm_match[] = {
{}
};
+static void mbm_cleanup(void)
+{
+ if (!mbm_enabled)
+ return;
+
+ kfree(mbm_local);
+ kfree(mbm_total);
+ mbm_enabled = false;
+}
+
+static const struct x86_cpu_id intel_mbm_local_match[] = {
+ { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_LOCAL },
+ {}
+};
+
+static const struct x86_cpu_id intel_mbm_total_match[] = {
+ { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_TOTAL },
+ {}
+};
+
+static int intel_mbm_init(void)
+{
+ int ret = 0, array_size, maxid = cqm_max_rmid + 1;
+
+ mbm_socket_max = topology_max_packages();
+ array_size = sizeof(struct sample) * maxid * mbm_socket_max;
+ mbm_local = kmalloc(array_size, GFP_KERNEL);
+ if (!mbm_local)
+ return -ENOMEM;
+
+ mbm_total = kmalloc(array_size, GFP_KERNEL);
+ if (!mbm_total) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ array_size = sizeof(struct hrtimer) * mbm_socket_max;
+ mbm_timers = kmalloc(array_size, GFP_KERNEL);
+ if (!mbm_timers) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ mbm_hrtimer_init();
+
+out:
+ if (ret)
+ mbm_cleanup();
+
+ return ret;
+}
+
static int __init intel_cqm_init(void)
{
- char *str, scale[20];
+ char *str = NULL, scale[20];
int i, cpu, ret;
- if (!x86_match_cpu(intel_cqm_match))
+ if (x86_match_cpu(intel_cqm_match))
+ cqm_enabled = true;
+
+ if (x86_match_cpu(intel_mbm_local_match) &&
+ x86_match_cpu(intel_mbm_total_match))
+ mbm_enabled = true;
+
+ if (!cqm_enabled && !mbm_enabled)
return -ENODEV;
cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
@@ -1365,16 +1748,41 @@ static int __init intel_cqm_init(void)
cqm_pick_event_reader(i);
}
- __perf_cpu_notifier(intel_cqm_cpu_notifier);
+ if (mbm_enabled)
+ ret = intel_mbm_init();
+ if (ret && !cqm_enabled)
+ goto out;
+
+ if (cqm_enabled && mbm_enabled)
+ intel_cqm_events_group.attrs = intel_cmt_mbm_events_attr;
+ else if (!cqm_enabled && mbm_enabled)
+ intel_cqm_events_group.attrs = intel_mbm_events_attr;
+ else if (cqm_enabled && !mbm_enabled)
+ intel_cqm_events_group.attrs = intel_cqm_events_attr;
ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
- if (ret)
+ if (ret) {
pr_err("Intel CQM perf registration failed: %d\n", ret);
- else
+ goto out;
+ }
+
+ if (cqm_enabled)
pr_info("Intel CQM monitoring enabled\n");
+ if (mbm_enabled)
+ pr_info("Intel MBM enabled\n");
+ /*
+ * Register the hot cpu notifier once we are sure cqm
+ * is enabled to avoid notifier leak.
+ */
+ __perf_cpu_notifier(intel_cqm_cpu_notifier);
out:
cpu_notifier_register_done();
+ if (ret) {
+ kfree(str);
+ cqm_cleanup();
+ mbm_cleanup();
+ }
return ret;
}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ce7211a07c0b..8584b90d8e0b 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -570,11 +570,12 @@ int intel_pmu_drain_bts_buffer(void)
* We will overwrite the from and to address before we output
* the sample.
*/
+ rcu_read_lock();
perf_prepare_sample(&header, &data, event, &regs);
if (perf_output_begin(&handle, event, header.size *
(top - base - skip)))
- return 1;
+ goto unlock;
for (at = base; at < top; at++) {
/* Filter out any records that contain kernel addresses. */
@@ -593,6 +594,8 @@ int intel_pmu_drain_bts_buffer(void)
/* There's new data available. */
event->hw.interrupts++;
event->pending_kill = POLL_IN;
+unlock:
+ rcu_read_unlock();
return 1;
}
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index b834a3f55a01..70c93f9b03ac 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -711,6 +711,7 @@ static int __init rapl_pmu_init(void)
rapl_pmu_events_group.attrs = rapl_events_cln_attr;
break;
case 63: /* Haswell-Server */
+ case 79: /* Broadwell-Server */
apply_quirk = true;
rapl_cntr_mask = RAPL_IDX_SRV;
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
@@ -718,6 +719,7 @@ static int __init rapl_pmu_init(void)
case 60: /* Haswell */
case 69: /* Haswell-Celeron */
case 61: /* Broadwell */
+ case 71: /* Broadwell-H */
rapl_cntr_mask = RAPL_IDX_HSW;
rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
break;
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 93f6bd9bf761..ab2bcaaebe38 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -46,7 +46,6 @@
(SNBEP_PMON_CTL_EV_SEL_MASK | \
SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_PMON_CTL_EV_SEL_EXT | \
SNBEP_PMON_CTL_INVERT | \
SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
@@ -148,7 +147,6 @@
/* IVBEP PCU */
#define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK \
(SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PMON_CTL_EV_SEL_EXT | \
SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
SNBEP_PMON_CTL_EDGE_DET | \
SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
@@ -258,7 +256,6 @@
SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
SNBEP_PMON_CTL_EDGE_DET | \
SNBEP_CBO_PMON_CTL_TID_EN | \
- SNBEP_PMON_CTL_EV_SEL_EXT | \
SNBEP_PMON_CTL_INVERT | \
KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \
SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
@@ -472,7 +469,7 @@ static struct attribute *snbep_uncore_cbox_formats_attr[] = {
};
static struct attribute *snbep_uncore_pcu_formats_attr[] = {
- &format_attr_event_ext.attr,
+ &format_attr_event.attr,
&format_attr_occ_sel.attr,
&format_attr_edge.attr,
&format_attr_inv.attr,
@@ -1313,7 +1310,7 @@ static struct attribute *ivbep_uncore_cbox_formats_attr[] = {
};
static struct attribute *ivbep_uncore_pcu_formats_attr[] = {
- &format_attr_event_ext.attr,
+ &format_attr_event.attr,
&format_attr_occ_sel.attr,
&format_attr_edge.attr,
&format_attr_thresh5.attr,
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 3d1a84383162..8f9afefd2dc5 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -94,7 +94,7 @@
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
+#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
@@ -245,6 +245,8 @@
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e51021c9207a..6e47e3a916f1 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -309,7 +309,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
- nodes_per_socket = ((ecx >> 8) & 7) + 1;
node_id = ecx & 7;
/* get compute unit information */
@@ -320,7 +319,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
u64 value;
rdmsrl(MSR_FAM10H_NODE_ID, value);
- nodes_per_socket = ((value >> 3) & 7) + 1;
node_id = value & 7;
} else
return;
@@ -522,6 +520,18 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_MWAITX))
use_mwaitx_delay();
+
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ u32 ecx;
+
+ ecx = cpuid_ecx(0x8000001e);
+ nodes_per_socket = ((ecx >> 8) & 7) + 1;
+ } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
+ u64 value;
+
+ rdmsrl(MSR_FAM10H_NODE_ID, value);
+ nodes_per_socket = ((value >> 3) & 7) + 1;
+ }
}
static void early_init_amd(struct cpuinfo_x86 *c)
@@ -539,6 +549,10 @@ static void early_init_amd(struct cpuinfo_x86 *c)
set_sched_clock_stable();
}
+ /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */
+ if (c->x86_power & BIT(12))
+ set_cpu_cap(c, X86_FEATURE_ACC_POWER);
+
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSCALL32);
#else
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9988caf42161..8394b3d1f94f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -692,7 +692,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_F_1_EDX] = edx;
- if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) {
+ if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) ||
+ ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) ||
+ (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) {
c->x86_cache_max_rmid = ecx;
c->x86_cache_occ_scale = ebx;
}