aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/acpi/boot.c404
-rw-r--r--arch/x86/kernel/apb_timer.c6
-rw-r--r--arch/x86/kernel/apic/apic.c83
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c16
-rw-r--r--arch/x86/kernel/apic/apic_noop.c23
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c10
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c14
-rw-r--r--arch/x86/kernel/apic/io_apic.c789
-rw-r--r--arch/x86/kernel/apic/probe_32.c33
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c8
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c10
-rw-r--r--arch/x86/kernel/cpu/Makefile14
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c55
-rw-r--r--arch/x86/kernel/cpu/intel.c37
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c52
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c40
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/amd_early.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/intel_early.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event.c28
-rw-r--r--arch/x86/kernel/cpu/perf_event.h48
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c74
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c205
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c20
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c12
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c3175
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h439
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c1221
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c636
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c2258
-rw-r--r--arch/x86/kernel/cpu/perf_event_knc.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c6
-rw-r--r--arch/x86/kernel/crash.c563
-rw-r--r--arch/x86/kernel/devicetree.c207
-rw-r--r--arch/x86/kernel/e820.c7
-rw-r--r--arch/x86/kernel/entry_32.S18
-rw-r--r--arch/x86/kernel/entry_64.S51
-rw-r--r--arch/x86/kernel/hw_breakpoint.c8
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/i8259.c3
-rw-r--r--arch/x86/kernel/iosf_mbi.c93
-rw-r--r--arch/x86/kernel/irq_64.c6
-rw-r--r--arch/x86/kernel/irq_work.c2
-rw-r--r--arch/x86/kernel/irqinit.c17
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c554
-rw-r--r--arch/x86/kernel/kprobes/opt.c4
-rw-r--r--arch/x86/kernel/kvm.c30
-rw-r--r--arch/x86/kernel/machine_kexec_32.c3
-rw-r--r--arch/x86/kernel/machine_kexec_64.c250
-rw-r--r--arch/x86/kernel/mpparse.c111
-rw-r--r--arch/x86/kernel/pmc_atom.c11
-rw-r--r--arch/x86/kernel/preempt.S25
-rw-r--r--arch/x86/kernel/process.c17
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c3
-rw-r--r--arch/x86/kernel/ptrace.c165
-rw-r--r--arch/x86/kernel/quirks.c18
-rw-r--r--arch/x86/kernel/setup.c11
-rw-r--r--arch/x86/kernel/signal.c5
-rw-r--r--arch/x86/kernel/smpboot.c187
-rw-r--r--arch/x86/kernel/time.c2
-rw-r--r--arch/x86/kernel/tsc.c26
-rw-r--r--arch/x86/kernel/vsmp_64.c4
-rw-r--r--arch/x86/kernel/vsyscall_64.c2
-rw-r--r--arch/x86/kernel/vsyscall_gtod.c23
-rw-r--r--arch/x86/kernel/xsave.c125
75 files changed, 7372 insertions, 4971 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index bde3993624f1..8f1e77440b2b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -39,8 +39,6 @@ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
-obj-$(CONFIG_PREEMPT) += preempt.o
-
obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
@@ -71,6 +69,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_X86_TSC) += trace_clock.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
+obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-y += kprobes/
obj-$(CONFIG_MODULES) += module.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a531f6564ed0..a142e77693e1 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -31,6 +31,7 @@
#include <linux/module.h>
#include <linux/dmi.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/slab.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
@@ -43,6 +44,7 @@
#include <asm/io.h>
#include <asm/mpspec.h>
#include <asm/smp.h>
+#include <asm/i8259.h>
#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
static int __initdata acpi_force = 0;
@@ -93,44 +95,7 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
-static unsigned int gsi_to_irq(unsigned int gsi)
-{
- unsigned int irq = gsi + NR_IRQS_LEGACY;
- unsigned int i;
-
- for (i = 0; i < NR_IRQS_LEGACY; i++) {
- if (isa_irq_to_gsi[i] == gsi) {
- return i;
- }
- }
-
- /* Provide an identity mapping of gsi == irq
- * except on truly weird platforms that have
- * non isa irqs in the first 16 gsis.
- */
- if (gsi >= NR_IRQS_LEGACY)
- irq = gsi;
- else
- irq = gsi_top + gsi;
-
- return irq;
-}
-
-static u32 irq_to_gsi(int irq)
-{
- unsigned int gsi;
-
- if (irq < NR_IRQS_LEGACY)
- gsi = isa_irq_to_gsi[irq];
- else if (irq < gsi_top)
- gsi = irq;
- else if (irq < (gsi_top + NR_IRQS_LEGACY))
- gsi = irq - gsi_top;
- else
- gsi = 0xffffffff;
-
- return gsi;
-}
+#define ACPI_INVALID_GSI INT_MIN
/*
* This is just a simple wrapper around early_ioremap(),
@@ -341,11 +306,145 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
#endif /*CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
+#define MP_ISA_BUS 0
+
+static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
+ u32 gsi)
+{
+ int ioapic;
+ int pin;
+ struct mpc_intsrc mp_irq;
+
+ /*
+ * Convert 'gsi' to 'ioapic.pin'.
+ */
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return;
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+
+ /*
+ * TBD: This check is for faulty timer entries, where the override
+ * erroneously sets the trigger to level, resulting in a HUGE
+ * increase of timer interrupts!
+ */
+ if ((bus_irq == 0) && (trigger == 3))
+ trigger = 1;
+
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger << 2) | polarity;
+ mp_irq.srcbus = MP_ISA_BUS;
+ mp_irq.srcbusirq = bus_irq; /* IRQ */
+ mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
+ mp_irq.dstirq = pin; /* INTIN# */
+
+ mp_save_irq(&mp_irq);
+
+ /*
+ * Reset default identity mapping if gsi is also an legacy IRQ,
+ * otherwise there will be more than one entry with the same GSI
+ * and acpi_isa_irq_to_gsi() may give wrong result.
+ */
+ if (gsi < nr_legacy_irqs() && isa_irq_to_gsi[gsi] == gsi)
+ isa_irq_to_gsi[gsi] = ACPI_INVALID_GSI;
+ isa_irq_to_gsi[bus_irq] = gsi;
+}
+
+static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
+ int polarity)
+{
+#ifdef CONFIG_X86_MPPARSE
+ struct mpc_intsrc mp_irq;
+ struct pci_dev *pdev;
+ unsigned char number;
+ unsigned int devfn;
+ int ioapic;
+ u8 pin;
+
+ if (!acpi_ioapic)
+ return 0;
+ if (!dev || !dev_is_pci(dev))
+ return 0;
+
+ pdev = to_pci_dev(dev);
+ number = pdev->bus->number;
+ devfn = pdev->devfn;
+ pin = pdev->pin;
+ /* print the entry should happen on mptable identically */
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+ (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
+ mp_irq.srcbus = number;
+ mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+ ioapic = mp_find_ioapic(gsi);
+ mp_irq.dstapic = mpc_ioapic_id(ioapic);
+ mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
+
+ mp_save_irq(&mp_irq);
+#endif
+ return 0;
+}
+
+static int mp_register_gsi(struct device *dev, u32 gsi, int trigger,
+ int polarity)
+{
+ int irq, node;
+
+ if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
+ return gsi;
+
+ /* Don't set up the ACPI SCI because it's already set up */
+ if (acpi_gbl_FADT.sci_interrupt == gsi)
+ return mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC);
+
+ trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
+ polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
+ node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
+ if (mp_set_gsi_attr(gsi, trigger, polarity, node)) {
+ pr_warn("Failed to set pin attr for GSI%d\n", gsi);
+ return -1;
+ }
+
+ irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC);
+ if (irq < 0)
+ return irq;
+
+ if (enable_update_mptable)
+ mp_config_acpi_gsi(dev, gsi, trigger, polarity);
+
+ return irq;
+}
+
+static void mp_unregister_gsi(u32 gsi)
+{
+ int irq;
+
+ if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
+ return;
+
+ if (acpi_gbl_FADT.sci_interrupt == gsi)
+ return;
+
+ irq = mp_map_gsi_to_irq(gsi, 0);
+ if (irq > 0)
+ mp_unmap_irq(irq);
+}
+
+static struct irq_domain_ops acpi_irqdomain_ops = {
+ .map = mp_irqdomain_map,
+ .unmap = mp_irqdomain_unmap,
+};
static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{
struct acpi_madt_io_apic *ioapic = NULL;
+ struct ioapic_domain_cfg cfg = {
+ .type = IOAPIC_DOMAIN_DYNAMIC,
+ .ops = &acpi_irqdomain_ops,
+ };
ioapic = (struct acpi_madt_io_apic *)header;
@@ -354,8 +453,12 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
acpi_table_print_madt_entry(header);
- mp_register_ioapic(ioapic->id,
- ioapic->address, ioapic->global_irq_base);
+ /* Statically assign IRQ numbers for IOAPICs hosting legacy IRQs */
+ if (ioapic->global_irq_base < nr_legacy_irqs())
+ cfg.type = IOAPIC_DOMAIN_LEGACY;
+
+ mp_register_ioapic(ioapic->id, ioapic->address, ioapic->global_irq_base,
+ &cfg);
return 0;
}
@@ -378,11 +481,6 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
- /*
- * mp_config_acpi_legacy_irqs() already setup IRQs < 16
- * If GSI is < 16, this will update its flags,
- * else it will create a new mp_irqs[] entry.
- */
mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
/*
@@ -504,25 +602,32 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
outb(new >> 8, 0x4d1);
}
-int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
{
- *irq = gsi_to_irq(gsi);
-
-#ifdef CONFIG_X86_IO_APIC
- if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC)
- setup_IO_APIC_irq_extra(gsi);
-#endif
+ int irq;
+ if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
+ *irqp = gsi;
+ } else {
+ irq = mp_map_gsi_to_irq(gsi,
+ IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
+ if (irq < 0)
+ return -1;
+ *irqp = irq;
+ }
return 0;
}
EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
{
- if (isa_irq >= 16)
- return -1;
- *gsi = irq_to_gsi(isa_irq);
- return 0;
+ if (isa_irq < nr_legacy_irqs() &&
+ isa_irq_to_gsi[isa_irq] != ACPI_INVALID_GSI) {
+ *gsi = isa_irq_to_gsi[isa_irq];
+ return 0;
+ }
+
+ return -1;
}
static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
@@ -542,15 +647,25 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
int trigger, int polarity)
{
+ int irq = gsi;
+
#ifdef CONFIG_X86_IO_APIC
- gsi = mp_register_gsi(dev, gsi, trigger, polarity);
+ irq = mp_register_gsi(dev, gsi, trigger, polarity);
#endif
- return gsi;
+ return irq;
+}
+
+static void acpi_unregister_gsi_ioapic(u32 gsi)
+{
+#ifdef CONFIG_X86_IO_APIC
+ mp_unregister_gsi(gsi);
+#endif
}
int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity) = acpi_register_gsi_pic;
+void (*__acpi_unregister_gsi)(u32 gsi) = NULL;
#ifdef CONFIG_ACPI_SLEEP
int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel;
@@ -564,32 +679,22 @@ int (*acpi_suspend_lowlevel)(void);
*/
int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
{
- unsigned int irq;
- unsigned int plat_gsi = gsi;
-
- plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity);
- irq = gsi_to_irq(plat_gsi);
-
- return irq;
+ return __acpi_register_gsi(dev, gsi, trigger, polarity);
}
EXPORT_SYMBOL_GPL(acpi_register_gsi);
void acpi_unregister_gsi(u32 gsi)
{
+ if (__acpi_unregister_gsi)
+ __acpi_unregister_gsi(gsi);
}
EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
-void __init acpi_set_irq_model_pic(void)
-{
- acpi_irq_model = ACPI_IRQ_MODEL_PIC;
- __acpi_register_gsi = acpi_register_gsi_pic;
- acpi_ioapic = 0;
-}
-
-void __init acpi_set_irq_model_ioapic(void)
+static void __init acpi_set_irq_model_ioapic(void)
{
acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
__acpi_register_gsi = acpi_register_gsi_ioapic;
+ __acpi_unregister_gsi = acpi_unregister_gsi_ioapic;
acpi_ioapic = 1;
}
@@ -825,9 +930,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
* and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
*/
- count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
- acpi_parse_lapic_addr_ovr, 0);
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
+ acpi_parse_lapic_addr_ovr, 0);
if (count < 0) {
printk(KERN_ERR PREFIX
"Error parsing LAPIC address override entry\n");
@@ -852,9 +956,8 @@ static int __init acpi_parse_madt_lapic_entries(void)
* and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
*/
- count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
- acpi_parse_lapic_addr_ovr, 0);
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
+ acpi_parse_lapic_addr_ovr, 0);
if (count < 0) {
printk(KERN_ERR PREFIX
"Error parsing LAPIC address override entry\n");
@@ -882,11 +985,10 @@ static int __init acpi_parse_madt_lapic_entries(void)
return count;
}
- x2count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
- acpi_parse_x2apic_nmi, 0);
- count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0);
+ x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
+ acpi_parse_x2apic_nmi, 0);
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,
+ acpi_parse_lapic_nmi, 0);
if (count < 0 || x2count < 0) {
printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
/* TBD: Cleanup to allow fallback to MPS */
@@ -897,44 +999,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
-#define MP_ISA_BUS 0
-
-void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
-{
- int ioapic;
- int pin;
- struct mpc_intsrc mp_irq;
-
- /*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return;
- pin = mp_find_ioapic_pin(ioapic, gsi);
-
- /*
- * TBD: This check is for faulty timer entries, where the override
- * erroneously sets the trigger to level, resulting in a HUGE
- * increase of timer interrupts!
- */
- if ((bus_irq == 0) && (trigger == 3))
- trigger = 1;
-
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = (trigger << 2) | polarity;
- mp_irq.srcbus = MP_ISA_BUS;
- mp_irq.srcbusirq = bus_irq; /* IRQ */
- mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
- mp_irq.dstirq = pin; /* INTIN# */
-
- mp_save_irq(&mp_irq);
-
- isa_irq_to_gsi[bus_irq] = gsi;
-}
-
-void __init mp_config_acpi_legacy_irqs(void)
+static void __init mp_config_acpi_legacy_irqs(void)
{
int i;
struct mpc_intsrc mp_irq;
@@ -952,7 +1017,7 @@ void __init mp_config_acpi_legacy_irqs(void)
* Use the default configuration for the IRQs 0-15. Unless
* overridden by (MADT) interrupt source override entries.
*/
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < nr_legacy_irqs(); i++) {
int ioapic, pin;
unsigned int dstapic;
int idx;
@@ -1000,84 +1065,6 @@ void __init mp_config_acpi_legacy_irqs(void)
}
}
-static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
- int polarity)
-{
-#ifdef CONFIG_X86_MPPARSE
- struct mpc_intsrc mp_irq;
- struct pci_dev *pdev;
- unsigned char number;
- unsigned int devfn;
- int ioapic;
- u8 pin;
-
- if (!acpi_ioapic)
- return 0;
- if (!dev || !dev_is_pci(dev))
- return 0;
-
- pdev = to_pci_dev(dev);
- number = pdev->bus->number;
- devfn = pdev->devfn;
- pin = pdev->pin;
- /* print the entry should happen on mptable identically */
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
- (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
- mp_irq.srcbus = number;
- mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
- ioapic = mp_find_ioapic(gsi);
- mp_irq.dstapic = mpc_ioapic_id(ioapic);
- mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
-
- mp_save_irq(&mp_irq);
-#endif
- return 0;
-}
-
-int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
-{
- int ioapic;
- int ioapic_pin;
- struct io_apic_irq_attr irq_attr;
- int ret;
-
- if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
- return gsi;
-
- /* Don't set up the ACPI SCI because it's already set up */
- if (acpi_gbl_FADT.sci_interrupt == gsi)
- return gsi;
-
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0) {
- printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
- return gsi;
- }
-
- ioapic_pin = mp_find_ioapic_pin(ioapic, gsi);
-
- if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
- printk(KERN_ERR "Invalid reference to IOAPIC pin "
- "%d-%d\n", mpc_ioapic_id(ioapic),
- ioapic_pin);
- return gsi;
- }
-
- if (enable_update_mptable)
- mp_config_acpi_gsi(dev, gsi, trigger, polarity);
-
- set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
- trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
- polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
- ret = io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr);
- if (ret < 0)
- gsi = INT_MIN;
-
- return gsi;
-}
-
/*
* Parse IOAPIC related entries in MADT
* returns 0 on success, < 0 on error
@@ -1107,9 +1094,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
return -ENODEV;
}
- count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic,
- MAX_IO_APICS);
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic,
+ MAX_IO_APICS);
if (!count) {
printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
return -ENODEV;
@@ -1118,9 +1104,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
return count;
}
- count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
- nr_irqs);
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE,
+ acpi_parse_int_src_ovr, nr_irqs);
if (count < 0) {
printk(KERN_ERR PREFIX
"Error parsing interrupt source overrides entry\n");
@@ -1139,9 +1124,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
/* Fill in identity legacy mappings where no override */
mp_config_acpi_legacy_irqs();
- count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src,
- nr_irqs);
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE,
+ acpi_parse_nmi_src, nr_irqs);
if (count < 0) {
printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
/* TBD: Cleanup to allow fallback to MPS */
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index af5b08ab3b71..b708738d016e 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -146,7 +146,7 @@ static inline int is_apbt_capable(void)
static int __init apbt_clockevent_register(void)
{
struct sfi_timer_table_entry *mtmr;
- struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev);
+ struct apbt_dev *adev = this_cpu_ptr(&cpu_apbt_dev);
mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
if (mtmr == NULL) {
@@ -185,8 +185,6 @@ static void apbt_setup_irq(struct apbt_dev *adev)
irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
- /* APB timer irqs are set up as mp_irqs, timer is edge type */
- __irq_set_handler(adev->irq, handle_edge_irq, 0, "edge");
}
/* Should be called with per cpu */
@@ -200,7 +198,7 @@ void apbt_setup_secondary_clock(void)
if (!cpu)
return;
- adev = &__get_cpu_var(cpu_apbt_dev);
+ adev = this_cpu_ptr(&cpu_apbt_dev);
if (!adev->timer) {
adev->timer = dw_apb_clockevent_init(cpu, adev->name,
APBT_CLOCKEVENT_RATING, adev_virt_addr(adev),
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ad28db7e6bde..ba6cc041edb1 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -67,7 +67,7 @@ EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
/*
* The highest APIC ID seen during enumeration.
*/
-unsigned int max_physical_apicid;
+static unsigned int max_physical_apicid;
/*
* Bitmask of physically existing CPUs:
@@ -561,7 +561,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
*/
static void setup_APIC_timer(void)
{
- struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+ struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
if (this_cpu_has(X86_FEATURE_ARAT)) {
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
@@ -696,7 +696,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
static int __init calibrate_APIC_clock(void)
{
- struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+ struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
void (*real_handler)(struct clock_event_device *dev);
unsigned long deltaj;
long delta, deltatsc;
@@ -1297,7 +1297,7 @@ void setup_local_APIC(void)
unsigned int value, queued;
int i, j, acked = 0;
unsigned long long tsc = 0, ntsc;
- long long max_loops = cpu_khz;
+ long long max_loops = cpu_khz ? cpu_khz : 1000000;
if (cpu_has_tsc)
rdtscll(tsc);
@@ -1342,17 +1342,6 @@ void setup_local_APIC(void)
/* always use the value from LDR */
early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
logical_smp_processor_id();
-
- /*
- * Some NUMA implementations (NUMAQ) don't initialize apicid to
- * node mapping during NUMA init. Now that logical apicid is
- * guaranteed to be known, give it another chance. This is already
- * a bit too late - percpu allocation has already happened without
- * proper NUMA affinity.
- */
- if (apic->x86_32_numa_cpu_node)
- set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
- apic->x86_32_numa_cpu_node(cpu));
#endif
/*
@@ -1394,7 +1383,7 @@ void setup_local_APIC(void)
break;
}
if (queued) {
- if (cpu_has_tsc) {
+ if (cpu_has_tsc && cpu_khz) {
rdtscll(ntsc);
max_loops = (cpu_khz << 10) - (ntsc - tsc);
} else
@@ -2053,8 +2042,6 @@ void __init connect_bsp_APIC(void)
imcr_pic_to_apic();
}
#endif
- if (apic->enable_apic_mode)
- apic->enable_apic_mode();
}
/**
@@ -2451,51 +2438,6 @@ static void apic_pm_activate(void) { }
#ifdef CONFIG_X86_64
-static int apic_cluster_num(void)
-{
- int i, clusters, zeros;
- unsigned id;
- u16 *bios_cpu_apicid;
- DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
-
- bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
- bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
-
- for (i = 0; i < nr_cpu_ids; i++) {
- /* are we being called early in kernel startup? */
- if (bios_cpu_apicid) {
- id = bios_cpu_apicid[i];
- } else if (i < nr_cpu_ids) {
- if (cpu_present(i))
- id = per_cpu(x86_bios_cpu_apicid, i);
- else
- continue;
- } else
- break;
-
- if (id != BAD_APICID)
- __set_bit(APIC_CLUSTERID(id), clustermap);
- }
-
- /* Problem: Partially populated chassis may not have CPUs in some of
- * the APIC clusters they have been allocated. Only present CPUs have
- * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
- * Since clusters are allocated sequentially, count zeros only if
- * they are bounded by ones.
- */
- clusters = 0;
- zeros = 0;
- for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
- if (test_bit(i, clustermap)) {
- clusters += 1 + zeros;
- zeros = 0;
- } else
- ++zeros;
- }
-
- return clusters;
-}
-
static int multi_checked;
static int multi;
@@ -2540,20 +2482,7 @@ static void dmi_check_multi(void)
int apic_is_clustered_box(void)
{
dmi_check_multi();
- if (multi)
- return 1;
-
- if (!is_vsmp_box())
- return 0;
-
- /*
- * ScaleMP vSMPowered boxes have one cluster per board and TSCs are
- * not guaranteed to be synced between boards
- */
- if (apic_cluster_num() > 1)
- return 1;
-
- return 0;
+ return multi;
}
#endif
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 7c1b29479513..de918c410eae 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -168,21 +168,16 @@ static struct apic apic_flat = {
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
- .check_apicid_present = NULL,
.vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = flat_init_apic_ldr,
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
.phys_pkg_id = flat_phys_pkg_id,
- .mps_oem_check = NULL,
.get_apic_id = flat_get_apic_id,
.set_apic_id = set_apic_id,
@@ -196,10 +191,7 @@ static struct apic apic_flat = {
.send_IPI_all = flat_send_IPI_all,
.send_IPI_self = apic_send_IPI_self,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = false,
- .smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,
@@ -283,7 +275,6 @@ static struct apic apic_physflat = {
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
- .check_apicid_present = NULL,
.vector_allocation_domain = default_vector_allocation_domain,
/* not needed, but shouldn't hurt: */
@@ -291,14 +282,10 @@ static struct apic apic_physflat = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
.phys_pkg_id = flat_phys_pkg_id,
- .mps_oem_check = NULL,
.get_apic_id = flat_get_apic_id,
.set_apic_id = set_apic_id,
@@ -312,10 +299,7 @@ static struct apic apic_physflat = {
.send_IPI_all = physflat_send_IPI_all,
.send_IPI_self = apic_send_IPI_self,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = false,
- .smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 8c7c98249c20..b205cdbdbe6a 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -89,16 +89,6 @@ static const struct cpumask *noop_target_cpus(void)
return cpumask_of(0);
}
-static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid)
-{
- return physid_isset(apicid, *map);
-}
-
-static unsigned long noop_check_apicid_present(int bit)
-{
- return physid_isset(bit, phys_cpu_present_map);
-}
-
static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
const struct cpumask *mask)
{
@@ -133,27 +123,21 @@ struct apic apic_noop = {
.target_cpus = noop_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = noop_check_apicid_used,
- .check_apicid_present = noop_check_apicid_present,
+ .check_apicid_used = default_check_apicid_used,
.vector_allocation_domain = noop_vector_allocation_domain,
.init_apic_ldr = noop_init_apic_ldr,
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
- .setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
.phys_pkg_id = noop_phys_pkg_id,
- .mps_oem_check = NULL,
-
.get_apic_id = noop_get_apic_id,
.set_apic_id = NULL,
.apic_id_mask = 0x0F << 24,
@@ -168,12 +152,7 @@ struct apic apic_noop = {
.wakeup_secondary_cpu = noop_wakeup_secondary_cpu,
- /* should be safe */
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-
.wait_for_init_deassert = false,
- .smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = NULL,
.read = noop_apic_read,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index a5b45df8bc88..4128b5fcb559 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -32,7 +32,7 @@
static int numachip_system __read_mostly;
-static const struct apic apic_numachip __read_mostly;
+static const struct apic apic_numachip;
static unsigned int get_apic_id(unsigned long x)
{
@@ -217,21 +217,16 @@ static const struct apic apic_numachip __refconst = {
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
- .check_apicid_present = NULL,
.vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = flat_init_apic_ldr,
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
.phys_pkg_id = numachip_phys_pkg_id,
- .mps_oem_check = NULL,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
@@ -246,10 +241,7 @@ static const struct apic apic_numachip __refconst = {
.send_IPI_self = numachip_send_IPI_self,
.wakeup_secondary_cpu = numachip_wakeup_secondary,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = false,
- .smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = NULL, /* REMRD not supported */
.read = native_apic_mem_read,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index e4840aa7a255..c4a8d63f8220 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -31,11 +31,6 @@ static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
return 0;
}
-static unsigned long bigsmp_check_apicid_present(int bit)
-{
- return 1;
-}
-
static int bigsmp_early_logical_apicid(int cpu)
{
/* on bigsmp, logical apicid is the same as physical */
@@ -168,21 +163,16 @@ static struct apic apic_bigsmp = {
.disable_esr = 1,
.dest_logical = 0,
.check_apicid_used = bigsmp_check_apicid_used,
- .check_apicid_present = bigsmp_check_apicid_present,
.vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = bigsmp_init_apic_ldr,
.ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
.setup_apic_routing = bigsmp_setup_apic_routing,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
- .setup_portio_remap = NULL,
.check_phys_apicid_present = bigsmp_check_phys_apicid_present,
- .enable_apic_mode = NULL,
.phys_pkg_id = bigsmp_phys_pkg_id,
- .mps_oem_check = NULL,
.get_apic_id = bigsmp_get_apic_id,
.set_apic_id = NULL,
@@ -196,11 +186,7 @@ static struct apic apic_bigsmp = {
.send_IPI_all = bigsmp_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-
.wait_for_init_deassert = true,
- .smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 81e08eff05ee..1183d545da1e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -31,6 +31,7 @@
#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/syscore_ops.h>
+#include <linux/irqdomain.h>
#include <linux/msi.h>
#include <linux/htirq.h>
#include <linux/freezer.h>
@@ -62,6 +63,16 @@
#define __apicdebuginit(type) static type __init
+#define for_each_ioapic(idx) \
+ for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
+#define for_each_ioapic_reverse(idx) \
+ for ((idx) = nr_ioapics - 1; (idx) >= 0; (idx)--)
+#define for_each_pin(idx, pin) \
+ for ((pin) = 0; (pin) < ioapics[(idx)].nr_registers; (pin)++)
+#define for_each_ioapic_pin(idx, pin) \
+ for_each_ioapic((idx)) \
+ for_each_pin((idx), (pin))
+
#define for_each_irq_pin(entry, head) \
for (entry = head; entry; entry = entry->next)
@@ -73,6 +84,17 @@ int sis_apic_bug = -1;
static DEFINE_RAW_SPINLOCK(ioapic_lock);
static DEFINE_RAW_SPINLOCK(vector_lock);
+static DEFINE_MUTEX(ioapic_mutex);
+static unsigned int ioapic_dynirq_base;
+static int ioapic_initialized;
+
+struct mp_pin_info {
+ int trigger;
+ int polarity;
+ int node;
+ int set;
+ u32 count;
+};
static struct ioapic {
/*
@@ -87,7 +109,9 @@ static struct ioapic {
struct mpc_ioapic mp_config;
/* IO APIC gsi routing info */
struct mp_ioapic_gsi gsi_config;
- DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+ struct ioapic_domain_cfg irqdomain_cfg;
+ struct irq_domain *irqdomain;
+ struct mp_pin_info *pin_info;
} ioapics[MAX_IO_APICS];
#define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver
@@ -107,6 +131,41 @@ struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
return &ioapics[ioapic_idx].gsi_config;
}
+static inline int mp_ioapic_pin_count(int ioapic)
+{
+ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+
+ return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
+}
+
+u32 mp_pin_to_gsi(int ioapic, int pin)
+{
+ return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin;
+}
+
+/*
+ * Initialize all legacy IRQs and all pins on the first IOAPIC
+ * if we have legacy interrupt controller. Kernel boot option "pirq="
+ * may rely on non-legacy pins on the first IOAPIC.
+ */
+static inline int mp_init_irq_at_boot(int ioapic, int irq)
+{
+ if (!nr_legacy_irqs())
+ return 0;
+
+ return ioapic == 0 || (irq >= 0 && irq < nr_legacy_irqs());
+}
+
+static inline struct mp_pin_info *mp_pin_info(int ioapic_idx, int pin)
+{
+ return ioapics[ioapic_idx].pin_info + pin;
+}
+
+static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic)
+{
+ return ioapics[ioapic].irqdomain;
+}
+
int nr_ioapics;
/* The one past the highest gsi number used */
@@ -118,9 +177,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* # of MP IRQ source entries */
int mp_irq_entries;
-/* GSI interrupts */
-static int nr_irqs_gsi = NR_IRQS_LEGACY;
-
#ifdef CONFIG_EISA
int mp_bus_id_to_type[MAX_MP_BUSSES];
#endif
@@ -149,8 +205,7 @@ static int __init parse_noapic(char *str)
}
early_param("noapic", parse_noapic);
-static int io_apic_setup_irq_pin(unsigned int irq, int node,
- struct io_apic_irq_attr *attr);
+static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
void mp_save_irq(struct mpc_intsrc *m)
@@ -182,19 +237,15 @@ static struct irq_pin_list *alloc_irq_pin_list(int node)
return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
}
-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
-
int __init arch_early_irq_init(void)
{
struct irq_cfg *cfg;
- int count, node, i;
+ int i, node = cpu_to_node(0);
- if (!legacy_pic->nr_legacy_irqs)
+ if (!nr_legacy_irqs())
io_apic_irqs = ~0UL;
- for (i = 0; i < nr_ioapics; i++) {
+ for_each_ioapic(i) {
ioapics[i].saved_registers =
kzalloc(sizeof(struct IO_APIC_route_entry) *
ioapics[i].nr_registers, GFP_KERNEL);
@@ -202,28 +253,20 @@ int __init arch_early_irq_init(void)
pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
}
- cfg = irq_cfgx;
- count = ARRAY_SIZE(irq_cfgx);
- node = cpu_to_node(0);
-
- for (i = 0; i < count; i++) {
- irq_set_chip_data(i, &cfg[i]);
- zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
- zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
- /*
- * For legacy IRQ's, start with assigning irq0 to irq15 to
- * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
- */
- if (i < legacy_pic->nr_legacy_irqs) {
- cfg[i].vector = IRQ0_VECTOR + i;
- cpumask_setall(cfg[i].domain);
- }
+ /*
+ * For legacy IRQ's, start with assigning irq0 to irq15 to
+ * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
+ */
+ for (i = 0; i < nr_legacy_irqs(); i++) {
+ cfg = alloc_irq_and_cfg_at(i, node);
+ cfg->vector = IRQ0_VECTOR + i;
+ cpumask_setall(cfg->domain);
}
return 0;
}
-static struct irq_cfg *irq_cfg(unsigned int irq)
+static inline struct irq_cfg *irq_cfg(unsigned int irq)
{
return irq_get_chip_data(irq);
}
@@ -265,7 +308,7 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
if (res < 0) {
if (res != -EEXIST)
return NULL;
- cfg = irq_get_chip_data(at);
+ cfg = irq_cfg(at);
if (cfg)
return cfg;
}
@@ -425,6 +468,21 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi
return 0;
}
+static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin)
+{
+ struct irq_pin_list **last, *entry;
+
+ last = &cfg->irq_2_pin;
+ for_each_irq_pin(entry, cfg->irq_2_pin)
+ if (entry->apic == apic && entry->pin == pin) {
+ *last = entry->next;
+ kfree(entry);
+ return;
+ } else {
+ last = &entry->next;
+ }
+}
+
static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
if (__add_pin_to_irq_node(cfg, node, apic, pin))
@@ -627,9 +685,8 @@ static void clear_IO_APIC (void)
{
int apic, pin;
- for (apic = 0; apic < nr_ioapics; apic++)
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
- clear_IO_APIC_pin(apic, pin);
+ for_each_ioapic_pin(apic, pin)
+ clear_IO_APIC_pin(apic, pin);
}
#ifdef CONFIG_X86_32
@@ -678,13 +735,13 @@ int save_ioapic_entries(void)
int apic, pin;
int err = 0;
- for (apic = 0; apic < nr_ioapics; apic++) {
+ for_each_ioapic(apic) {
if (!ioapics[apic].saved_registers) {
err = -ENOMEM;
continue;
}
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
+ for_each_pin(apic, pin)
ioapics[apic].saved_registers[pin] =
ioapic_read_entry(apic, pin);
}
@@ -699,11 +756,11 @@ void mask_ioapic_entries(void)
{
int apic, pin;
- for (apic = 0; apic < nr_ioapics; apic++) {
+ for_each_ioapic(apic) {
if (!ioapics[apic].saved_registers)
continue;
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
+ for_each_pin(apic, pin) {
struct IO_APIC_route_entry entry;
entry = ioapics[apic].saved_registers[pin];
@@ -722,11 +779,11 @@ int restore_ioapic_entries(void)
{
int apic, pin;
- for (apic = 0; apic < nr_ioapics; apic++) {
+ for_each_ioapic(apic) {
if (!ioapics[apic].saved_registers)
continue;
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
+ for_each_pin(apic, pin)
ioapic_write_entry(apic, pin,
ioapics[apic].saved_registers[pin]);
}
@@ -785,7 +842,7 @@ static int __init find_isa_irq_apic(int irq, int type)
if (i < mp_irq_entries) {
int ioapic_idx;
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ for_each_ioapic(ioapic_idx)
if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
return ioapic_idx;
}
@@ -799,7 +856,7 @@ static int __init find_isa_irq_apic(int irq, int type)
*/
static int EISA_ELCR(unsigned int irq)
{
- if (irq < legacy_pic->nr_legacy_irqs) {
+ if (irq < nr_legacy_irqs()) {
unsigned int port = 0x4d0 + (irq >> 3);
return (inb(port) >> (irq & 7)) & 1;
}
@@ -939,29 +996,106 @@ static int irq_trigger(int idx)
return trigger;
}
-static int pin_2_irq(int idx, int apic, int pin)
+static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin)
+{
+ int irq = -1;
+ int ioapic = (int)(long)domain->host_data;
+ int type = ioapics[ioapic].irqdomain_cfg.type;
+
+ switch (type) {
+ case IOAPIC_DOMAIN_LEGACY:
+ /*
+ * Dynamically allocate IRQ number for non-ISA IRQs in the first 16
+ * GSIs on some weird platforms.
+ */
+ if (gsi < nr_legacy_irqs())
+ irq = irq_create_mapping(domain, pin);
+ else if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0)
+ irq = gsi;
+ break;
+ case IOAPIC_DOMAIN_STRICT:
+ if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0)
+ irq = gsi;
+ break;
+ case IOAPIC_DOMAIN_DYNAMIC:
+ irq = irq_create_mapping(domain, pin);
+ break;
+ default:
+ WARN(1, "ioapic: unknown irqdomain type %d\n", type);
+ break;
+ }
+
+ return irq > 0 ? irq : -1;
+}
+
+static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
+ unsigned int flags)
{
int irq;
- int bus = mp_irqs[idx].srcbus;
- struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(apic);
+ struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
+ struct mp_pin_info *info = mp_pin_info(ioapic, pin);
+
+ if (!domain)
+ return -1;
+
+ mutex_lock(&ioapic_mutex);
/*
- * Debugging check, we are in big trouble if this message pops up!
+ * Don't use irqdomain to manage ISA IRQs because there may be
+ * multiple IOAPIC pins sharing the same ISA IRQ number and
+ * irqdomain only supports 1:1 mapping between IOAPIC pin and
+ * IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are used
+ * for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H).
+ * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are
+ * available, and some BIOSes may use MP Interrupt Source records
+ * to override IRQ numbers for PIRQs instead of reprogramming
+ * the interrupt routing logic. Thus there may be multiple pins
+ * sharing the same legacy IRQ number when ACPI is disabled.
*/
- if (mp_irqs[idx].dstirq != pin)
- pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
-
- if (test_bit(bus, mp_bus_not_pci)) {
+ if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
irq = mp_irqs[idx].srcbusirq;
+ if (flags & IOAPIC_MAP_ALLOC) {
+ if (info->count == 0 &&
+ mp_irqdomain_map(domain, irq, pin) != 0)
+ irq = -1;
+
+ /* special handling for timer IRQ0 */
+ if (irq == 0)
+ info->count++;
+ }
} else {
- u32 gsi = gsi_cfg->gsi_base + pin;
+ irq = irq_find_mapping(domain, pin);
+ if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC))
+ irq = alloc_irq_from_domain(domain, gsi, pin);
+ }
- if (gsi >= NR_IRQS_LEGACY)
- irq = gsi;
- else
- irq = gsi_top + gsi;
+ if (flags & IOAPIC_MAP_ALLOC) {
+ /* special handling for legacy IRQs */
+ if (irq < nr_legacy_irqs() && info->count == 1 &&
+ mp_irqdomain_map(domain, irq, pin) != 0)
+ irq = -1;
+
+ if (irq > 0)
+ info->count++;
+ else if (info->count == 0)
+ info->set = 0;
}
+ mutex_unlock(&ioapic_mutex);
+
+ return irq > 0 ? irq : -1;
+}
+
+static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags)
+{
+ u32 gsi = mp_pin_to_gsi(ioapic, pin);
+
+ /*
+ * Debugging check, we are in big trouble if this message pops up!
+ */
+ if (mp_irqs[idx].dstirq != pin)
+ pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
+
#ifdef CONFIG_X86_32
/*
* PCI IRQ command line redirection. Yes, limits are hardcoded.
@@ -972,16 +1106,58 @@ static int pin_2_irq(int idx, int apic, int pin)
apic_printk(APIC_VERBOSE, KERN_DEBUG
"disabling PIRQ%d\n", pin-16);
} else {
- irq = pirq_entries[pin-16];
+ int irq = pirq_entries[pin-16];
apic_printk(APIC_VERBOSE, KERN_DEBUG
"using PIRQ%d -> IRQ %d\n",
pin-16, irq);
+ return irq;
}
}
}
#endif
- return irq;
+ return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags);
+}
+
+int mp_map_gsi_to_irq(u32 gsi, unsigned int flags)
+{
+ int ioapic, pin, idx;
+
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return -1;
+
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+ idx = find_irq_entry(ioapic, pin, mp_INT);
+ if ((flags & IOAPIC_MAP_CHECK) && idx < 0)
+ return -1;
+
+ return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags);
+}
+
+void mp_unmap_irq(int irq)
+{
+ struct irq_data *data = irq_get_irq_data(irq);
+ struct mp_pin_info *info;
+ int ioapic, pin;
+
+ if (!data || !data->domain)
+ return;
+
+ ioapic = (int)(long)data->domain->host_data;
+ pin = (int)data->hwirq;
+ info = mp_pin_info(ioapic, pin);
+
+ mutex_lock(&ioapic_mutex);
+ if (--info->count == 0) {
+ info->set = 0;
+ if (irq < nr_legacy_irqs() &&
+ ioapics[ioapic].irqdomain_cfg.type == IOAPIC_DOMAIN_LEGACY)
+ mp_irqdomain_unmap(data->domain, irq);
+ else
+ irq_dispose_mapping(irq);
+ }
+ mutex_unlock(&ioapic_mutex);
}
/*
@@ -991,7 +1167,7 @@ static int pin_2_irq(int idx, int apic, int pin)
int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
struct io_apic_irq_attr *irq_attr)
{
- int ioapic_idx, i, best_guess = -1;
+ int irq, i, best_ioapic = -1, best_idx = -1;
apic_printk(APIC_DEBUG,
"querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
@@ -1001,44 +1177,56 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
"PCI BIOS passed nonexistent PCI bus %d!\n", bus);
return -1;
}
+
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].srcbus;
+ int ioapic_idx, found = 0;
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ if (bus != lbus || mp_irqs[i].irqtype != mp_INT ||
+ slot != ((mp_irqs[i].srcbusirq >> 2) & 0x1f))
+ continue;
+
+ for_each_ioapic(ioapic_idx)
if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
- mp_irqs[i].dstapic == MP_APIC_ALL)
+ mp_irqs[i].dstapic == MP_APIC_ALL) {
+ found = 1;
break;
+ }
+ if (!found)
+ continue;
- if (!test_bit(lbus, mp_bus_not_pci) &&
- !mp_irqs[i].irqtype &&
- (bus == lbus) &&
- (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq);
+ /* Skip ISA IRQs */
+ irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq, 0);
+ if (irq > 0 && !IO_APIC_IRQ(irq))
+ continue;
- if (!(ioapic_idx || IO_APIC_IRQ(irq)))
- continue;
+ if (pin == (mp_irqs[i].srcbusirq & 3)) {
+ best_idx = i;
+ best_ioapic = ioapic_idx;
+ goto out;
+ }
- if (pin == (mp_irqs[i].srcbusirq & 3)) {
- set_io_apic_irq_attr(irq_attr, ioapic_idx,
- mp_irqs[i].dstirq,
- irq_trigger(i),
- irq_polarity(i));
- return irq;
- }
- /*
- * Use the first all-but-pin matching entry as a
- * best-guess fuzzy result for broken mptables.
- */
- if (best_guess < 0) {
- set_io_apic_irq_attr(irq_attr, ioapic_idx,
- mp_irqs[i].dstirq,
- irq_trigger(i),
- irq_polarity(i));
- best_guess = irq;
- }
+ /*
+ * Use the first all-but-pin matching entry as a
+ * best-guess fuzzy result for broken mptables.
+ */
+ if (best_idx < 0) {
+ best_idx = i;
+ best_ioapic = ioapic_idx;
}
}
- return best_guess;
+ if (best_idx < 0)
+ return -1;
+
+out:
+ irq = pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
+ IOAPIC_MAP_ALLOC);
+ if (irq > 0)
+ set_io_apic_irq_attr(irq_attr, best_ioapic,
+ mp_irqs[best_idx].dstirq,
+ irq_trigger(best_idx),
+ irq_polarity(best_idx));
+ return irq;
}
EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
@@ -1198,7 +1386,7 @@ void __setup_vector_irq(int cpu)
raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
for_each_active_irq(irq) {
- cfg = irq_get_chip_data(irq);
+ cfg = irq_cfg(irq);
if (!cfg)
continue;
@@ -1227,12 +1415,10 @@ static inline int IO_APIC_irq_trigger(int irq)
{
int apic, idx, pin;
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
- idx = find_irq_entry(apic, pin, mp_INT);
- if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
- return irq_trigger(idx);
- }
+ for_each_ioapic_pin(apic, pin) {
+ idx = find_irq_entry(apic, pin, mp_INT);
+ if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin, 0)))
+ return irq_trigger(idx);
}
/*
* nonexistent IRQs are edge default
@@ -1330,95 +1516,29 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
}
ioapic_register_intr(irq, cfg, attr->trigger);
- if (irq < legacy_pic->nr_legacy_irqs)
+ if (irq < nr_legacy_irqs())
legacy_pic->mask(irq);
ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry);
}
-static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin)
-{
- if (idx != -1)
- return false;
-
- apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
- mpc_ioapic_id(ioapic_idx), pin);
- return true;
-}
-
-static void __init __io_apic_setup_irqs(unsigned int ioapic_idx)
-{
- int idx, node = cpu_to_node(0);
- struct io_apic_irq_attr attr;
- unsigned int pin, irq;
-
- for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) {
- idx = find_irq_entry(ioapic_idx, pin, mp_INT);
- if (io_apic_pin_not_connected(idx, ioapic_idx, pin))
- continue;
-
- irq = pin_2_irq(idx, ioapic_idx, pin);
-
- if ((ioapic_idx > 0) && (irq > 16))
- continue;
-
- /*
- * Skip the timer IRQ if there's a quirk handler
- * installed and if it returns 1:
- */
- if (apic->multi_timer_check &&
- apic->multi_timer_check(ioapic_idx, irq))
- continue;
-
- set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
- irq_polarity(idx));
-
- io_apic_setup_irq_pin(irq, node, &attr);
- }
-}
-
static void __init setup_IO_APIC_irqs(void)
{
- unsigned int ioapic_idx;
+ unsigned int ioapic, pin;
+ int idx;
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
- __io_apic_setup_irqs(ioapic_idx);
-}
-
-/*
- * for the gsit that is not in first ioapic
- * but could not use acpi_register_gsi()
- * like some special sci in IBM x3330
- */
-void setup_IO_APIC_irq_extra(u32 gsi)
-{
- int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0);
- struct io_apic_irq_attr attr;
-
- /*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic_idx = mp_find_ioapic(gsi);
- if (ioapic_idx < 0)
- return;
-
- pin = mp_find_ioapic_pin(ioapic_idx, gsi);
- idx = find_irq_entry(ioapic_idx, pin, mp_INT);
- if (idx == -1)
- return;
-
- irq = pin_2_irq(idx, ioapic_idx, pin);
-
- /* Only handle the non legacy irqs on secondary ioapics */
- if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY)
- return;
-
- set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
- irq_polarity(idx));
-
- io_apic_setup_irq_pin_once(irq, node, &attr);
+ for_each_ioapic_pin(ioapic, pin) {
+ idx = find_irq_entry(ioapic, pin, mp_INT);
+ if (idx < 0)
+ apic_printk(APIC_VERBOSE,
+ KERN_DEBUG " apic %d pin %d not connected\n",
+ mpc_ioapic_id(ioapic), pin);
+ else
+ pin_2_irq(idx, ioapic, pin,
+ ioapic ? 0 : IOAPIC_MAP_ALLOC);
+ }
}
/*
@@ -1586,7 +1706,7 @@ __apicdebuginit(void) print_IO_APICs(void)
struct irq_chip *chip;
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ for_each_ioapic(ioapic_idx)
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
mpc_ioapic_id(ioapic_idx),
ioapics[ioapic_idx].nr_registers);
@@ -1597,7 +1717,7 @@ __apicdebuginit(void) print_IO_APICs(void)
*/
printk(KERN_INFO "testing the IO APIC.......................\n");
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ for_each_ioapic(ioapic_idx)
print_IO_APIC(ioapic_idx);
printk(KERN_DEBUG "IRQ to pin mappings:\n");
@@ -1608,7 +1728,7 @@ __apicdebuginit(void) print_IO_APICs(void)
if (chip != &ioapic_chip)
continue;
- cfg = irq_get_chip_data(irq);
+ cfg = irq_cfg(irq);
if (!cfg)
continue;
entry = cfg->irq_2_pin;
@@ -1758,7 +1878,7 @@ __apicdebuginit(void) print_PIC(void)
unsigned int v;
unsigned long flags;
- if (!legacy_pic->nr_legacy_irqs)
+ if (!nr_legacy_irqs())
return;
printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1828,26 +1948,22 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
void __init enable_IO_APIC(void)
{
int i8259_apic, i8259_pin;
- int apic;
+ int apic, pin;
- if (!legacy_pic->nr_legacy_irqs)
+ if (!nr_legacy_irqs())
return;
- for(apic = 0; apic < nr_ioapics; apic++) {
- int pin;
+ for_each_ioapic_pin(apic, pin) {
/* See if any of the pins is in ExtINT mode */
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
- struct IO_APIC_route_entry entry;
- entry = ioapic_read_entry(apic, pin);
+ struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin);
- /* If the interrupt line is enabled and in ExtInt mode
- * I have found the pin where the i8259 is connected.
- */
- if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
- ioapic_i8259.apic = apic;
- ioapic_i8259.pin = pin;
- goto found_i8259;
- }
+ /* If the interrupt line is enabled and in ExtInt mode
+ * I have found the pin where the i8259 is connected.
+ */
+ if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+ ioapic_i8259.apic = apic;
+ ioapic_i8259.pin = pin;
+ goto found_i8259;
}
}
found_i8259:
@@ -1919,7 +2035,7 @@ void disable_IO_APIC(void)
*/
clear_IO_APIC();
- if (!legacy_pic->nr_legacy_irqs)
+ if (!nr_legacy_irqs())
return;
x86_io_apic_ops.disable();
@@ -1950,7 +2066,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
/*
* Set the IOAPIC ID to the value stored in the MPC table.
*/
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
+ for_each_ioapic(ioapic_idx) {
/* Read the register 0 value */
raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(ioapic_idx, 0);
@@ -2123,7 +2239,7 @@ static unsigned int startup_ioapic_irq(struct irq_data *data)
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < legacy_pic->nr_legacy_irqs) {
+ if (irq < nr_legacy_irqs()) {
legacy_pic->mask(irq);
if (legacy_pic->irq_pending(irq))
was_pending = 1;
@@ -2225,7 +2341,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
goto unlock;
}
- __this_cpu_write(vector_irq[vector], -1);
+ __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
unlock:
raw_spin_unlock(&desc->lock);
}
@@ -2253,7 +2369,7 @@ static void irq_complete_move(struct irq_cfg *cfg)
void irq_force_complete_move(int irq)
{
- struct irq_cfg *cfg = irq_get_chip_data(irq);
+ struct irq_cfg *cfg = irq_cfg(irq);
if (!cfg)
return;
@@ -2507,6 +2623,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_eoi = ack_apic_level,
.irq_set_affinity = native_ioapic_set_affinity,
.irq_retrigger = ioapic_retrigger_irq,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
};
static inline void init_IO_APIC_traps(void)
@@ -2514,26 +2631,15 @@ static inline void init_IO_APIC_traps(void)
struct irq_cfg *cfg;
unsigned int irq;
- /*
- * NOTE! The local APIC isn't very good at handling
- * multiple interrupts at the same interrupt level.
- * As the interrupt level is determined by taking the
- * vector number and shifting that right by 4, we
- * want to spread these out a bit so that they don't
- * all fall in the same interrupt level.
- *
- * Also, we've got to be careful not to trash gate
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
- */
for_each_active_irq(irq) {
- cfg = irq_get_chip_data(irq);
+ cfg = irq_cfg(irq);
if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
/*
* Hmm.. We don't have an entry for this,
* so default to an old-fashioned 8259
* interrupt if we can..
*/
- if (irq < legacy_pic->nr_legacy_irqs)
+ if (irq < nr_legacy_irqs())
legacy_pic->make_irq(irq);
else
/* Strange. Oh, well.. */
@@ -2649,8 +2755,6 @@ static int __init disable_timer_pin_setup(char *arg)
}
early_param("disable_timer_pin_1", disable_timer_pin_setup);
-int timer_through_8259 __initdata;
-
/*
* This code may look a bit paranoid, but it's supposed to cooperate with
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
@@ -2661,7 +2765,7 @@ int timer_through_8259 __initdata;
*/
static inline void __init check_timer(void)
{
- struct irq_cfg *cfg = irq_get_chip_data(0);
+ struct irq_cfg *cfg = irq_cfg(0);
int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
unsigned long flags;
@@ -2755,7 +2859,6 @@ static inline void __init check_timer(void)
legacy_pic->unmask(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
- timer_through_8259 = 1;
goto out;
}
/*
@@ -2827,15 +2930,54 @@ out:
*/
#define PIC_IRQS (1UL << PIC_CASCADE_IR)
+static int mp_irqdomain_create(int ioapic)
+{
+ size_t size;
+ int hwirqs = mp_ioapic_pin_count(ioapic);
+ struct ioapic *ip = &ioapics[ioapic];
+ struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
+ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+
+ size = sizeof(struct mp_pin_info) * mp_ioapic_pin_count(ioapic);
+ ip->pin_info = kzalloc(size, GFP_KERNEL);
+ if (!ip->pin_info)
+ return -ENOMEM;
+
+ if (cfg->type == IOAPIC_DOMAIN_INVALID)
+ return 0;
+
+ ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops,
+ (void *)(long)ioapic);
+ if(!ip->irqdomain) {
+ kfree(ip->pin_info);
+ ip->pin_info = NULL;
+ return -ENOMEM;
+ }
+
+ if (cfg->type == IOAPIC_DOMAIN_LEGACY ||
+ cfg->type == IOAPIC_DOMAIN_STRICT)
+ ioapic_dynirq_base = max(ioapic_dynirq_base,
+ gsi_cfg->gsi_end + 1);
+
+ if (gsi_cfg->gsi_base == 0)
+ irq_set_default_host(ip->irqdomain);
+
+ return 0;
+}
+
void __init setup_IO_APIC(void)
{
+ int ioapic;
/*
* calling enable_IO_APIC() is moved to setup_local_APIC for BP
*/
- io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
+ io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL;
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+ for_each_ioapic(ioapic)
+ BUG_ON(mp_irqdomain_create(ioapic));
+
/*
* Set up IO-APIC IRQ routing.
*/
@@ -2844,8 +2986,10 @@ void __init setup_IO_APIC(void)
sync_Arb_IDs();
setup_IO_APIC_irqs();
init_IO_APIC_traps();
- if (legacy_pic->nr_legacy_irqs)
+ if (nr_legacy_irqs())
check_timer();
+
+ ioapic_initialized = 1;
}
/*
@@ -2880,7 +3024,7 @@ static void ioapic_resume(void)
{
int ioapic_idx;
- for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--)
+ for_each_ioapic_reverse(ioapic_idx)
resume_ioapic_id(ioapic_idx);
restore_ioapic_entries();
@@ -2926,7 +3070,7 @@ int arch_setup_hwirq(unsigned int irq, int node)
void arch_teardown_hwirq(unsigned int irq)
{
- struct irq_cfg *cfg = irq_get_chip_data(irq);
+ struct irq_cfg *cfg = irq_cfg(irq);
unsigned long flags;
free_remapped_irq(irq);
@@ -3030,6 +3174,7 @@ static struct irq_chip msi_chip = {
.irq_ack = ack_apic_edge,
.irq_set_affinity = msi_set_affinity,
.irq_retrigger = ioapic_retrigger_irq,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
};
int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
@@ -3053,7 +3198,7 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
if (!irq_offset)
write_msi_msg(irq, &msg);
- setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
+ setup_remapped_irq(irq, irq_cfg(irq), chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
@@ -3128,6 +3273,7 @@ static struct irq_chip dmar_msi_type = {
.irq_ack = ack_apic_edge,
.irq_set_affinity = dmar_msi_set_affinity,
.irq_retrigger = ioapic_retrigger_irq,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
};
int arch_setup_dmar_msi(unsigned int irq)
@@ -3178,6 +3324,7 @@ static struct irq_chip hpet_msi_type = {
.irq_ack = ack_apic_edge,
.irq_set_affinity = hpet_msi_set_affinity,
.irq_retrigger = ioapic_retrigger_irq,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
};
int default_setup_hpet_msi(unsigned int irq, unsigned int id)
@@ -3192,7 +3339,7 @@ int default_setup_hpet_msi(unsigned int irq, unsigned int id)
hpet_msi_write(irq_get_handler_data(irq), &msg);
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
+ setup_remapped_irq(irq, irq_cfg(irq), chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
return 0;
@@ -3241,6 +3388,7 @@ static struct irq_chip ht_irq_chip = {
.irq_ack = ack_apic_edge,
.irq_set_affinity = ht_set_affinity,
.irq_retrigger = ioapic_retrigger_irq,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
};
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
@@ -3303,27 +3451,6 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
return ret;
}
-int io_apic_setup_irq_pin_once(unsigned int irq, int node,
- struct io_apic_irq_attr *attr)
-{
- unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin;
- int ret;
- struct IO_APIC_route_entry orig_entry;
-
- /* Avoid redundant programming */
- if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n", mpc_ioapic_id(ioapic_idx), pin);
- orig_entry = ioapic_read_entry(attr->ioapic, pin);
- if (attr->trigger == orig_entry.trigger && attr->polarity == orig_entry.polarity)
- return 0;
- return -EBUSY;
- }
- ret = io_apic_setup_irq_pin(irq, node, attr);
- if (!ret)
- set_bit(pin, ioapics[ioapic_idx].pin_programmed);
- return ret;
-}
-
static int __init io_apic_get_redir_entries(int ioapic)
{
union IO_APIC_reg_01 reg_01;
@@ -3340,20 +3467,13 @@ static int __init io_apic_get_redir_entries(int ioapic)
return reg_01.bits.entries + 1;
}
-static void __init probe_nr_irqs_gsi(void)
-{
- int nr;
-
- nr = gsi_top + NR_IRQS_LEGACY;
- if (nr > nr_irqs_gsi)
- nr_irqs_gsi = nr;
-
- printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
-}
-
unsigned int arch_dynirq_lower_bound(unsigned int from)
{
- return from < nr_irqs_gsi ? nr_irqs_gsi : from;
+ /*
+ * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
+ * gsi_top if ioapic_dynirq_base hasn't been initialized yet.
+ */
+ return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
}
int __init arch_probe_nr_irqs(void)
@@ -3363,33 +3483,17 @@ int __init arch_probe_nr_irqs(void)
if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
nr_irqs = NR_VECTORS * nr_cpu_ids;
- nr = nr_irqs_gsi + 8 * nr_cpu_ids;
+ nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
/*
* for MSI and HT dyn irq
*/
- nr += nr_irqs_gsi * 16;
+ nr += gsi_top * 16;
#endif
if (nr < nr_irqs)
nr_irqs = nr;
- return NR_IRQS_LEGACY;
-}
-
-int io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr)
-{
- int node;
-
- if (!IO_APIC_IRQ(irq)) {
- apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- irq_attr->ioapic);
- return -EINVAL;
- }
-
- node = dev ? dev_to_node(dev) : cpu_to_node(0);
-
- return io_apic_setup_irq_pin_once(irq, node, irq_attr);
+ return 0;
}
#ifdef CONFIG_X86_32
@@ -3483,9 +3587,8 @@ static u8 __init io_apic_unique_id(u8 id)
DECLARE_BITMAP(used, 256);
bitmap_zero(used, 256);
- for (i = 0; i < nr_ioapics; i++) {
+ for_each_ioapic(i)
__set_bit(mpc_ioapic_id(i), used);
- }
if (!test_bit(id, used))
return id;
return find_first_zero_bit(used, 256);
@@ -3543,14 +3646,13 @@ void __init setup_ioapic_dest(void)
if (skip_ioapic_setup == 1)
return;
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
- for (pin = 0; pin < ioapics[ioapic].nr_registers; pin++) {
+ for_each_ioapic_pin(ioapic, pin) {
irq_entry = find_irq_entry(ioapic, pin, mp_INT);
if (irq_entry == -1)
continue;
- irq = pin_2_irq(irq_entry, ioapic, pin);
- if ((ioapic > 0) && (irq > 16))
+ irq = pin_2_irq(irq_entry, ioapic, pin, 0);
+ if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
continue;
idata = irq_get_irq_data(irq);
@@ -3573,29 +3675,33 @@ void __init setup_ioapic_dest(void)
static struct resource *ioapic_resources;
-static struct resource * __init ioapic_setup_resources(int nr_ioapics)
+static struct resource * __init ioapic_setup_resources(void)
{
unsigned long n;
struct resource *res;
char *mem;
- int i;
+ int i, num = 0;
- if (nr_ioapics <= 0)
+ for_each_ioapic(i)
+ num++;
+ if (num == 0)
return NULL;
n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
- n *= nr_ioapics;
+ n *= num;
mem = alloc_bootmem(n);
res = (void *)mem;
- mem += sizeof(struct resource) * nr_ioapics;
+ mem += sizeof(struct resource) * num;
- for (i = 0; i < nr_ioapics; i++) {
- res[i].name = mem;
- res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ num = 0;
+ for_each_ioapic(i) {
+ res[num].name = mem;
+ res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
mem += IOAPIC_RESOURCE_NAME_SIZE;
+ num++;
}
ioapic_resources = res;
@@ -3609,8 +3715,8 @@ void __init native_io_apic_init_mappings(void)
struct resource *ioapic_res;
int i;
- ioapic_res = ioapic_setup_resources(nr_ioapics);
- for (i = 0; i < nr_ioapics; i++) {
+ ioapic_res = ioapic_setup_resources();
+ for_each_ioapic(i) {
if (smp_found_config) {
ioapic_phys = mpc_ioapic_addr(i);
#ifdef CONFIG_X86_32
@@ -3641,8 +3747,6 @@ fake_ioapic_page:
ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
ioapic_res++;
}
-
- probe_nr_irqs_gsi();
}
void __init ioapic_insert_resources(void)
@@ -3657,7 +3761,7 @@ void __init ioapic_insert_resources(void)
return;
}
- for (i = 0; i < nr_ioapics; i++) {
+ for_each_ioapic(i) {
insert_resource(&iomem_resource, r);
r++;
}
@@ -3665,16 +3769,15 @@ void __init ioapic_insert_resources(void)
int mp_find_ioapic(u32 gsi)
{
- int i = 0;
+ int i;
if (nr_ioapics == 0)
return -1;
/* Find the IOAPIC that manages this GSI. */
- for (i = 0; i < nr_ioapics; i++) {
+ for_each_ioapic(i) {
struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
- if ((gsi >= gsi_cfg->gsi_base)
- && (gsi <= gsi_cfg->gsi_end))
+ if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end)
return i;
}
@@ -3686,7 +3789,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
{
struct mp_ioapic_gsi *gsi_cfg;
- if (WARN_ON(ioapic == -1))
+ if (WARN_ON(ioapic < 0))
return -1;
gsi_cfg = mp_ioapic_gsi_routing(ioapic);
@@ -3729,7 +3832,8 @@ static __init int bad_ioapic_register(int idx)
return 0;
}
-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
+void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
+ struct ioapic_domain_cfg *cfg)
{
int idx = 0;
int entries;
@@ -3743,6 +3847,8 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
ioapics[idx].mp_config.type = MP_IOAPIC;
ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
ioapics[idx].mp_config.apicaddr = address;
+ ioapics[idx].irqdomain = NULL;
+ ioapics[idx].irqdomain_cfg = *cfg;
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
@@ -3779,6 +3885,97 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
nr_ioapics++;
}
+int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ int ioapic = (int)(long)domain->host_data;
+ struct mp_pin_info *info = mp_pin_info(ioapic, hwirq);
+ struct io_apic_irq_attr attr;
+
+ /* Get default attribute if not set by caller yet */
+ if (!info->set) {
+ u32 gsi = mp_pin_to_gsi(ioapic, hwirq);
+
+ if (acpi_get_override_irq(gsi, &info->trigger,
+ &info->polarity) < 0) {
+ /*
+ * PCI interrupts are always polarity one level
+ * triggered.
+ */
+ info->trigger = 1;
+ info->polarity = 1;
+ }
+ info->node = NUMA_NO_NODE;
+
+ /*
+ * setup_IO_APIC_irqs() programs all legacy IRQs with default
+ * trigger and polarity attributes. Don't set the flag for that
+ * case so the first legacy IRQ user could reprogram the pin
+ * with real trigger and polarity attributes.
+ */
+ if (virq >= nr_legacy_irqs() || info->count)
+ info->set = 1;
+ }
+ set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger,
+ info->polarity);
+
+ return io_apic_setup_irq_pin(virq, info->node, &attr);
+}
+
+void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq)
+{
+ struct irq_data *data = irq_get_irq_data(virq);
+ struct irq_cfg *cfg = irq_cfg(virq);
+ int ioapic = (int)(long)domain->host_data;
+ int pin = (int)data->hwirq;
+
+ ioapic_mask_entry(ioapic, pin);
+ __remove_pin_from_irq(cfg, ioapic, pin);
+ WARN_ON(cfg->irq_2_pin != NULL);
+ arch_teardown_hwirq(virq);
+}
+
+int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node)
+{
+ int ret = 0;
+ int ioapic, pin;
+ struct mp_pin_info *info;
+
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return -ENODEV;
+
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+ info = mp_pin_info(ioapic, pin);
+ trigger = trigger ? 1 : 0;
+ polarity = polarity ? 1 : 0;
+
+ mutex_lock(&ioapic_mutex);
+ if (!info->set) {
+ info->trigger = trigger;
+ info->polarity = polarity;
+ info->node = node;
+ info->set = 1;
+ } else if (info->trigger != trigger || info->polarity != polarity) {
+ ret = -EBUSY;
+ }
+ mutex_unlock(&ioapic_mutex);
+
+ return ret;
+}
+
+bool mp_should_keep_irq(struct device *dev)
+{
+ if (dev->power.is_prepared)
+ return true;
+#ifdef CONFIG_PM_RUNTIME
+ if (dev->power.runtime_status == RPM_SUSPENDING)
+ return true;
+#endif
+
+ return false;
+}
+
/* Enable IOAPIC early just for system timer */
void __init pre_init_apic_IRQ0(void)
{
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index cceb352c968c..bda488680dbc 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -88,21 +88,16 @@ static struct apic apic_default = {
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = default_check_apicid_used,
- .check_apicid_present = default_check_apicid_present,
.vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = default_init_apic_ldr,
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = setup_apic_flat_routing,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
- .setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
.phys_pkg_id = default_phys_pkg_id,
- .mps_oem_check = NULL,
.get_apic_id = default_get_apic_id,
.set_apic_id = NULL,
@@ -116,11 +111,7 @@ static struct apic apic_default = {
.send_IPI_all = default_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-
.wait_for_init_deassert = true,
- .smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,
@@ -214,29 +205,7 @@ void __init generic_apic_probe(void)
printk(KERN_INFO "Using APIC driver %s\n", apic->name);
}
-/* These functions can switch the APIC even after the initial ->probe() */
-
-int __init
-generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
-{
- struct apic **drv;
-
- for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
- if (!((*drv)->mps_oem_check))
- continue;
- if (!(*drv)->mps_oem_check(mpc, oem, productid))
- continue;
-
- if (!cmdline_apic) {
- apic = *drv;
- printk(KERN_INFO "Switched to APIC driver `%s'.\n",
- apic->name);
- }
- return 1;
- }
- return 0;
-}
-
+/* This function can switch the APIC even after the initial ->probe() */
int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
struct apic **drv;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index e66766bf1641..e658f21681c8 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -42,7 +42,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
* We are to modify mask, so we need an own copy
* and be sure it's manipulated with irq off.
*/
- ipi_mask_ptr = __raw_get_cpu_var(ipi_mask);
+ ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask);
cpumask_copy(ipi_mask_ptr, mask);
/*
@@ -249,21 +249,16 @@ static struct apic apic_x2apic_cluster = {
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
- .check_apicid_present = NULL,
.vector_allocation_domain = cluster_vector_allocation_domain,
.init_apic_ldr = init_x2apic_ldr,
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
.phys_pkg_id = x2apic_phys_pkg_id,
- .mps_oem_check = NULL,
.get_apic_id = x2apic_get_apic_id,
.set_apic_id = x2apic_set_apic_id,
@@ -277,10 +272,7 @@ static struct apic apic_x2apic_cluster = {
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_self = x2apic_send_IPI_self,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = false,
- .smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = NULL,
.read = native_apic_msr_read,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 6d600ebf6c12..6fae733e9194 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -103,21 +103,16 @@ static struct apic apic_x2apic_phys = {
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
- .check_apicid_present = NULL,
.vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = init_x2apic_ldr,
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
.phys_pkg_id = x2apic_phys_pkg_id,
- .mps_oem_check = NULL,
.get_apic_id = x2apic_get_apic_id,
.set_apic_id = x2apic_set_apic_id,
@@ -131,10 +126,7 @@ static struct apic apic_x2apic_phys = {
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_self = x2apic_send_IPI_self,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = false,
- .smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = NULL,
.read = native_apic_msr_read,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 293b41df54ef..8e9dcfd630e4 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -204,7 +204,6 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
{
-#ifdef CONFIG_SMP
unsigned long val;
int pnode;
@@ -223,7 +222,6 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
atomic_set(&init_deasserted, 1);
-#endif
return 0;
}
@@ -365,21 +363,16 @@ static struct apic __refdata apic_x2apic_uv_x = {
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
- .check_apicid_present = NULL,
.vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = uv_init_apic_ldr,
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
.phys_pkg_id = uv_phys_pkg_id,
- .mps_oem_check = NULL,
.get_apic_id = x2apic_get_apic_id,
.set_apic_id = set_apic_id,
@@ -394,10 +387,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
.send_IPI_self = uv_send_IPI_self,
.wakeup_secondary_cpu = uv_wakeup_secondary,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = false,
- .smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = NULL,
.read = native_apic_msr_read,
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 7fd54f09b011..e27b49d7c922 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -13,10 +13,13 @@ nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_common.o := $(nostackp)
obj-y := intel_cacheinfo.o scattered.o topology.o
-obj-y += proc.o capflags.o powerflags.o common.o
+obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
+obj-$(CONFIG_PROC_FS) += proc.o
+obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
+
obj-$(CONFIG_X86_32) += bugs.o
obj-$(CONFIG_X86_64) += bugs_64.o
@@ -36,7 +39,12 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
endif
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_rapl.o
+obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o
+
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
+ perf_event_intel_uncore_snb.o \
+ perf_event_intel_uncore_snbep.o \
+ perf_event_intel_uncore_nhmex.o
endif
@@ -48,6 +56,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
+ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
@@ -56,3 +65,4 @@ cpufeature = $(src)/../../include/asm/cpufeature.h
targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
$(call if_changed,mkcapflags)
+endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 60e5497681f5..813d29d00a17 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -525,6 +525,13 @@ static void early_init_amd(struct cpuinfo_x86 *c)
}
#endif
+ /*
+ * This is only needed to tell the kernel whether to use VMCALL
+ * and VMMCALL. VMMCALL is never executed except under virt, so
+ * we can set it unconditionally.
+ */
+ set_cpu_cap(c, X86_FEATURE_VMMCALL);
+
/* F16h erratum 793, CVE-2013-6885 */
if (c->x86 == 0x16 && c->x86_model <= 0xf)
msr_set_bit(MSR_AMD64_LS_CFG, 15);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 333fd5209336..4b4f78c9ba19 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -148,6 +148,7 @@ static int __init x86_xsave_setup(char *s)
{
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+ setup_clear_cpu_cap(X86_FEATURE_XSAVES);
setup_clear_cpu_cap(X86_FEATURE_AVX);
setup_clear_cpu_cap(X86_FEATURE_AVX2);
return 1;
@@ -161,6 +162,13 @@ static int __init x86_xsaveopt_setup(char *s)
}
__setup("noxsaveopt", x86_xsaveopt_setup);
+static int __init x86_xsaves_setup(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+ return 1;
+}
+__setup("noxsaves", x86_xsaves_setup);
+
#ifdef CONFIG_X86_32
static int cachesize_override = -1;
static int disable_x86_serial_nr = 1;
@@ -338,8 +346,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
continue;
printk(KERN_WARNING
- "CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
- x86_cap_flags[df->feature], df->level);
+ "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
+ x86_cap_flag(df->feature), df->level);
}
}
@@ -956,6 +964,7 @@ static void vgetcpu_set_mode(void)
vgetcpu_mode = VGETCPU_LSL;
}
+#ifdef CONFIG_IA32_EMULATION
/* May not be __init: called during resume */
static void syscall32_cpu_init(void)
{
@@ -967,7 +976,8 @@ static void syscall32_cpu_init(void)
wrmsrl(MSR_CSTAR, ia32_cstar_target);
}
-#endif
+#endif /* CONFIG_IA32_EMULATION */
+#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_32
void enable_sep_cpu(void)
@@ -1176,7 +1186,7 @@ void syscall_init(void)
/* Flags to clear on syscall */
wrmsrl(MSR_SYSCALL_MASK,
X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
- X86_EFLAGS_IOPL|X86_EFLAGS_AC);
+ X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
}
/*
@@ -1190,9 +1200,9 @@ DEFINE_PER_CPU(int, debug_stack_usage);
int is_debug_stack(unsigned long addr)
{
- return __get_cpu_var(debug_stack_usage) ||
- (addr <= __get_cpu_var(debug_stack_addr) &&
- addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
+ return __this_cpu_read(debug_stack_usage) ||
+ (addr <= __this_cpu_read(debug_stack_addr) &&
+ addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ));
}
NOKPROBE_SYMBOL(is_debug_stack);
@@ -1258,6 +1268,19 @@ static void dbg_restore_debug_regs(void)
#define dbg_restore_debug_regs()
#endif /* ! CONFIG_KGDB */
+static void wait_for_master_cpu(int cpu)
+{
+#ifdef CONFIG_SMP
+ /*
+ * wait for ACK from master CPU before continuing
+ * with AP initialization
+ */
+ WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
+ while (!cpumask_test_cpu(cpu, cpu_callout_mask))
+ cpu_relax();
+#endif
+}
+
/*
* cpu_init() initializes state that is per-CPU. Some data is already
* initialized (naturally) in the bootstrap process, such as the GDT
@@ -1273,16 +1296,17 @@ void cpu_init(void)
struct task_struct *me;
struct tss_struct *t;
unsigned long v;
- int cpu;
+ int cpu = stack_smp_processor_id();
int i;
+ wait_for_master_cpu(cpu);
+
/*
* Load microcode on this cpu if a valid microcode is available.
* This is early microcode loading procedure.
*/
load_ucode_ap();
- cpu = stack_smp_processor_id();
t = &per_cpu(init_tss, cpu);
oist = &per_cpu(orig_ist, cpu);
@@ -1294,9 +1318,6 @@ void cpu_init(void)
me = current;
- if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
- panic("CPU#%d already initialized!\n", cpu);
-
pr_debug("Initializing CPU#%d\n", cpu);
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
@@ -1373,17 +1394,13 @@ void cpu_init(void)
struct tss_struct *t = &per_cpu(init_tss, cpu);
struct thread_struct *thread = &curr->thread;
- show_ucode_info_early();
+ wait_for_master_cpu(cpu);
- if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
- printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
- for (;;)
- local_irq_enable();
- }
+ show_ucode_info_early();
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
- if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
+ if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de)
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
load_current_idt();
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 74e804ddc5c7..9cc6b6f25f42 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -144,6 +144,21 @@ static void early_init_intel(struct cpuinfo_x86 *c)
setup_clear_cpu_cap(X86_FEATURE_ERMS);
}
}
+
+ /*
+ * Intel Quark Core DevMan_001.pdf section 6.4.11
+ * "The operating system also is required to invalidate (i.e., flush)
+ * the TLB when any changes are made to any of the page table entries.
+ * The operating system must reload CR3 to cause the TLB to be flushed"
+ *
+ * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should
+ * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
+ * to be modified
+ */
+ if (c->x86 == 5 && c->x86_model == 9) {
+ pr_info("Disabling PGE capability bit\n");
+ setup_clear_cpu_cap(X86_FEATURE_PGE);
+ }
}
#ifdef CONFIG_X86_32
@@ -198,12 +213,13 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_F00F_BUG
/*
- * All current models of Pentium and Pentium with MMX technology CPUs
+ * All models of Pentium and Pentium with MMX technology CPUs
* have the F0 0F bug, which lets nonprivileged users lock up the
* system. Announce that the fault handler will be checking for it.
+ * The Quark is also family 5, but does not have the same bug.
*/
clear_cpu_bug(c, X86_BUG_F00F);
- if (!paravirt_enabled() && c->x86 == 5) {
+ if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) {
static int f00f_workaround_enabled;
set_cpu_bug(c, X86_BUG_F00F);
@@ -382,6 +398,13 @@ static void init_intel(struct cpuinfo_x86 *c)
}
l2 = init_intel_cacheinfo(c);
+
+ /* Detect legacy cache sizes if init_intel_cacheinfo did not */
+ if (l2 == 0) {
+ cpu_detect_cache_sizes(c);
+ l2 = c->x86_cache_size;
+ }
+
if (c->cpuid_level > 9) {
unsigned eax = cpuid_eax(10);
/* Check for version and the number of counters */
@@ -485,6 +508,13 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
*/
if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0))
size = 256;
+
+ /*
+ * Intel Quark SoC X1000 contains a 4-way set associative
+ * 16K cache with a 16 byte cache line and 256 lines per tag
+ */
+ if ((c->x86 == 5) && (c->x86_model == 9))
+ size = 16;
return size;
}
#endif
@@ -686,7 +716,8 @@ static const struct cpu_dev intel_cpu_dev = {
[3] = "OverDrive PODP5V83",
[4] = "Pentium MMX",
[7] = "Mobile Pentium 75 - 200",
- [8] = "Mobile Pentium MMX"
+ [8] = "Mobile Pentium MMX",
+ [9] = "Quark SoC X1000",
}
},
{ .family = 6, .model_names =
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 9c8f7394c612..c7035073dfc1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -461,7 +461,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
- if (strict_strtoul(buf, 10, &val) < 0)
+ if (kstrtoul(buf, 10, &val) < 0)
return -EINVAL;
err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
@@ -511,7 +511,7 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
return -EINVAL;
- if (strict_strtoul(buf, 16, &val) < 0)
+ if (kstrtoul(buf, 16, &val) < 0)
return -EINVAL;
if (amd_set_subcaches(cpu, val))
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 5ac2d1fb28bc..4cfba4371a71 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -83,7 +83,7 @@ static DEFINE_MUTEX(mce_inject_mutex);
static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
{
int cpu = smp_processor_id();
- struct mce *m = &__get_cpu_var(injectm);
+ struct mce *m = this_cpu_ptr(&injectm);
if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
return NMI_DONE;
cpumask_clear_cpu(cpu, mce_inject_cpumask);
@@ -97,7 +97,7 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
static void mce_irq_ipi(void *info)
{
int cpu = smp_processor_id();
- struct mce *m = &__get_cpu_var(injectm);
+ struct mce *m = this_cpu_ptr(&injectm);
if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
m->inject_flags & MCJ_EXCEPTION) {
@@ -109,7 +109,7 @@ static void mce_irq_ipi(void *info)
/* Inject mce on current CPU */
static int raise_local(void)
{
- struct mce *m = &__get_cpu_var(injectm);
+ struct mce *m = this_cpu_ptr(&injectm);
int context = MCJ_CTX(m->inject_flags);
int ret = 0;
int cpu = m->extcpu;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 4fc57975acc1..61a9668cebfd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -400,7 +400,7 @@ static u64 mce_rdmsrl(u32 msr)
if (offset < 0)
return 0;
- return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
+ return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
}
if (rdmsrl_safe(msr, &v)) {
@@ -422,7 +422,7 @@ static void mce_wrmsrl(u32 msr, u64 v)
int offset = msr_to_offset(msr);
if (offset >= 0)
- *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
+ *(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
return;
}
wrmsrl(msr, v);
@@ -478,7 +478,7 @@ static DEFINE_PER_CPU(struct mce_ring, mce_ring);
/* Runs with CPU affinity in workqueue */
static int mce_ring_empty(void)
{
- struct mce_ring *r = &__get_cpu_var(mce_ring);
+ struct mce_ring *r = this_cpu_ptr(&mce_ring);
return r->start == r->end;
}
@@ -490,7 +490,7 @@ static int mce_ring_get(unsigned long *pfn)
*pfn = 0;
get_cpu();
- r = &__get_cpu_var(mce_ring);
+ r = this_cpu_ptr(&mce_ring);
if (r->start == r->end)
goto out;
*pfn = r->ring[r->start];
@@ -504,7 +504,7 @@ out:
/* Always runs in MCE context with preempt off */
static int mce_ring_add(unsigned long pfn)
{
- struct mce_ring *r = &__get_cpu_var(mce_ring);
+ struct mce_ring *r = this_cpu_ptr(&mce_ring);
unsigned next;
next = (r->end + 1) % MCE_RING_SIZE;
@@ -526,7 +526,7 @@ int mce_available(struct cpuinfo_x86 *c)
static void mce_schedule_work(void)
{
if (!mce_ring_empty())
- schedule_work(&__get_cpu_var(mce_work));
+ schedule_work(this_cpu_ptr(&mce_work));
}
DEFINE_PER_CPU(struct irq_work, mce_irq_work);
@@ -551,7 +551,7 @@ static void mce_report_event(struct pt_regs *regs)
return;
}
- irq_work_queue(&__get_cpu_var(mce_irq_work));
+ irq_work_queue(this_cpu_ptr(&mce_irq_work));
}
/*
@@ -1045,7 +1045,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
mce_gather_info(&m, regs);
- final = &__get_cpu_var(mces_seen);
+ final = this_cpu_ptr(&mces_seen);
*final = m;
memset(valid_banks, 0, sizeof(valid_banks));
@@ -1278,22 +1278,22 @@ static unsigned long (*mce_adjust_timer)(unsigned long interval) =
static int cmc_error_seen(void)
{
- unsigned long *v = &__get_cpu_var(mce_polled_error);
+ unsigned long *v = this_cpu_ptr(&mce_polled_error);
return test_and_clear_bit(0, v);
}
static void mce_timer_fn(unsigned long data)
{
- struct timer_list *t = &__get_cpu_var(mce_timer);
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
unsigned long iv;
int notify;
WARN_ON(smp_processor_id() != data);
- if (mce_available(__this_cpu_ptr(&cpu_info))) {
+ if (mce_available(this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP,
- &__get_cpu_var(mce_poll_banks));
+ this_cpu_ptr(&mce_poll_banks));
mce_intel_cmci_poll();
}
@@ -1323,7 +1323,7 @@ static void mce_timer_fn(unsigned long data)
*/
void mce_timer_kick(unsigned long interval)
{
- struct timer_list *t = &__get_cpu_var(mce_timer);
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
unsigned long when = jiffies + interval;
unsigned long iv = __this_cpu_read(mce_next_interval);
@@ -1659,7 +1659,7 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
static void __mcheck_cpu_init_timer(void)
{
- struct timer_list *t = &__get_cpu_var(mce_timer);
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
unsigned int cpu = smp_processor_id();
setup_timer(t, mce_timer_fn, cpu);
@@ -1702,8 +1702,8 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_timer();
- INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
- init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb);
+ INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work);
+ init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb);
}
/*
@@ -1955,7 +1955,7 @@ static struct miscdevice mce_chrdev_device = {
static void __mce_disable_bank(void *arg)
{
int bank = *((int *)arg);
- __clear_bit(bank, __get_cpu_var(mce_poll_banks));
+ __clear_bit(bank, this_cpu_ptr(mce_poll_banks));
cmci_disable_bank(bank);
}
@@ -2065,7 +2065,7 @@ static void mce_syscore_shutdown(void)
static void mce_syscore_resume(void)
{
__mcheck_cpu_init_generic();
- __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
+ __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
}
static struct syscore_ops mce_syscore_ops = {
@@ -2080,7 +2080,7 @@ static struct syscore_ops mce_syscore_ops = {
static void mce_cpu_restart(void *data)
{
- if (!mce_available(__this_cpu_ptr(&cpu_info)))
+ if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
__mcheck_cpu_init_timer();
@@ -2096,14 +2096,14 @@ static void mce_restart(void)
/* Toggle features for corrected errors */
static void mce_disable_cmci(void *data)
{
- if (!mce_available(__this_cpu_ptr(&cpu_info)))
+ if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
cmci_clear();
}
static void mce_enable_ce(void *all)
{
- if (!mce_available(__this_cpu_ptr(&cpu_info)))
+ if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
cmci_reenable();
cmci_recheck();
@@ -2136,7 +2136,7 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
{
u64 new;
- if (strict_strtoull(buf, 0, &new) < 0)
+ if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
attr_to_bank(attr)->ctl = new;
@@ -2174,7 +2174,7 @@ static ssize_t set_ignore_ce(struct device *s,
{
u64 new;
- if (strict_strtoull(buf, 0, &new) < 0)
+ if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
if (mca_cfg.ignore_ce ^ !!new) {
@@ -2198,7 +2198,7 @@ static ssize_t set_cmci_disabled(struct device *s,
{
u64 new;
- if (strict_strtoull(buf, 0, &new) < 0)
+ if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
if (mca_cfg.cmci_disabled ^ !!new) {
@@ -2336,7 +2336,7 @@ static void mce_disable_cpu(void *h)
unsigned long action = *(unsigned long *)h;
int i;
- if (!mce_available(__this_cpu_ptr(&cpu_info)))
+ if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
if (!(action & CPU_TASKS_FROZEN))
@@ -2354,7 +2354,7 @@ static void mce_reenable_cpu(void *h)
unsigned long action = *(unsigned long *)h;
int i;
- if (!mce_available(__this_cpu_ptr(&cpu_info)))
+ if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
if (!(action & CPU_TASKS_FROZEN))
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 603df4f74640..5d4999f95aec 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -310,7 +310,7 @@ static void amd_threshold_interrupt(void)
* event.
*/
machine_check_poll(MCP_TIMESTAMP,
- &__get_cpu_var(mce_poll_banks));
+ this_cpu_ptr(&mce_poll_banks));
if (high & MASK_OVERFLOW_HI) {
rdmsrl(address, m.misc);
@@ -353,7 +353,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
if (!b->interrupt_capable)
return -EINVAL;
- if (strict_strtoul(buf, 0, &new) < 0)
+ if (kstrtoul(buf, 0, &new) < 0)
return -EINVAL;
b->interrupt_enable = !!new;
@@ -372,7 +372,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
struct thresh_restart tr;
unsigned long new;
- if (strict_strtoul(buf, 0, &new) < 0)
+ if (kstrtoul(buf, 0, &new) < 0)
return -EINVAL;
if (new > THRESHOLD_MAX)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 9a316b21df8b..b3c97bafc123 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -42,7 +42,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
-static DEFINE_SPINLOCK(cmci_discover_lock);
+static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1
#define CMCI_POLL_INTERVAL (30 * HZ)
@@ -86,7 +86,7 @@ void mce_intel_cmci_poll(void)
{
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
return;
- machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
+ machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
}
void mce_intel_hcpu_update(unsigned long cpu)
@@ -144,14 +144,14 @@ static void cmci_storm_disable_banks(void)
int bank;
u64 val;
- spin_lock_irqsave(&cmci_discover_lock, flags);
- owned = __get_cpu_var(mce_banks_owned);
+ raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ owned = this_cpu_ptr(mce_banks_owned);
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
}
- spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static bool cmci_storm_detect(void)
@@ -195,7 +195,7 @@ static void intel_threshold_interrupt(void)
{
if (cmci_storm_detect())
return;
- machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
+ machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
mce_notify_irq();
}
@@ -206,12 +206,12 @@ static void intel_threshold_interrupt(void)
*/
static void cmci_discover(int banks)
{
- unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
+ unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
unsigned long flags;
int i;
int bios_wrong_thresh = 0;
- spin_lock_irqsave(&cmci_discover_lock, flags);
+ raw_spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
int bios_zero_thresh = 0;
@@ -228,7 +228,7 @@ static void cmci_discover(int banks)
/* Already owned by someone else? */
if (val & MCI_CTL2_CMCI_EN) {
clear_bit(i, owned);
- __clear_bit(i, __get_cpu_var(mce_poll_banks));
+ __clear_bit(i, this_cpu_ptr(mce_poll_banks));
continue;
}
@@ -252,7 +252,7 @@ static void cmci_discover(int banks)
/* Did the enable bit stick? -- the bank supports CMCI */
if (val & MCI_CTL2_CMCI_EN) {
set_bit(i, owned);
- __clear_bit(i, __get_cpu_var(mce_poll_banks));
+ __clear_bit(i, this_cpu_ptr(mce_poll_banks));
/*
* We are able to set thresholds for some banks that
* had a threshold of 0. This means the BIOS has not
@@ -263,10 +263,10 @@ static void cmci_discover(int banks)
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
bios_wrong_thresh = 1;
} else {
- WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
+ WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks)));
}
}
- spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
pr_info_once(
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
@@ -284,10 +284,10 @@ void cmci_recheck(void)
unsigned long flags;
int banks;
- if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
+ if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
return;
local_irq_save(flags);
- machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
+ machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
local_irq_restore(flags);
}
@@ -296,12 +296,12 @@ static void __cmci_disable_bank(int bank)
{
u64 val;
- if (!test_bit(bank, __get_cpu_var(mce_banks_owned)))
+ if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
return;
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
- __clear_bit(bank, __get_cpu_var(mce_banks_owned));
+ __clear_bit(bank, this_cpu_ptr(mce_banks_owned));
}
/*
@@ -316,10 +316,10 @@ void cmci_clear(void)
if (!cmci_supported(&banks))
return;
- spin_lock_irqsave(&cmci_discover_lock, flags);
+ raw_spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++)
__cmci_disable_bank(i);
- spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static void cmci_rediscover_work_func(void *arg)
@@ -360,9 +360,9 @@ void cmci_disable_bank(int bank)
if (!cmci_supported(&banks))
return;
- spin_lock_irqsave(&cmci_discover_lock, flags);
+ raw_spin_lock_irqsave(&cmci_discover_lock, flags);
__cmci_disable_bank(bank);
- spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static void intel_init_cmci(void)
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 36a1bb6d1ee0..1af51b1586d7 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -498,8 +498,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
- printk(KERN_DEBUG
- "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+ if (system_state == SYSTEM_BOOTING)
+ printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu);
return;
}
diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c
index 617a9e284245..7aa1acc79789 100644
--- a/arch/x86/kernel/cpu/microcode/amd_early.c
+++ b/arch/x86/kernel/cpu/microcode/amd_early.c
@@ -27,7 +27,7 @@ static u32 ucode_new_rev;
u8 amd_ucode_patch[PATCH_MAX_SIZE];
static u16 this_equiv_id;
-struct cpio_data ucode_cpio;
+static struct cpio_data ucode_cpio;
/*
* Microcode patch container file is prepended to the initrd in cpio format.
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index a276fa75d9b5..c6826d1e8082 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -127,7 +127,7 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
return get_matching_microcode(csig, cpf, mc_intel, crev);
}
-int apply_microcode(int cpu)
+static int apply_microcode_intel(int cpu)
{
struct microcode_intel *mc_intel;
struct ucode_cpu_info *uci;
@@ -314,7 +314,7 @@ static struct microcode_ops microcode_intel_ops = {
.request_microcode_user = request_microcode_user,
.request_microcode_fw = request_microcode_fw,
.collect_cpu_info = collect_cpu_info,
- .apply_microcode = apply_microcode,
+ .apply_microcode = apply_microcode_intel,
.microcode_fini_cpu = microcode_fini_cpu,
};
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c
index 18f739129e72..b88343f7a3b3 100644
--- a/arch/x86/kernel/cpu/microcode/intel_early.c
+++ b/arch/x86/kernel/cpu/microcode/intel_early.c
@@ -28,8 +28,8 @@
#include <asm/tlbflush.h>
#include <asm/setup.h>
-unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
-struct mc_saved_data {
+static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
+static struct mc_saved_data {
unsigned int mc_saved_count;
struct microcode_intel **mc_saved;
} mc_saved_data;
@@ -415,7 +415,7 @@ static void __ref show_saved_mc(void)
struct ucode_cpu_info uci;
if (mc_saved_data.mc_saved_count == 0) {
- pr_debug("no micorcode data saved.\n");
+ pr_debug("no microcode data saved.\n");
return;
}
pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
@@ -506,7 +506,7 @@ int save_mc_for_early(u8 *mc)
if (mc_saved && mc_saved_count)
memcpy(mc_saved_tmp, mc_saved,
- mc_saved_count * sizeof(struct mirocode_intel *));
+ mc_saved_count * sizeof(struct microcode_intel *));
/*
* Save the microcode patch mc in mc_save_tmp structure if it's a newer
* version.
@@ -526,7 +526,7 @@ int save_mc_for_early(u8 *mc)
show_saved_mc();
/*
- * Free old saved microcod data.
+ * Free old saved microcode data.
*/
if (mc_saved) {
for (i = 0; i < mc_saved_count_init; i++)
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index f961de9964c7..ea5f363a1948 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -707,7 +707,7 @@ void __init mtrr_bp_init(void)
} else {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
- if (cpu_has_k6_mtrr) {
+ if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) {
/* Pre-Athlon (K6) AMD CPU MTRRs */
mtrr_if = mtrr_ops[X86_VENDOR_AMD];
size_or_mask = SIZE_OR_MASK_BITS(32);
@@ -715,14 +715,14 @@ void __init mtrr_bp_init(void)
}
break;
case X86_VENDOR_CENTAUR:
- if (cpu_has_centaur_mcr) {
+ if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) {
mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
case X86_VENDOR_CYRIX:
- if (cpu_has_cyrix_arr) {
+ if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) {
mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2879ecdaac43..143e5f5dc855 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -243,7 +243,9 @@ static bool check_hw_exists(void)
msr_fail:
printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
- printk(KERN_ERR "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new);
+ printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+ boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
+ reg, val_new);
return false;
}
@@ -387,7 +389,7 @@ int x86_pmu_hw_config(struct perf_event *event)
precise++;
/* Support for IP fixup */
- if (x86_pmu.lbr_nr)
+ if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
precise++;
}
@@ -487,7 +489,7 @@ static int __x86_pmu_event_init(struct perf_event *event)
void x86_pmu_disable_all(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -505,7 +507,7 @@ void x86_pmu_disable_all(void)
static void x86_pmu_disable(struct pmu *pmu)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (!x86_pmu_initialized())
return;
@@ -522,7 +524,7 @@ static void x86_pmu_disable(struct pmu *pmu)
void x86_pmu_enable_all(int added)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -869,7 +871,7 @@ static void x86_pmu_start(struct perf_event *event, int flags);
static void x86_pmu_enable(struct pmu *pmu)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *event;
struct hw_perf_event *hwc;
int i, added = cpuc->n_added;
@@ -1020,7 +1022,7 @@ void x86_pmu_enable_event(struct perf_event *event)
*/
static int x86_pmu_add(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc;
int assign[X86_PMC_IDX_MAX];
int n, n0, ret;
@@ -1071,7 +1073,7 @@ out:
static void x86_pmu_start(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx = event->hw.idx;
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
@@ -1150,7 +1152,7 @@ void perf_event_print_debug(void)
void x86_pmu_stop(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
@@ -1172,7 +1174,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
static void x86_pmu_del(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int i;
/*
@@ -1227,7 +1229,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
- cpuc = &__get_cpu_var(cpu_hw_events);
+ cpuc = this_cpu_ptr(&cpu_hw_events);
/*
* Some chipsets need to unmask the LVTPC in a particular spot
@@ -1636,7 +1638,7 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
*/
static int x86_pmu_commit_txn(struct pmu *pmu)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int assign[X86_PMC_IDX_MAX];
int n, ret;
@@ -1995,7 +1997,7 @@ static unsigned long get_segment_base(unsigned int segment)
if (idx > GDT_ENTRIES)
return 0;
- desc = __this_cpu_ptr(&gdt_page.gdt[0]);
+ desc = raw_cpu_ptr(gdt_page.gdt);
}
return get_desc_base(desc + idx);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8ade93111e03..fc5eb390b368 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -67,8 +67,10 @@ struct event_constraint {
*/
#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
-#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style datala, store */
#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
+#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
struct amd_nb {
int nb_id; /* NorthBridge id */
@@ -252,18 +254,52 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
#define INTEL_PLD_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
#define INTEL_PST_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
-/* DataLA version of store sampling without extra enable bit. */
-#define INTEL_PST_HSW_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
+ EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
+
+/* Check flags and event code, and set the HSW store flag */
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+
+/* Check flags and event code, and set the HSW load flag */
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+
+/* Check flags and event code/umask, and set the HSW store flag */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+/* Check flags and event code/umask, and set the HSW load flag */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+
+/* Check flags and event code/umask, and set the HSW N/A flag */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ INTEL_ARCH_EVENT_MASK|INTEL_ARCH_EVENT_MASK, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
+
+
/*
* We define the end marker as having a weight of -1
* to enable blacklisting of events using a counter bitmask
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index beeb7cc07044..28926311aac1 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -699,7 +699,7 @@ __init int amd_pmu_init(void)
void amd_pmu_enable_virt(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
cpuc->perf_ctr_virt_mask = 0;
@@ -711,7 +711,7 @@ EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
void amd_pmu_disable_virt(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
/*
* We only mask out the Host-only bit so that host-only counting works
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 2502d0d9d246..944bf019b74f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1045,7 +1045,7 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
static void intel_pmu_disable_all(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -1058,7 +1058,7 @@ static void intel_pmu_disable_all(void)
static void intel_pmu_enable_all(int added)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
intel_pmu_pebs_enable_all();
intel_pmu_lbr_enable_all();
@@ -1092,7 +1092,7 @@ static void intel_pmu_enable_all(int added)
*/
static void intel_pmu_nhm_workaround(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
static const unsigned long nhm_magic[4] = {
0x4300B5,
0x4300D2,
@@ -1191,7 +1191,7 @@ static inline bool event_is_checkpointed(struct perf_event *event)
static void intel_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
intel_pmu_disable_bts();
@@ -1255,7 +1255,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
static void intel_pmu_enable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
if (!__this_cpu_read(cpu_hw_events.enabled))
@@ -1349,7 +1349,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
u64 status;
int handled;
- cpuc = &__get_cpu_var(cpu_hw_events);
+ cpuc = this_cpu_ptr(&cpu_hw_events);
/*
* No known reason to not always do late ACK,
@@ -1781,7 +1781,7 @@ EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
@@ -1802,7 +1802,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
int idx;
@@ -1836,7 +1836,7 @@ static void core_pmu_enable_event(struct perf_event *event)
static void core_pmu_enable_all(int added)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -2367,15 +2367,15 @@ __init int intel_pmu_init(void)
* Install the hw-cache-events table:
*/
switch (boot_cpu_data.x86_model) {
- case 14: /* 65 nm core solo/duo, "Yonah" */
+ case 14: /* 65nm Core "Yonah" */
pr_cont("Core events, ");
break;
- case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+ case 15: /* 65nm Core2 "Merom" */
x86_add_quirk(intel_clovertown_quirk);
- case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
- case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
- case 29: /* six-core 45 nm xeon "Dunnington" */
+ case 22: /* 65nm Core2 "Merom-L" */
+ case 23: /* 45nm Core2 "Penryn" */
+ case 29: /* 45nm Core2 "Dunnington (MP) */
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -2386,9 +2386,9 @@ __init int intel_pmu_init(void)
pr_cont("Core2 events, ");
break;
- case 26: /* 45 nm nehalem, "Bloomfield" */
- case 30: /* 45 nm nehalem, "Lynnfield" */
- case 46: /* 45 nm nehalem-ex, "Beckton" */
+ case 30: /* 45nm Nehalem */
+ case 26: /* 45nm Nehalem-EP */
+ case 46: /* 45nm Nehalem-EX */
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -2415,11 +2415,11 @@ __init int intel_pmu_init(void)
pr_cont("Nehalem events, ");
break;
- case 28: /* Atom */
- case 38: /* Lincroft */
- case 39: /* Penwell */
- case 53: /* Cloverview */
- case 54: /* Cedarview */
+ case 28: /* 45nm Atom "Pineview" */
+ case 38: /* 45nm Atom "Lincroft" */
+ case 39: /* 32nm Atom "Penwell" */
+ case 53: /* 32nm Atom "Cloverview" */
+ case 54: /* 32nm Atom "Cedarview" */
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -2430,8 +2430,8 @@ __init int intel_pmu_init(void)
pr_cont("Atom events, ");
break;
- case 55: /* Atom 22nm "Silvermont" */
- case 77: /* Avoton "Silvermont" */
+ case 55: /* 22nm Atom "Silvermont" */
+ case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -2446,9 +2446,9 @@ __init int intel_pmu_init(void)
pr_cont("Silvermont events, ");
break;
- case 37: /* 32 nm nehalem, "Clarkdale" */
- case 44: /* 32 nm nehalem, "Gulftown" */
- case 47: /* 32 nm Xeon E7 */
+ case 37: /* 32nm Westmere */
+ case 44: /* 32nm Westmere-EP */
+ case 47: /* 32nm Westmere-EX */
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -2474,8 +2474,8 @@ __init int intel_pmu_init(void)
pr_cont("Westmere events, ");
break;
- case 42: /* SandyBridge */
- case 45: /* SandyBridge, "Romely-EP" */
+ case 42: /* 32nm SandyBridge */
+ case 45: /* 32nm SandyBridge-E/EN/EP */
x86_add_quirk(intel_sandybridge_quirk);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -2506,8 +2506,9 @@ __init int intel_pmu_init(void)
pr_cont("SandyBridge events, ");
break;
- case 58: /* IvyBridge */
- case 62: /* IvyBridge EP */
+
+ case 58: /* 22nm IvyBridge */
+ case 62: /* 22nm IvyBridge-EP/EX */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
/* dTLB-load-misses on IVB is different than SNB */
@@ -2539,11 +2540,10 @@ __init int intel_pmu_init(void)
break;
- case 60: /* Haswell Client */
- case 70:
- case 71:
- case 63:
- case 69:
+ case 60: /* 22nm Haswell Core */
+ case 63: /* 22nm Haswell Server */
+ case 69: /* 22nm Haswell ULT */
+ case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -2552,7 +2552,7 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_hsw_event_constraints;
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
- x86_pmu.extra_regs = intel_snb_extra_regs;
+ x86_pmu.extra_regs = intel_snbep_extra_regs;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
/* all extra regs are per-cpu when HT is on */
x86_pmu.er_flags |= ERF_HAS_RSP_1;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 696ade311ded..46211bcc813e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -108,14 +108,16 @@ static u64 precise_store_data(u64 status)
return val;
}
-static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
+static u64 precise_datala_hsw(struct perf_event *event, u64 status)
{
union perf_mem_data_src dse;
- u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
- dse.val = 0;
- dse.mem_op = PERF_MEM_OP_STORE;
- dse.mem_lvl = PERF_MEM_LVL_NA;
+ dse.val = PERF_MEM_NA;
+
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+ dse.mem_op = PERF_MEM_OP_STORE;
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
+ dse.mem_op = PERF_MEM_OP_LOAD;
/*
* L1 info only valid for following events:
@@ -125,15 +127,12 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
* MEM_UOPS_RETIRED.SPLIT_STORES
* MEM_UOPS_RETIRED.ALL_STORES
*/
- if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0)
- return dse.mem_lvl;
-
- if (status & 1)
- dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
- else
- dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
-
- /* Nothing else supported. Sorry. */
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
+ if (status & 1)
+ dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ else
+ dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
+ }
return dse.val;
}
@@ -475,7 +474,7 @@ void intel_pmu_enable_bts(u64 config)
void intel_pmu_disable_bts(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
unsigned long debugctlmsr;
if (!cpuc->ds)
@@ -492,7 +491,7 @@ void intel_pmu_disable_bts(void)
int intel_pmu_drain_bts_buffer(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct debug_store *ds = cpuc->ds;
struct bts_record {
u64 from;
@@ -569,28 +568,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
};
struct event_constraint intel_slm_pebs_event_constraints[] = {
- INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
- INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
- INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
- INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
- INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
- INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
- INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
- INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
- INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
- INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
- INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
- INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
- INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
- INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
- INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
- INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
- INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
- INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
EVENT_CONSTRAINT_END
};
@@ -626,68 +607,44 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
- INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
EVENT_CONSTRAINT_END
};
struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
- INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
EVENT_CONSTRAINT_END
};
struct event_constraint intel_hsw_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
- INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
- INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
- INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
- /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
- /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
- INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
- /* MEM_UOPS_RETIRED.SPLIT_STORES */
- INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
- INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
- INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
- INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
- INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
- /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
- INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
- /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
- INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
- /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
- INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
- /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
- INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
- INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
-
+ INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
EVENT_CONSTRAINT_END
};
@@ -712,7 +669,7 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
void intel_pmu_pebs_enable(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
@@ -727,7 +684,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
void intel_pmu_pebs_disable(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -745,7 +702,7 @@ void intel_pmu_pebs_disable(struct perf_event *event)
void intel_pmu_pebs_enable_all(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (cpuc->pebs_enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -753,7 +710,7 @@ void intel_pmu_pebs_enable_all(void)
void intel_pmu_pebs_disable_all(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (cpuc->pebs_enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
@@ -761,7 +718,7 @@ void intel_pmu_pebs_disable_all(void)
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
unsigned long from = cpuc->lbr_entries[0].from;
unsigned long old_to, to = cpuc->lbr_entries[0].to;
unsigned long ip = regs->ip;
@@ -864,51 +821,53 @@ static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
static void __intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs, void *__pebs)
{
+#define PERF_X86_EVENT_PEBS_HSW_PREC \
+ (PERF_X86_EVENT_PEBS_ST_HSW | \
+ PERF_X86_EVENT_PEBS_LD_HSW | \
+ PERF_X86_EVENT_PEBS_NA_HSW)
/*
* We cast to the biggest pebs_record but are careful not to
* unconditionally access the 'extra' entries.
*/
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct pebs_record_hsw *pebs = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
u64 sample_type;
- int fll, fst;
+ int fll, fst, dsrc;
+ int fl = event->hw.flags;
if (!intel_pmu_save_and_restart(event))
return;
- fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
- fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
- PERF_X86_EVENT_PEBS_ST_HSW);
+ sample_type = event->attr.sample_type;
+ dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
+
+ fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
+ fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
perf_sample_data_init(&data, 0, event->hw.last_period);
data.period = event->hw.last_period;
- sample_type = event->attr.sample_type;
/*
- * if PEBS-LL or PreciseStore
+ * Use latency for weight (only avail with PEBS-LL)
*/
- if (fll || fst) {
- /*
- * Use latency for weight (only avail with PEBS-LL)
- */
- if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
- data.weight = pebs->lat;
-
- /*
- * data.data_src encodes the data source
- */
- if (sample_type & PERF_SAMPLE_DATA_SRC) {
- if (fll)
- data.data_src.val = load_latency_data(pebs->dse);
- else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
- data.data_src.val =
- precise_store_data_hsw(event, pebs->dse);
- else
- data.data_src.val = precise_store_data(pebs->dse);
- }
+ if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
+ data.weight = pebs->lat;
+
+ /*
+ * data.data_src encodes the data source
+ */
+ if (dsrc) {
+ u64 val = PERF_MEM_NA;
+ if (fll)
+ val = load_latency_data(pebs->dse);
+ else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
+ val = precise_datala_hsw(event, pebs->dse);
+ else if (fst)
+ val = precise_store_data(pebs->dse);
+ data.data_src.val = val;
}
/*
@@ -935,16 +894,16 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
else
regs.flags &= ~PERF_EFLAGS_EXACT;
- if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
+ if ((sample_type & PERF_SAMPLE_ADDR) &&
x86_pmu.intel_cap.pebs_format >= 1)
data.addr = pebs->dla;
if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */
- if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll)
+ if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
data.weight = intel_hsw_weight(pebs);
- if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION)
+ if (sample_type & PERF_SAMPLE_TRANSACTION)
data.txn = intel_hsw_transaction(pebs);
}
@@ -957,7 +916,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct debug_store *ds = cpuc->ds;
struct perf_event *event = cpuc->events[0]; /* PMC0 only */
struct pebs_record_core *at, *top;
@@ -998,7 +957,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct debug_store *ds = cpuc->ds;
struct perf_event *event = NULL;
void *at, *top;
@@ -1055,7 +1014,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
* BTS, PEBS probe and setup
*/
-void intel_ds_init(void)
+void __init intel_ds_init(void)
{
/*
* No support for 32bit formats
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 9dd2459a4c73..45fa730a5283 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -133,7 +133,7 @@ static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
static void __intel_pmu_lbr_enable(void)
{
u64 debugctl;
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (cpuc->lbr_sel)
wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
@@ -183,7 +183,7 @@ void intel_pmu_lbr_reset(void)
void intel_pmu_lbr_enable(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (!x86_pmu.lbr_nr)
return;
@@ -203,7 +203,7 @@ void intel_pmu_lbr_enable(struct perf_event *event)
void intel_pmu_lbr_disable(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (!x86_pmu.lbr_nr)
return;
@@ -220,7 +220,7 @@ void intel_pmu_lbr_disable(struct perf_event *event)
void intel_pmu_lbr_enable_all(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (cpuc->lbr_users)
__intel_pmu_lbr_enable();
@@ -228,7 +228,7 @@ void intel_pmu_lbr_enable_all(void)
void intel_pmu_lbr_disable_all(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (cpuc->lbr_users)
__intel_pmu_lbr_disable();
@@ -332,7 +332,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
void intel_pmu_lbr_read(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (!cpuc->lbr_users)
return;
@@ -697,7 +697,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
};
/* core */
-void intel_pmu_lbr_init_core(void)
+void __init intel_pmu_lbr_init_core(void)
{
x86_pmu.lbr_nr = 4;
x86_pmu.lbr_tos = MSR_LBR_TOS;
@@ -712,7 +712,7 @@ void intel_pmu_lbr_init_core(void)
}
/* nehalem/westmere */
-void intel_pmu_lbr_init_nhm(void)
+void __init intel_pmu_lbr_init_nhm(void)
{
x86_pmu.lbr_nr = 16;
x86_pmu.lbr_tos = MSR_LBR_TOS;
@@ -733,7 +733,7 @@ void intel_pmu_lbr_init_nhm(void)
}
/* sandy bridge */
-void intel_pmu_lbr_init_snb(void)
+void __init intel_pmu_lbr_init_snb(void)
{
x86_pmu.lbr_nr = 16;
x86_pmu.lbr_tos = MSR_LBR_TOS;
@@ -753,7 +753,7 @@ void intel_pmu_lbr_init_snb(void)
}
/* atom */
-void intel_pmu_lbr_init_atom(void)
+void __init intel_pmu_lbr_init_atom(void)
{
/*
* only models starting at stepping 10 seems
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 619f7699487a..d64f275fe274 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -135,7 +135,7 @@ static inline u64 rapl_scale(u64 v)
* or use ldexp(count, -32).
* Watts = Joules/Time delta
*/
- return v << (32 - __get_cpu_var(rapl_pmu)->hw_unit);
+ return v << (32 - __this_cpu_read(rapl_pmu->hw_unit));
}
static u64 rapl_event_update(struct perf_event *event)
@@ -187,7 +187,7 @@ static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
{
- struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu);
+ struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
struct perf_event *event;
unsigned long flags;
@@ -234,7 +234,7 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
static void rapl_pmu_event_start(struct perf_event *event, int mode)
{
- struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu);
+ struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
unsigned long flags;
spin_lock_irqsave(&pmu->lock, flags);
@@ -244,7 +244,7 @@ static void rapl_pmu_event_start(struct perf_event *event, int mode)
static void rapl_pmu_event_stop(struct perf_event *event, int mode)
{
- struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu);
+ struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
struct hw_perf_event *hwc = &event->hw;
unsigned long flags;
@@ -278,7 +278,7 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode)
static int rapl_pmu_event_add(struct perf_event *event, int mode)
{
- struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu);
+ struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
struct hw_perf_event *hwc = &event->hw;
unsigned long flags;
@@ -696,7 +696,7 @@ static int __init rapl_pmu_init(void)
return -1;
}
- pmu = __get_cpu_var(rapl_pmu);
+ pmu = __this_cpu_read(rapl_pmu);
pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
" API unit is 2^-32 Joules,"
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index cfc6f9dfcd90..9762dbd9f3f7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -1,83 +1,39 @@
#include "perf_event_intel_uncore.h"
static struct intel_uncore_type *empty_uncore[] = { NULL, };
-static struct intel_uncore_type **msr_uncores = empty_uncore;
-static struct intel_uncore_type **pci_uncores = empty_uncore;
-/* pci bus to socket mapping */
-static int pcibus_to_physid[256] = { [0 ... 255] = -1, };
+struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
+struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
-static struct pci_dev *extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+static bool pcidrv_registered;
+struct pci_driver *uncore_pci_driver;
+/* pci bus to socket mapping */
+int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, };
+struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
static DEFINE_RAW_SPINLOCK(uncore_box_lock);
-
/* mask of cpus that collect uncore events */
static cpumask_t uncore_cpu_mask;
/* constraint for the fixed counter */
-static struct event_constraint constraint_fixed =
+static struct event_constraint uncore_constraint_fixed =
EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
-static struct event_constraint constraint_empty =
+struct event_constraint uncore_constraint_empty =
EVENT_CONSTRAINT(0, 0, 0);
-#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \
- ((1ULL << (n)) - 1)))
-
-DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
-DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
-DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
-DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
-DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
-DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
-DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
-DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
-DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
-DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
-DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
-DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
-DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
-DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51");
-DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35");
-DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31");
-DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
-DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
-DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
-DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
-DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
-DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
-DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
-DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
-DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
-DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
-DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
-DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
-DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
-
-static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
-static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
-static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
-static void uncore_pmu_event_read(struct perf_event *event);
-
-static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
+ssize_t uncore_event_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uncore_event_desc *event =
+ container_of(attr, struct uncore_event_desc, attr);
+ return sprintf(buf, "%s", event->config);
+}
+
+struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct intel_uncore_pmu, pmu);
}
-static struct intel_uncore_box *
-uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
+struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
struct intel_uncore_box *box;
@@ -86,6 +42,9 @@ uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
return box;
raw_spin_lock(&uncore_box_lock);
+ /* Recheck in lock to handle races. */
+ if (*per_cpu_ptr(pmu->box, cpu))
+ goto out;
list_for_each_entry(box, &pmu->box_list, list) {
if (box->phys_id == topology_physical_package_id(cpu)) {
atomic_inc(&box->refcnt);
@@ -93,12 +52,13 @@ uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
break;
}
}
+out:
raw_spin_unlock(&uncore_box_lock);
return *per_cpu_ptr(pmu->box, cpu);
}
-static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
+struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
{
/*
* perf core schedules event on the basis of cpu, uncore events are
@@ -107,7 +67,7 @@ static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
}
-static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
{
u64 count;
@@ -119,7 +79,7 @@ static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_eve
/*
* generic get constraint function for shared match/mask registers.
*/
-static struct event_constraint *
+struct event_constraint *
uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
{
struct intel_uncore_extra_reg *er;
@@ -154,10 +114,10 @@ uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
return NULL;
}
- return &constraint_empty;
+ return &uncore_constraint_empty;
}
-static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
{
struct intel_uncore_extra_reg *er;
struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
@@ -178,7 +138,7 @@ static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_even
reg1->alloc = 0;
}
-static u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
+u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
{
struct intel_uncore_extra_reg *er;
unsigned long flags;
@@ -193,2936 +153,6 @@ static u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
return config;
}
-/* Sandy Bridge-EP uncore support */
-static struct intel_uncore_type snbep_uncore_cbox;
-static struct intel_uncore_type snbep_uncore_pcu;
-
-static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
-{
- struct pci_dev *pdev = box->pci_dev;
- int box_ctl = uncore_pci_box_ctl(box);
- u32 config = 0;
-
- if (!pci_read_config_dword(pdev, box_ctl, &config)) {
- config |= SNBEP_PMON_BOX_CTL_FRZ;
- pci_write_config_dword(pdev, box_ctl, config);
- }
-}
-
-static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
-{
- struct pci_dev *pdev = box->pci_dev;
- int box_ctl = uncore_pci_box_ctl(box);
- u32 config = 0;
-
- if (!pci_read_config_dword(pdev, box_ctl, &config)) {
- config &= ~SNBEP_PMON_BOX_CTL_FRZ;
- pci_write_config_dword(pdev, box_ctl, config);
- }
-}
-
-static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
-
- pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
-
- pci_write_config_dword(pdev, hwc->config_base, hwc->config);
-}
-
-static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
- u64 count = 0;
-
- pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
- pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
-
- return count;
-}
-
-static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
-{
- struct pci_dev *pdev = box->pci_dev;
-
- pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT);
-}
-
-static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
-{
- u64 config;
- unsigned msr;
-
- msr = uncore_msr_box_ctl(box);
- if (msr) {
- rdmsrl(msr, config);
- config |= SNBEP_PMON_BOX_CTL_FRZ;
- wrmsrl(msr, config);
- }
-}
-
-static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
-{
- u64 config;
- unsigned msr;
-
- msr = uncore_msr_box_ctl(box);
- if (msr) {
- rdmsrl(msr, config);
- config &= ~SNBEP_PMON_BOX_CTL_FRZ;
- wrmsrl(msr, config);
- }
-}
-
-static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
- if (reg1->idx != EXTRA_REG_NONE)
- wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0));
-
- wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
- struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- wrmsrl(hwc->config_base, hwc->config);
-}
-
-static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
-{
- unsigned msr = uncore_msr_box_ctl(box);
-
- if (msr)
- wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
-}
-
-static struct attribute *snbep_uncore_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- NULL,
-};
-
-static struct attribute *snbep_uncore_ubox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh5.attr,
- NULL,
-};
-
-static struct attribute *snbep_uncore_cbox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_tid_en.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- &format_attr_filter_tid.attr,
- &format_attr_filter_nid.attr,
- &format_attr_filter_state.attr,
- &format_attr_filter_opc.attr,
- NULL,
-};
-
-static struct attribute *snbep_uncore_pcu_formats_attr[] = {
- &format_attr_event_ext.attr,
- &format_attr_occ_sel.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh5.attr,
- &format_attr_occ_invert.attr,
- &format_attr_occ_edge.attr,
- &format_attr_filter_band0.attr,
- &format_attr_filter_band1.attr,
- &format_attr_filter_band2.attr,
- &format_attr_filter_band3.attr,
- NULL,
-};
-
-static struct attribute *snbep_uncore_qpi_formats_attr[] = {
- &format_attr_event_ext.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- &format_attr_match_rds.attr,
- &format_attr_match_rnid30.attr,
- &format_attr_match_rnid4.attr,
- &format_attr_match_dnid.attr,
- &format_attr_match_mc.attr,
- &format_attr_match_opc.attr,
- &format_attr_match_vnw.attr,
- &format_attr_match0.attr,
- &format_attr_match1.attr,
- &format_attr_mask_rds.attr,
- &format_attr_mask_rnid30.attr,
- &format_attr_mask_rnid4.attr,
- &format_attr_mask_dnid.attr,
- &format_attr_mask_mc.attr,
- &format_attr_mask_opc.attr,
- &format_attr_mask_vnw.attr,
- &format_attr_mask0.attr,
- &format_attr_mask1.attr,
- NULL,
-};
-
-static struct uncore_event_desc snbep_uncore_imc_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
- INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
- INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
- { /* end: all zeroes */ },
-};
-
-static struct uncore_event_desc snbep_uncore_qpi_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"),
- INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
- INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"),
- INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"),
- { /* end: all zeroes */ },
-};
-
-static struct attribute_group snbep_uncore_format_group = {
- .name = "format",
- .attrs = snbep_uncore_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_ubox_format_group = {
- .name = "format",
- .attrs = snbep_uncore_ubox_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_cbox_format_group = {
- .name = "format",
- .attrs = snbep_uncore_cbox_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_pcu_format_group = {
- .name = "format",
- .attrs = snbep_uncore_pcu_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_qpi_format_group = {
- .name = "format",
- .attrs = snbep_uncore_qpi_formats_attr,
-};
-
-#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
- .init_box = snbep_uncore_msr_init_box, \
- .disable_box = snbep_uncore_msr_disable_box, \
- .enable_box = snbep_uncore_msr_enable_box, \
- .disable_event = snbep_uncore_msr_disable_event, \
- .enable_event = snbep_uncore_msr_enable_event, \
- .read_counter = uncore_msr_read_counter
-
-static struct intel_uncore_ops snbep_uncore_msr_ops = {
- SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-};
-
-#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT() \
- .init_box = snbep_uncore_pci_init_box, \
- .disable_box = snbep_uncore_pci_disable_box, \
- .enable_box = snbep_uncore_pci_enable_box, \
- .disable_event = snbep_uncore_pci_disable_event, \
- .read_counter = snbep_uncore_pci_read_counter
-
-static struct intel_uncore_ops snbep_uncore_pci_ops = {
- SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
- .enable_event = snbep_uncore_pci_enable_event, \
-};
-
-static struct event_constraint snbep_uncore_cbox_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x04, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x05, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x07, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x13, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x1b, 0xc),
- UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
- UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
- UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
- EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff),
- UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
- EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint snbep_uncore_r2pcie_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x12, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
- EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint snbep_uncore_r3qpi_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2a, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x30, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
- EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type snbep_uncore_ubox = {
- .name = "ubox",
- .num_counters = 2,
- .num_boxes = 1,
- .perf_ctr_bits = 44,
- .fixed_ctr_bits = 48,
- .perf_ctr = SNBEP_U_MSR_PMON_CTR0,
- .event_ctl = SNBEP_U_MSR_PMON_CTL0,
- .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
- .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
- .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
- .ops = &snbep_uncore_msr_ops,
- .format_group = &snbep_uncore_ubox_format_group,
-};
-
-static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
- SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
- SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2),
- EVENT_EXTRA_END
-};
-
-static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct intel_uncore_extra_reg *er = &box->shared_regs[0];
- int i;
-
- if (uncore_box_is_fake(box))
- return;
-
- for (i = 0; i < 5; i++) {
- if (reg1->alloc & (0x1 << i))
- atomic_sub(1 << (i * 6), &er->ref);
- }
- reg1->alloc = 0;
-}
-
-static struct event_constraint *
-__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event,
- u64 (*cbox_filter_mask)(int fields))
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct intel_uncore_extra_reg *er = &box->shared_regs[0];
- int i, alloc = 0;
- unsigned long flags;
- u64 mask;
-
- if (reg1->idx == EXTRA_REG_NONE)
- return NULL;
-
- raw_spin_lock_irqsave(&er->lock, flags);
- for (i = 0; i < 5; i++) {
- if (!(reg1->idx & (0x1 << i)))
- continue;
- if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
- continue;
-
- mask = cbox_filter_mask(0x1 << i);
- if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) ||
- !((reg1->config ^ er->config) & mask)) {
- atomic_add(1 << (i * 6), &er->ref);
- er->config &= ~mask;
- er->config |= reg1->config & mask;
- alloc |= (0x1 << i);
- } else {
- break;
- }
- }
- raw_spin_unlock_irqrestore(&er->lock, flags);
- if (i < 5)
- goto fail;
-
- if (!uncore_box_is_fake(box))
- reg1->alloc |= alloc;
-
- return NULL;
-fail:
- for (; i >= 0; i--) {
- if (alloc & (0x1 << i))
- atomic_sub(1 << (i * 6), &er->ref);
- }
- return &constraint_empty;
-}
-
-static u64 snbep_cbox_filter_mask(int fields)
-{
- u64 mask = 0;
-
- if (fields & 0x1)
- mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID;
- if (fields & 0x2)
- mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID;
- if (fields & 0x4)
- mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
- if (fields & 0x8)
- mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
-
- return mask;
-}
-
-static struct event_constraint *
-snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask);
-}
-
-static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct extra_reg *er;
- int idx = 0;
-
- for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) {
- if (er->event != (event->hw.config & er->config_mask))
- continue;
- idx |= er->idx;
- }
-
- if (idx) {
- reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
- SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
- reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx);
- reg1->idx = idx;
- }
- return 0;
-}
-
-static struct intel_uncore_ops snbep_uncore_cbox_ops = {
- SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
- .hw_config = snbep_cbox_hw_config,
- .get_constraint = snbep_cbox_get_constraint,
- .put_constraint = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type snbep_uncore_cbox = {
- .name = "cbox",
- .num_counters = 4,
- .num_boxes = 8,
- .perf_ctr_bits = 44,
- .event_ctl = SNBEP_C0_MSR_PMON_CTL0,
- .perf_ctr = SNBEP_C0_MSR_PMON_CTR0,
- .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL,
- .msr_offset = SNBEP_CBO_MSR_OFFSET,
- .num_shared_regs = 1,
- .constraints = snbep_uncore_cbox_constraints,
- .ops = &snbep_uncore_cbox_ops,
- .format_group = &snbep_uncore_cbox_format_group,
-};
-
-static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- u64 config = reg1->config;
-
- if (new_idx > reg1->idx)
- config <<= 8 * (new_idx - reg1->idx);
- else
- config >>= 8 * (reg1->idx - new_idx);
-
- if (modify) {
- hwc->config += new_idx - reg1->idx;
- reg1->config = config;
- reg1->idx = new_idx;
- }
- return config;
-}
-
-static struct event_constraint *
-snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct intel_uncore_extra_reg *er = &box->shared_regs[0];
- unsigned long flags;
- int idx = reg1->idx;
- u64 mask, config1 = reg1->config;
- bool ok = false;
-
- if (reg1->idx == EXTRA_REG_NONE ||
- (!uncore_box_is_fake(box) && reg1->alloc))
- return NULL;
-again:
- mask = 0xffULL << (idx * 8);
- raw_spin_lock_irqsave(&er->lock, flags);
- if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
- !((config1 ^ er->config) & mask)) {
- atomic_add(1 << (idx * 8), &er->ref);
- er->config &= ~mask;
- er->config |= config1 & mask;
- ok = true;
- }
- raw_spin_unlock_irqrestore(&er->lock, flags);
-
- if (!ok) {
- idx = (idx + 1) % 4;
- if (idx != reg1->idx) {
- config1 = snbep_pcu_alter_er(event, idx, false);
- goto again;
- }
- return &constraint_empty;
- }
-
- if (!uncore_box_is_fake(box)) {
- if (idx != reg1->idx)
- snbep_pcu_alter_er(event, idx, true);
- reg1->alloc = 1;
- }
- return NULL;
-}
-
-static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct intel_uncore_extra_reg *er = &box->shared_regs[0];
-
- if (uncore_box_is_fake(box) || !reg1->alloc)
- return;
-
- atomic_sub(1 << (reg1->idx * 8), &er->ref);
- reg1->alloc = 0;
-}
-
-static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
-
- if (ev_sel >= 0xb && ev_sel <= 0xe) {
- reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
- reg1->idx = ev_sel - 0xb;
- reg1->config = event->attr.config1 & (0xff << reg1->idx);
- }
- return 0;
-}
-
-static struct intel_uncore_ops snbep_uncore_pcu_ops = {
- SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
- .hw_config = snbep_pcu_hw_config,
- .get_constraint = snbep_pcu_get_constraint,
- .put_constraint = snbep_pcu_put_constraint,
-};
-
-static struct intel_uncore_type snbep_uncore_pcu = {
- .name = "pcu",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0,
- .event_ctl = SNBEP_PCU_MSR_PMON_CTL0,
- .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL,
- .num_shared_regs = 1,
- .ops = &snbep_uncore_pcu_ops,
- .format_group = &snbep_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *snbep_msr_uncores[] = {
- &snbep_uncore_ubox,
- &snbep_uncore_cbox,
- &snbep_uncore_pcu,
- NULL,
-};
-
-enum {
- SNBEP_PCI_QPI_PORT0_FILTER,
- SNBEP_PCI_QPI_PORT1_FILTER,
-};
-
-static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
- if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) {
- reg1->idx = 0;
- reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0;
- reg1->config = event->attr.config1;
- reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0;
- reg2->config = event->attr.config2;
- }
- return 0;
-}
-
-static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
- if (reg1->idx != EXTRA_REG_NONE) {
- int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
- struct pci_dev *filter_pdev = extra_pci_dev[box->phys_id][idx];
- WARN_ON_ONCE(!filter_pdev);
- if (filter_pdev) {
- pci_write_config_dword(filter_pdev, reg1->reg,
- (u32)reg1->config);
- pci_write_config_dword(filter_pdev, reg1->reg + 4,
- (u32)(reg1->config >> 32));
- pci_write_config_dword(filter_pdev, reg2->reg,
- (u32)reg2->config);
- pci_write_config_dword(filter_pdev, reg2->reg + 4,
- (u32)(reg2->config >> 32));
- }
- }
-
- pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops snbep_uncore_qpi_ops = {
- SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
- .enable_event = snbep_qpi_enable_event,
- .hw_config = snbep_qpi_hw_config,
- .get_constraint = uncore_get_constraint,
- .put_constraint = uncore_put_constraint,
-};
-
-#define SNBEP_UNCORE_PCI_COMMON_INIT() \
- .perf_ctr = SNBEP_PCI_PMON_CTR0, \
- .event_ctl = SNBEP_PCI_PMON_CTL0, \
- .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \
- .ops = &snbep_uncore_pci_ops, \
- .format_group = &snbep_uncore_format_group
-
-static struct intel_uncore_type snbep_uncore_ha = {
- .name = "ha",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type snbep_uncore_imc = {
- .name = "imc",
- .num_counters = 4,
- .num_boxes = 4,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
- .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
- .event_descs = snbep_uncore_imc_events,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type snbep_uncore_qpi = {
- .name = "qpi",
- .num_counters = 4,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCI_PMON_CTR0,
- .event_ctl = SNBEP_PCI_PMON_CTL0,
- .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .num_shared_regs = 1,
- .ops = &snbep_uncore_qpi_ops,
- .event_descs = snbep_uncore_qpi_events,
- .format_group = &snbep_uncore_qpi_format_group,
-};
-
-
-static struct intel_uncore_type snbep_uncore_r2pcie = {
- .name = "r2pcie",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 44,
- .constraints = snbep_uncore_r2pcie_constraints,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type snbep_uncore_r3qpi = {
- .name = "r3qpi",
- .num_counters = 3,
- .num_boxes = 2,
- .perf_ctr_bits = 44,
- .constraints = snbep_uncore_r3qpi_constraints,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
- SNBEP_PCI_UNCORE_HA,
- SNBEP_PCI_UNCORE_IMC,
- SNBEP_PCI_UNCORE_QPI,
- SNBEP_PCI_UNCORE_R2PCIE,
- SNBEP_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *snbep_pci_uncores[] = {
- [SNBEP_PCI_UNCORE_HA] = &snbep_uncore_ha,
- [SNBEP_PCI_UNCORE_IMC] = &snbep_uncore_imc,
- [SNBEP_PCI_UNCORE_QPI] = &snbep_uncore_qpi,
- [SNBEP_PCI_UNCORE_R2PCIE] = &snbep_uncore_r2pcie,
- [SNBEP_PCI_UNCORE_R3QPI] = &snbep_uncore_r3qpi,
- NULL,
-};
-
-static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = {
- { /* Home Agent */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
- },
- { /* MC Channel 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0),
- },
- { /* MC Channel 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1),
- },
- { /* MC Channel 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2),
- },
- { /* MC Channel 3 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3),
- },
- { /* QPI Port 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0),
- },
- { /* QPI Port 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1),
- },
- { /* R2PCIe */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0),
- },
- { /* R3QPI Link 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0),
- },
- { /* R3QPI Link 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
- },
- { /* QPI Port 0 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- SNBEP_PCI_QPI_PORT0_FILTER),
- },
- { /* QPI Port 0 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- SNBEP_PCI_QPI_PORT1_FILTER),
- },
- { /* end: all zeroes */ }
-};
-
-static struct pci_driver snbep_uncore_pci_driver = {
- .name = "snbep_uncore",
- .id_table = snbep_uncore_pci_ids,
-};
-
-/*
- * build pci bus to socket mapping
- */
-static int snbep_pci2phy_map_init(int devid)
-{
- struct pci_dev *ubox_dev = NULL;
- int i, bus, nodeid;
- int err = 0;
- u32 config = 0;
-
- while (1) {
- /* find the UBOX device */
- ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev);
- if (!ubox_dev)
- break;
- bus = ubox_dev->bus->number;
- /* get the Node ID of the local register */
- err = pci_read_config_dword(ubox_dev, 0x40, &config);
- if (err)
- break;
- nodeid = config;
- /* get the Node ID mapping */
- err = pci_read_config_dword(ubox_dev, 0x54, &config);
- if (err)
- break;
- /*
- * every three bits in the Node ID mapping register maps
- * to a particular node.
- */
- for (i = 0; i < 8; i++) {
- if (nodeid == ((config >> (3 * i)) & 0x7)) {
- pcibus_to_physid[bus] = i;
- break;
- }
- }
- }
-
- if (!err) {
- /*
- * For PCI bus with no UBOX device, find the next bus
- * that has UBOX device and use its mapping.
- */
- i = -1;
- for (bus = 255; bus >= 0; bus--) {
- if (pcibus_to_physid[bus] >= 0)
- i = pcibus_to_physid[bus];
- else
- pcibus_to_physid[bus] = i;
- }
- }
-
- if (ubox_dev)
- pci_dev_put(ubox_dev);
-
- return err ? pcibios_err_to_errno(err) : 0;
-}
-/* end of Sandy Bridge-EP uncore support */
-
-/* IvyTown uncore support */
-static void ivt_uncore_msr_init_box(struct intel_uncore_box *box)
-{
- unsigned msr = uncore_msr_box_ctl(box);
- if (msr)
- wrmsrl(msr, IVT_PMON_BOX_CTL_INT);
-}
-
-static void ivt_uncore_pci_init_box(struct intel_uncore_box *box)
-{
- struct pci_dev *pdev = box->pci_dev;
-
- pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVT_PMON_BOX_CTL_INT);
-}
-
-#define IVT_UNCORE_MSR_OPS_COMMON_INIT() \
- .init_box = ivt_uncore_msr_init_box, \
- .disable_box = snbep_uncore_msr_disable_box, \
- .enable_box = snbep_uncore_msr_enable_box, \
- .disable_event = snbep_uncore_msr_disable_event, \
- .enable_event = snbep_uncore_msr_enable_event, \
- .read_counter = uncore_msr_read_counter
-
-static struct intel_uncore_ops ivt_uncore_msr_ops = {
- IVT_UNCORE_MSR_OPS_COMMON_INIT(),
-};
-
-static struct intel_uncore_ops ivt_uncore_pci_ops = {
- .init_box = ivt_uncore_pci_init_box,
- .disable_box = snbep_uncore_pci_disable_box,
- .enable_box = snbep_uncore_pci_enable_box,
- .disable_event = snbep_uncore_pci_disable_event,
- .enable_event = snbep_uncore_pci_enable_event,
- .read_counter = snbep_uncore_pci_read_counter,
-};
-
-#define IVT_UNCORE_PCI_COMMON_INIT() \
- .perf_ctr = SNBEP_PCI_PMON_CTR0, \
- .event_ctl = SNBEP_PCI_PMON_CTL0, \
- .event_mask = IVT_PMON_RAW_EVENT_MASK, \
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \
- .ops = &ivt_uncore_pci_ops, \
- .format_group = &ivt_uncore_format_group
-
-static struct attribute *ivt_uncore_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- NULL,
-};
-
-static struct attribute *ivt_uncore_ubox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh5.attr,
- NULL,
-};
-
-static struct attribute *ivt_uncore_cbox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_tid_en.attr,
- &format_attr_thresh8.attr,
- &format_attr_filter_tid.attr,
- &format_attr_filter_link.attr,
- &format_attr_filter_state2.attr,
- &format_attr_filter_nid2.attr,
- &format_attr_filter_opc2.attr,
- NULL,
-};
-
-static struct attribute *ivt_uncore_pcu_formats_attr[] = {
- &format_attr_event_ext.attr,
- &format_attr_occ_sel.attr,
- &format_attr_edge.attr,
- &format_attr_thresh5.attr,
- &format_attr_occ_invert.attr,
- &format_attr_occ_edge.attr,
- &format_attr_filter_band0.attr,
- &format_attr_filter_band1.attr,
- &format_attr_filter_band2.attr,
- &format_attr_filter_band3.attr,
- NULL,
-};
-
-static struct attribute *ivt_uncore_qpi_formats_attr[] = {
- &format_attr_event_ext.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_thresh8.attr,
- &format_attr_match_rds.attr,
- &format_attr_match_rnid30.attr,
- &format_attr_match_rnid4.attr,
- &format_attr_match_dnid.attr,
- &format_attr_match_mc.attr,
- &format_attr_match_opc.attr,
- &format_attr_match_vnw.attr,
- &format_attr_match0.attr,
- &format_attr_match1.attr,
- &format_attr_mask_rds.attr,
- &format_attr_mask_rnid30.attr,
- &format_attr_mask_rnid4.attr,
- &format_attr_mask_dnid.attr,
- &format_attr_mask_mc.attr,
- &format_attr_mask_opc.attr,
- &format_attr_mask_vnw.attr,
- &format_attr_mask0.attr,
- &format_attr_mask1.attr,
- NULL,
-};
-
-static struct attribute_group ivt_uncore_format_group = {
- .name = "format",
- .attrs = ivt_uncore_formats_attr,
-};
-
-static struct attribute_group ivt_uncore_ubox_format_group = {
- .name = "format",
- .attrs = ivt_uncore_ubox_formats_attr,
-};
-
-static struct attribute_group ivt_uncore_cbox_format_group = {
- .name = "format",
- .attrs = ivt_uncore_cbox_formats_attr,
-};
-
-static struct attribute_group ivt_uncore_pcu_format_group = {
- .name = "format",
- .attrs = ivt_uncore_pcu_formats_attr,
-};
-
-static struct attribute_group ivt_uncore_qpi_format_group = {
- .name = "format",
- .attrs = ivt_uncore_qpi_formats_attr,
-};
-
-static struct intel_uncore_type ivt_uncore_ubox = {
- .name = "ubox",
- .num_counters = 2,
- .num_boxes = 1,
- .perf_ctr_bits = 44,
- .fixed_ctr_bits = 48,
- .perf_ctr = SNBEP_U_MSR_PMON_CTR0,
- .event_ctl = SNBEP_U_MSR_PMON_CTL0,
- .event_mask = IVT_U_MSR_PMON_RAW_EVENT_MASK,
- .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
- .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
- .ops = &ivt_uncore_msr_ops,
- .format_group = &ivt_uncore_ubox_format_group,
-};
-
-static struct extra_reg ivt_uncore_cbox_extra_regs[] = {
- SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
- SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
- SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
-
- SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
- SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
- EVENT_EXTRA_END
-};
-
-static u64 ivt_cbox_filter_mask(int fields)
-{
- u64 mask = 0;
-
- if (fields & 0x1)
- mask |= IVT_CB0_MSR_PMON_BOX_FILTER_TID;
- if (fields & 0x2)
- mask |= IVT_CB0_MSR_PMON_BOX_FILTER_LINK;
- if (fields & 0x4)
- mask |= IVT_CB0_MSR_PMON_BOX_FILTER_STATE;
- if (fields & 0x8)
- mask |= IVT_CB0_MSR_PMON_BOX_FILTER_NID;
- if (fields & 0x10)
- mask |= IVT_CB0_MSR_PMON_BOX_FILTER_OPC;
-
- return mask;
-}
-
-static struct event_constraint *
-ivt_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- return __snbep_cbox_get_constraint(box, event, ivt_cbox_filter_mask);
-}
-
-static int ivt_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct extra_reg *er;
- int idx = 0;
-
- for (er = ivt_uncore_cbox_extra_regs; er->msr; er++) {
- if (er->event != (event->hw.config & er->config_mask))
- continue;
- idx |= er->idx;
- }
-
- if (idx) {
- reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
- SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
- reg1->config = event->attr.config1 & ivt_cbox_filter_mask(idx);
- reg1->idx = idx;
- }
- return 0;
-}
-
-static void ivt_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
- if (reg1->idx != EXTRA_REG_NONE) {
- u64 filter = uncore_shared_reg_config(box, 0);
- wrmsrl(reg1->reg, filter & 0xffffffff);
- wrmsrl(reg1->reg + 6, filter >> 32);
- }
-
- wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops ivt_uncore_cbox_ops = {
- .init_box = ivt_uncore_msr_init_box,
- .disable_box = snbep_uncore_msr_disable_box,
- .enable_box = snbep_uncore_msr_enable_box,
- .disable_event = snbep_uncore_msr_disable_event,
- .enable_event = ivt_cbox_enable_event,
- .read_counter = uncore_msr_read_counter,
- .hw_config = ivt_cbox_hw_config,
- .get_constraint = ivt_cbox_get_constraint,
- .put_constraint = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type ivt_uncore_cbox = {
- .name = "cbox",
- .num_counters = 4,
- .num_boxes = 15,
- .perf_ctr_bits = 44,
- .event_ctl = SNBEP_C0_MSR_PMON_CTL0,
- .perf_ctr = SNBEP_C0_MSR_PMON_CTR0,
- .event_mask = IVT_CBO_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL,
- .msr_offset = SNBEP_CBO_MSR_OFFSET,
- .num_shared_regs = 1,
- .constraints = snbep_uncore_cbox_constraints,
- .ops = &ivt_uncore_cbox_ops,
- .format_group = &ivt_uncore_cbox_format_group,
-};
-
-static struct intel_uncore_ops ivt_uncore_pcu_ops = {
- IVT_UNCORE_MSR_OPS_COMMON_INIT(),
- .hw_config = snbep_pcu_hw_config,
- .get_constraint = snbep_pcu_get_constraint,
- .put_constraint = snbep_pcu_put_constraint,
-};
-
-static struct intel_uncore_type ivt_uncore_pcu = {
- .name = "pcu",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0,
- .event_ctl = SNBEP_PCU_MSR_PMON_CTL0,
- .event_mask = IVT_PCU_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL,
- .num_shared_regs = 1,
- .ops = &ivt_uncore_pcu_ops,
- .format_group = &ivt_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *ivt_msr_uncores[] = {
- &ivt_uncore_ubox,
- &ivt_uncore_cbox,
- &ivt_uncore_pcu,
- NULL,
-};
-
-static struct intel_uncore_type ivt_uncore_ha = {
- .name = "ha",
- .num_counters = 4,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- IVT_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type ivt_uncore_imc = {
- .name = "imc",
- .num_counters = 4,
- .num_boxes = 8,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
- .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
- IVT_UNCORE_PCI_COMMON_INIT(),
-};
-
-/* registers in IRP boxes are not properly aligned */
-static unsigned ivt_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4};
-static unsigned ivt_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0};
-
-static void ivt_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
-
- pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx],
- hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static void ivt_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
-
- pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], hwc->config);
-}
-
-static u64 ivt_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
- u64 count = 0;
-
- pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
- pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
-
- return count;
-}
-
-static struct intel_uncore_ops ivt_uncore_irp_ops = {
- .init_box = ivt_uncore_pci_init_box,
- .disable_box = snbep_uncore_pci_disable_box,
- .enable_box = snbep_uncore_pci_enable_box,
- .disable_event = ivt_uncore_irp_disable_event,
- .enable_event = ivt_uncore_irp_enable_event,
- .read_counter = ivt_uncore_irp_read_counter,
-};
-
-static struct intel_uncore_type ivt_uncore_irp = {
- .name = "irp",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .event_mask = IVT_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .ops = &ivt_uncore_irp_ops,
- .format_group = &ivt_uncore_format_group,
-};
-
-static struct intel_uncore_ops ivt_uncore_qpi_ops = {
- .init_box = ivt_uncore_pci_init_box,
- .disable_box = snbep_uncore_pci_disable_box,
- .enable_box = snbep_uncore_pci_enable_box,
- .disable_event = snbep_uncore_pci_disable_event,
- .enable_event = snbep_qpi_enable_event,
- .read_counter = snbep_uncore_pci_read_counter,
- .hw_config = snbep_qpi_hw_config,
- .get_constraint = uncore_get_constraint,
- .put_constraint = uncore_put_constraint,
-};
-
-static struct intel_uncore_type ivt_uncore_qpi = {
- .name = "qpi",
- .num_counters = 4,
- .num_boxes = 3,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCI_PMON_CTR0,
- .event_ctl = SNBEP_PCI_PMON_CTL0,
- .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .num_shared_regs = 1,
- .ops = &ivt_uncore_qpi_ops,
- .format_group = &ivt_uncore_qpi_format_group,
-};
-
-static struct intel_uncore_type ivt_uncore_r2pcie = {
- .name = "r2pcie",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 44,
- .constraints = snbep_uncore_r2pcie_constraints,
- IVT_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type ivt_uncore_r3qpi = {
- .name = "r3qpi",
- .num_counters = 3,
- .num_boxes = 2,
- .perf_ctr_bits = 44,
- .constraints = snbep_uncore_r3qpi_constraints,
- IVT_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
- IVT_PCI_UNCORE_HA,
- IVT_PCI_UNCORE_IMC,
- IVT_PCI_UNCORE_IRP,
- IVT_PCI_UNCORE_QPI,
- IVT_PCI_UNCORE_R2PCIE,
- IVT_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *ivt_pci_uncores[] = {
- [IVT_PCI_UNCORE_HA] = &ivt_uncore_ha,
- [IVT_PCI_UNCORE_IMC] = &ivt_uncore_imc,
- [IVT_PCI_UNCORE_IRP] = &ivt_uncore_irp,
- [IVT_PCI_UNCORE_QPI] = &ivt_uncore_qpi,
- [IVT_PCI_UNCORE_R2PCIE] = &ivt_uncore_r2pcie,
- [IVT_PCI_UNCORE_R3QPI] = &ivt_uncore_r3qpi,
- NULL,
-};
-
-static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = {
- { /* Home Agent 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 0),
- },
- { /* Home Agent 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 1),
- },
- { /* MC0 Channel 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 0),
- },
- { /* MC0 Channel 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 1),
- },
- { /* MC0 Channel 3 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 2),
- },
- { /* MC0 Channel 4 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 3),
- },
- { /* MC1 Channel 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 4),
- },
- { /* MC1 Channel 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 5),
- },
- { /* MC1 Channel 3 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 6),
- },
- { /* MC1 Channel 4 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7),
- },
- { /* IRP */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IRP, 0),
- },
- { /* QPI0 Port 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0),
- },
- { /* QPI0 Port 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 1),
- },
- { /* QPI1 Port 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 2),
- },
- { /* R2PCIe */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R2PCIE, 0),
- },
- { /* R3QPI0 Link 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 0),
- },
- { /* R3QPI0 Link 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 1),
- },
- { /* R3QPI1 Link 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
- .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2),
- },
- { /* QPI Port 0 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- SNBEP_PCI_QPI_PORT0_FILTER),
- },
- { /* QPI Port 0 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- SNBEP_PCI_QPI_PORT1_FILTER),
- },
- { /* end: all zeroes */ }
-};
-
-static struct pci_driver ivt_uncore_pci_driver = {
- .name = "ivt_uncore",
- .id_table = ivt_uncore_pci_ids,
-};
-/* end of IvyTown uncore support */
-
-/* Sandy Bridge uncore support */
-static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- if (hwc->idx < UNCORE_PMC_IDX_FIXED)
- wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
- else
- wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
-}
-
-static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- wrmsrl(event->hw.config_base, 0);
-}
-
-static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
-{
- if (box->pmu->pmu_idx == 0) {
- wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
- SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
- }
-}
-
-static struct uncore_event_desc snb_uncore_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
- { /* end: all zeroes */ },
-};
-
-static struct attribute *snb_uncore_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_cmask5.attr,
- NULL,
-};
-
-static struct attribute_group snb_uncore_format_group = {
- .name = "format",
- .attrs = snb_uncore_formats_attr,
-};
-
-static struct intel_uncore_ops snb_uncore_msr_ops = {
- .init_box = snb_uncore_msr_init_box,
- .disable_event = snb_uncore_msr_disable_event,
- .enable_event = snb_uncore_msr_enable_event,
- .read_counter = uncore_msr_read_counter,
-};
-
-static struct event_constraint snb_uncore_cbox_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
- EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type snb_uncore_cbox = {
- .name = "cbox",
- .num_counters = 2,
- .num_boxes = 4,
- .perf_ctr_bits = 44,
- .fixed_ctr_bits = 48,
- .perf_ctr = SNB_UNC_CBO_0_PER_CTR0,
- .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
- .fixed_ctr = SNB_UNC_FIXED_CTR,
- .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
- .single_fixed = 1,
- .event_mask = SNB_UNC_RAW_EVENT_MASK,
- .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
- .constraints = snb_uncore_cbox_constraints,
- .ops = &snb_uncore_msr_ops,
- .format_group = &snb_uncore_format_group,
- .event_descs = snb_uncore_events,
-};
-
-static struct intel_uncore_type *snb_msr_uncores[] = {
- &snb_uncore_cbox,
- NULL,
-};
-
-enum {
- SNB_PCI_UNCORE_IMC,
-};
-
-static struct uncore_event_desc snb_uncore_imc_events[] = {
- INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"),
- INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
- INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
-
- INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
- INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
- INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
-
- { /* end: all zeroes */ },
-};
-
-#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff
-#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48
-
-/* page size multiple covering all config regs */
-#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000
-
-#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1
-#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050
-#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2
-#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054
-#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE
-
-static struct attribute *snb_uncore_imc_formats_attr[] = {
- &format_attr_event.attr,
- NULL,
-};
-
-static struct attribute_group snb_uncore_imc_format_group = {
- .name = "format",
- .attrs = snb_uncore_imc_formats_attr,
-};
-
-static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
-{
- struct pci_dev *pdev = box->pci_dev;
- int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
- resource_size_t addr;
- u32 pci_dword;
-
- pci_read_config_dword(pdev, where, &pci_dword);
- addr = pci_dword;
-
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
- pci_read_config_dword(pdev, where + 4, &pci_dword);
- addr |= ((resource_size_t)pci_dword << 32);
-#endif
-
- addr &= ~(PAGE_SIZE - 1);
-
- box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
- box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
-}
-
-static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
-{}
-
-static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
-{}
-
-static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{}
-
-static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{}
-
-static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
-}
-
-/*
- * custom event_init() function because we define our own fixed, free
- * running counters, so we do not want to conflict with generic uncore
- * logic. Also simplifies processing
- */
-static int snb_uncore_imc_event_init(struct perf_event *event)
-{
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box;
- struct hw_perf_event *hwc = &event->hw;
- u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
- int idx, base;
-
- if (event->attr.type != event->pmu->type)
- return -ENOENT;
-
- pmu = uncore_event_to_pmu(event);
- /* no device found for this pmu */
- if (pmu->func_id < 0)
- return -ENOENT;
-
- /* Sampling not supported yet */
- if (hwc->sample_period)
- return -EINVAL;
-
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest ||
- event->attr.sample_period) /* no sampling */
- return -EINVAL;
-
- /*
- * Place all uncore events for a particular physical package
- * onto a single cpu
- */
- if (event->cpu < 0)
- return -EINVAL;
-
- /* check only supported bits are set */
- if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
- return -EINVAL;
-
- box = uncore_pmu_to_box(pmu, event->cpu);
- if (!box || box->cpu < 0)
- return -EINVAL;
-
- event->cpu = box->cpu;
-
- event->hw.idx = -1;
- event->hw.last_tag = ~0ULL;
- event->hw.extra_reg.idx = EXTRA_REG_NONE;
- event->hw.branch_reg.idx = EXTRA_REG_NONE;
- /*
- * check event is known (whitelist, determines counter)
- */
- switch (cfg) {
- case SNB_UNCORE_PCI_IMC_DATA_READS:
- base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
- idx = UNCORE_PMC_IDX_FIXED;
- break;
- case SNB_UNCORE_PCI_IMC_DATA_WRITES:
- base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
- idx = UNCORE_PMC_IDX_FIXED + 1;
- break;
- default:
- return -EINVAL;
- }
-
- /* must be done before validate_group */
- event->hw.event_base = base;
- event->hw.config = cfg;
- event->hw.idx = idx;
-
- /* no group validation needed, we have free running counters */
-
- return 0;
-}
-
-static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- return 0;
-}
-
-static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- u64 count;
-
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
- return;
-
- event->hw.state = 0;
- box->n_active++;
-
- list_add_tail(&event->active_entry, &box->active_list);
-
- count = snb_uncore_imc_read_counter(box, event);
- local64_set(&event->hw.prev_count, count);
-
- if (box->n_active == 1)
- uncore_pmu_start_hrtimer(box);
-}
-
-static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- struct hw_perf_event *hwc = &event->hw;
-
- if (!(hwc->state & PERF_HES_STOPPED)) {
- box->n_active--;
-
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
- hwc->state |= PERF_HES_STOPPED;
-
- list_del(&event->active_entry);
-
- if (box->n_active == 0)
- uncore_pmu_cancel_hrtimer(box);
- }
-
- if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- /*
- * Drain the remaining delta count out of a event
- * that we are disabling:
- */
- uncore_perf_event_update(box, event);
- hwc->state |= PERF_HES_UPTODATE;
- }
-}
-
-static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- struct hw_perf_event *hwc = &event->hw;
-
- if (!box)
- return -ENODEV;
-
- hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
- if (!(flags & PERF_EF_START))
- hwc->state |= PERF_HES_ARCH;
-
- snb_uncore_imc_event_start(event, 0);
-
- box->n_events++;
-
- return 0;
-}
-
-static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- int i;
-
- snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-
- for (i = 0; i < box->n_events; i++) {
- if (event == box->event_list[i]) {
- --box->n_events;
- break;
- }
- }
-}
-
-static int snb_pci2phy_map_init(int devid)
-{
- struct pci_dev *dev = NULL;
- int bus;
-
- dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
- if (!dev)
- return -ENOTTY;
-
- bus = dev->bus->number;
-
- pcibus_to_physid[bus] = 0;
-
- pci_dev_put(dev);
-
- return 0;
-}
-
-static struct pmu snb_uncore_imc_pmu = {
- .task_ctx_nr = perf_invalid_context,
- .event_init = snb_uncore_imc_event_init,
- .add = snb_uncore_imc_event_add,
- .del = snb_uncore_imc_event_del,
- .start = snb_uncore_imc_event_start,
- .stop = snb_uncore_imc_event_stop,
- .read = uncore_pmu_event_read,
-};
-
-static struct intel_uncore_ops snb_uncore_imc_ops = {
- .init_box = snb_uncore_imc_init_box,
- .enable_box = snb_uncore_imc_enable_box,
- .disable_box = snb_uncore_imc_disable_box,
- .disable_event = snb_uncore_imc_disable_event,
- .enable_event = snb_uncore_imc_enable_event,
- .hw_config = snb_uncore_imc_hw_config,
- .read_counter = snb_uncore_imc_read_counter,
-};
-
-static struct intel_uncore_type snb_uncore_imc = {
- .name = "imc",
- .num_counters = 2,
- .num_boxes = 1,
- .fixed_ctr_bits = 32,
- .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE,
- .event_descs = snb_uncore_imc_events,
- .format_group = &snb_uncore_imc_format_group,
- .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
- .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK,
- .ops = &snb_uncore_imc_ops,
- .pmu = &snb_uncore_imc_pmu,
-};
-
-static struct intel_uncore_type *snb_pci_uncores[] = {
- [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc,
- NULL,
-};
-
-static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* end: all zeroes */ },
-};
-
-static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* end: all zeroes */ },
-};
-
-static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* end: all zeroes */ },
-};
-
-static struct pci_driver snb_uncore_pci_driver = {
- .name = "snb_uncore",
- .id_table = snb_uncore_pci_ids,
-};
-
-static struct pci_driver ivb_uncore_pci_driver = {
- .name = "ivb_uncore",
- .id_table = ivb_uncore_pci_ids,
-};
-
-static struct pci_driver hsw_uncore_pci_driver = {
- .name = "hsw_uncore",
- .id_table = hsw_uncore_pci_ids,
-};
-
-/* end of Sandy Bridge uncore support */
-
-/* Nehalem uncore support */
-static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
-{
- wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
-}
-
-static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
-{
- wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
-}
-
-static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- if (hwc->idx < UNCORE_PMC_IDX_FIXED)
- wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
- else
- wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
-}
-
-static struct attribute *nhm_uncore_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_cmask8.attr,
- NULL,
-};
-
-static struct attribute_group nhm_uncore_format_group = {
- .name = "format",
- .attrs = nhm_uncore_formats_attr,
-};
-
-static struct uncore_event_desc nhm_uncore_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
- INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"),
- INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"),
- INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"),
- INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"),
- INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"),
- INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
- INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"),
- INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"),
- { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_ops nhm_uncore_msr_ops = {
- .disable_box = nhm_uncore_msr_disable_box,
- .enable_box = nhm_uncore_msr_enable_box,
- .disable_event = snb_uncore_msr_disable_event,
- .enable_event = nhm_uncore_msr_enable_event,
- .read_counter = uncore_msr_read_counter,
-};
-
-static struct intel_uncore_type nhm_uncore = {
- .name = "",
- .num_counters = 8,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .event_ctl = NHM_UNC_PERFEVTSEL0,
- .perf_ctr = NHM_UNC_UNCORE_PMC0,
- .fixed_ctr = NHM_UNC_FIXED_CTR,
- .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL,
- .event_mask = NHM_UNC_RAW_EVENT_MASK,
- .event_descs = nhm_uncore_events,
- .ops = &nhm_uncore_msr_ops,
- .format_group = &nhm_uncore_format_group,
-};
-
-static struct intel_uncore_type *nhm_msr_uncores[] = {
- &nhm_uncore,
- NULL,
-};
-/* end of Nehalem uncore support */
-
-/* Nehalem-EX uncore support */
-DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5");
-DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7");
-DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63");
-DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
-
-static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
-{
- wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
-}
-
-static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
-{
- unsigned msr = uncore_msr_box_ctl(box);
- u64 config;
-
- if (msr) {
- rdmsrl(msr, config);
- config &= ~((1ULL << uncore_num_counters(box)) - 1);
- /* WBox has a fixed counter */
- if (uncore_msr_fixed_ctl(box))
- config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN;
- wrmsrl(msr, config);
- }
-}
-
-static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box)
-{
- unsigned msr = uncore_msr_box_ctl(box);
- u64 config;
-
- if (msr) {
- rdmsrl(msr, config);
- config |= (1ULL << uncore_num_counters(box)) - 1;
- /* WBox has a fixed counter */
- if (uncore_msr_fixed_ctl(box))
- config |= NHMEX_W_PMON_GLOBAL_FIXED_EN;
- wrmsrl(msr, config);
- }
-}
-
-static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- wrmsrl(event->hw.config_base, 0);
-}
-
-static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
- wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
- else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
- wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
- else
- wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
-}
-
-#define NHMEX_UNCORE_OPS_COMMON_INIT() \
- .init_box = nhmex_uncore_msr_init_box, \
- .disable_box = nhmex_uncore_msr_disable_box, \
- .enable_box = nhmex_uncore_msr_enable_box, \
- .disable_event = nhmex_uncore_msr_disable_event, \
- .read_counter = uncore_msr_read_counter
-
-static struct intel_uncore_ops nhmex_uncore_ops = {
- NHMEX_UNCORE_OPS_COMMON_INIT(),
- .enable_event = nhmex_uncore_msr_enable_event,
-};
-
-static struct attribute *nhmex_uncore_ubox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_edge.attr,
- NULL,
-};
-
-static struct attribute_group nhmex_uncore_ubox_format_group = {
- .name = "format",
- .attrs = nhmex_uncore_ubox_formats_attr,
-};
-
-static struct intel_uncore_type nhmex_uncore_ubox = {
- .name = "ubox",
- .num_counters = 1,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_U_MSR_PMON_EV_SEL,
- .perf_ctr = NHMEX_U_MSR_PMON_CTR,
- .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL,
- .ops = &nhmex_uncore_ops,
- .format_group = &nhmex_uncore_ubox_format_group
-};
-
-static struct attribute *nhmex_uncore_cbox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- NULL,
-};
-
-static struct attribute_group nhmex_uncore_cbox_format_group = {
- .name = "format",
- .attrs = nhmex_uncore_cbox_formats_attr,
-};
-
-/* msr offset for each instance of cbox */
-static unsigned nhmex_cbox_msr_offsets[] = {
- 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0,
-};
-
-static struct intel_uncore_type nhmex_uncore_cbox = {
- .name = "cbox",
- .num_counters = 6,
- .num_boxes = 10,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0,
- .perf_ctr = NHMEX_C0_MSR_PMON_CTR0,
- .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL,
- .msr_offsets = nhmex_cbox_msr_offsets,
- .pair_ctr_ctl = 1,
- .ops = &nhmex_uncore_ops,
- .format_group = &nhmex_uncore_cbox_format_group
-};
-
-static struct uncore_event_desc nhmex_uncore_wbox_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"),
- { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_type nhmex_uncore_wbox = {
- .name = "wbox",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_W_MSR_PMON_CNT0,
- .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0,
- .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR,
- .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL,
- .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_W_MSR_GLOBAL_CTL,
- .pair_ctr_ctl = 1,
- .event_descs = nhmex_uncore_wbox_events,
- .ops = &nhmex_uncore_ops,
- .format_group = &nhmex_uncore_cbox_format_group
-};
-
-static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
- int ctr, ev_sel;
-
- ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >>
- NHMEX_B_PMON_CTR_SHIFT;
- ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >>
- NHMEX_B_PMON_CTL_EV_SEL_SHIFT;
-
- /* events that do not use the match/mask registers */
- if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) ||
- (ctr == 2 && ev_sel != 0x4) || ctr == 3)
- return 0;
-
- if (box->pmu->pmu_idx == 0)
- reg1->reg = NHMEX_B0_MSR_MATCH;
- else
- reg1->reg = NHMEX_B1_MSR_MATCH;
- reg1->idx = 0;
- reg1->config = event->attr.config1;
- reg2->config = event->attr.config2;
- return 0;
-}
-
-static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
- if (reg1->idx != EXTRA_REG_NONE) {
- wrmsrl(reg1->reg, reg1->config);
- wrmsrl(reg1->reg + 1, reg2->config);
- }
- wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
- (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK));
-}
-
-/*
- * The Bbox has 4 counters, but each counter monitors different events.
- * Use bits 6-7 in the event config to select counter.
- */
-static struct event_constraint nhmex_uncore_bbox_constraints[] = {
- EVENT_CONSTRAINT(0 , 1, 0xc0),
- EVENT_CONSTRAINT(0x40, 2, 0xc0),
- EVENT_CONSTRAINT(0x80, 4, 0xc0),
- EVENT_CONSTRAINT(0xc0, 8, 0xc0),
- EVENT_CONSTRAINT_END,
-};
-
-static struct attribute *nhmex_uncore_bbox_formats_attr[] = {
- &format_attr_event5.attr,
- &format_attr_counter.attr,
- &format_attr_match.attr,
- &format_attr_mask.attr,
- NULL,
-};
-
-static struct attribute_group nhmex_uncore_bbox_format_group = {
- .name = "format",
- .attrs = nhmex_uncore_bbox_formats_attr,
-};
-
-static struct intel_uncore_ops nhmex_uncore_bbox_ops = {
- NHMEX_UNCORE_OPS_COMMON_INIT(),
- .enable_event = nhmex_bbox_msr_enable_event,
- .hw_config = nhmex_bbox_hw_config,
- .get_constraint = uncore_get_constraint,
- .put_constraint = uncore_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_bbox = {
- .name = "bbox",
- .num_counters = 4,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_B0_MSR_PMON_CTL0,
- .perf_ctr = NHMEX_B0_MSR_PMON_CTR0,
- .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL,
- .msr_offset = NHMEX_B_MSR_OFFSET,
- .pair_ctr_ctl = 1,
- .num_shared_regs = 1,
- .constraints = nhmex_uncore_bbox_constraints,
- .ops = &nhmex_uncore_bbox_ops,
- .format_group = &nhmex_uncore_bbox_format_group
-};
-
-static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
- /* only TO_R_PROG_EV event uses the match/mask register */
- if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) !=
- NHMEX_S_EVENT_TO_R_PROG_EV)
- return 0;
-
- if (box->pmu->pmu_idx == 0)
- reg1->reg = NHMEX_S0_MSR_MM_CFG;
- else
- reg1->reg = NHMEX_S1_MSR_MM_CFG;
- reg1->idx = 0;
- reg1->config = event->attr.config1;
- reg2->config = event->attr.config2;
- return 0;
-}
-
-static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
- if (reg1->idx != EXTRA_REG_NONE) {
- wrmsrl(reg1->reg, 0);
- wrmsrl(reg1->reg + 1, reg1->config);
- wrmsrl(reg1->reg + 2, reg2->config);
- wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
- }
- wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
-}
-
-static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- &format_attr_match.attr,
- &format_attr_mask.attr,
- NULL,
-};
-
-static struct attribute_group nhmex_uncore_sbox_format_group = {
- .name = "format",
- .attrs = nhmex_uncore_sbox_formats_attr,
-};
-
-static struct intel_uncore_ops nhmex_uncore_sbox_ops = {
- NHMEX_UNCORE_OPS_COMMON_INIT(),
- .enable_event = nhmex_sbox_msr_enable_event,
- .hw_config = nhmex_sbox_hw_config,
- .get_constraint = uncore_get_constraint,
- .put_constraint = uncore_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_sbox = {
- .name = "sbox",
- .num_counters = 4,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_S0_MSR_PMON_CTL0,
- .perf_ctr = NHMEX_S0_MSR_PMON_CTR0,
- .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL,
- .msr_offset = NHMEX_S_MSR_OFFSET,
- .pair_ctr_ctl = 1,
- .num_shared_regs = 1,
- .ops = &nhmex_uncore_sbox_ops,
- .format_group = &nhmex_uncore_sbox_format_group
-};
-
-enum {
- EXTRA_REG_NHMEX_M_FILTER,
- EXTRA_REG_NHMEX_M_DSP,
- EXTRA_REG_NHMEX_M_ISS,
- EXTRA_REG_NHMEX_M_MAP,
- EXTRA_REG_NHMEX_M_MSC_THR,
- EXTRA_REG_NHMEX_M_PGT,
- EXTRA_REG_NHMEX_M_PLD,
- EXTRA_REG_NHMEX_M_ZDP_CTL_FVC,
-};
-
-static struct extra_reg nhmex_uncore_mbox_extra_regs[] = {
- MBOX_INC_SEL_EXTAR_REG(0x0, DSP),
- MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR),
- MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR),
- MBOX_INC_SEL_EXTAR_REG(0x9, ISS),
- /* event 0xa uses two extra registers */
- MBOX_INC_SEL_EXTAR_REG(0xa, ISS),
- MBOX_INC_SEL_EXTAR_REG(0xa, PLD),
- MBOX_INC_SEL_EXTAR_REG(0xb, PLD),
- /* events 0xd ~ 0x10 use the same extra register */
- MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC),
- MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC),
- MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC),
- MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC),
- MBOX_INC_SEL_EXTAR_REG(0x16, PGT),
- MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP),
- MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS),
- MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT),
- MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP),
- EVENT_EXTRA_END
-};
-
-/* Nehalem-EX or Westmere-EX ? */
-static bool uncore_nhmex;
-
-static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config)
-{
- struct intel_uncore_extra_reg *er;
- unsigned long flags;
- bool ret = false;
- u64 mask;
-
- if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
- er = &box->shared_regs[idx];
- raw_spin_lock_irqsave(&er->lock, flags);
- if (!atomic_read(&er->ref) || er->config == config) {
- atomic_inc(&er->ref);
- er->config = config;
- ret = true;
- }
- raw_spin_unlock_irqrestore(&er->lock, flags);
-
- return ret;
- }
- /*
- * The ZDP_CTL_FVC MSR has 4 fields which are used to control
- * events 0xd ~ 0x10. Besides these 4 fields, there are additional
- * fields which are shared.
- */
- idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
- if (WARN_ON_ONCE(idx >= 4))
- return false;
-
- /* mask of the shared fields */
- if (uncore_nhmex)
- mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
- else
- mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK;
- er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
-
- raw_spin_lock_irqsave(&er->lock, flags);
- /* add mask of the non-shared field if it's in use */
- if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) {
- if (uncore_nhmex)
- mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
- else
- mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
- }
-
- if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) {
- atomic_add(1 << (idx * 8), &er->ref);
- if (uncore_nhmex)
- mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
- NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
- else
- mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK |
- WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
- er->config &= ~mask;
- er->config |= (config & mask);
- ret = true;
- }
- raw_spin_unlock_irqrestore(&er->lock, flags);
-
- return ret;
-}
-
-static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx)
-{
- struct intel_uncore_extra_reg *er;
-
- if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
- er = &box->shared_regs[idx];
- atomic_dec(&er->ref);
- return;
- }
-
- idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
- er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
- atomic_sub(1 << (idx * 8), &er->ref);
-}
-
-static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
- u64 config = reg1->config;
-
- /* get the non-shared control bits and shift them */
- idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
- if (uncore_nhmex)
- config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
- else
- config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
- if (new_idx > orig_idx) {
- idx = new_idx - orig_idx;
- config <<= 3 * idx;
- } else {
- idx = orig_idx - new_idx;
- config >>= 3 * idx;
- }
-
- /* add the shared control bits back */
- if (uncore_nhmex)
- config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
- else
- config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
- config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
- if (modify) {
- /* adjust the main event selector */
- if (new_idx > orig_idx)
- hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
- else
- hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
- reg1->config = config;
- reg1->idx = ~0xff | new_idx;
- }
- return config;
-}
-
-static struct event_constraint *
-nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
- int i, idx[2], alloc = 0;
- u64 config1 = reg1->config;
-
- idx[0] = __BITS_VALUE(reg1->idx, 0, 8);
- idx[1] = __BITS_VALUE(reg1->idx, 1, 8);
-again:
- for (i = 0; i < 2; i++) {
- if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
- idx[i] = 0xff;
-
- if (idx[i] == 0xff)
- continue;
-
- if (!nhmex_mbox_get_shared_reg(box, idx[i],
- __BITS_VALUE(config1, i, 32)))
- goto fail;
- alloc |= (0x1 << i);
- }
-
- /* for the match/mask registers */
- if (reg2->idx != EXTRA_REG_NONE &&
- (uncore_box_is_fake(box) || !reg2->alloc) &&
- !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config))
- goto fail;
-
- /*
- * If it's a fake box -- as per validate_{group,event}() we
- * shouldn't touch event state and we can avoid doing so
- * since both will only call get_event_constraints() once
- * on each event, this avoids the need for reg->alloc.
- */
- if (!uncore_box_is_fake(box)) {
- if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8))
- nhmex_mbox_alter_er(event, idx[0], true);
- reg1->alloc |= alloc;
- if (reg2->idx != EXTRA_REG_NONE)
- reg2->alloc = 1;
- }
- return NULL;
-fail:
- if (idx[0] != 0xff && !(alloc & 0x1) &&
- idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
- /*
- * events 0xd ~ 0x10 are functional identical, but are
- * controlled by different fields in the ZDP_CTL_FVC
- * register. If we failed to take one field, try the
- * rest 3 choices.
- */
- BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff);
- idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
- idx[0] = (idx[0] + 1) % 4;
- idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
- if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) {
- config1 = nhmex_mbox_alter_er(event, idx[0], false);
- goto again;
- }
- }
-
- if (alloc & 0x1)
- nhmex_mbox_put_shared_reg(box, idx[0]);
- if (alloc & 0x2)
- nhmex_mbox_put_shared_reg(box, idx[1]);
- return &constraint_empty;
-}
-
-static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
-
- if (uncore_box_is_fake(box))
- return;
-
- if (reg1->alloc & 0x1)
- nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8));
- if (reg1->alloc & 0x2)
- nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8));
- reg1->alloc = 0;
-
- if (reg2->alloc) {
- nhmex_mbox_put_shared_reg(box, reg2->idx);
- reg2->alloc = 0;
- }
-}
-
-static int nhmex_mbox_extra_reg_idx(struct extra_reg *er)
-{
- if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
- return er->idx;
- return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd;
-}
-
-static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct intel_uncore_type *type = box->pmu->type;
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
- struct extra_reg *er;
- unsigned msr;
- int reg_idx = 0;
- /*
- * The mbox events may require 2 extra MSRs at the most. But only
- * the lower 32 bits in these MSRs are significant, so we can use
- * config1 to pass two MSRs' config.
- */
- for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) {
- if (er->event != (event->hw.config & er->config_mask))
- continue;
- if (event->attr.config1 & ~er->valid_mask)
- return -EINVAL;
-
- msr = er->msr + type->msr_offset * box->pmu->pmu_idx;
- if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff))
- return -EINVAL;
-
- /* always use the 32~63 bits to pass the PLD config */
- if (er->idx == EXTRA_REG_NHMEX_M_PLD)
- reg_idx = 1;
- else if (WARN_ON_ONCE(reg_idx > 0))
- return -EINVAL;
-
- reg1->idx &= ~(0xff << (reg_idx * 8));
- reg1->reg &= ~(0xffff << (reg_idx * 16));
- reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8);
- reg1->reg |= msr << (reg_idx * 16);
- reg1->config = event->attr.config1;
- reg_idx++;
- }
- /*
- * The mbox only provides ability to perform address matching
- * for the PLD events.
- */
- if (reg_idx == 2) {
- reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
- if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
- reg2->config = event->attr.config2;
- else
- reg2->config = ~0ULL;
- if (box->pmu->pmu_idx == 0)
- reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
- else
- reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
- }
- return 0;
-}
-
-static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx)
-{
- struct intel_uncore_extra_reg *er;
- unsigned long flags;
- u64 config;
-
- if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
- return box->shared_regs[idx].config;
-
- er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
- raw_spin_lock_irqsave(&er->lock, flags);
- config = er->config;
- raw_spin_unlock_irqrestore(&er->lock, flags);
- return config;
-}
-
-static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
- int idx;
-
- idx = __BITS_VALUE(reg1->idx, 0, 8);
- if (idx != 0xff)
- wrmsrl(__BITS_VALUE(reg1->reg, 0, 16),
- nhmex_mbox_shared_reg_config(box, idx));
- idx = __BITS_VALUE(reg1->idx, 1, 8);
- if (idx != 0xff)
- wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
- nhmex_mbox_shared_reg_config(box, idx));
-
- if (reg2->idx != EXTRA_REG_NONE) {
- wrmsrl(reg2->reg, 0);
- if (reg2->config != ~0ULL) {
- wrmsrl(reg2->reg + 1,
- reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
- wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
- (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
- wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
- }
- }
-
- wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
-}
-
-DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3");
-DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5");
-DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6");
-DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7");
-DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13");
-DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21");
-DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en, filter_cfg_en, "config2:63");
-DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33");
-DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61");
-DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63");
-
-static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
- &format_attr_count_mode.attr,
- &format_attr_storage_mode.attr,
- &format_attr_wrap_mode.attr,
- &format_attr_flag_mode.attr,
- &format_attr_inc_sel.attr,
- &format_attr_set_flag_sel.attr,
- &format_attr_filter_cfg_en.attr,
- &format_attr_filter_match.attr,
- &format_attr_filter_mask.attr,
- &format_attr_dsp.attr,
- &format_attr_thr.attr,
- &format_attr_fvc.attr,
- &format_attr_pgt.attr,
- &format_attr_map.attr,
- &format_attr_iss.attr,
- &format_attr_pld.attr,
- NULL,
-};
-
-static struct attribute_group nhmex_uncore_mbox_format_group = {
- .name = "format",
- .attrs = nhmex_uncore_mbox_formats_attr,
-};
-
-static struct uncore_event_desc nhmex_uncore_mbox_events[] = {
- INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"),
- INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"),
- { /* end: all zeroes */ },
-};
-
-static struct uncore_event_desc wsmex_uncore_mbox_events[] = {
- INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"),
- INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"),
- { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_ops nhmex_uncore_mbox_ops = {
- NHMEX_UNCORE_OPS_COMMON_INIT(),
- .enable_event = nhmex_mbox_msr_enable_event,
- .hw_config = nhmex_mbox_hw_config,
- .get_constraint = nhmex_mbox_get_constraint,
- .put_constraint = nhmex_mbox_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_mbox = {
- .name = "mbox",
- .num_counters = 6,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_M0_MSR_PMU_CTL0,
- .perf_ctr = NHMEX_M0_MSR_PMU_CNT0,
- .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL,
- .msr_offset = NHMEX_M_MSR_OFFSET,
- .pair_ctr_ctl = 1,
- .num_shared_regs = 8,
- .event_descs = nhmex_uncore_mbox_events,
- .ops = &nhmex_uncore_mbox_ops,
- .format_group = &nhmex_uncore_mbox_format_group,
-};
-
-static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
- /* adjust the main event selector and extra register index */
- if (reg1->idx % 2) {
- reg1->idx--;
- hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
- } else {
- reg1->idx++;
- hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
- }
-
- /* adjust extra register config */
- switch (reg1->idx % 6) {
- case 2:
- /* shift the 8~15 bits to the 0~7 bits */
- reg1->config >>= 8;
- break;
- case 3:
- /* shift the 0~7 bits to the 8~15 bits */
- reg1->config <<= 8;
- break;
- };
-}
-
-/*
- * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7.
- * An event set consists of 6 events, the 3rd and 4th events in
- * an event set use the same extra register. So an event set uses
- * 5 extra registers.
- */
-static struct event_constraint *
-nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
- struct intel_uncore_extra_reg *er;
- unsigned long flags;
- int idx, er_idx;
- u64 config1;
- bool ok = false;
-
- if (!uncore_box_is_fake(box) && reg1->alloc)
- return NULL;
-
- idx = reg1->idx % 6;
- config1 = reg1->config;
-again:
- er_idx = idx;
- /* the 3rd and 4th events use the same extra register */
- if (er_idx > 2)
- er_idx--;
- er_idx += (reg1->idx / 6) * 5;
-
- er = &box->shared_regs[er_idx];
- raw_spin_lock_irqsave(&er->lock, flags);
- if (idx < 2) {
- if (!atomic_read(&er->ref) || er->config == reg1->config) {
- atomic_inc(&er->ref);
- er->config = reg1->config;
- ok = true;
- }
- } else if (idx == 2 || idx == 3) {
- /*
- * these two events use different fields in a extra register,
- * the 0~7 bits and the 8~15 bits respectively.
- */
- u64 mask = 0xff << ((idx - 2) * 8);
- if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) ||
- !((er->config ^ config1) & mask)) {
- atomic_add(1 << ((idx - 2) * 8), &er->ref);
- er->config &= ~mask;
- er->config |= config1 & mask;
- ok = true;
- }
- } else {
- if (!atomic_read(&er->ref) ||
- (er->config == (hwc->config >> 32) &&
- er->config1 == reg1->config &&
- er->config2 == reg2->config)) {
- atomic_inc(&er->ref);
- er->config = (hwc->config >> 32);
- er->config1 = reg1->config;
- er->config2 = reg2->config;
- ok = true;
- }
- }
- raw_spin_unlock_irqrestore(&er->lock, flags);
-
- if (!ok) {
- /*
- * The Rbox events are always in pairs. The paired
- * events are functional identical, but use different
- * extra registers. If we failed to take an extra
- * register, try the alternative.
- */
- idx ^= 1;
- if (idx != reg1->idx % 6) {
- if (idx == 2)
- config1 >>= 8;
- else if (idx == 3)
- config1 <<= 8;
- goto again;
- }
- } else {
- if (!uncore_box_is_fake(box)) {
- if (idx != reg1->idx % 6)
- nhmex_rbox_alter_er(box, event);
- reg1->alloc = 1;
- }
- return NULL;
- }
- return &constraint_empty;
-}
-
-static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct intel_uncore_extra_reg *er;
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- int idx, er_idx;
-
- if (uncore_box_is_fake(box) || !reg1->alloc)
- return;
-
- idx = reg1->idx % 6;
- er_idx = idx;
- if (er_idx > 2)
- er_idx--;
- er_idx += (reg1->idx / 6) * 5;
-
- er = &box->shared_regs[er_idx];
- if (idx == 2 || idx == 3)
- atomic_sub(1 << ((idx - 2) * 8), &er->ref);
- else
- atomic_dec(&er->ref);
-
- reg1->alloc = 0;
-}
-
-static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
- int idx;
-
- idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >>
- NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
- if (idx >= 0x18)
- return -EINVAL;
-
- reg1->idx = idx;
- reg1->config = event->attr.config1;
-
- switch (idx % 6) {
- case 4:
- case 5:
- hwc->config |= event->attr.config & (~0ULL << 32);
- reg2->config = event->attr.config2;
- break;
- };
- return 0;
-}
-
-static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
- int idx, port;
-
- idx = reg1->idx;
- port = idx / 6 + box->pmu->pmu_idx * 4;
-
- switch (idx % 6) {
- case 0:
- wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config);
- break;
- case 1:
- wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config);
- break;
- case 2:
- case 3:
- wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port),
- uncore_shared_reg_config(box, 2 + (idx / 6) * 5));
- break;
- case 4:
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port),
- hwc->config >> 32);
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config);
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config);
- break;
- case 5:
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port),
- hwc->config >> 32);
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config);
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config);
- break;
- };
-
- wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
- (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
-}
-
-DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63");
-DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63");
-DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63");
-DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15");
-DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31");
-
-static struct attribute *nhmex_uncore_rbox_formats_attr[] = {
- &format_attr_event5.attr,
- &format_attr_xbr_mm_cfg.attr,
- &format_attr_xbr_match.attr,
- &format_attr_xbr_mask.attr,
- &format_attr_qlx_cfg.attr,
- &format_attr_iperf_cfg.attr,
- NULL,
-};
-
-static struct attribute_group nhmex_uncore_rbox_format_group = {
- .name = "format",
- .attrs = nhmex_uncore_rbox_formats_attr,
-};
-
-static struct uncore_event_desc nhmex_uncore_rbox_events[] = {
- INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"),
- INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"),
- INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"),
- INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"),
- INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"),
- INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"),
- { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_ops nhmex_uncore_rbox_ops = {
- NHMEX_UNCORE_OPS_COMMON_INIT(),
- .enable_event = nhmex_rbox_msr_enable_event,
- .hw_config = nhmex_rbox_hw_config,
- .get_constraint = nhmex_rbox_get_constraint,
- .put_constraint = nhmex_rbox_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_rbox = {
- .name = "rbox",
- .num_counters = 8,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_R_MSR_PMON_CTL0,
- .perf_ctr = NHMEX_R_MSR_PMON_CNT0,
- .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_R_MSR_GLOBAL_CTL,
- .msr_offset = NHMEX_R_MSR_OFFSET,
- .pair_ctr_ctl = 1,
- .num_shared_regs = 20,
- .event_descs = nhmex_uncore_rbox_events,
- .ops = &nhmex_uncore_rbox_ops,
- .format_group = &nhmex_uncore_rbox_format_group
-};
-
-static struct intel_uncore_type *nhmex_msr_uncores[] = {
- &nhmex_uncore_ubox,
- &nhmex_uncore_cbox,
- &nhmex_uncore_bbox,
- &nhmex_uncore_sbox,
- &nhmex_uncore_mbox,
- &nhmex_uncore_rbox,
- &nhmex_uncore_wbox,
- NULL,
-};
-/* end of Nehalem-EX uncore support */
-
static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
{
struct hw_perf_event *hwc = &event->hw;
@@ -3140,7 +170,7 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_eve
hwc->event_base = uncore_perf_ctr(box, hwc->idx);
}
-static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
+void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
{
u64 prev_count, new_count, delta;
int shift;
@@ -3201,14 +231,14 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
-static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
+void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
{
__hrtimer_start_range_ns(&box->hrtimer,
ns_to_ktime(box->hrtimer_duration), 0,
HRTIMER_MODE_REL_PINNED, 0);
}
-static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
+void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
{
hrtimer_cancel(&box->hrtimer);
}
@@ -3291,7 +321,7 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve
}
if (event->attr.config == UNCORE_FIXED_EVENT)
- return &constraint_fixed;
+ return &uncore_constraint_fixed;
if (type->constraints) {
for_each_event_constraint(c, type->constraints) {
@@ -3496,7 +526,7 @@ static void uncore_pmu_event_del(struct perf_event *event, int flags)
event->hw.last_tag = ~0ULL;
}
-static void uncore_pmu_event_read(struct perf_event *event)
+void uncore_pmu_event_read(struct perf_event *event)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
uncore_perf_event_update(box, event);
@@ -3635,7 +665,7 @@ static struct attribute_group uncore_pmu_attr_group = {
.attrs = uncore_pmu_attrs,
};
-static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
+static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
{
int ret;
@@ -3758,9 +788,6 @@ fail:
return ret;
}
-static struct pci_driver *uncore_pci_driver;
-static bool pcidrv_registered;
-
/*
* add a pci uncore device
*/
@@ -3770,18 +797,20 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
struct intel_uncore_box *box;
struct intel_uncore_type *type;
int phys_id;
+ bool first_box = false;
- phys_id = pcibus_to_physid[pdev->bus->number];
+ phys_id = uncore_pcibus_to_physid[pdev->bus->number];
if (phys_id < 0)
return -ENODEV;
if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
- extra_pci_dev[phys_id][UNCORE_PCI_DEV_IDX(id->driver_data)] = pdev;
+ int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
+ uncore_extra_pci_dev[phys_id][idx] = pdev;
pci_set_drvdata(pdev, NULL);
return 0;
}
- type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+ type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
box = uncore_alloc_box(type, NUMA_NO_NODE);
if (!box)
return -ENOMEM;
@@ -3803,9 +832,13 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
pci_set_drvdata(pdev, box);
raw_spin_lock(&uncore_box_lock);
+ if (list_empty(&pmu->box_list))
+ first_box = true;
list_add_tail(&box->list, &pmu->box_list);
raw_spin_unlock(&uncore_box_lock);
+ if (first_box)
+ uncore_pmu_register(pmu);
return 0;
}
@@ -3813,13 +846,14 @@ static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box = pci_get_drvdata(pdev);
struct intel_uncore_pmu *pmu;
- int i, cpu, phys_id = pcibus_to_physid[pdev->bus->number];
+ int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number];
+ bool last_box = false;
box = pci_get_drvdata(pdev);
if (!box) {
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
- if (extra_pci_dev[phys_id][i] == pdev) {
- extra_pci_dev[phys_id][i] = NULL;
+ if (uncore_extra_pci_dev[phys_id][i] == pdev) {
+ uncore_extra_pci_dev[phys_id][i] = NULL;
break;
}
}
@@ -3835,6 +869,8 @@ static void uncore_pci_remove(struct pci_dev *pdev)
raw_spin_lock(&uncore_box_lock);
list_del(&box->list);
+ if (list_empty(&pmu->box_list))
+ last_box = true;
raw_spin_unlock(&uncore_box_lock);
for_each_possible_cpu(cpu) {
@@ -3846,6 +882,9 @@ static void uncore_pci_remove(struct pci_dev *pdev)
WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
kfree(box);
+
+ if (last_box)
+ perf_pmu_unregister(&pmu->pmu);
}
static int __init uncore_pci_init(void)
@@ -3854,46 +893,32 @@ static int __init uncore_pci_init(void)
switch (boot_cpu_data.x86_model) {
case 45: /* Sandy Bridge-EP */
- ret = snbep_pci2phy_map_init(0x3ce0);
- if (ret)
- return ret;
- pci_uncores = snbep_pci_uncores;
- uncore_pci_driver = &snbep_uncore_pci_driver;
+ ret = snbep_uncore_pci_init();
break;
- case 62: /* IvyTown */
- ret = snbep_pci2phy_map_init(0x0e1e);
- if (ret)
- return ret;
- pci_uncores = ivt_pci_uncores;
- uncore_pci_driver = &ivt_uncore_pci_driver;
+ case 62: /* Ivy Bridge-EP */
+ ret = ivbep_uncore_pci_init();
+ break;
+ case 63: /* Haswell-EP */
+ ret = hswep_uncore_pci_init();
break;
case 42: /* Sandy Bridge */
- ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC);
- if (ret)
- return ret;
- pci_uncores = snb_pci_uncores;
- uncore_pci_driver = &snb_uncore_pci_driver;
+ ret = snb_uncore_pci_init();
break;
case 58: /* Ivy Bridge */
- ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC);
- if (ret)
- return ret;
- pci_uncores = snb_pci_uncores;
- uncore_pci_driver = &ivb_uncore_pci_driver;
+ ret = ivb_uncore_pci_init();
break;
case 60: /* Haswell */
case 69: /* Haswell Celeron */
- ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC);
- if (ret)
- return ret;
- pci_uncores = snb_pci_uncores;
- uncore_pci_driver = &hsw_uncore_pci_driver;
+ ret = hsw_uncore_pci_init();
break;
default:
return 0;
}
- ret = uncore_types_init(pci_uncores);
+ if (ret)
+ return ret;
+
+ ret = uncore_types_init(uncore_pci_uncores);
if (ret)
return ret;
@@ -3904,7 +929,7 @@ static int __init uncore_pci_init(void)
if (ret == 0)
pcidrv_registered = true;
else
- uncore_types_exit(pci_uncores);
+ uncore_types_exit(uncore_pci_uncores);
return ret;
}
@@ -3914,7 +939,7 @@ static void __init uncore_pci_exit(void)
if (pcidrv_registered) {
pcidrv_registered = false;
pci_unregister_driver(uncore_pci_driver);
- uncore_types_exit(pci_uncores);
+ uncore_types_exit(uncore_pci_uncores);
}
}
@@ -3940,8 +965,8 @@ static void uncore_cpu_dying(int cpu)
struct intel_uncore_box *box;
int i, j;
- for (i = 0; msr_uncores[i]; i++) {
- type = msr_uncores[i];
+ for (i = 0; uncore_msr_uncores[i]; i++) {
+ type = uncore_msr_uncores[i];
for (j = 0; j < type->num_boxes; j++) {
pmu = &type->pmus[j];
box = *per_cpu_ptr(pmu->box, cpu);
@@ -3961,8 +986,8 @@ static int uncore_cpu_starting(int cpu)
phys_id = topology_physical_package_id(cpu);
- for (i = 0; msr_uncores[i]; i++) {
- type = msr_uncores[i];
+ for (i = 0; uncore_msr_uncores[i]; i++) {
+ type = uncore_msr_uncores[i];
for (j = 0; j < type->num_boxes; j++) {
pmu = &type->pmus[j];
box = *per_cpu_ptr(pmu->box, cpu);
@@ -4002,8 +1027,8 @@ static int uncore_cpu_prepare(int cpu, int phys_id)
struct intel_uncore_box *box;
int i, j;
- for (i = 0; msr_uncores[i]; i++) {
- type = msr_uncores[i];
+ for (i = 0; uncore_msr_uncores[i]; i++) {
+ type = uncore_msr_uncores[i];
for (j = 0; j < type->num_boxes; j++) {
pmu = &type->pmus[j];
if (pmu->func_id < 0)
@@ -4083,8 +1108,8 @@ static void uncore_event_exit_cpu(int cpu)
if (target >= 0)
cpumask_set_cpu(target, &uncore_cpu_mask);
- uncore_change_context(msr_uncores, cpu, target);
- uncore_change_context(pci_uncores, cpu, target);
+ uncore_change_context(uncore_msr_uncores, cpu, target);
+ uncore_change_context(uncore_pci_uncores, cpu, target);
}
static void uncore_event_init_cpu(int cpu)
@@ -4099,8 +1124,8 @@ static void uncore_event_init_cpu(int cpu)
cpumask_set_cpu(cpu, &uncore_cpu_mask);
- uncore_change_context(msr_uncores, -1, cpu);
- uncore_change_context(pci_uncores, -1, cpu);
+ uncore_change_context(uncore_msr_uncores, -1, cpu);
+ uncore_change_context(uncore_pci_uncores, -1, cpu);
}
static int uncore_cpu_notifier(struct notifier_block *self,
@@ -4160,47 +1185,37 @@ static void __init uncore_cpu_setup(void *dummy)
static int __init uncore_cpu_init(void)
{
- int ret, max_cores;
+ int ret;
- max_cores = boot_cpu_data.x86_max_cores;
switch (boot_cpu_data.x86_model) {
case 26: /* Nehalem */
case 30:
case 37: /* Westmere */
case 44:
- msr_uncores = nhm_msr_uncores;
+ nhm_uncore_cpu_init();
break;
case 42: /* Sandy Bridge */
case 58: /* Ivy Bridge */
- if (snb_uncore_cbox.num_boxes > max_cores)
- snb_uncore_cbox.num_boxes = max_cores;
- msr_uncores = snb_msr_uncores;
+ snb_uncore_cpu_init();
break;
case 45: /* Sandy Bridge-EP */
- if (snbep_uncore_cbox.num_boxes > max_cores)
- snbep_uncore_cbox.num_boxes = max_cores;
- msr_uncores = snbep_msr_uncores;
+ snbep_uncore_cpu_init();
break;
case 46: /* Nehalem-EX */
- uncore_nhmex = true;
case 47: /* Westmere-EX aka. Xeon E7 */
- if (!uncore_nhmex)
- nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
- if (nhmex_uncore_cbox.num_boxes > max_cores)
- nhmex_uncore_cbox.num_boxes = max_cores;
- msr_uncores = nhmex_msr_uncores;
+ nhmex_uncore_cpu_init();
break;
- case 62: /* IvyTown */
- if (ivt_uncore_cbox.num_boxes > max_cores)
- ivt_uncore_cbox.num_boxes = max_cores;
- msr_uncores = ivt_msr_uncores;
+ case 62: /* Ivy Bridge-EP */
+ ivbep_uncore_cpu_init();
+ break;
+ case 63: /* Haswell-EP */
+ hswep_uncore_cpu_init();
break;
-
default:
return 0;
}
- ret = uncore_types_init(msr_uncores);
+ ret = uncore_types_init(uncore_msr_uncores);
if (ret)
return ret;
@@ -4213,16 +1228,8 @@ static int __init uncore_pmus_register(void)
struct intel_uncore_type *type;
int i, j;
- for (i = 0; msr_uncores[i]; i++) {
- type = msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- uncore_pmu_register(pmu);
- }
- }
-
- for (i = 0; pci_uncores[i]; i++) {
- type = pci_uncores[i];
+ for (i = 0; uncore_msr_uncores[i]; i++) {
+ type = uncore_msr_uncores[i];
for (j = 0; j < type->num_boxes; j++) {
pmu = &type->pmus[j];
uncore_pmu_register(pmu);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 90236f0c94a9..18eb78bbdd10 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -24,395 +24,6 @@
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
-/* SNB event control */
-#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
-#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
-#define SNB_UNC_CTL_EDGE_DET (1 << 18)
-#define SNB_UNC_CTL_EN (1 << 22)
-#define SNB_UNC_CTL_INVERT (1 << 23)
-#define SNB_UNC_CTL_CMASK_MASK 0x1f000000
-#define NHM_UNC_CTL_CMASK_MASK 0xff000000
-#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0)
-
-#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
- SNB_UNC_CTL_UMASK_MASK | \
- SNB_UNC_CTL_EDGE_DET | \
- SNB_UNC_CTL_INVERT | \
- SNB_UNC_CTL_CMASK_MASK)
-
-#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
- SNB_UNC_CTL_UMASK_MASK | \
- SNB_UNC_CTL_EDGE_DET | \
- SNB_UNC_CTL_INVERT | \
- NHM_UNC_CTL_CMASK_MASK)
-
-/* SNB global control register */
-#define SNB_UNC_PERF_GLOBAL_CTL 0x391
-#define SNB_UNC_FIXED_CTR_CTRL 0x394
-#define SNB_UNC_FIXED_CTR 0x395
-
-/* SNB uncore global control */
-#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1)
-#define SNB_UNC_GLOBAL_CTL_EN (1 << 29)
-
-/* SNB Cbo register */
-#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700
-#define SNB_UNC_CBO_0_PER_CTR0 0x706
-#define SNB_UNC_CBO_MSR_OFFSET 0x10
-
-/* NHM global control register */
-#define NHM_UNC_PERF_GLOBAL_CTL 0x391
-#define NHM_UNC_FIXED_CTR 0x394
-#define NHM_UNC_FIXED_CTR_CTRL 0x395
-
-/* NHM uncore global control */
-#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1)
-#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32)
-
-/* NHM uncore register */
-#define NHM_UNC_PERFEVTSEL0 0x3c0
-#define NHM_UNC_UNCORE_PMC0 0x3b0
-
-/* SNB-EP Box level control */
-#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0)
-#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1)
-#define SNBEP_PMON_BOX_CTL_FRZ (1 << 8)
-#define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16)
-#define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \
- SNBEP_PMON_BOX_CTL_RST_CTRS | \
- SNBEP_PMON_BOX_CTL_FRZ_EN)
-/* SNB-EP event control */
-#define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff
-#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00
-#define SNBEP_PMON_CTL_RST (1 << 17)
-#define SNBEP_PMON_CTL_EDGE_DET (1 << 18)
-#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21)
-#define SNBEP_PMON_CTL_EN (1 << 22)
-#define SNBEP_PMON_CTL_INVERT (1 << 23)
-#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000
-#define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PMON_CTL_UMASK_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_PMON_CTL_INVERT | \
- SNBEP_PMON_CTL_TRESH_MASK)
-
-/* SNB-EP Ubox event control */
-#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000
-#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \
- (SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PMON_CTL_UMASK_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_PMON_CTL_INVERT | \
- SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
-
-#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19)
-#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
- SNBEP_CBO_PMON_CTL_TID_EN)
-
-/* SNB-EP PCU event control */
-#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000
-#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000
-#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30)
-#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31)
-#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \
- (SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_PMON_CTL_EV_SEL_EXT | \
- SNBEP_PMON_CTL_INVERT | \
- SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
-
-#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \
- (SNBEP_PMON_RAW_EVENT_MASK | \
- SNBEP_PMON_CTL_EV_SEL_EXT)
-
-/* SNB-EP pci control register */
-#define SNBEP_PCI_PMON_BOX_CTL 0xf4
-#define SNBEP_PCI_PMON_CTL0 0xd8
-/* SNB-EP pci counter register */
-#define SNBEP_PCI_PMON_CTR0 0xa0
-
-/* SNB-EP home agent register */
-#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40
-#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44
-#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48
-/* SNB-EP memory controller register */
-#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0
-#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0
-/* SNB-EP QPI register */
-#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228
-#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c
-#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238
-#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c
-
-/* SNB-EP Ubox register */
-#define SNBEP_U_MSR_PMON_CTR0 0xc16
-#define SNBEP_U_MSR_PMON_CTL0 0xc10
-
-#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08
-#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09
-
-/* SNB-EP Cbo register */
-#define SNBEP_C0_MSR_PMON_CTR0 0xd16
-#define SNBEP_C0_MSR_PMON_CTL0 0xd10
-#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04
-#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14
-#define SNBEP_CBO_MSR_OFFSET 0x20
-
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID 0x1f
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID 0x3fc00
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE 0x7c0000
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC 0xff800000
-
-#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) { \
- .event = (e), \
- .msr = SNBEP_C0_MSR_PMON_BOX_FILTER, \
- .config_mask = (m), \
- .idx = (i) \
-}
-
-/* SNB-EP PCU register */
-#define SNBEP_PCU_MSR_PMON_CTR0 0xc36
-#define SNBEP_PCU_MSR_PMON_CTL0 0xc30
-#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24
-#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34
-#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff
-#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc
-#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd
-
-/* IVT event control */
-#define IVT_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \
- SNBEP_PMON_BOX_CTL_RST_CTRS)
-#define IVT_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PMON_CTL_UMASK_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_PMON_CTL_TRESH_MASK)
-/* IVT Ubox */
-#define IVT_U_MSR_PMON_GLOBAL_CTL 0xc00
-#define IVT_U_PMON_GLOBAL_FRZ_ALL (1 << 31)
-#define IVT_U_PMON_GLOBAL_UNFRZ_ALL (1 << 29)
-
-#define IVT_U_MSR_PMON_RAW_EVENT_MASK \
- (SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PMON_CTL_UMASK_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
-/* IVT Cbo */
-#define IVT_CBO_MSR_PMON_RAW_EVENT_MASK (IVT_PMON_RAW_EVENT_MASK | \
- SNBEP_CBO_PMON_CTL_TID_EN)
-
-#define IVT_CB0_MSR_PMON_BOX_FILTER_TID (0x1fULL << 0)
-#define IVT_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 5)
-#define IVT_CB0_MSR_PMON_BOX_FILTER_STATE (0x3fULL << 17)
-#define IVT_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32)
-#define IVT_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52)
-#define IVT_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61)
-#define IVT_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62)
-#define IVT_CB0_MSR_PMON_BOX_FILTER_IOSC (0x1ULL << 63)
-
-/* IVT home agent */
-#define IVT_HA_PCI_PMON_CTL_Q_OCC_RST (1 << 16)
-#define IVT_HA_PCI_PMON_RAW_EVENT_MASK \
- (IVT_PMON_RAW_EVENT_MASK | \
- IVT_HA_PCI_PMON_CTL_Q_OCC_RST)
-/* IVT PCU */
-#define IVT_PCU_MSR_PMON_RAW_EVENT_MASK \
- (SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PMON_CTL_EV_SEL_EXT | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
-/* IVT QPI */
-#define IVT_QPI_PCI_PMON_RAW_EVENT_MASK \
- (IVT_PMON_RAW_EVENT_MASK | \
- SNBEP_PMON_CTL_EV_SEL_EXT)
-
-/* NHM-EX event control */
-#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff
-#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00
-#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0)
-#define NHMEX_PMON_CTL_EDGE_DET (1 << 18)
-#define NHMEX_PMON_CTL_PMI_EN (1 << 20)
-#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22)
-#define NHMEX_PMON_CTL_INVERT (1 << 23)
-#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000
-#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \
- NHMEX_PMON_CTL_UMASK_MASK | \
- NHMEX_PMON_CTL_EDGE_DET | \
- NHMEX_PMON_CTL_INVERT | \
- NHMEX_PMON_CTL_TRESH_MASK)
-
-/* NHM-EX Ubox */
-#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00
-#define NHMEX_U_MSR_PMON_CTR 0xc11
-#define NHMEX_U_MSR_PMON_EV_SEL 0xc10
-
-#define NHMEX_U_PMON_GLOBAL_EN (1 << 0)
-#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e
-#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28)
-#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29)
-#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31)
-
-#define NHMEX_U_PMON_RAW_EVENT_MASK \
- (NHMEX_PMON_CTL_EV_SEL_MASK | \
- NHMEX_PMON_CTL_EDGE_DET)
-
-/* NHM-EX Cbox */
-#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00
-#define NHMEX_C0_MSR_PMON_CTR0 0xd11
-#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10
-#define NHMEX_C_MSR_OFFSET 0x20
-
-/* NHM-EX Bbox */
-#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20
-#define NHMEX_B0_MSR_PMON_CTR0 0xc31
-#define NHMEX_B0_MSR_PMON_CTL0 0xc30
-#define NHMEX_B_MSR_OFFSET 0x40
-#define NHMEX_B0_MSR_MATCH 0xe45
-#define NHMEX_B0_MSR_MASK 0xe46
-#define NHMEX_B1_MSR_MATCH 0xe4d
-#define NHMEX_B1_MSR_MASK 0xe4e
-
-#define NHMEX_B_PMON_CTL_EN (1 << 0)
-#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1
-#define NHMEX_B_PMON_CTL_EV_SEL_MASK \
- (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT)
-#define NHMEX_B_PMON_CTR_SHIFT 6
-#define NHMEX_B_PMON_CTR_MASK \
- (0x3 << NHMEX_B_PMON_CTR_SHIFT)
-#define NHMEX_B_PMON_RAW_EVENT_MASK \
- (NHMEX_B_PMON_CTL_EV_SEL_MASK | \
- NHMEX_B_PMON_CTR_MASK)
-
-/* NHM-EX Sbox */
-#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40
-#define NHMEX_S0_MSR_PMON_CTR0 0xc51
-#define NHMEX_S0_MSR_PMON_CTL0 0xc50
-#define NHMEX_S_MSR_OFFSET 0x80
-#define NHMEX_S0_MSR_MM_CFG 0xe48
-#define NHMEX_S0_MSR_MATCH 0xe49
-#define NHMEX_S0_MSR_MASK 0xe4a
-#define NHMEX_S1_MSR_MM_CFG 0xe58
-#define NHMEX_S1_MSR_MATCH 0xe59
-#define NHMEX_S1_MSR_MASK 0xe5a
-
-#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63)
-#define NHMEX_S_EVENT_TO_R_PROG_EV 0
-
-/* NHM-EX Mbox */
-#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0
-#define NHMEX_M0_MSR_PMU_DSP 0xca5
-#define NHMEX_M0_MSR_PMU_ISS 0xca6
-#define NHMEX_M0_MSR_PMU_MAP 0xca7
-#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8
-#define NHMEX_M0_MSR_PMU_PGT 0xca9
-#define NHMEX_M0_MSR_PMU_PLD 0xcaa
-#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab
-#define NHMEX_M0_MSR_PMU_CTL0 0xcb0
-#define NHMEX_M0_MSR_PMU_CNT0 0xcb1
-#define NHMEX_M_MSR_OFFSET 0x40
-#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54
-#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c
-
-#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63)
-#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL
-#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL
-#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34
-
-#define NHMEX_M_PMON_CTL_EN (1 << 0)
-#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1)
-#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2
-#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \
- (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT)
-#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4
-#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \
- (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT)
-#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6)
-#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7)
-#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9
-#define NHMEX_M_PMON_CTL_INC_SEL_MASK \
- (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
-#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19
-#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \
- (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT)
-#define NHMEX_M_PMON_RAW_EVENT_MASK \
- (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \
- NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \
- NHMEX_M_PMON_CTL_WRAP_MODE | \
- NHMEX_M_PMON_CTL_FLAG_MODE | \
- NHMEX_M_PMON_CTL_INC_SEL_MASK | \
- NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
-
-#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23))
-#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
-
-#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24))
-#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
-
-/*
- * use the 9~13 bits to select event If the 7th bit is not set,
- * otherwise use the 19~21 bits to select event.
- */
-#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
-#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \
- NHMEX_M_PMON_CTL_FLAG_MODE)
-#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \
- NHMEX_M_PMON_CTL_FLAG_MODE)
-#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \
- NHMEX_M_PMON_CTL_FLAG_MODE)
-#define MBOX_INC_SEL_EXTAR_REG(c, r) \
- EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \
- MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r)
-#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \
- EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \
- MBOX_SET_FLAG_SEL_MASK, \
- (u64)-1, NHMEX_M_##r)
-
-/* NHM-EX Rbox */
-#define NHMEX_R_MSR_GLOBAL_CTL 0xe00
-#define NHMEX_R_MSR_PMON_CTL0 0xe10
-#define NHMEX_R_MSR_PMON_CNT0 0xe11
-#define NHMEX_R_MSR_OFFSET 0x20
-
-#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \
- ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4))
-#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n))
-#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n))
-#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \
- (((n) < 4 ? 0 : 0x10) + (n) * 4)
-#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \
- (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
-#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \
- (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1)
-#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \
- (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2)
-#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \
- (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
-#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \
- (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1)
-#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \
- (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2)
-
-#define NHMEX_R_PMON_CTL_EN (1 << 0)
-#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1
-#define NHMEX_R_PMON_CTL_EV_SEL_MASK \
- (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT)
-#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6)
-#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK
-
-/* NHM-EX Wbox */
-#define NHMEX_W_MSR_GLOBAL_CTL 0xc80
-#define NHMEX_W_MSR_PMON_CNT0 0xc90
-#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91
-#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394
-#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395
-
-#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31)
-
struct intel_uncore_ops;
struct intel_uncore_pmu;
struct intel_uncore_box;
@@ -505,6 +116,9 @@ struct uncore_event_desc {
const char *config;
};
+ssize_t uncore_event_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf);
+
#define INTEL_UNCORE_EVENT_DESC(_name, _config) \
{ \
.attr = __ATTR(_name, 0444, uncore_event_show, NULL), \
@@ -522,15 +136,6 @@ static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
static struct kobj_attribute format_attr_##_var = \
__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
-
-static ssize_t uncore_event_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- struct uncore_event_desc *event =
- container_of(attr, struct uncore_event_desc, attr);
- return sprintf(buf, "%s", event->config);
-}
-
static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
{
return box->pmu->type->box_ctl;
@@ -694,3 +299,41 @@ static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
{
return (box->phys_id < 0);
}
+
+struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event);
+struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
+struct intel_uncore_box *uncore_event_to_box(struct perf_event *event);
+u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
+void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
+void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
+void uncore_pmu_event_read(struct perf_event *event);
+void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
+struct event_constraint *
+uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
+void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
+u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
+
+extern struct intel_uncore_type **uncore_msr_uncores;
+extern struct intel_uncore_type **uncore_pci_uncores;
+extern struct pci_driver *uncore_pci_driver;
+extern int uncore_pcibus_to_physid[256];
+extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+extern struct event_constraint uncore_constraint_empty;
+
+/* perf_event_intel_uncore_snb.c */
+int snb_uncore_pci_init(void);
+int ivb_uncore_pci_init(void);
+int hsw_uncore_pci_init(void);
+void snb_uncore_cpu_init(void);
+void nhm_uncore_cpu_init(void);
+
+/* perf_event_intel_uncore_snbep.c */
+int snbep_uncore_pci_init(void);
+void snbep_uncore_cpu_init(void);
+int ivbep_uncore_pci_init(void);
+void ivbep_uncore_cpu_init(void);
+int hswep_uncore_pci_init(void);
+void hswep_uncore_cpu_init(void);
+
+/* perf_event_intel_uncore_nhmex.c */
+void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
new file mode 100644
index 000000000000..2749965afed0
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
@@ -0,0 +1,1221 @@
+/* Nehalem-EX/Westmere-EX uncore support */
+#include "perf_event_intel_uncore.h"
+
+/* NHM-EX event control */
+#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff
+#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00
+#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0)
+#define NHMEX_PMON_CTL_EDGE_DET (1 << 18)
+#define NHMEX_PMON_CTL_PMI_EN (1 << 20)
+#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22)
+#define NHMEX_PMON_CTL_INVERT (1 << 23)
+#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000
+#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \
+ NHMEX_PMON_CTL_UMASK_MASK | \
+ NHMEX_PMON_CTL_EDGE_DET | \
+ NHMEX_PMON_CTL_INVERT | \
+ NHMEX_PMON_CTL_TRESH_MASK)
+
+/* NHM-EX Ubox */
+#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00
+#define NHMEX_U_MSR_PMON_CTR 0xc11
+#define NHMEX_U_MSR_PMON_EV_SEL 0xc10
+
+#define NHMEX_U_PMON_GLOBAL_EN (1 << 0)
+#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e
+#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28)
+#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29)
+#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31)
+
+#define NHMEX_U_PMON_RAW_EVENT_MASK \
+ (NHMEX_PMON_CTL_EV_SEL_MASK | \
+ NHMEX_PMON_CTL_EDGE_DET)
+
+/* NHM-EX Cbox */
+#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00
+#define NHMEX_C0_MSR_PMON_CTR0 0xd11
+#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10
+#define NHMEX_C_MSR_OFFSET 0x20
+
+/* NHM-EX Bbox */
+#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20
+#define NHMEX_B0_MSR_PMON_CTR0 0xc31
+#define NHMEX_B0_MSR_PMON_CTL0 0xc30
+#define NHMEX_B_MSR_OFFSET 0x40
+#define NHMEX_B0_MSR_MATCH 0xe45
+#define NHMEX_B0_MSR_MASK 0xe46
+#define NHMEX_B1_MSR_MATCH 0xe4d
+#define NHMEX_B1_MSR_MASK 0xe4e
+
+#define NHMEX_B_PMON_CTL_EN (1 << 0)
+#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1
+#define NHMEX_B_PMON_CTL_EV_SEL_MASK \
+ (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_B_PMON_CTR_SHIFT 6
+#define NHMEX_B_PMON_CTR_MASK \
+ (0x3 << NHMEX_B_PMON_CTR_SHIFT)
+#define NHMEX_B_PMON_RAW_EVENT_MASK \
+ (NHMEX_B_PMON_CTL_EV_SEL_MASK | \
+ NHMEX_B_PMON_CTR_MASK)
+
+/* NHM-EX Sbox */
+#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40
+#define NHMEX_S0_MSR_PMON_CTR0 0xc51
+#define NHMEX_S0_MSR_PMON_CTL0 0xc50
+#define NHMEX_S_MSR_OFFSET 0x80
+#define NHMEX_S0_MSR_MM_CFG 0xe48
+#define NHMEX_S0_MSR_MATCH 0xe49
+#define NHMEX_S0_MSR_MASK 0xe4a
+#define NHMEX_S1_MSR_MM_CFG 0xe58
+#define NHMEX_S1_MSR_MATCH 0xe59
+#define NHMEX_S1_MSR_MASK 0xe5a
+
+#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63)
+#define NHMEX_S_EVENT_TO_R_PROG_EV 0
+
+/* NHM-EX Mbox */
+#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0
+#define NHMEX_M0_MSR_PMU_DSP 0xca5
+#define NHMEX_M0_MSR_PMU_ISS 0xca6
+#define NHMEX_M0_MSR_PMU_MAP 0xca7
+#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8
+#define NHMEX_M0_MSR_PMU_PGT 0xca9
+#define NHMEX_M0_MSR_PMU_PLD 0xcaa
+#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab
+#define NHMEX_M0_MSR_PMU_CTL0 0xcb0
+#define NHMEX_M0_MSR_PMU_CNT0 0xcb1
+#define NHMEX_M_MSR_OFFSET 0x40
+#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54
+#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c
+
+#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63)
+#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34
+
+#define NHMEX_M_PMON_CTL_EN (1 << 0)
+#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1)
+#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2
+#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \
+ (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \
+ (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6)
+#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7)
+#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9
+#define NHMEX_M_PMON_CTL_INC_SEL_MASK \
+ (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \
+ (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT)
+#define NHMEX_M_PMON_RAW_EVENT_MASK \
+ (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \
+ NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \
+ NHMEX_M_PMON_CTL_WRAP_MODE | \
+ NHMEX_M_PMON_CTL_FLAG_MODE | \
+ NHMEX_M_PMON_CTL_INC_SEL_MASK | \
+ NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
+
+#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23))
+#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
+
+#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24))
+#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
+
+/*
+ * use the 9~13 bits to select event If the 7th bit is not set,
+ * otherwise use the 19~21 bits to select event.
+ */
+#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \
+ NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \
+ NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \
+ NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_EXTAR_REG(c, r) \
+ EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+ MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r)
+#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \
+ EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+ MBOX_SET_FLAG_SEL_MASK, \
+ (u64)-1, NHMEX_M_##r)
+
+/* NHM-EX Rbox */
+#define NHMEX_R_MSR_GLOBAL_CTL 0xe00
+#define NHMEX_R_MSR_PMON_CTL0 0xe10
+#define NHMEX_R_MSR_PMON_CNT0 0xe11
+#define NHMEX_R_MSR_OFFSET 0x20
+
+#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \
+ ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n))
+#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \
+ (((n) < 4 ? 0 : 0x10) + (n) * 4)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \
+ (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \
+ (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2)
+
+#define NHMEX_R_PMON_CTL_EN (1 << 0)
+#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1
+#define NHMEX_R_PMON_CTL_EV_SEL_MASK \
+ (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6)
+#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK
+
+/* NHM-EX Wbox */
+#define NHMEX_W_MSR_GLOBAL_CTL 0xc80
+#define NHMEX_W_MSR_PMON_CNT0 0xc90
+#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91
+#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394
+#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395
+
+#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31)
+
+#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \
+ ((1ULL << (n)) - 1)))
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7");
+DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
+
+static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
+}
+
+static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+ u64 config;
+
+ if (msr) {
+ rdmsrl(msr, config);
+ config &= ~((1ULL << uncore_num_counters(box)) - 1);
+ /* WBox has a fixed counter */
+ if (uncore_msr_fixed_ctl(box))
+ config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN;
+ wrmsrl(msr, config);
+ }
+}
+
+static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+ u64 config;
+
+ if (msr) {
+ rdmsrl(msr, config);
+ config |= (1ULL << uncore_num_counters(box)) - 1;
+ /* WBox has a fixed counter */
+ if (uncore_msr_fixed_ctl(box))
+ config |= NHMEX_W_PMON_GLOBAL_FIXED_EN;
+ wrmsrl(msr, config);
+ }
+}
+
+static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ wrmsrl(event->hw.config_base, 0);
+}
+
+static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
+ wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
+ else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+ else
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+#define NHMEX_UNCORE_OPS_COMMON_INIT() \
+ .init_box = nhmex_uncore_msr_init_box, \
+ .disable_box = nhmex_uncore_msr_disable_box, \
+ .enable_box = nhmex_uncore_msr_enable_box, \
+ .disable_event = nhmex_uncore_msr_disable_event, \
+ .read_counter = uncore_msr_read_counter
+
+static struct intel_uncore_ops nhmex_uncore_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_uncore_msr_enable_event,
+};
+
+static struct attribute *nhmex_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_edge.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_ubox_formats_attr,
+};
+
+static struct intel_uncore_type nhmex_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 1,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_U_MSR_PMON_EV_SEL,
+ .perf_ctr = NHMEX_U_MSR_PMON_CTR,
+ .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL,
+ .ops = &nhmex_uncore_ops,
+ .format_group = &nhmex_uncore_ubox_format_group
+};
+
+static struct attribute *nhmex_uncore_cbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_cbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_cbox_formats_attr,
+};
+
+/* msr offset for each instance of cbox */
+static unsigned nhmex_cbox_msr_offsets[] = {
+ 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0,
+};
+
+static struct intel_uncore_type nhmex_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 6,
+ .num_boxes = 10,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0,
+ .perf_ctr = NHMEX_C0_MSR_PMON_CTR0,
+ .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL,
+ .msr_offsets = nhmex_cbox_msr_offsets,
+ .pair_ctr_ctl = 1,
+ .ops = &nhmex_uncore_ops,
+ .format_group = &nhmex_uncore_cbox_format_group
+};
+
+static struct uncore_event_desc nhmex_uncore_wbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type nhmex_uncore_wbox = {
+ .name = "wbox",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_W_MSR_PMON_CNT0,
+ .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0,
+ .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR,
+ .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL,
+ .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_W_MSR_GLOBAL_CTL,
+ .pair_ctr_ctl = 1,
+ .event_descs = nhmex_uncore_wbox_events,
+ .ops = &nhmex_uncore_ops,
+ .format_group = &nhmex_uncore_cbox_format_group
+};
+
+static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ int ctr, ev_sel;
+
+ ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >>
+ NHMEX_B_PMON_CTR_SHIFT;
+ ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >>
+ NHMEX_B_PMON_CTL_EV_SEL_SHIFT;
+
+ /* events that do not use the match/mask registers */
+ if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) ||
+ (ctr == 2 && ev_sel != 0x4) || ctr == 3)
+ return 0;
+
+ if (box->pmu->pmu_idx == 0)
+ reg1->reg = NHMEX_B0_MSR_MATCH;
+ else
+ reg1->reg = NHMEX_B1_MSR_MATCH;
+ reg1->idx = 0;
+ reg1->config = event->attr.config1;
+ reg2->config = event->attr.config2;
+ return 0;
+}
+
+static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ wrmsrl(reg1->reg, reg1->config);
+ wrmsrl(reg1->reg + 1, reg2->config);
+ }
+ wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+ (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK));
+}
+
+/*
+ * The Bbox has 4 counters, but each counter monitors different events.
+ * Use bits 6-7 in the event config to select counter.
+ */
+static struct event_constraint nhmex_uncore_bbox_constraints[] = {
+ EVENT_CONSTRAINT(0 , 1, 0xc0),
+ EVENT_CONSTRAINT(0x40, 2, 0xc0),
+ EVENT_CONSTRAINT(0x80, 4, 0xc0),
+ EVENT_CONSTRAINT(0xc0, 8, 0xc0),
+ EVENT_CONSTRAINT_END,
+};
+
+static struct attribute *nhmex_uncore_bbox_formats_attr[] = {
+ &format_attr_event5.attr,
+ &format_attr_counter.attr,
+ &format_attr_match.attr,
+ &format_attr_mask.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_bbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_bbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_bbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_bbox_msr_enable_event,
+ .hw_config = nhmex_bbox_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_bbox = {
+ .name = "bbox",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_B0_MSR_PMON_CTL0,
+ .perf_ctr = NHMEX_B0_MSR_PMON_CTR0,
+ .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL,
+ .msr_offset = NHMEX_B_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 1,
+ .constraints = nhmex_uncore_bbox_constraints,
+ .ops = &nhmex_uncore_bbox_ops,
+ .format_group = &nhmex_uncore_bbox_format_group
+};
+
+static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ /* only TO_R_PROG_EV event uses the match/mask register */
+ if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) !=
+ NHMEX_S_EVENT_TO_R_PROG_EV)
+ return 0;
+
+ if (box->pmu->pmu_idx == 0)
+ reg1->reg = NHMEX_S0_MSR_MM_CFG;
+ else
+ reg1->reg = NHMEX_S1_MSR_MM_CFG;
+ reg1->idx = 0;
+ reg1->config = event->attr.config1;
+ reg2->config = event->attr.config2;
+ return 0;
+}
+
+static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ wrmsrl(reg1->reg, 0);
+ wrmsrl(reg1->reg + 1, reg1->config);
+ wrmsrl(reg1->reg + 2, reg2->config);
+ wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
+ }
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+}
+
+static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_match.attr,
+ &format_attr_mask.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_sbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_sbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_sbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_sbox_msr_enable_event,
+ .hw_config = nhmex_sbox_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_sbox = {
+ .name = "sbox",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_S0_MSR_PMON_CTL0,
+ .perf_ctr = NHMEX_S0_MSR_PMON_CTR0,
+ .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL,
+ .msr_offset = NHMEX_S_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 1,
+ .ops = &nhmex_uncore_sbox_ops,
+ .format_group = &nhmex_uncore_sbox_format_group
+};
+
+enum {
+ EXTRA_REG_NHMEX_M_FILTER,
+ EXTRA_REG_NHMEX_M_DSP,
+ EXTRA_REG_NHMEX_M_ISS,
+ EXTRA_REG_NHMEX_M_MAP,
+ EXTRA_REG_NHMEX_M_MSC_THR,
+ EXTRA_REG_NHMEX_M_PGT,
+ EXTRA_REG_NHMEX_M_PLD,
+ EXTRA_REG_NHMEX_M_ZDP_CTL_FVC,
+};
+
+static struct extra_reg nhmex_uncore_mbox_extra_regs[] = {
+ MBOX_INC_SEL_EXTAR_REG(0x0, DSP),
+ MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR),
+ MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR),
+ MBOX_INC_SEL_EXTAR_REG(0x9, ISS),
+ /* event 0xa uses two extra registers */
+ MBOX_INC_SEL_EXTAR_REG(0xa, ISS),
+ MBOX_INC_SEL_EXTAR_REG(0xa, PLD),
+ MBOX_INC_SEL_EXTAR_REG(0xb, PLD),
+ /* events 0xd ~ 0x10 use the same extra register */
+ MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0x16, PGT),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP),
+ EVENT_EXTRA_END
+};
+
+/* Nehalem-EX or Westmere-EX ? */
+static bool uncore_nhmex;
+
+static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config)
+{
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ bool ret = false;
+ u64 mask;
+
+ if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+ er = &box->shared_regs[idx];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (!atomic_read(&er->ref) || er->config == config) {
+ atomic_inc(&er->ref);
+ er->config = config;
+ ret = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ return ret;
+ }
+ /*
+ * The ZDP_CTL_FVC MSR has 4 fields which are used to control
+ * events 0xd ~ 0x10. Besides these 4 fields, there are additional
+ * fields which are shared.
+ */
+ idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ if (WARN_ON_ONCE(idx >= 4))
+ return false;
+
+ /* mask of the shared fields */
+ if (uncore_nhmex)
+ mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
+ else
+ mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK;
+ er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+
+ raw_spin_lock_irqsave(&er->lock, flags);
+ /* add mask of the non-shared field if it's in use */
+ if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) {
+ if (uncore_nhmex)
+ mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ else
+ mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ }
+
+ if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) {
+ atomic_add(1 << (idx * 8), &er->ref);
+ if (uncore_nhmex)
+ mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
+ NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ else
+ mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK |
+ WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ er->config &= ~mask;
+ er->config |= (config & mask);
+ ret = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ return ret;
+}
+
+static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx)
+{
+ struct intel_uncore_extra_reg *er;
+
+ if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+ er = &box->shared_regs[idx];
+ atomic_dec(&er->ref);
+ return;
+ }
+
+ idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+ atomic_sub(1 << (idx * 8), &er->ref);
+}
+
+static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
+ u64 config = reg1->config;
+
+ /* get the non-shared control bits and shift them */
+ idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ if (uncore_nhmex)
+ config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ else
+ config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ if (new_idx > orig_idx) {
+ idx = new_idx - orig_idx;
+ config <<= 3 * idx;
+ } else {
+ idx = orig_idx - new_idx;
+ config >>= 3 * idx;
+ }
+
+ /* add the shared control bits back */
+ if (uncore_nhmex)
+ config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+ else
+ config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+ config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+ if (modify) {
+ /* adjust the main event selector */
+ if (new_idx > orig_idx)
+ hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+ else
+ hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+ reg1->config = config;
+ reg1->idx = ~0xff | new_idx;
+ }
+ return config;
+}
+
+static struct event_constraint *
+nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ int i, idx[2], alloc = 0;
+ u64 config1 = reg1->config;
+
+ idx[0] = __BITS_VALUE(reg1->idx, 0, 8);
+ idx[1] = __BITS_VALUE(reg1->idx, 1, 8);
+again:
+ for (i = 0; i < 2; i++) {
+ if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
+ idx[i] = 0xff;
+
+ if (idx[i] == 0xff)
+ continue;
+
+ if (!nhmex_mbox_get_shared_reg(box, idx[i],
+ __BITS_VALUE(config1, i, 32)))
+ goto fail;
+ alloc |= (0x1 << i);
+ }
+
+ /* for the match/mask registers */
+ if (reg2->idx != EXTRA_REG_NONE &&
+ (uncore_box_is_fake(box) || !reg2->alloc) &&
+ !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config))
+ goto fail;
+
+ /*
+ * If it's a fake box -- as per validate_{group,event}() we
+ * shouldn't touch event state and we can avoid doing so
+ * since both will only call get_event_constraints() once
+ * on each event, this avoids the need for reg->alloc.
+ */
+ if (!uncore_box_is_fake(box)) {
+ if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8))
+ nhmex_mbox_alter_er(event, idx[0], true);
+ reg1->alloc |= alloc;
+ if (reg2->idx != EXTRA_REG_NONE)
+ reg2->alloc = 1;
+ }
+ return NULL;
+fail:
+ if (idx[0] != 0xff && !(alloc & 0x1) &&
+ idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+ /*
+ * events 0xd ~ 0x10 are functional identical, but are
+ * controlled by different fields in the ZDP_CTL_FVC
+ * register. If we failed to take one field, try the
+ * rest 3 choices.
+ */
+ BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff);
+ idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ idx[0] = (idx[0] + 1) % 4;
+ idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) {
+ config1 = nhmex_mbox_alter_er(event, idx[0], false);
+ goto again;
+ }
+ }
+
+ if (alloc & 0x1)
+ nhmex_mbox_put_shared_reg(box, idx[0]);
+ if (alloc & 0x2)
+ nhmex_mbox_put_shared_reg(box, idx[1]);
+ return &uncore_constraint_empty;
+}
+
+static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+
+ if (uncore_box_is_fake(box))
+ return;
+
+ if (reg1->alloc & 0x1)
+ nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8));
+ if (reg1->alloc & 0x2)
+ nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8));
+ reg1->alloc = 0;
+
+ if (reg2->alloc) {
+ nhmex_mbox_put_shared_reg(box, reg2->idx);
+ reg2->alloc = 0;
+ }
+}
+
+static int nhmex_mbox_extra_reg_idx(struct extra_reg *er)
+{
+ if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+ return er->idx;
+ return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd;
+}
+
+static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_type *type = box->pmu->type;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ struct extra_reg *er;
+ unsigned msr;
+ int reg_idx = 0;
+ /*
+ * The mbox events may require 2 extra MSRs at the most. But only
+ * the lower 32 bits in these MSRs are significant, so we can use
+ * config1 to pass two MSRs' config.
+ */
+ for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ if (event->attr.config1 & ~er->valid_mask)
+ return -EINVAL;
+
+ msr = er->msr + type->msr_offset * box->pmu->pmu_idx;
+ if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff))
+ return -EINVAL;
+
+ /* always use the 32~63 bits to pass the PLD config */
+ if (er->idx == EXTRA_REG_NHMEX_M_PLD)
+ reg_idx = 1;
+ else if (WARN_ON_ONCE(reg_idx > 0))
+ return -EINVAL;
+
+ reg1->idx &= ~(0xff << (reg_idx * 8));
+ reg1->reg &= ~(0xffff << (reg_idx * 16));
+ reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8);
+ reg1->reg |= msr << (reg_idx * 16);
+ reg1->config = event->attr.config1;
+ reg_idx++;
+ }
+ /*
+ * The mbox only provides ability to perform address matching
+ * for the PLD events.
+ */
+ if (reg_idx == 2) {
+ reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
+ if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
+ reg2->config = event->attr.config2;
+ else
+ reg2->config = ~0ULL;
+ if (box->pmu->pmu_idx == 0)
+ reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
+ else
+ reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
+ }
+ return 0;
+}
+
+static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ u64 config;
+
+ if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+ return box->shared_regs[idx].config;
+
+ er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ config = er->config;
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+ return config;
+}
+
+static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ int idx;
+
+ idx = __BITS_VALUE(reg1->idx, 0, 8);
+ if (idx != 0xff)
+ wrmsrl(__BITS_VALUE(reg1->reg, 0, 16),
+ nhmex_mbox_shared_reg_config(box, idx));
+ idx = __BITS_VALUE(reg1->idx, 1, 8);
+ if (idx != 0xff)
+ wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
+ nhmex_mbox_shared_reg_config(box, idx));
+
+ if (reg2->idx != EXTRA_REG_NONE) {
+ wrmsrl(reg2->reg, 0);
+ if (reg2->config != ~0ULL) {
+ wrmsrl(reg2->reg + 1,
+ reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
+ wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
+ (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
+ wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
+ }
+ }
+
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3");
+DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5");
+DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6");
+DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7");
+DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13");
+DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21");
+DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en, filter_cfg_en, "config2:63");
+DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61");
+DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63");
+
+static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
+ &format_attr_count_mode.attr,
+ &format_attr_storage_mode.attr,
+ &format_attr_wrap_mode.attr,
+ &format_attr_flag_mode.attr,
+ &format_attr_inc_sel.attr,
+ &format_attr_set_flag_sel.attr,
+ &format_attr_filter_cfg_en.attr,
+ &format_attr_filter_match.attr,
+ &format_attr_filter_mask.attr,
+ &format_attr_dsp.attr,
+ &format_attr_thr.attr,
+ &format_attr_fvc.attr,
+ &format_attr_pgt.attr,
+ &format_attr_map.attr,
+ &format_attr_iss.attr,
+ &format_attr_pld.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_mbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_mbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_mbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"),
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"),
+ { /* end: all zeroes */ },
+};
+
+static struct uncore_event_desc wsmex_uncore_mbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"),
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_mbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_mbox_msr_enable_event,
+ .hw_config = nhmex_mbox_hw_config,
+ .get_constraint = nhmex_mbox_get_constraint,
+ .put_constraint = nhmex_mbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_mbox = {
+ .name = "mbox",
+ .num_counters = 6,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_M0_MSR_PMU_CTL0,
+ .perf_ctr = NHMEX_M0_MSR_PMU_CNT0,
+ .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL,
+ .msr_offset = NHMEX_M_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 8,
+ .event_descs = nhmex_uncore_mbox_events,
+ .ops = &nhmex_uncore_mbox_ops,
+ .format_group = &nhmex_uncore_mbox_format_group,
+};
+
+static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ /* adjust the main event selector and extra register index */
+ if (reg1->idx % 2) {
+ reg1->idx--;
+ hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+ } else {
+ reg1->idx++;
+ hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+ }
+
+ /* adjust extra register config */
+ switch (reg1->idx % 6) {
+ case 2:
+ /* shift the 8~15 bits to the 0~7 bits */
+ reg1->config >>= 8;
+ break;
+ case 3:
+ /* shift the 0~7 bits to the 8~15 bits */
+ reg1->config <<= 8;
+ break;
+ }
+}
+
+/*
+ * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7.
+ * An event set consists of 6 events, the 3rd and 4th events in
+ * an event set use the same extra register. So an event set uses
+ * 5 extra registers.
+ */
+static struct event_constraint *
+nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ int idx, er_idx;
+ u64 config1;
+ bool ok = false;
+
+ if (!uncore_box_is_fake(box) && reg1->alloc)
+ return NULL;
+
+ idx = reg1->idx % 6;
+ config1 = reg1->config;
+again:
+ er_idx = idx;
+ /* the 3rd and 4th events use the same extra register */
+ if (er_idx > 2)
+ er_idx--;
+ er_idx += (reg1->idx / 6) * 5;
+
+ er = &box->shared_regs[er_idx];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (idx < 2) {
+ if (!atomic_read(&er->ref) || er->config == reg1->config) {
+ atomic_inc(&er->ref);
+ er->config = reg1->config;
+ ok = true;
+ }
+ } else if (idx == 2 || idx == 3) {
+ /*
+ * these two events use different fields in a extra register,
+ * the 0~7 bits and the 8~15 bits respectively.
+ */
+ u64 mask = 0xff << ((idx - 2) * 8);
+ if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) ||
+ !((er->config ^ config1) & mask)) {
+ atomic_add(1 << ((idx - 2) * 8), &er->ref);
+ er->config &= ~mask;
+ er->config |= config1 & mask;
+ ok = true;
+ }
+ } else {
+ if (!atomic_read(&er->ref) ||
+ (er->config == (hwc->config >> 32) &&
+ er->config1 == reg1->config &&
+ er->config2 == reg2->config)) {
+ atomic_inc(&er->ref);
+ er->config = (hwc->config >> 32);
+ er->config1 = reg1->config;
+ er->config2 = reg2->config;
+ ok = true;
+ }
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ if (!ok) {
+ /*
+ * The Rbox events are always in pairs. The paired
+ * events are functional identical, but use different
+ * extra registers. If we failed to take an extra
+ * register, try the alternative.
+ */
+ idx ^= 1;
+ if (idx != reg1->idx % 6) {
+ if (idx == 2)
+ config1 >>= 8;
+ else if (idx == 3)
+ config1 <<= 8;
+ goto again;
+ }
+ } else {
+ if (!uncore_box_is_fake(box)) {
+ if (idx != reg1->idx % 6)
+ nhmex_rbox_alter_er(box, event);
+ reg1->alloc = 1;
+ }
+ return NULL;
+ }
+ return &uncore_constraint_empty;
+}
+
+static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_extra_reg *er;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ int idx, er_idx;
+
+ if (uncore_box_is_fake(box) || !reg1->alloc)
+ return;
+
+ idx = reg1->idx % 6;
+ er_idx = idx;
+ if (er_idx > 2)
+ er_idx--;
+ er_idx += (reg1->idx / 6) * 5;
+
+ er = &box->shared_regs[er_idx];
+ if (idx == 2 || idx == 3)
+ atomic_sub(1 << ((idx - 2) * 8), &er->ref);
+ else
+ atomic_dec(&er->ref);
+
+ reg1->alloc = 0;
+}
+
+static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ int idx;
+
+ idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >>
+ NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+ if (idx >= 0x18)
+ return -EINVAL;
+
+ reg1->idx = idx;
+ reg1->config = event->attr.config1;
+
+ switch (idx % 6) {
+ case 4:
+ case 5:
+ hwc->config |= event->attr.config & (~0ULL << 32);
+ reg2->config = event->attr.config2;
+ break;
+ }
+ return 0;
+}
+
+static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ int idx, port;
+
+ idx = reg1->idx;
+ port = idx / 6 + box->pmu->pmu_idx * 4;
+
+ switch (idx % 6) {
+ case 0:
+ wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config);
+ break;
+ case 1:
+ wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config);
+ break;
+ case 2:
+ case 3:
+ wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port),
+ uncore_shared_reg_config(box, 2 + (idx / 6) * 5));
+ break;
+ case 4:
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port),
+ hwc->config >> 32);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config);
+ break;
+ case 5:
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port),
+ hwc->config >> 32);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config);
+ break;
+ }
+
+ wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+ (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15");
+DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31");
+
+static struct attribute *nhmex_uncore_rbox_formats_attr[] = {
+ &format_attr_event5.attr,
+ &format_attr_xbr_mm_cfg.attr,
+ &format_attr_xbr_match.attr,
+ &format_attr_xbr_mask.attr,
+ &format_attr_qlx_cfg.attr,
+ &format_attr_iperf_cfg.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_rbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_rbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_rbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"),
+ INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_rbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_rbox_msr_enable_event,
+ .hw_config = nhmex_rbox_hw_config,
+ .get_constraint = nhmex_rbox_get_constraint,
+ .put_constraint = nhmex_rbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_rbox = {
+ .name = "rbox",
+ .num_counters = 8,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_R_MSR_PMON_CTL0,
+ .perf_ctr = NHMEX_R_MSR_PMON_CNT0,
+ .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_R_MSR_GLOBAL_CTL,
+ .msr_offset = NHMEX_R_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 20,
+ .event_descs = nhmex_uncore_rbox_events,
+ .ops = &nhmex_uncore_rbox_ops,
+ .format_group = &nhmex_uncore_rbox_format_group
+};
+
+static struct intel_uncore_type *nhmex_msr_uncores[] = {
+ &nhmex_uncore_ubox,
+ &nhmex_uncore_cbox,
+ &nhmex_uncore_bbox,
+ &nhmex_uncore_sbox,
+ &nhmex_uncore_mbox,
+ &nhmex_uncore_rbox,
+ &nhmex_uncore_wbox,
+ NULL,
+};
+
+void nhmex_uncore_cpu_init(void)
+{
+ if (boot_cpu_data.x86_model == 46)
+ uncore_nhmex = true;
+ else
+ nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
+ if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ uncore_msr_uncores = nhmex_msr_uncores;
+}
+/* end of Nehalem-EX uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
new file mode 100644
index 000000000000..3001015b755c
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -0,0 +1,636 @@
+/* Nehalem/SandBridge/Haswell uncore support */
+#include "perf_event_intel_uncore.h"
+
+/* SNB event control */
+#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
+#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
+#define SNB_UNC_CTL_EDGE_DET (1 << 18)
+#define SNB_UNC_CTL_EN (1 << 22)
+#define SNB_UNC_CTL_INVERT (1 << 23)
+#define SNB_UNC_CTL_CMASK_MASK 0x1f000000
+#define NHM_UNC_CTL_CMASK_MASK 0xff000000
+#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0)
+
+#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
+ SNB_UNC_CTL_UMASK_MASK | \
+ SNB_UNC_CTL_EDGE_DET | \
+ SNB_UNC_CTL_INVERT | \
+ SNB_UNC_CTL_CMASK_MASK)
+
+#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
+ SNB_UNC_CTL_UMASK_MASK | \
+ SNB_UNC_CTL_EDGE_DET | \
+ SNB_UNC_CTL_INVERT | \
+ NHM_UNC_CTL_CMASK_MASK)
+
+/* SNB global control register */
+#define SNB_UNC_PERF_GLOBAL_CTL 0x391
+#define SNB_UNC_FIXED_CTR_CTRL 0x394
+#define SNB_UNC_FIXED_CTR 0x395
+
+/* SNB uncore global control */
+#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1)
+#define SNB_UNC_GLOBAL_CTL_EN (1 << 29)
+
+/* SNB Cbo register */
+#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700
+#define SNB_UNC_CBO_0_PER_CTR0 0x706
+#define SNB_UNC_CBO_MSR_OFFSET 0x10
+
+/* NHM global control register */
+#define NHM_UNC_PERF_GLOBAL_CTL 0x391
+#define NHM_UNC_FIXED_CTR 0x394
+#define NHM_UNC_FIXED_CTR_CTRL 0x395
+
+/* NHM uncore global control */
+#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1)
+#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32)
+
+/* NHM uncore register */
+#define NHM_UNC_PERFEVTSEL0 0x3c0
+#define NHM_UNC_UNCORE_PMC0 0x3b0
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
+
+/* Sandy Bridge uncore support */
+static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+ wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+ else
+ wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
+}
+
+static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ wrmsrl(event->hw.config_base, 0);
+}
+
+static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0) {
+ wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+ }
+}
+
+static struct uncore_event_desc snb_uncore_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+ { /* end: all zeroes */ },
+};
+
+static struct attribute *snb_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask5.attr,
+ NULL,
+};
+
+static struct attribute_group snb_uncore_format_group = {
+ .name = "format",
+ .attrs = snb_uncore_formats_attr,
+};
+
+static struct intel_uncore_ops snb_uncore_msr_ops = {
+ .init_box = snb_uncore_msr_init_box,
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = snb_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct event_constraint snb_uncore_cbox_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snb_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 2,
+ .num_boxes = 4,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNB_UNC_CBO_0_PER_CTR0,
+ .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
+ .fixed_ctr = SNB_UNC_FIXED_CTR,
+ .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
+ .single_fixed = 1,
+ .event_mask = SNB_UNC_RAW_EVENT_MASK,
+ .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
+ .constraints = snb_uncore_cbox_constraints,
+ .ops = &snb_uncore_msr_ops,
+ .format_group = &snb_uncore_format_group,
+ .event_descs = snb_uncore_events,
+};
+
+static struct intel_uncore_type *snb_msr_uncores[] = {
+ &snb_uncore_cbox,
+ NULL,
+};
+
+void snb_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = snb_msr_uncores;
+ if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+}
+
+enum {
+ SNB_PCI_UNCORE_IMC,
+};
+
+static struct uncore_event_desc snb_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"),
+ INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
+
+ INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
+ INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
+
+ { /* end: all zeroes */ },
+};
+
+#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff
+#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48
+
+/* page size multiple covering all config regs */
+#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000
+
+#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1
+#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050
+#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2
+#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054
+#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE
+
+static struct attribute *snb_uncore_imc_formats_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group snb_uncore_imc_format_group = {
+ .name = "format",
+ .attrs = snb_uncore_imc_formats_attr,
+};
+
+static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
+ resource_size_t addr;
+ u32 pci_dword;
+
+ pci_read_config_dword(pdev, where, &pci_dword);
+ addr = pci_dword;
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ pci_read_config_dword(pdev, where + 4, &pci_dword);
+ addr |= ((resource_size_t)pci_dword << 32);
+#endif
+
+ addr &= ~(PAGE_SIZE - 1);
+
+ box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
+ box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
+}
+
+static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
+{}
+
+static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
+{}
+
+static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{}
+
+static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{}
+
+static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
+}
+
+/*
+ * custom event_init() function because we define our own fixed, free
+ * running counters, so we do not want to conflict with generic uncore
+ * logic. Also simplifies processing
+ */
+static int snb_uncore_imc_event_init(struct perf_event *event)
+{
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
+ int idx, base;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ pmu = uncore_event_to_pmu(event);
+ /* no device found for this pmu */
+ if (pmu->func_id < 0)
+ return -ENOENT;
+
+ /* Sampling not supported yet */
+ if (hwc->sample_period)
+ return -EINVAL;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ event->attr.sample_period) /* no sampling */
+ return -EINVAL;
+
+ /*
+ * Place all uncore events for a particular physical package
+ * onto a single cpu
+ */
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ /* check only supported bits are set */
+ if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
+ return -EINVAL;
+
+ box = uncore_pmu_to_box(pmu, event->cpu);
+ if (!box || box->cpu < 0)
+ return -EINVAL;
+
+ event->cpu = box->cpu;
+
+ event->hw.idx = -1;
+ event->hw.last_tag = ~0ULL;
+ event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ event->hw.branch_reg.idx = EXTRA_REG_NONE;
+ /*
+ * check event is known (whitelist, determines counter)
+ */
+ switch (cfg) {
+ case SNB_UNCORE_PCI_IMC_DATA_READS:
+ base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
+ idx = UNCORE_PMC_IDX_FIXED;
+ break;
+ case SNB_UNCORE_PCI_IMC_DATA_WRITES:
+ base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
+ idx = UNCORE_PMC_IDX_FIXED + 1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* must be done before validate_group */
+ event->hw.event_base = base;
+ event->hw.config = cfg;
+ event->hw.idx = idx;
+
+ /* no group validation needed, we have free running counters */
+
+ return 0;
+}
+
+static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ return 0;
+}
+
+static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ u64 count;
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ event->hw.state = 0;
+ box->n_active++;
+
+ list_add_tail(&event->active_entry, &box->active_list);
+
+ count = snb_uncore_imc_read_counter(box, event);
+ local64_set(&event->hw.prev_count, count);
+
+ if (box->n_active == 1)
+ uncore_pmu_start_hrtimer(box);
+}
+
+static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ box->n_active--;
+
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ list_del(&event->active_entry);
+
+ if (box->n_active == 0)
+ uncore_pmu_cancel_hrtimer(box);
+ }
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ /*
+ * Drain the remaining delta count out of a event
+ * that we are disabling:
+ */
+ uncore_perf_event_update(box, event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!box)
+ return -ENODEV;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ if (!(flags & PERF_EF_START))
+ hwc->state |= PERF_HES_ARCH;
+
+ snb_uncore_imc_event_start(event, 0);
+
+ box->n_events++;
+
+ return 0;
+}
+
+static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ int i;
+
+ snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
+
+ for (i = 0; i < box->n_events; i++) {
+ if (event == box->event_list[i]) {
+ --box->n_events;
+ break;
+ }
+ }
+}
+
+static int snb_pci2phy_map_init(int devid)
+{
+ struct pci_dev *dev = NULL;
+ int bus;
+
+ dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
+ if (!dev)
+ return -ENOTTY;
+
+ bus = dev->bus->number;
+
+ uncore_pcibus_to_physid[bus] = 0;
+
+ pci_dev_put(dev);
+
+ return 0;
+}
+
+static struct pmu snb_uncore_imc_pmu = {
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = snb_uncore_imc_event_init,
+ .add = snb_uncore_imc_event_add,
+ .del = snb_uncore_imc_event_del,
+ .start = snb_uncore_imc_event_start,
+ .stop = snb_uncore_imc_event_stop,
+ .read = uncore_pmu_event_read,
+};
+
+static struct intel_uncore_ops snb_uncore_imc_ops = {
+ .init_box = snb_uncore_imc_init_box,
+ .enable_box = snb_uncore_imc_enable_box,
+ .disable_box = snb_uncore_imc_disable_box,
+ .disable_event = snb_uncore_imc_disable_event,
+ .enable_event = snb_uncore_imc_enable_event,
+ .hw_config = snb_uncore_imc_hw_config,
+ .read_counter = snb_uncore_imc_read_counter,
+};
+
+static struct intel_uncore_type snb_uncore_imc = {
+ .name = "imc",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .fixed_ctr_bits = 32,
+ .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE,
+ .event_descs = snb_uncore_imc_events,
+ .format_group = &snb_uncore_imc_format_group,
+ .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
+ .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK,
+ .ops = &snb_uncore_imc_ops,
+ .pmu = &snb_uncore_imc_pmu,
+};
+
+static struct intel_uncore_type *snb_pci_uncores[] = {
+ [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc,
+ NULL,
+};
+
+static const struct pci_device_id snb_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id ivb_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id hsw_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
+static struct pci_driver snb_uncore_pci_driver = {
+ .name = "snb_uncore",
+ .id_table = snb_uncore_pci_ids,
+};
+
+static struct pci_driver ivb_uncore_pci_driver = {
+ .name = "ivb_uncore",
+ .id_table = ivb_uncore_pci_ids,
+};
+
+static struct pci_driver hsw_uncore_pci_driver = {
+ .name = "hsw_uncore",
+ .id_table = hsw_uncore_pci_ids,
+};
+
+struct imc_uncore_pci_dev {
+ __u32 pci_id;
+ struct pci_driver *driver;
+};
+#define IMC_DEV(a, d) \
+ { .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) }
+
+static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
+ IMC_DEV(SNB_IMC, &snb_uncore_pci_driver),
+ IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */
+ IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
+ IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
+ { /* end marker */ }
+};
+
+
+#define for_each_imc_pci_id(x, t) \
+ for (x = (t); (x)->pci_id; x++)
+
+static struct pci_driver *imc_uncore_find_dev(void)
+{
+ const struct imc_uncore_pci_dev *p;
+ int ret;
+
+ for_each_imc_pci_id(p, desktop_imc_pci_ids) {
+ ret = snb_pci2phy_map_init(p->pci_id);
+ if (ret == 0)
+ return p->driver;
+ }
+ return NULL;
+}
+
+static int imc_uncore_pci_init(void)
+{
+ struct pci_driver *imc_drv = imc_uncore_find_dev();
+
+ if (!imc_drv)
+ return -ENODEV;
+
+ uncore_pci_uncores = snb_pci_uncores;
+ uncore_pci_driver = imc_drv;
+
+ return 0;
+}
+
+int snb_uncore_pci_init(void)
+{
+ return imc_uncore_pci_init();
+}
+
+int ivb_uncore_pci_init(void)
+{
+ return imc_uncore_pci_init();
+}
+int hsw_uncore_pci_init(void)
+{
+ return imc_uncore_pci_init();
+}
+
+/* end of Sandy Bridge uncore support */
+
+/* Nehalem uncore support */
+static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
+}
+
+static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+ wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+ else
+ wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
+}
+
+static struct attribute *nhm_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask8.attr,
+ NULL,
+};
+
+static struct attribute_group nhm_uncore_format_group = {
+ .name = "format",
+ .attrs = nhm_uncore_formats_attr,
+};
+
+static struct uncore_event_desc nhm_uncore_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+ INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"),
+ INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhm_uncore_msr_ops = {
+ .disable_box = nhm_uncore_msr_disable_box,
+ .enable_box = nhm_uncore_msr_enable_box,
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = nhm_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type nhm_uncore = {
+ .name = "",
+ .num_counters = 8,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .event_ctl = NHM_UNC_PERFEVTSEL0,
+ .perf_ctr = NHM_UNC_UNCORE_PMC0,
+ .fixed_ctr = NHM_UNC_FIXED_CTR,
+ .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL,
+ .event_mask = NHM_UNC_RAW_EVENT_MASK,
+ .event_descs = nhm_uncore_events,
+ .ops = &nhm_uncore_msr_ops,
+ .format_group = &nhm_uncore_format_group,
+};
+
+static struct intel_uncore_type *nhm_msr_uncores[] = {
+ &nhm_uncore,
+ NULL,
+};
+
+void nhm_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = nhm_msr_uncores;
+}
+
+/* end of Nehalem uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
new file mode 100644
index 000000000000..adf138eac85c
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -0,0 +1,2258 @@
+/* SandyBridge-EP/IvyTown uncore support */
+#include "perf_event_intel_uncore.h"
+
+
+/* SNB-EP Box level control */
+#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0)
+#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1)
+#define SNBEP_PMON_BOX_CTL_FRZ (1 << 8)
+#define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16)
+#define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \
+ SNBEP_PMON_BOX_CTL_RST_CTRS | \
+ SNBEP_PMON_BOX_CTL_FRZ_EN)
+/* SNB-EP event control */
+#define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff
+#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00
+#define SNBEP_PMON_CTL_RST (1 << 17)
+#define SNBEP_PMON_CTL_EDGE_DET (1 << 18)
+#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21)
+#define SNBEP_PMON_CTL_EN (1 << 22)
+#define SNBEP_PMON_CTL_INVERT (1 << 23)
+#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000
+#define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PMON_CTL_UMASK_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_INVERT | \
+ SNBEP_PMON_CTL_TRESH_MASK)
+
+/* SNB-EP Ubox event control */
+#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000
+#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PMON_CTL_UMASK_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_INVERT | \
+ SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
+
+#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19)
+#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_CBO_PMON_CTL_TID_EN)
+
+/* SNB-EP PCU event control */
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000
+#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30)
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31)
+#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_EV_SEL_EXT | \
+ SNBEP_PMON_CTL_INVERT | \
+ SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+
+#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_PMON_CTL_EV_SEL_EXT)
+
+/* SNB-EP pci control register */
+#define SNBEP_PCI_PMON_BOX_CTL 0xf4
+#define SNBEP_PCI_PMON_CTL0 0xd8
+/* SNB-EP pci counter register */
+#define SNBEP_PCI_PMON_CTR0 0xa0
+
+/* SNB-EP home agent register */
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44
+#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48
+/* SNB-EP memory controller register */
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0
+/* SNB-EP QPI register */
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c
+
+/* SNB-EP Ubox register */
+#define SNBEP_U_MSR_PMON_CTR0 0xc16
+#define SNBEP_U_MSR_PMON_CTL0 0xc10
+
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09
+
+/* SNB-EP Cbo register */
+#define SNBEP_C0_MSR_PMON_CTR0 0xd16
+#define SNBEP_C0_MSR_PMON_CTL0 0xd10
+#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04
+#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14
+#define SNBEP_CBO_MSR_OFFSET 0x20
+
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID 0x1f
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID 0x3fc00
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE 0x7c0000
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC 0xff800000
+
+#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) { \
+ .event = (e), \
+ .msr = SNBEP_C0_MSR_PMON_BOX_FILTER, \
+ .config_mask = (m), \
+ .idx = (i) \
+}
+
+/* SNB-EP PCU register */
+#define SNBEP_PCU_MSR_PMON_CTR0 0xc36
+#define SNBEP_PCU_MSR_PMON_CTL0 0xc30
+#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff
+#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc
+#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd
+
+/* IVBEP event control */
+#define IVBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \
+ SNBEP_PMON_BOX_CTL_RST_CTRS)
+#define IVBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PMON_CTL_UMASK_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_TRESH_MASK)
+/* IVBEP Ubox */
+#define IVBEP_U_MSR_PMON_GLOBAL_CTL 0xc00
+#define IVBEP_U_PMON_GLOBAL_FRZ_ALL (1 << 31)
+#define IVBEP_U_PMON_GLOBAL_UNFRZ_ALL (1 << 29)
+
+#define IVBEP_U_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PMON_CTL_UMASK_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
+/* IVBEP Cbo */
+#define IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK (IVBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_CBO_PMON_CTL_TID_EN)
+
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_TID (0x1fULL << 0)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 5)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE (0x3fULL << 17)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63)
+
+/* IVBEP home agent */
+#define IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST (1 << 16)
+#define IVBEP_HA_PCI_PMON_RAW_EVENT_MASK \
+ (IVBEP_PMON_RAW_EVENT_MASK | \
+ IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST)
+/* IVBEP PCU */
+#define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PMON_CTL_EV_SEL_EXT | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+/* IVBEP QPI */
+#define IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK \
+ (IVBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_PMON_CTL_EV_SEL_EXT)
+
+#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \
+ ((1ULL << (n)) - 1)))
+
+/* Haswell-EP Ubox */
+#define HSWEP_U_MSR_PMON_CTR0 0x705
+#define HSWEP_U_MSR_PMON_CTL0 0x709
+#define HSWEP_U_MSR_PMON_FILTER 0x707
+
+#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL 0x703
+#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTR 0x704
+
+#define HSWEP_U_MSR_PMON_BOX_FILTER_TID (0x1 << 0)
+#define HSWEP_U_MSR_PMON_BOX_FILTER_CID (0x1fULL << 1)
+#define HSWEP_U_MSR_PMON_BOX_FILTER_MASK \
+ (HSWEP_U_MSR_PMON_BOX_FILTER_TID | \
+ HSWEP_U_MSR_PMON_BOX_FILTER_CID)
+
+/* Haswell-EP CBo */
+#define HSWEP_C0_MSR_PMON_CTR0 0xe08
+#define HSWEP_C0_MSR_PMON_CTL0 0xe01
+#define HSWEP_C0_MSR_PMON_BOX_CTL 0xe00
+#define HSWEP_C0_MSR_PMON_BOX_FILTER0 0xe05
+#define HSWEP_CBO_MSR_OFFSET 0x10
+
+
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_TID (0x3fULL << 0)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 6)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE (0x7fULL << 17)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63)
+
+
+/* Haswell-EP Sbox */
+#define HSWEP_S0_MSR_PMON_CTR0 0x726
+#define HSWEP_S0_MSR_PMON_CTL0 0x721
+#define HSWEP_S0_MSR_PMON_BOX_CTL 0x720
+#define HSWEP_SBOX_MSR_OFFSET 0xa
+#define HSWEP_S_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_CBO_PMON_CTL_TID_EN)
+
+/* Haswell-EP PCU */
+#define HSWEP_PCU_MSR_PMON_CTR0 0x717
+#define HSWEP_PCU_MSR_PMON_CTL0 0x711
+#define HSWEP_PCU_MSR_PMON_BOX_CTL 0x710
+#define HSWEP_PCU_MSR_PMON_BOX_FILTER 0x715
+
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
+DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
+DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
+DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62");
+DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61");
+DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31");
+DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
+DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
+
+static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
+ u32 config = 0;
+
+ if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+ config |= SNBEP_PMON_BOX_CTL_FRZ;
+ pci_write_config_dword(pdev, box_ctl, config);
+ }
+}
+
+static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
+ u32 config = 0;
+
+ if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+ config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+ pci_write_config_dword(pdev, box_ctl, config);
+ }
+}
+
+static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, hwc->config_base, hwc->config);
+}
+
+static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 count = 0;
+
+ pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
+ pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
+
+ return count;
+}
+
+static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+
+ pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT);
+}
+
+static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+ u64 config;
+ unsigned msr;
+
+ msr = uncore_msr_box_ctl(box);
+ if (msr) {
+ rdmsrl(msr, config);
+ config |= SNBEP_PMON_BOX_CTL_FRZ;
+ wrmsrl(msr, config);
+ }
+}
+
+static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ u64 config;
+ unsigned msr;
+
+ msr = uncore_msr_box_ctl(box);
+ if (msr) {
+ rdmsrl(msr, config);
+ config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+ wrmsrl(msr, config);
+ }
+}
+
+static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE)
+ wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0));
+
+ wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ wrmsrl(hwc->config_base, hwc->config);
+}
+
+static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+
+ if (msr)
+ wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
+}
+
+static struct attribute *snbep_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_cbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid.attr,
+ &format_attr_filter_nid.attr,
+ &format_attr_filter_state.attr,
+ &format_attr_filter_opc.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_pcu_formats_attr[] = {
+ &format_attr_event_ext.attr,
+ &format_attr_occ_sel.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ &format_attr_occ_invert.attr,
+ &format_attr_occ_edge.attr,
+ &format_attr_filter_band0.attr,
+ &format_attr_filter_band1.attr,
+ &format_attr_filter_band2.attr,
+ &format_attr_filter_band3.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_qpi_formats_attr[] = {
+ &format_attr_event_ext.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_match_rds.attr,
+ &format_attr_match_rnid30.attr,
+ &format_attr_match_rnid4.attr,
+ &format_attr_match_dnid.attr,
+ &format_attr_match_mc.attr,
+ &format_attr_match_opc.attr,
+ &format_attr_match_vnw.attr,
+ &format_attr_match0.attr,
+ &format_attr_match1.attr,
+ &format_attr_mask_rds.attr,
+ &format_attr_mask_rnid30.attr,
+ &format_attr_mask_rnid4.attr,
+ &format_attr_mask_dnid.attr,
+ &format_attr_mask_mc.attr,
+ &format_attr_mask_opc.attr,
+ &format_attr_mask_vnw.attr,
+ &format_attr_mask0.attr,
+ &format_attr_mask1.attr,
+ NULL,
+};
+
+static struct uncore_event_desc snbep_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
+ { /* end: all zeroes */ },
+};
+
+static struct uncore_event_desc snbep_uncore_qpi_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"),
+ INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
+ INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"),
+ INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"),
+ { /* end: all zeroes */ },
+};
+
+static struct attribute_group snbep_uncore_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_ubox_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_cbox_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_cbox_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_pcu_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_pcu_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_qpi_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_qpi_formats_attr,
+};
+
+#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
+ .init_box = snbep_uncore_msr_init_box, \
+ .disable_box = snbep_uncore_msr_disable_box, \
+ .enable_box = snbep_uncore_msr_enable_box, \
+ .disable_event = snbep_uncore_msr_disable_event, \
+ .enable_event = snbep_uncore_msr_enable_event, \
+ .read_counter = uncore_msr_read_counter
+
+static struct intel_uncore_ops snbep_uncore_msr_ops = {
+ SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+};
+
+#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT() \
+ .init_box = snbep_uncore_pci_init_box, \
+ .disable_box = snbep_uncore_pci_disable_box, \
+ .enable_box = snbep_uncore_pci_enable_box, \
+ .disable_event = snbep_uncore_pci_disable_event, \
+ .read_counter = snbep_uncore_pci_read_counter
+
+static struct intel_uncore_ops snbep_uncore_pci_ops = {
+ SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+ .enable_event = snbep_uncore_pci_enable_event, \
+};
+
+static struct event_constraint snbep_uncore_cbox_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x04, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x05, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x07, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x1b, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
+ EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff),
+ UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r2pcie_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x12, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r3qpi_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2a, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x30, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snbep_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNBEP_U_MSR_PMON_CTR0,
+ .event_ctl = SNBEP_U_MSR_PMON_CTL0,
+ .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &snbep_uncore_ubox_format_group,
+};
+
+static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
+ SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+ SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2),
+ EVENT_EXTRA_END
+};
+
+static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+ int i;
+
+ if (uncore_box_is_fake(box))
+ return;
+
+ for (i = 0; i < 5; i++) {
+ if (reg1->alloc & (0x1 << i))
+ atomic_sub(1 << (i * 6), &er->ref);
+ }
+ reg1->alloc = 0;
+}
+
+static struct event_constraint *
+__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event,
+ u64 (*cbox_filter_mask)(int fields))
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+ int i, alloc = 0;
+ unsigned long flags;
+ u64 mask;
+
+ if (reg1->idx == EXTRA_REG_NONE)
+ return NULL;
+
+ raw_spin_lock_irqsave(&er->lock, flags);
+ for (i = 0; i < 5; i++) {
+ if (!(reg1->idx & (0x1 << i)))
+ continue;
+ if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
+ continue;
+
+ mask = cbox_filter_mask(0x1 << i);
+ if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) ||
+ !((reg1->config ^ er->config) & mask)) {
+ atomic_add(1 << (i * 6), &er->ref);
+ er->config &= ~mask;
+ er->config |= reg1->config & mask;
+ alloc |= (0x1 << i);
+ } else {
+ break;
+ }
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+ if (i < 5)
+ goto fail;
+
+ if (!uncore_box_is_fake(box))
+ reg1->alloc |= alloc;
+
+ return NULL;
+fail:
+ for (; i >= 0; i--) {
+ if (alloc & (0x1 << i))
+ atomic_sub(1 << (i * 6), &er->ref);
+ }
+ return &uncore_constraint_empty;
+}
+
+static u64 snbep_cbox_filter_mask(int fields)
+{
+ u64 mask = 0;
+
+ if (fields & 0x1)
+ mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID;
+ if (fields & 0x2)
+ mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID;
+ if (fields & 0x4)
+ mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
+ if (fields & 0x8)
+ mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
+
+ return mask;
+}
+
+static struct event_constraint *
+snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask);
+}
+
+static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct extra_reg *er;
+ int idx = 0;
+
+ for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ idx |= er->idx;
+ }
+
+ if (idx) {
+ reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
+ SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx);
+ reg1->idx = idx;
+ }
+ return 0;
+}
+
+static struct intel_uncore_ops snbep_uncore_cbox_ops = {
+ SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = snbep_cbox_hw_config,
+ .get_constraint = snbep_cbox_get_constraint,
+ .put_constraint = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type snbep_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 44,
+ .event_ctl = SNBEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = SNBEP_C0_MSR_PMON_CTR0,
+ .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = SNBEP_CBO_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = snbep_uncore_cbox_constraints,
+ .ops = &snbep_uncore_cbox_ops,
+ .format_group = &snbep_uncore_cbox_format_group,
+};
+
+static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ u64 config = reg1->config;
+
+ if (new_idx > reg1->idx)
+ config <<= 8 * (new_idx - reg1->idx);
+ else
+ config >>= 8 * (reg1->idx - new_idx);
+
+ if (modify) {
+ hwc->config += new_idx - reg1->idx;
+ reg1->config = config;
+ reg1->idx = new_idx;
+ }
+ return config;
+}
+
+static struct event_constraint *
+snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+ unsigned long flags;
+ int idx = reg1->idx;
+ u64 mask, config1 = reg1->config;
+ bool ok = false;
+
+ if (reg1->idx == EXTRA_REG_NONE ||
+ (!uncore_box_is_fake(box) && reg1->alloc))
+ return NULL;
+again:
+ mask = 0xffULL << (idx * 8);
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
+ !((config1 ^ er->config) & mask)) {
+ atomic_add(1 << (idx * 8), &er->ref);
+ er->config &= ~mask;
+ er->config |= config1 & mask;
+ ok = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ if (!ok) {
+ idx = (idx + 1) % 4;
+ if (idx != reg1->idx) {
+ config1 = snbep_pcu_alter_er(event, idx, false);
+ goto again;
+ }
+ return &uncore_constraint_empty;
+ }
+
+ if (!uncore_box_is_fake(box)) {
+ if (idx != reg1->idx)
+ snbep_pcu_alter_er(event, idx, true);
+ reg1->alloc = 1;
+ }
+ return NULL;
+}
+
+static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+
+ if (uncore_box_is_fake(box) || !reg1->alloc)
+ return;
+
+ atomic_sub(1 << (reg1->idx * 8), &er->ref);
+ reg1->alloc = 0;
+}
+
+static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
+
+ if (ev_sel >= 0xb && ev_sel <= 0xe) {
+ reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
+ reg1->idx = ev_sel - 0xb;
+ reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8));
+ }
+ return 0;
+}
+
+static struct intel_uncore_ops snbep_uncore_pcu_ops = {
+ SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = snbep_pcu_hw_config,
+ .get_constraint = snbep_pcu_get_constraint,
+ .put_constraint = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type snbep_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0,
+ .event_ctl = SNBEP_PCU_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_pcu_ops,
+ .format_group = &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *snbep_msr_uncores[] = {
+ &snbep_uncore_ubox,
+ &snbep_uncore_cbox,
+ &snbep_uncore_pcu,
+ NULL,
+};
+
+void snbep_uncore_cpu_init(void)
+{
+ if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ uncore_msr_uncores = snbep_msr_uncores;
+}
+
+enum {
+ SNBEP_PCI_QPI_PORT0_FILTER,
+ SNBEP_PCI_QPI_PORT1_FILTER,
+};
+
+static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) {
+ reg1->idx = 0;
+ reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0;
+ reg1->config = event->attr.config1;
+ reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0;
+ reg2->config = event->attr.config2;
+ }
+ return 0;
+}
+
+static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
+ struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx];
+ if (filter_pdev) {
+ pci_write_config_dword(filter_pdev, reg1->reg,
+ (u32)reg1->config);
+ pci_write_config_dword(filter_pdev, reg1->reg + 4,
+ (u32)(reg1->config >> 32));
+ pci_write_config_dword(filter_pdev, reg2->reg,
+ (u32)reg2->config);
+ pci_write_config_dword(filter_pdev, reg2->reg + 4,
+ (u32)(reg2->config >> 32));
+ }
+ }
+
+ pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops snbep_uncore_qpi_ops = {
+ SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+ .enable_event = snbep_qpi_enable_event,
+ .hw_config = snbep_qpi_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+#define SNBEP_UNCORE_PCI_COMMON_INIT() \
+ .perf_ctr = SNBEP_PCI_PMON_CTR0, \
+ .event_ctl = SNBEP_PCI_PMON_CTL0, \
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \
+ .ops = &snbep_uncore_pci_ops, \
+ .format_group = &snbep_uncore_format_group
+
+static struct intel_uncore_type snbep_uncore_ha = {
+ .name = "ha",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_imc = {
+ .name = "imc",
+ .num_counters = 4,
+ .num_boxes = 4,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+ .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+ .event_descs = snbep_uncore_imc_events,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_qpi = {
+ .name = "qpi",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_qpi_ops,
+ .event_descs = snbep_uncore_qpi_events,
+ .format_group = &snbep_uncore_qpi_format_group,
+};
+
+
+static struct intel_uncore_type snbep_uncore_r2pcie = {
+ .name = "r2pcie",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .constraints = snbep_uncore_r2pcie_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_r3qpi = {
+ .name = "r3qpi",
+ .num_counters = 3,
+ .num_boxes = 2,
+ .perf_ctr_bits = 44,
+ .constraints = snbep_uncore_r3qpi_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+ SNBEP_PCI_UNCORE_HA,
+ SNBEP_PCI_UNCORE_IMC,
+ SNBEP_PCI_UNCORE_QPI,
+ SNBEP_PCI_UNCORE_R2PCIE,
+ SNBEP_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *snbep_pci_uncores[] = {
+ [SNBEP_PCI_UNCORE_HA] = &snbep_uncore_ha,
+ [SNBEP_PCI_UNCORE_IMC] = &snbep_uncore_imc,
+ [SNBEP_PCI_UNCORE_QPI] = &snbep_uncore_qpi,
+ [SNBEP_PCI_UNCORE_R2PCIE] = &snbep_uncore_r2pcie,
+ [SNBEP_PCI_UNCORE_R3QPI] = &snbep_uncore_r3qpi,
+ NULL,
+};
+
+static const struct pci_device_id snbep_uncore_pci_ids[] = {
+ { /* Home Agent */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
+ },
+ { /* MC Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0),
+ },
+ { /* MC Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1),
+ },
+ { /* MC Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2),
+ },
+ { /* MC Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3),
+ },
+ { /* QPI Port 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0),
+ },
+ { /* QPI Port 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1),
+ },
+ { /* R2PCIe */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0),
+ },
+ { /* R3QPI Link 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0),
+ },
+ { /* R3QPI Link 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT0_FILTER),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT1_FILTER),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver snbep_uncore_pci_driver = {
+ .name = "snbep_uncore",
+ .id_table = snbep_uncore_pci_ids,
+};
+
+/*
+ * build pci bus to socket mapping
+ */
+static int snbep_pci2phy_map_init(int devid)
+{
+ struct pci_dev *ubox_dev = NULL;
+ int i, bus, nodeid;
+ int err = 0;
+ u32 config = 0;
+
+ while (1) {
+ /* find the UBOX device */
+ ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev);
+ if (!ubox_dev)
+ break;
+ bus = ubox_dev->bus->number;
+ /* get the Node ID of the local register */
+ err = pci_read_config_dword(ubox_dev, 0x40, &config);
+ if (err)
+ break;
+ nodeid = config;
+ /* get the Node ID mapping */
+ err = pci_read_config_dword(ubox_dev, 0x54, &config);
+ if (err)
+ break;
+ /*
+ * every three bits in the Node ID mapping register maps
+ * to a particular node.
+ */
+ for (i = 0; i < 8; i++) {
+ if (nodeid == ((config >> (3 * i)) & 0x7)) {
+ uncore_pcibus_to_physid[bus] = i;
+ break;
+ }
+ }
+ }
+
+ if (!err) {
+ /*
+ * For PCI bus with no UBOX device, find the next bus
+ * that has UBOX device and use its mapping.
+ */
+ i = -1;
+ for (bus = 255; bus >= 0; bus--) {
+ if (uncore_pcibus_to_physid[bus] >= 0)
+ i = uncore_pcibus_to_physid[bus];
+ else
+ uncore_pcibus_to_physid[bus] = i;
+ }
+ }
+
+ if (ubox_dev)
+ pci_dev_put(ubox_dev);
+
+ return err ? pcibios_err_to_errno(err) : 0;
+}
+
+int snbep_uncore_pci_init(void)
+{
+ int ret = snbep_pci2phy_map_init(0x3ce0);
+ if (ret)
+ return ret;
+ uncore_pci_uncores = snbep_pci_uncores;
+ uncore_pci_driver = &snbep_uncore_pci_driver;
+ return 0;
+}
+/* end of Sandy Bridge-EP uncore support */
+
+/* IvyTown uncore support */
+static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+ if (msr)
+ wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT);
+}
+
+static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+
+ pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
+}
+
+#define IVBEP_UNCORE_MSR_OPS_COMMON_INIT() \
+ .init_box = ivbep_uncore_msr_init_box, \
+ .disable_box = snbep_uncore_msr_disable_box, \
+ .enable_box = snbep_uncore_msr_enable_box, \
+ .disable_event = snbep_uncore_msr_disable_event, \
+ .enable_event = snbep_uncore_msr_enable_event, \
+ .read_counter = uncore_msr_read_counter
+
+static struct intel_uncore_ops ivbep_uncore_msr_ops = {
+ IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+};
+
+static struct intel_uncore_ops ivbep_uncore_pci_ops = {
+ .init_box = ivbep_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = snbep_uncore_pci_disable_event,
+ .enable_event = snbep_uncore_pci_enable_event,
+ .read_counter = snbep_uncore_pci_read_counter,
+};
+
+#define IVBEP_UNCORE_PCI_COMMON_INIT() \
+ .perf_ctr = SNBEP_PCI_PMON_CTR0, \
+ .event_ctl = SNBEP_PCI_PMON_CTL0, \
+ .event_mask = IVBEP_PMON_RAW_EVENT_MASK, \
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \
+ .ops = &ivbep_uncore_pci_ops, \
+ .format_group = &ivbep_uncore_format_group
+
+static struct attribute *ivbep_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute *ivbep_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ NULL,
+};
+
+static struct attribute *ivbep_uncore_cbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid.attr,
+ &format_attr_filter_link.attr,
+ &format_attr_filter_state2.attr,
+ &format_attr_filter_nid2.attr,
+ &format_attr_filter_opc2.attr,
+ &format_attr_filter_nc.attr,
+ &format_attr_filter_c6.attr,
+ &format_attr_filter_isoc.attr,
+ NULL,
+};
+
+static struct attribute *ivbep_uncore_pcu_formats_attr[] = {
+ &format_attr_event_ext.attr,
+ &format_attr_occ_sel.attr,
+ &format_attr_edge.attr,
+ &format_attr_thresh5.attr,
+ &format_attr_occ_invert.attr,
+ &format_attr_occ_edge.attr,
+ &format_attr_filter_band0.attr,
+ &format_attr_filter_band1.attr,
+ &format_attr_filter_band2.attr,
+ &format_attr_filter_band3.attr,
+ NULL,
+};
+
+static struct attribute *ivbep_uncore_qpi_formats_attr[] = {
+ &format_attr_event_ext.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_match_rds.attr,
+ &format_attr_match_rnid30.attr,
+ &format_attr_match_rnid4.attr,
+ &format_attr_match_dnid.attr,
+ &format_attr_match_mc.attr,
+ &format_attr_match_opc.attr,
+ &format_attr_match_vnw.attr,
+ &format_attr_match0.attr,
+ &format_attr_match1.attr,
+ &format_attr_mask_rds.attr,
+ &format_attr_mask_rnid30.attr,
+ &format_attr_mask_rnid4.attr,
+ &format_attr_mask_dnid.attr,
+ &format_attr_mask_mc.attr,
+ &format_attr_mask_opc.attr,
+ &format_attr_mask_vnw.attr,
+ &format_attr_mask0.attr,
+ &format_attr_mask1.attr,
+ NULL,
+};
+
+static struct attribute_group ivbep_uncore_format_group = {
+ .name = "format",
+ .attrs = ivbep_uncore_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = ivbep_uncore_ubox_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_cbox_format_group = {
+ .name = "format",
+ .attrs = ivbep_uncore_cbox_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_pcu_format_group = {
+ .name = "format",
+ .attrs = ivbep_uncore_pcu_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_qpi_format_group = {
+ .name = "format",
+ .attrs = ivbep_uncore_qpi_formats_attr,
+};
+
+static struct intel_uncore_type ivbep_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNBEP_U_MSR_PMON_CTR0,
+ .event_ctl = SNBEP_U_MSR_PMON_CTL0,
+ .event_mask = IVBEP_U_MSR_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &ivbep_uncore_ubox_format_group,
+};
+
+static struct extra_reg ivbep_uncore_cbox_extra_regs[] = {
+ SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+ SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
+ EVENT_EXTRA_END
+};
+
+static u64 ivbep_cbox_filter_mask(int fields)
+{
+ u64 mask = 0;
+
+ if (fields & 0x1)
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_TID;
+ if (fields & 0x2)
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK;
+ if (fields & 0x4)
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
+ if (fields & 0x8)
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NID;
+ if (fields & 0x10) {
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NC;
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_C6;
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
+ }
+
+ return mask;
+}
+
+static struct event_constraint *
+ivbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ return __snbep_cbox_get_constraint(box, event, ivbep_cbox_filter_mask);
+}
+
+static int ivbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct extra_reg *er;
+ int idx = 0;
+
+ for (er = ivbep_uncore_cbox_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ idx |= er->idx;
+ }
+
+ if (idx) {
+ reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
+ SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 & ivbep_cbox_filter_mask(idx);
+ reg1->idx = idx;
+ }
+ return 0;
+}
+
+static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ u64 filter = uncore_shared_reg_config(box, 0);
+ wrmsrl(reg1->reg, filter & 0xffffffff);
+ wrmsrl(reg1->reg + 6, filter >> 32);
+ }
+
+ wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops ivbep_uncore_cbox_ops = {
+ .init_box = ivbep_uncore_msr_init_box,
+ .disable_box = snbep_uncore_msr_disable_box,
+ .enable_box = snbep_uncore_msr_enable_box,
+ .disable_event = snbep_uncore_msr_disable_event,
+ .enable_event = ivbep_cbox_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .hw_config = ivbep_cbox_hw_config,
+ .get_constraint = ivbep_cbox_get_constraint,
+ .put_constraint = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type ivbep_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .num_boxes = 15,
+ .perf_ctr_bits = 44,
+ .event_ctl = SNBEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = SNBEP_C0_MSR_PMON_CTR0,
+ .event_mask = IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = SNBEP_CBO_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = snbep_uncore_cbox_constraints,
+ .ops = &ivbep_uncore_cbox_ops,
+ .format_group = &ivbep_uncore_cbox_format_group,
+};
+
+static struct intel_uncore_ops ivbep_uncore_pcu_ops = {
+ IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = snbep_pcu_hw_config,
+ .get_constraint = snbep_pcu_get_constraint,
+ .put_constraint = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type ivbep_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0,
+ .event_ctl = SNBEP_PCU_MSR_PMON_CTL0,
+ .event_mask = IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &ivbep_uncore_pcu_ops,
+ .format_group = &ivbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *ivbep_msr_uncores[] = {
+ &ivbep_uncore_ubox,
+ &ivbep_uncore_cbox,
+ &ivbep_uncore_pcu,
+ NULL,
+};
+
+void ivbep_uncore_cpu_init(void)
+{
+ if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ uncore_msr_uncores = ivbep_msr_uncores;
+}
+
+static struct intel_uncore_type ivbep_uncore_ha = {
+ .name = "ha",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type ivbep_uncore_imc = {
+ .name = "imc",
+ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+ .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+ .event_descs = snbep_uncore_imc_events,
+ IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+/* registers in IRP boxes are not properly aligned */
+static unsigned ivbep_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4};
+static unsigned ivbep_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0};
+
+static void ivbep_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx],
+ hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void ivbep_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], hwc->config);
+}
+
+static u64 ivbep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 count = 0;
+
+ pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
+ pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
+
+ return count;
+}
+
+static struct intel_uncore_ops ivbep_uncore_irp_ops = {
+ .init_box = ivbep_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = ivbep_uncore_irp_disable_event,
+ .enable_event = ivbep_uncore_irp_enable_event,
+ .read_counter = ivbep_uncore_irp_read_counter,
+};
+
+static struct intel_uncore_type ivbep_uncore_irp = {
+ .name = "irp",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_mask = IVBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .ops = &ivbep_uncore_irp_ops,
+ .format_group = &ivbep_uncore_format_group,
+};
+
+static struct intel_uncore_ops ivbep_uncore_qpi_ops = {
+ .init_box = ivbep_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = snbep_uncore_pci_disable_event,
+ .enable_event = snbep_qpi_enable_event,
+ .read_counter = snbep_uncore_pci_read_counter,
+ .hw_config = snbep_qpi_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type ivbep_uncore_qpi = {
+ .name = "qpi",
+ .num_counters = 4,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &ivbep_uncore_qpi_ops,
+ .format_group = &ivbep_uncore_qpi_format_group,
+};
+
+static struct intel_uncore_type ivbep_uncore_r2pcie = {
+ .name = "r2pcie",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .constraints = snbep_uncore_r2pcie_constraints,
+ IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type ivbep_uncore_r3qpi = {
+ .name = "r3qpi",
+ .num_counters = 3,
+ .num_boxes = 2,
+ .perf_ctr_bits = 44,
+ .constraints = snbep_uncore_r3qpi_constraints,
+ IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+ IVBEP_PCI_UNCORE_HA,
+ IVBEP_PCI_UNCORE_IMC,
+ IVBEP_PCI_UNCORE_IRP,
+ IVBEP_PCI_UNCORE_QPI,
+ IVBEP_PCI_UNCORE_R2PCIE,
+ IVBEP_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *ivbep_pci_uncores[] = {
+ [IVBEP_PCI_UNCORE_HA] = &ivbep_uncore_ha,
+ [IVBEP_PCI_UNCORE_IMC] = &ivbep_uncore_imc,
+ [IVBEP_PCI_UNCORE_IRP] = &ivbep_uncore_irp,
+ [IVBEP_PCI_UNCORE_QPI] = &ivbep_uncore_qpi,
+ [IVBEP_PCI_UNCORE_R2PCIE] = &ivbep_uncore_r2pcie,
+ [IVBEP_PCI_UNCORE_R3QPI] = &ivbep_uncore_r3qpi,
+ NULL,
+};
+
+static const struct pci_device_id ivbep_uncore_pci_ids[] = {
+ { /* Home Agent 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 0),
+ },
+ { /* Home Agent 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 1),
+ },
+ { /* MC0 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 0),
+ },
+ { /* MC0 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 1),
+ },
+ { /* MC0 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 2),
+ },
+ { /* MC0 Channel 4 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 3),
+ },
+ { /* MC1 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 4),
+ },
+ { /* MC1 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 5),
+ },
+ { /* MC1 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 6),
+ },
+ { /* MC1 Channel 4 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 7),
+ },
+ { /* IRP */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IRP, 0),
+ },
+ { /* QPI0 Port 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 0),
+ },
+ { /* QPI0 Port 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 1),
+ },
+ { /* QPI1 Port 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 2),
+ },
+ { /* R2PCIe */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R2PCIE, 0),
+ },
+ { /* R3QPI0 Link 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 0),
+ },
+ { /* R3QPI0 Link 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 1),
+ },
+ { /* R3QPI1 Link 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 2),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT0_FILTER),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT1_FILTER),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver ivbep_uncore_pci_driver = {
+ .name = "ivbep_uncore",
+ .id_table = ivbep_uncore_pci_ids,
+};
+
+int ivbep_uncore_pci_init(void)
+{
+ int ret = snbep_pci2phy_map_init(0x0e1e);
+ if (ret)
+ return ret;
+ uncore_pci_uncores = ivbep_pci_uncores;
+ uncore_pci_driver = &ivbep_uncore_pci_driver;
+ return 0;
+}
+/* end of IvyTown uncore support */
+
+/* Haswell-EP uncore support */
+static struct attribute *hswep_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ &format_attr_filter_tid2.attr,
+ &format_attr_filter_cid.attr,
+ NULL,
+};
+
+static struct attribute_group hswep_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = hswep_uncore_ubox_formats_attr,
+};
+
+static int hswep_ubox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ reg1->reg = HSWEP_U_MSR_PMON_FILTER;
+ reg1->config = event->attr.config1 & HSWEP_U_MSR_PMON_BOX_FILTER_MASK;
+ reg1->idx = 0;
+ return 0;
+}
+
+static struct intel_uncore_ops hswep_uncore_ubox_ops = {
+ SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = hswep_ubox_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type hswep_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = HSWEP_U_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_U_MSR_PMON_CTL0,
+ .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+ .num_shared_regs = 1,
+ .ops = &hswep_uncore_ubox_ops,
+ .format_group = &hswep_uncore_ubox_format_group,
+};
+
+static struct attribute *hswep_uncore_cbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid3.attr,
+ &format_attr_filter_link2.attr,
+ &format_attr_filter_state3.attr,
+ &format_attr_filter_nid2.attr,
+ &format_attr_filter_opc2.attr,
+ &format_attr_filter_nc.attr,
+ &format_attr_filter_c6.attr,
+ &format_attr_filter_isoc.attr,
+ NULL,
+};
+
+static struct attribute_group hswep_uncore_cbox_format_group = {
+ .name = "format",
+ .attrs = hswep_uncore_cbox_formats_attr,
+};
+
+static struct event_constraint hswep_uncore_cbox_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x09, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg hswep_uncore_cbox_extra_regs[] = {
+ SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+ SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4028, 0x40ff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4032, 0x40ff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4029, 0x40ff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4033, 0x40ff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x402A, 0x40ff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x12),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
+ EVENT_EXTRA_END
+};
+
+static u64 hswep_cbox_filter_mask(int fields)
+{
+ u64 mask = 0;
+ if (fields & 0x1)
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_TID;
+ if (fields & 0x2)
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK;
+ if (fields & 0x4)
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE;
+ if (fields & 0x8)
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NID;
+ if (fields & 0x10) {
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC;
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NC;
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_C6;
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
+ }
+ return mask;
+}
+
+static struct event_constraint *
+hswep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ return __snbep_cbox_get_constraint(box, event, hswep_cbox_filter_mask);
+}
+
+static int hswep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct extra_reg *er;
+ int idx = 0;
+
+ for (er = hswep_uncore_cbox_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ idx |= er->idx;
+ }
+
+ if (idx) {
+ reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
+ HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 & hswep_cbox_filter_mask(idx);
+ reg1->idx = idx;
+ }
+ return 0;
+}
+
+static void hswep_cbox_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ u64 filter = uncore_shared_reg_config(box, 0);
+ wrmsrl(reg1->reg, filter & 0xffffffff);
+ wrmsrl(reg1->reg + 1, filter >> 32);
+ }
+
+ wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops hswep_uncore_cbox_ops = {
+ .init_box = snbep_uncore_msr_init_box,
+ .disable_box = snbep_uncore_msr_disable_box,
+ .enable_box = snbep_uncore_msr_enable_box,
+ .disable_event = snbep_uncore_msr_disable_event,
+ .enable_event = hswep_cbox_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .hw_config = hswep_cbox_hw_config,
+ .get_constraint = hswep_cbox_get_constraint,
+ .put_constraint = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type hswep_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .num_boxes = 18,
+ .perf_ctr_bits = 44,
+ .event_ctl = HSWEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
+ .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_CBO_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = hswep_uncore_cbox_constraints,
+ .ops = &hswep_uncore_cbox_ops,
+ .format_group = &hswep_uncore_cbox_format_group,
+};
+
+static struct attribute *hswep_uncore_sbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute_group hswep_uncore_sbox_format_group = {
+ .name = "format",
+ .attrs = hswep_uncore_sbox_formats_attr,
+};
+
+static struct intel_uncore_type hswep_uncore_sbox = {
+ .name = "sbox",
+ .num_counters = 4,
+ .num_boxes = 4,
+ .perf_ctr_bits = 44,
+ .event_ctl = HSWEP_S0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_S0_MSR_PMON_CTR0,
+ .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_SBOX_MSR_OFFSET,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &hswep_uncore_sbox_format_group,
+};
+
+static int hswep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
+
+ if (ev_sel >= 0xb && ev_sel <= 0xe) {
+ reg1->reg = HSWEP_PCU_MSR_PMON_BOX_FILTER;
+ reg1->idx = ev_sel - 0xb;
+ reg1->config = event->attr.config1 & (0xff << reg1->idx);
+ }
+ return 0;
+}
+
+static struct intel_uncore_ops hswep_uncore_pcu_ops = {
+ SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = hswep_pcu_hw_config,
+ .get_constraint = snbep_pcu_get_constraint,
+ .put_constraint = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type hswep_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_PCU_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &hswep_uncore_pcu_ops,
+ .format_group = &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *hswep_msr_uncores[] = {
+ &hswep_uncore_ubox,
+ &hswep_uncore_cbox,
+ &hswep_uncore_sbox,
+ &hswep_uncore_pcu,
+ NULL,
+};
+
+void hswep_uncore_cpu_init(void)
+{
+ if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ uncore_msr_uncores = hswep_msr_uncores;
+}
+
+static struct intel_uncore_type hswep_uncore_ha = {
+ .name = "ha",
+ .num_counters = 5,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct uncore_event_desc hswep_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type hswep_uncore_imc = {
+ .name = "imc",
+ .num_counters = 5,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+ .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+ .event_descs = hswep_uncore_imc_events,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_ops hswep_uncore_irp_ops = {
+ .init_box = snbep_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = ivbep_uncore_irp_disable_event,
+ .enable_event = ivbep_uncore_irp_enable_event,
+ .read_counter = ivbep_uncore_irp_read_counter,
+};
+
+static struct intel_uncore_type hswep_uncore_irp = {
+ .name = "irp",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .ops = &hswep_uncore_irp_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type hswep_uncore_qpi = {
+ .name = "qpi",
+ .num_counters = 5,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_qpi_ops,
+ .format_group = &snbep_uncore_qpi_format_group,
+};
+
+static struct event_constraint hswep_uncore_r2pcie_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x24, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x27, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2a, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type hswep_uncore_r2pcie = {
+ .name = "r2pcie",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .constraints = hswep_uncore_r2pcie_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x01, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type hswep_uncore_r3qpi = {
+ .name = "r3qpi",
+ .num_counters = 4,
+ .num_boxes = 3,
+ .perf_ctr_bits = 44,
+ .constraints = hswep_uncore_r3qpi_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+ HSWEP_PCI_UNCORE_HA,
+ HSWEP_PCI_UNCORE_IMC,
+ HSWEP_PCI_UNCORE_IRP,
+ HSWEP_PCI_UNCORE_QPI,
+ HSWEP_PCI_UNCORE_R2PCIE,
+ HSWEP_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *hswep_pci_uncores[] = {
+ [HSWEP_PCI_UNCORE_HA] = &hswep_uncore_ha,
+ [HSWEP_PCI_UNCORE_IMC] = &hswep_uncore_imc,
+ [HSWEP_PCI_UNCORE_IRP] = &hswep_uncore_irp,
+ [HSWEP_PCI_UNCORE_QPI] = &hswep_uncore_qpi,
+ [HSWEP_PCI_UNCORE_R2PCIE] = &hswep_uncore_r2pcie,
+ [HSWEP_PCI_UNCORE_R3QPI] = &hswep_uncore_r3qpi,
+ NULL,
+};
+
+static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = {
+ { /* Home Agent 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0),
+ },
+ { /* Home Agent 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f38),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 1),
+ },
+ { /* MC0 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb0),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 0),
+ },
+ { /* MC0 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb1),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 1),
+ },
+ { /* MC0 Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb4),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 2),
+ },
+ { /* MC0 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb5),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 3),
+ },
+ { /* MC1 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd0),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 4),
+ },
+ { /* MC1 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd1),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 5),
+ },
+ { /* MC1 Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd4),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 6),
+ },
+ { /* MC1 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd5),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 7),
+ },
+ { /* IRP */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f39),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IRP, 0),
+ },
+ { /* QPI0 Port 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f32),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 0),
+ },
+ { /* QPI0 Port 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f33),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 1),
+ },
+ { /* QPI1 Port 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3a),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 2),
+ },
+ { /* R2PCIe */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f34),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R2PCIE, 0),
+ },
+ { /* R3QPI0 Link 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f36),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 0),
+ },
+ { /* R3QPI0 Link 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f37),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 1),
+ },
+ { /* R3QPI1 Link 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3e),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 2),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f86),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT0_FILTER),
+ },
+ { /* QPI Port 1 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT1_FILTER),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver hswep_uncore_pci_driver = {
+ .name = "hswep_uncore",
+ .id_table = hswep_uncore_pci_ids,
+};
+
+int hswep_uncore_pci_init(void)
+{
+ int ret = snbep_pci2phy_map_init(0x2f1e);
+ if (ret)
+ return ret;
+ uncore_pci_uncores = hswep_pci_uncores;
+ uncore_pci_driver = &hswep_uncore_pci_driver;
+ return 0;
+}
+/* end of Haswell-EP uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
index 838fa8772c62..5b0c232d1ee6 100644
--- a/arch/x86/kernel/cpu/perf_event_knc.c
+++ b/arch/x86/kernel/cpu/perf_event_knc.c
@@ -217,7 +217,7 @@ static int knc_pmu_handle_irq(struct pt_regs *regs)
int bit, loops;
u64 status;
- cpuc = &__get_cpu_var(cpu_hw_events);
+ cpuc = this_cpu_ptr(&cpu_hw_events);
knc_pmu_disable_all();
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 5d466b7d8609..f2e56783af3d 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -915,7 +915,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
static void p4_pmu_disable_all(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -984,7 +984,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
static void p4_pmu_enable_all(int added)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -1004,7 +1004,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
- cpuc = &__get_cpu_var(cpu_hw_events);
+ cpuc = this_cpu_ptr(&cpu_hw_events);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
int overflow;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 507de8066594..f5ab56d14287 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -4,9 +4,14 @@
* Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
*
* Copyright (C) IBM Corporation, 2004. All rights reserved.
+ * Copyright (C) Red Hat Inc., 2014. All rights reserved.
+ * Authors:
+ * Vivek Goyal <vgoyal@redhat.com>
*
*/
+#define pr_fmt(fmt) "kexec: " fmt
+
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/smp.h>
@@ -16,6 +21,7 @@
#include <linux/elf.h>
#include <linux/elfcore.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
@@ -28,6 +34,45 @@
#include <asm/reboot.h>
#include <asm/virtext.h>
+/* Alignment required for elf header segment */
+#define ELF_CORE_HEADER_ALIGN 4096
+
+/* This primarily represents number of split ranges due to exclusion */
+#define CRASH_MAX_RANGES 16
+
+struct crash_mem_range {
+ u64 start, end;
+};
+
+struct crash_mem {
+ unsigned int nr_ranges;
+ struct crash_mem_range ranges[CRASH_MAX_RANGES];
+};
+
+/* Misc data about ram ranges needed to prepare elf headers */
+struct crash_elf_data {
+ struct kimage *image;
+ /*
+ * Total number of ram ranges we have after various adjustments for
+ * GART, crash reserved region etc.
+ */
+ unsigned int max_nr_ranges;
+ unsigned long gart_start, gart_end;
+
+ /* Pointer to elf header */
+ void *ehdr;
+ /* Pointer to next phdr */
+ void *bufp;
+ struct crash_mem mem;
+};
+
+/* Used while preparing memory map entries for second kernel */
+struct crash_memmap_data {
+ struct boot_params *params;
+ /* Type of memory */
+ unsigned int type;
+};
+
int in_crash_kexec;
/*
@@ -39,6 +84,7 @@ int in_crash_kexec;
*/
crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
+unsigned long crash_zero_bytes;
static inline void cpu_crash_vmclear_loaded_vmcss(void)
{
@@ -135,3 +181,520 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
#endif
crash_save_cpu(regs, safe_smp_processor_id());
}
+
+#ifdef CONFIG_KEXEC_FILE
+static int get_nr_ram_ranges_callback(unsigned long start_pfn,
+ unsigned long nr_pfn, void *arg)
+{
+ int *nr_ranges = arg;
+
+ (*nr_ranges)++;
+ return 0;
+}
+
+static int get_gart_ranges_callback(u64 start, u64 end, void *arg)
+{
+ struct crash_elf_data *ced = arg;
+
+ ced->gart_start = start;
+ ced->gart_end = end;
+
+ /* Not expecting more than 1 gart aperture */
+ return 1;
+}
+
+
+/* Gather all the required information to prepare elf headers for ram regions */
+static void fill_up_crash_elf_data(struct crash_elf_data *ced,
+ struct kimage *image)
+{
+ unsigned int nr_ranges = 0;
+
+ ced->image = image;
+
+ walk_system_ram_range(0, -1, &nr_ranges,
+ get_nr_ram_ranges_callback);
+
+ ced->max_nr_ranges = nr_ranges;
+
+ /*
+ * We don't create ELF headers for GART aperture as an attempt
+ * to dump this memory in second kernel leads to hang/crash.
+ * If gart aperture is present, one needs to exclude that region
+ * and that could lead to need of extra phdr.
+ */
+ walk_iomem_res("GART", IORESOURCE_MEM, 0, -1,
+ ced, get_gart_ranges_callback);
+
+ /*
+ * If we have gart region, excluding that could potentially split
+ * a memory range, resulting in extra header. Account for that.
+ */
+ if (ced->gart_end)
+ ced->max_nr_ranges++;
+
+ /* Exclusion of crash region could split memory ranges */
+ ced->max_nr_ranges++;
+
+ /* If crashk_low_res is not 0, another range split possible */
+ if (crashk_low_res.end)
+ ced->max_nr_ranges++;
+}
+
+static int exclude_mem_range(struct crash_mem *mem,
+ unsigned long long mstart, unsigned long long mend)
+{
+ int i, j;
+ unsigned long long start, end;
+ struct crash_mem_range temp_range = {0, 0};
+
+ for (i = 0; i < mem->nr_ranges; i++) {
+ start = mem->ranges[i].start;
+ end = mem->ranges[i].end;
+
+ if (mstart > end || mend < start)
+ continue;
+
+ /* Truncate any area outside of range */
+ if (mstart < start)
+ mstart = start;
+ if (mend > end)
+ mend = end;
+
+ /* Found completely overlapping range */
+ if (mstart == start && mend == end) {
+ mem->ranges[i].start = 0;
+ mem->ranges[i].end = 0;
+ if (i < mem->nr_ranges - 1) {
+ /* Shift rest of the ranges to left */
+ for (j = i; j < mem->nr_ranges - 1; j++) {
+ mem->ranges[j].start =
+ mem->ranges[j+1].start;
+ mem->ranges[j].end =
+ mem->ranges[j+1].end;
+ }
+ }
+ mem->nr_ranges--;
+ return 0;
+ }
+
+ if (mstart > start && mend < end) {
+ /* Split original range */
+ mem->ranges[i].end = mstart - 1;
+ temp_range.start = mend + 1;
+ temp_range.end = end;
+ } else if (mstart != start)
+ mem->ranges[i].end = mstart - 1;
+ else
+ mem->ranges[i].start = mend + 1;
+ break;
+ }
+
+ /* If a split happend, add the split to array */
+ if (!temp_range.end)
+ return 0;
+
+ /* Split happened */
+ if (i == CRASH_MAX_RANGES - 1) {
+ pr_err("Too many crash ranges after split\n");
+ return -ENOMEM;
+ }
+
+ /* Location where new range should go */
+ j = i + 1;
+ if (j < mem->nr_ranges) {
+ /* Move over all ranges one slot towards the end */
+ for (i = mem->nr_ranges - 1; i >= j; i--)
+ mem->ranges[i + 1] = mem->ranges[i];
+ }
+
+ mem->ranges[j].start = temp_range.start;
+ mem->ranges[j].end = temp_range.end;
+ mem->nr_ranges++;
+ return 0;
+}
+
+/*
+ * Look for any unwanted ranges between mstart, mend and remove them. This
+ * might lead to split and split ranges are put in ced->mem.ranges[] array
+ */
+static int elf_header_exclude_ranges(struct crash_elf_data *ced,
+ unsigned long long mstart, unsigned long long mend)
+{
+ struct crash_mem *cmem = &ced->mem;
+ int ret = 0;
+
+ memset(cmem->ranges, 0, sizeof(cmem->ranges));
+
+ cmem->ranges[0].start = mstart;
+ cmem->ranges[0].end = mend;
+ cmem->nr_ranges = 1;
+
+ /* Exclude crashkernel region */
+ ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+ if (ret)
+ return ret;
+
+ if (crashk_low_res.end) {
+ ret = exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+ if (ret)
+ return ret;
+ }
+
+ /* Exclude GART region */
+ if (ced->gart_end) {
+ ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
+{
+ struct crash_elf_data *ced = arg;
+ Elf64_Ehdr *ehdr;
+ Elf64_Phdr *phdr;
+ unsigned long mstart, mend;
+ struct kimage *image = ced->image;
+ struct crash_mem *cmem;
+ int ret, i;
+
+ ehdr = ced->ehdr;
+
+ /* Exclude unwanted mem ranges */
+ ret = elf_header_exclude_ranges(ced, start, end);
+ if (ret)
+ return ret;
+
+ /* Go through all the ranges in ced->mem.ranges[] and prepare phdr */
+ cmem = &ced->mem;
+
+ for (i = 0; i < cmem->nr_ranges; i++) {
+ mstart = cmem->ranges[i].start;
+ mend = cmem->ranges[i].end;
+
+ phdr = ced->bufp;
+ ced->bufp += sizeof(Elf64_Phdr);
+
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_offset = mstart;
+
+ /*
+ * If a range matches backup region, adjust offset to backup
+ * segment.
+ */
+ if (mstart == image->arch.backup_src_start &&
+ (mend - mstart + 1) == image->arch.backup_src_sz)
+ phdr->p_offset = image->arch.backup_load_addr;
+
+ phdr->p_paddr = mstart;
+ phdr->p_vaddr = (unsigned long long) __va(mstart);
+ phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
+ phdr->p_align = 0;
+ ehdr->e_phnum++;
+ pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
+ phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
+ ehdr->e_phnum, phdr->p_offset);
+ }
+
+ return ret;
+}
+
+static int prepare_elf64_headers(struct crash_elf_data *ced,
+ void **addr, unsigned long *sz)
+{
+ Elf64_Ehdr *ehdr;
+ Elf64_Phdr *phdr;
+ unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
+ unsigned char *buf, *bufp;
+ unsigned int cpu;
+ unsigned long long notes_addr;
+ int ret;
+
+ /* extra phdr for vmcoreinfo elf note */
+ nr_phdr = nr_cpus + 1;
+ nr_phdr += ced->max_nr_ranges;
+
+ /*
+ * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
+ * area on x86_64 (ffffffff80000000 - ffffffffa0000000).
+ * I think this is required by tools like gdb. So same physical
+ * memory will be mapped in two elf headers. One will contain kernel
+ * text virtual addresses and other will have __va(physical) addresses.
+ */
+
+ nr_phdr++;
+ elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
+ elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
+
+ buf = vzalloc(elf_sz);
+ if (!buf)
+ return -ENOMEM;
+
+ bufp = buf;
+ ehdr = (Elf64_Ehdr *)bufp;
+ bufp += sizeof(Elf64_Ehdr);
+ memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+ ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+ ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
+ ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+ ehdr->e_ident[EI_OSABI] = ELF_OSABI;
+ memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
+ ehdr->e_type = ET_CORE;
+ ehdr->e_machine = ELF_ARCH;
+ ehdr->e_version = EV_CURRENT;
+ ehdr->e_phoff = sizeof(Elf64_Ehdr);
+ ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+ ehdr->e_phentsize = sizeof(Elf64_Phdr);
+
+ /* Prepare one phdr of type PT_NOTE for each present cpu */
+ for_each_present_cpu(cpu) {
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = PT_NOTE;
+ notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
+ phdr->p_offset = phdr->p_paddr = notes_addr;
+ phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
+ (ehdr->e_phnum)++;
+ }
+
+ /* Prepare one PT_NOTE header for vmcoreinfo */
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = PT_NOTE;
+ phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
+ phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
+ (ehdr->e_phnum)++;
+
+#ifdef CONFIG_X86_64
+ /* Prepare PT_LOAD type program header for kernel text region */
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_vaddr = (Elf64_Addr)_text;
+ phdr->p_filesz = phdr->p_memsz = _end - _text;
+ phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
+ (ehdr->e_phnum)++;
+#endif
+
+ /* Prepare PT_LOAD headers for system ram chunks. */
+ ced->ehdr = ehdr;
+ ced->bufp = bufp;
+ ret = walk_system_ram_res(0, -1, ced,
+ prepare_elf64_ram_headers_callback);
+ if (ret < 0)
+ return ret;
+
+ *addr = buf;
+ *sz = elf_sz;
+ return 0;
+}
+
+/* Prepare elf headers. Return addr and size */
+static int prepare_elf_headers(struct kimage *image, void **addr,
+ unsigned long *sz)
+{
+ struct crash_elf_data *ced;
+ int ret;
+
+ ced = kzalloc(sizeof(*ced), GFP_KERNEL);
+ if (!ced)
+ return -ENOMEM;
+
+ fill_up_crash_elf_data(ced, image);
+
+ /* By default prepare 64bit headers */
+ ret = prepare_elf64_headers(ced, addr, sz);
+ kfree(ced);
+ return ret;
+}
+
+static int add_e820_entry(struct boot_params *params, struct e820entry *entry)
+{
+ unsigned int nr_e820_entries;
+
+ nr_e820_entries = params->e820_entries;
+ if (nr_e820_entries >= E820MAX)
+ return 1;
+
+ memcpy(&params->e820_map[nr_e820_entries], entry,
+ sizeof(struct e820entry));
+ params->e820_entries++;
+ return 0;
+}
+
+static int memmap_entry_callback(u64 start, u64 end, void *arg)
+{
+ struct crash_memmap_data *cmd = arg;
+ struct boot_params *params = cmd->params;
+ struct e820entry ei;
+
+ ei.addr = start;
+ ei.size = end - start + 1;
+ ei.type = cmd->type;
+ add_e820_entry(params, &ei);
+
+ return 0;
+}
+
+static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
+ unsigned long long mstart,
+ unsigned long long mend)
+{
+ unsigned long start, end;
+ int ret = 0;
+
+ cmem->ranges[0].start = mstart;
+ cmem->ranges[0].end = mend;
+ cmem->nr_ranges = 1;
+
+ /* Exclude Backup region */
+ start = image->arch.backup_load_addr;
+ end = start + image->arch.backup_src_sz - 1;
+ ret = exclude_mem_range(cmem, start, end);
+ if (ret)
+ return ret;
+
+ /* Exclude elf header region */
+ start = image->arch.elf_load_addr;
+ end = start + image->arch.elf_headers_sz - 1;
+ return exclude_mem_range(cmem, start, end);
+}
+
+/* Prepare memory map for crash dump kernel */
+int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
+{
+ int i, ret = 0;
+ unsigned long flags;
+ struct e820entry ei;
+ struct crash_memmap_data cmd;
+ struct crash_mem *cmem;
+
+ cmem = vzalloc(sizeof(struct crash_mem));
+ if (!cmem)
+ return -ENOMEM;
+
+ memset(&cmd, 0, sizeof(struct crash_memmap_data));
+ cmd.params = params;
+
+ /* Add first 640K segment */
+ ei.addr = image->arch.backup_src_start;
+ ei.size = image->arch.backup_src_sz;
+ ei.type = E820_RAM;
+ add_e820_entry(params, &ei);
+
+ /* Add ACPI tables */
+ cmd.type = E820_ACPI;
+ flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd,
+ memmap_entry_callback);
+
+ /* Add ACPI Non-volatile Storage */
+ cmd.type = E820_NVS;
+ walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd,
+ memmap_entry_callback);
+
+ /* Add crashk_low_res region */
+ if (crashk_low_res.end) {
+ ei.addr = crashk_low_res.start;
+ ei.size = crashk_low_res.end - crashk_low_res.start + 1;
+ ei.type = E820_RAM;
+ add_e820_entry(params, &ei);
+ }
+
+ /* Exclude some ranges from crashk_res and add rest to memmap */
+ ret = memmap_exclude_ranges(image, cmem, crashk_res.start,
+ crashk_res.end);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < cmem->nr_ranges; i++) {
+ ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1;
+
+ /* If entry is less than a page, skip it */
+ if (ei.size < PAGE_SIZE)
+ continue;
+ ei.addr = cmem->ranges[i].start;
+ ei.type = E820_RAM;
+ add_e820_entry(params, &ei);
+ }
+
+out:
+ vfree(cmem);
+ return ret;
+}
+
+static int determine_backup_region(u64 start, u64 end, void *arg)
+{
+ struct kimage *image = arg;
+
+ image->arch.backup_src_start = start;
+ image->arch.backup_src_sz = end - start + 1;
+
+ /* Expecting only one range for backup region */
+ return 1;
+}
+
+int crash_load_segments(struct kimage *image)
+{
+ unsigned long src_start, src_sz, elf_sz;
+ void *elf_addr;
+ int ret;
+
+ /*
+ * Determine and load a segment for backup area. First 640K RAM
+ * region is backup source
+ */
+
+ ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END,
+ image, determine_backup_region);
+
+ /* Zero or postive return values are ok */
+ if (ret < 0)
+ return ret;
+
+ src_start = image->arch.backup_src_start;
+ src_sz = image->arch.backup_src_sz;
+
+ /* Add backup segment. */
+ if (src_sz) {
+ /*
+ * Ideally there is no source for backup segment. This is
+ * copied in purgatory after crash. Just add a zero filled
+ * segment for now to make sure checksum logic works fine.
+ */
+ ret = kexec_add_buffer(image, (char *)&crash_zero_bytes,
+ sizeof(crash_zero_bytes), src_sz,
+ PAGE_SIZE, 0, -1, 0,
+ &image->arch.backup_load_addr);
+ if (ret)
+ return ret;
+ pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
+ image->arch.backup_load_addr, src_start, src_sz);
+ }
+
+ /* Prepare elf headers and add a segment */
+ ret = prepare_elf_headers(image, &elf_addr, &elf_sz);
+ if (ret)
+ return ret;
+
+ image->arch.elf_headers = elf_addr;
+ image->arch.elf_headers_sz = elf_sz;
+
+ ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz,
+ ELF_CORE_HEADER_ALIGN, 0, -1, 0,
+ &image->arch.elf_load_addr);
+ if (ret) {
+ vfree((void *)image->arch.elf_headers);
+ return ret;
+ }
+ pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->arch.elf_load_addr, elf_sz, elf_sz);
+
+ return ret;
+}
+#endif /* CONFIG_KEXEC_FILE */
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 7db54b5d5f86..3d3503351242 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -21,6 +21,7 @@
#include <asm/apic.h>
#include <asm/pci_x86.h>
#include <asm/setup.h>
+#include <asm/i8259.h>
__initdata u64 initial_dtb;
char __initdata cmd_line[COMMAND_LINE_SIZE];
@@ -165,82 +166,6 @@ static void __init dtb_lapic_setup(void)
#ifdef CONFIG_X86_IO_APIC
static unsigned int ioapic_id;
-static void __init dtb_add_ioapic(struct device_node *dn)
-{
- struct resource r;
- int ret;
-
- ret = of_address_to_resource(dn, 0, &r);
- if (ret) {
- printk(KERN_ERR "Can't obtain address from node %s.\n",
- dn->full_name);
- return;
- }
- mp_register_ioapic(++ioapic_id, r.start, gsi_top);
-}
-
-static void __init dtb_ioapic_setup(void)
-{
- struct device_node *dn;
-
- for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic")
- dtb_add_ioapic(dn);
-
- if (nr_ioapics) {
- of_ioapic = 1;
- return;
- }
- printk(KERN_ERR "Error: No information about IO-APIC in OF.\n");
-}
-#else
-static void __init dtb_ioapic_setup(void) {}
-#endif
-
-static void __init dtb_apic_setup(void)
-{
- dtb_lapic_setup();
- dtb_ioapic_setup();
-}
-
-#ifdef CONFIG_OF_FLATTREE
-static void __init x86_flattree_get_config(void)
-{
- u32 size, map_len;
- void *dt;
-
- if (!initial_dtb)
- return;
-
- map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
-
- initial_boot_params = dt = early_memremap(initial_dtb, map_len);
- size = of_get_flat_dt_size();
- if (map_len < size) {
- early_iounmap(dt, map_len);
- initial_boot_params = dt = early_memremap(initial_dtb, size);
- map_len = size;
- }
-
- unflatten_and_copy_device_tree();
- early_iounmap(dt, map_len);
-}
-#else
-static inline void x86_flattree_get_config(void) { }
-#endif
-
-void __init x86_dtb_init(void)
-{
- x86_flattree_get_config();
-
- if (!of_have_populated_dt())
- return;
-
- dtb_setup_hpet();
- dtb_apic_setup();
-}
-
-#ifdef CONFIG_X86_IO_APIC
-
struct of_ioapic_type {
u32 out_type;
u32 trigger;
@@ -276,10 +201,8 @@ static int ioapic_xlate(struct irq_domain *domain,
const u32 *intspec, u32 intsize,
irq_hw_number_t *out_hwirq, u32 *out_type)
{
- struct io_apic_irq_attr attr;
struct of_ioapic_type *it;
- u32 line, idx;
- int rc;
+ u32 line, idx, gsi;
if (WARN_ON(intsize < 2))
return -EINVAL;
@@ -291,13 +214,10 @@ static int ioapic_xlate(struct irq_domain *domain,
it = &of_ioapic_type[intspec[1]];
- idx = (u32) domain->host_data;
- set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
-
- rc = io_apic_setup_irq_pin_once(irq_find_mapping(domain, line),
- cpu_to_node(0), &attr);
- if (rc)
- return rc;
+ idx = (u32)(long)domain->host_data;
+ gsi = mp_pin_to_gsi(idx, line);
+ if (mp_set_gsi_attr(gsi, it->trigger, it->polarity, cpu_to_node(0)))
+ return -EBUSY;
*out_hwirq = line;
*out_type = it->out_type;
@@ -305,81 +225,86 @@ static int ioapic_xlate(struct irq_domain *domain,
}
const struct irq_domain_ops ioapic_irq_domain_ops = {
+ .map = mp_irqdomain_map,
+ .unmap = mp_irqdomain_unmap,
.xlate = ioapic_xlate,
};
-static void dt_add_ioapic_domain(unsigned int ioapic_num,
- struct device_node *np)
+static void __init dtb_add_ioapic(struct device_node *dn)
{
- struct irq_domain *id;
- struct mp_ioapic_gsi *gsi_cfg;
+ struct resource r;
int ret;
- int num;
-
- gsi_cfg = mp_ioapic_gsi_routing(ioapic_num);
- num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
-
- id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops,
- (void *)ioapic_num);
- BUG_ON(!id);
- if (gsi_cfg->gsi_base == 0) {
- /*
- * The first NR_IRQS_LEGACY irq descs are allocated in
- * early_irq_init() and need just a mapping. The
- * remaining irqs need both. All of them are preallocated
- * and assigned so we can keep the 1:1 mapping which the ioapic
- * is having.
- */
- irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
-
- if (num > NR_IRQS_LEGACY) {
- ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
- NR_IRQS_LEGACY, num - NR_IRQS_LEGACY);
- if (ret)
- pr_err("Error creating mapping for the "
- "remaining IRQs: %d\n", ret);
- }
- irq_set_default_host(id);
- } else {
- ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num);
- if (ret)
- pr_err("Error creating IRQ mapping: %d\n", ret);
+ struct ioapic_domain_cfg cfg = {
+ .type = IOAPIC_DOMAIN_DYNAMIC,
+ .ops = &ioapic_irq_domain_ops,
+ .dev = dn,
+ };
+
+ ret = of_address_to_resource(dn, 0, &r);
+ if (ret) {
+ printk(KERN_ERR "Can't obtain address from node %s.\n",
+ dn->full_name);
+ return;
}
+ mp_register_ioapic(++ioapic_id, r.start, gsi_top, &cfg);
}
-static void __init ioapic_add_ofnode(struct device_node *np)
+static void __init dtb_ioapic_setup(void)
{
- struct resource r;
- int i, ret;
+ struct device_node *dn;
- ret = of_address_to_resource(np, 0, &r);
- if (ret) {
- printk(KERN_ERR "Failed to obtain address for %s\n",
- np->full_name);
+ for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic")
+ dtb_add_ioapic(dn);
+
+ if (nr_ioapics) {
+ of_ioapic = 1;
return;
}
+ printk(KERN_ERR "Error: No information about IO-APIC in OF.\n");
+}
+#else
+static void __init dtb_ioapic_setup(void) {}
+#endif
- for (i = 0; i < nr_ioapics; i++) {
- if (r.start == mpc_ioapic_addr(i)) {
- dt_add_ioapic_domain(i, np);
- return;
- }
- }
- printk(KERN_ERR "IOxAPIC at %s is not registered.\n", np->full_name);
+static void __init dtb_apic_setup(void)
+{
+ dtb_lapic_setup();
+ dtb_ioapic_setup();
}
-void __init x86_add_irq_domains(void)
+#ifdef CONFIG_OF_FLATTREE
+static void __init x86_flattree_get_config(void)
{
- struct device_node *dp;
+ u32 size, map_len;
+ void *dt;
- if (!of_have_populated_dt())
+ if (!initial_dtb)
return;
- for_each_node_with_property(dp, "interrupt-controller") {
- if (of_device_is_compatible(dp, "intel,ce4100-ioapic"))
- ioapic_add_ofnode(dp);
+ map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
+
+ initial_boot_params = dt = early_memremap(initial_dtb, map_len);
+ size = of_get_flat_dt_size();
+ if (map_len < size) {
+ early_iounmap(dt, map_len);
+ initial_boot_params = dt = early_memremap(initial_dtb, size);
+ map_len = size;
}
+
+ unflatten_and_copy_device_tree();
+ early_iounmap(dt, map_len);
}
#else
-void __init x86_add_irq_domains(void) { }
+static inline void x86_flattree_get_config(void) { }
#endif
+
+void __init x86_dtb_init(void)
+{
+ x86_flattree_get_config();
+
+ if (!of_have_populated_dt())
+ return;
+
+ dtb_setup_hpet();
+ dtb_apic_setup();
+}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 988c00a1f60d..49f886481615 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -682,15 +682,14 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len)
* hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
*
* This function requires the e820 map to be sorted and without any
- * overlapping entries and assumes the first e820 area to be RAM.
+ * overlapping entries.
*/
void __init e820_mark_nosave_regions(unsigned long limit_pfn)
{
int i;
- unsigned long pfn;
+ unsigned long pfn = 0;
- pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
- for (i = 1; i < e820.nr_map; i++) {
+ for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
if (pfn < PFN_UP(ei->addr))
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 47c410d99f5d..344b63f18d14 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -447,16 +447,14 @@ sysenter_exit:
sysenter_audit:
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
jnz syscall_trace_entry
- addl $4,%esp
- CFI_ADJUST_CFA_OFFSET -4
- /* %esi already in 8(%esp) 6th arg: 4th syscall arg */
- /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */
- /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */
- movl %ebx,%ecx /* 3rd arg: 1st syscall arg */
- movl %eax,%edx /* 2nd arg: syscall number */
- movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
+ /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */
+ movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */
+ /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */
+ pushl_cfi PT_ESI(%esp) /* a3: 5th arg */
+ pushl_cfi PT_EDX+4(%esp) /* a2: 4th arg */
call __audit_syscall_entry
- pushl_cfi %ebx
+ popl_cfi %ecx /* get that remapped edx off the stack */
+ popl_cfi %ecx /* get that remapped esi off the stack */
movl PT_EAX(%esp),%eax /* reload syscall number */
jmp sysenter_do_call
@@ -683,7 +681,7 @@ END(syscall_badsys)
sysenter_badsys:
movl $-ENOSYS,%eax
jmp sysenter_after_call
-END(syscall_badsys)
+END(sysenter_badsys)
CFI_ENDPROC
.macro FIXUP_ESPFIX_STACK
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2fac1343a90b..df088bb03fb3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -404,8 +404,8 @@ GLOBAL(system_call_after_swapgs)
* and short:
*/
ENABLE_INTERRUPTS(CLBR_NONE)
- SAVE_ARGS 8,0
- movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
+ SAVE_ARGS 8, 0, rax_enosys=1
+ movq_cfi rax,(ORIG_RAX-ARGOFFSET)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
@@ -417,7 +417,7 @@ system_call_fastpath:
andl $__SYSCALL_MASK,%eax
cmpl $__NR_syscall_max,%eax
#endif
- ja badsys
+ ja ret_from_sys_call /* and return regs->ax */
movq %r10,%rcx
call *sys_call_table(,%rax,8) # XXX: rip relative
movq %rax,RAX-ARGOFFSET(%rsp)
@@ -476,28 +476,8 @@ sysret_signal:
FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
jmp int_check_syscall_exit_work
-badsys:
- movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
- jmp ret_from_sys_call
-
#ifdef CONFIG_AUDITSYSCALL
/*
- * Fast path for syscall audit without full syscall trace.
- * We just call __audit_syscall_entry() directly, and then
- * jump back to the normal fast path.
- */
-auditsys:
- movq %r10,%r9 /* 6th arg: 4th syscall arg */
- movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
- movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
- movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
- movq %rax,%rsi /* 2nd arg: syscall number */
- movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
- call __audit_syscall_entry
- LOAD_ARGS 0 /* reload call-clobbered registers */
- jmp system_call_fastpath
-
- /*
* Return fast path for syscall audit. Call __audit_syscall_exit()
* directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
* masked off.
@@ -514,18 +494,25 @@ sysret_audit:
/* Do syscall tracing */
tracesys:
-#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
- jz auditsys
-#endif
+ leaq -REST_SKIP(%rsp), %rdi
+ movq $AUDIT_ARCH_X86_64, %rsi
+ call syscall_trace_enter_phase1
+ test %rax, %rax
+ jnz tracesys_phase2 /* if needed, run the slow path */
+ LOAD_ARGS 0 /* else restore clobbered regs */
+ jmp system_call_fastpath /* and return to the fast path */
+
+tracesys_phase2:
SAVE_REST
- movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
FIXUP_TOP_OF_STACK %rdi
- movq %rsp,%rdi
- call syscall_trace_enter
+ movq %rsp, %rdi
+ movq $AUDIT_ARCH_X86_64, %rsi
+ movq %rax,%rdx
+ call syscall_trace_enter_phase2
+
/*
* Reload arg registers from stack in case ptrace changed them.
- * We don't reload %rax because syscall_trace_enter() returned
+ * We don't reload %rax because syscall_trace_entry_phase2() returned
* the value it wants us to use in the table lookup.
*/
LOAD_ARGS ARGOFFSET, 1
@@ -536,7 +523,7 @@ tracesys:
andl $__SYSCALL_MASK,%eax
cmpl $__NR_syscall_max,%eax
#endif
- ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
+ ja int_ret_from_sys_call /* RAX(%rsp) is already set */
movq %r10,%rcx /* fixup for C */
call *sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 5f9cf20cdb68..3d5fb509bdeb 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -108,7 +108,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
int i;
for (i = 0; i < HBP_NUM; i++) {
- struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+ struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
if (!*slot) {
*slot = bp;
@@ -122,7 +122,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
set_debugreg(info->address, i);
__this_cpu_write(cpu_debugreg[i], info->address);
- dr7 = &__get_cpu_var(cpu_dr7);
+ dr7 = this_cpu_ptr(&cpu_dr7);
*dr7 |= encode_dr7(i, info->len, info->type);
set_debugreg(*dr7, 7);
@@ -146,7 +146,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
int i;
for (i = 0; i < HBP_NUM; i++) {
- struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+ struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
if (*slot == bp) {
*slot = NULL;
@@ -157,7 +157,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
return;
- dr7 = &__get_cpu_var(cpu_dr7);
+ dr7 = this_cpu_ptr(&cpu_dr7);
*dr7 &= ~__encode_dr7(i, info->len, info->type);
set_debugreg(*dr7, 7);
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index d5dd80814419..a9a4229f6161 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -375,7 +375,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
/*
* These bits must be zero.
*/
- xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
+ memset(xsave_hdr->reserved, 0, 48);
return ret;
}
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 8af817105e29..e7cc5370cd2f 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -111,8 +111,7 @@ static void make_8259A_irq(unsigned int irq)
{
disable_irq_nosync(irq);
io_apic_irqs &= ~(1<<irq);
- irq_set_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
- i8259A_chip.name);
+ irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
enable_irq(irq);
}
diff --git a/arch/x86/kernel/iosf_mbi.c b/arch/x86/kernel/iosf_mbi.c
index d30acdc1229d..82f8d02f0df2 100644
--- a/arch/x86/kernel/iosf_mbi.c
+++ b/arch/x86/kernel/iosf_mbi.c
@@ -22,10 +22,13 @@
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <linux/capability.h>
#include <asm/iosf_mbi.h>
#define PCI_DEVICE_ID_BAYTRAIL 0x0F00
+#define PCI_DEVICE_ID_BRASWELL 0x2280
#define PCI_DEVICE_ID_QUARK_X1000 0x0958
static DEFINE_SPINLOCK(iosf_mbi_lock);
@@ -187,6 +190,89 @@ bool iosf_mbi_available(void)
}
EXPORT_SYMBOL(iosf_mbi_available);
+#ifdef CONFIG_IOSF_MBI_DEBUG
+static u32 dbg_mdr;
+static u32 dbg_mcr;
+static u32 dbg_mcrx;
+
+static int mcr_get(void *data, u64 *val)
+{
+ *val = *(u32 *)data;
+ return 0;
+}
+
+static int mcr_set(void *data, u64 val)
+{
+ u8 command = ((u32)val & 0xFF000000) >> 24,
+ port = ((u32)val & 0x00FF0000) >> 16,
+ offset = ((u32)val & 0x0000FF00) >> 8;
+ int err;
+
+ *(u32 *)data = val;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EACCES;
+
+ if (command & 1u)
+ err = iosf_mbi_write(port,
+ command,
+ dbg_mcrx | offset,
+ dbg_mdr);
+ else
+ err = iosf_mbi_read(port,
+ command,
+ dbg_mcrx | offset,
+ &dbg_mdr);
+
+ return err;
+}
+DEFINE_SIMPLE_ATTRIBUTE(iosf_mcr_fops, mcr_get, mcr_set , "%llx\n");
+
+static struct dentry *iosf_dbg;
+
+static void iosf_sideband_debug_init(void)
+{
+ struct dentry *d;
+
+ iosf_dbg = debugfs_create_dir("iosf_sb", NULL);
+ if (IS_ERR_OR_NULL(iosf_dbg))
+ return;
+
+ /* mdr */
+ d = debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr);
+ if (IS_ERR_OR_NULL(d))
+ goto cleanup;
+
+ /* mcrx */
+ debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx);
+ if (IS_ERR_OR_NULL(d))
+ goto cleanup;
+
+ /* mcr - initiates mailbox tranaction */
+ debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops);
+ if (IS_ERR_OR_NULL(d))
+ goto cleanup;
+
+ return;
+
+cleanup:
+ debugfs_remove_recursive(d);
+}
+
+static void iosf_debugfs_init(void)
+{
+ iosf_sideband_debug_init();
+}
+
+static void iosf_debugfs_remove(void)
+{
+ debugfs_remove_recursive(iosf_dbg);
+}
+#else
+static inline void iosf_debugfs_init(void) { }
+static inline void iosf_debugfs_remove(void) { }
+#endif /* CONFIG_IOSF_MBI_DEBUG */
+
static int iosf_mbi_probe(struct pci_dev *pdev,
const struct pci_device_id *unused)
{
@@ -202,8 +288,9 @@ static int iosf_mbi_probe(struct pci_dev *pdev,
return 0;
}
-static DEFINE_PCI_DEVICE_TABLE(iosf_mbi_pci_ids) = {
+static const struct pci_device_id iosf_mbi_pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BAYTRAIL) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BRASWELL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_QUARK_X1000) },
{ 0, },
};
@@ -217,11 +304,15 @@ static struct pci_driver iosf_mbi_pci_driver = {
static int __init iosf_mbi_init(void)
{
+ iosf_debugfs_init();
+
return pci_register_driver(&iosf_mbi_pci_driver);
}
static void __exit iosf_mbi_exit(void)
{
+ iosf_debugfs_remove();
+
pci_unregister_driver(&iosf_mbi_pci_driver);
if (mbi_pdev) {
pci_dev_put(mbi_pdev);
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 4d1c746892eb..e4b503d5558c 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -52,13 +52,13 @@ static inline void stack_overflow_check(struct pt_regs *regs)
regs->sp <= curbase + THREAD_SIZE)
return;
- irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) +
+ irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) +
STACK_TOP_MARGIN;
- irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr);
+ irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr);
if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
return;
- oist = &__get_cpu_var(orig_ist);
+ oist = this_cpu_ptr(&orig_ist);
estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 1de84e3ab4e0..15d741ddfeeb 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -41,7 +41,7 @@ __visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
void arch_irq_work_raise(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
- if (!cpu_has_apic)
+ if (!arch_irq_work_has_interrupt())
return;
apic->send_IPI_self(IRQ_WORK_VECTOR);
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 7f50156542fb..4de73ee78361 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -70,7 +70,6 @@ int vector_used_by_percpu_irq(unsigned int vector)
void __init init_ISA_irqs(void)
{
struct irq_chip *chip = legacy_pic->chip;
- const char *name = chip->name;
int i;
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
@@ -78,8 +77,8 @@ void __init init_ISA_irqs(void)
#endif
legacy_pic->init(0);
- for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
- irq_set_chip_and_handler_name(i, chip, handle_level_irq, name);
+ for (i = 0; i < nr_legacy_irqs(); i++)
+ irq_set_chip_and_handler(i, chip, handle_level_irq);
}
void __init init_IRQ(void)
@@ -87,12 +86,6 @@ void __init init_IRQ(void)
int i;
/*
- * We probably need a better place for this, but it works for
- * now ...
- */
- x86_add_irq_domains();
-
- /*
* On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
* If these IRQ's are handled by legacy interrupt-controllers like PIC,
* then this configuration will likely be static after the boot. If
@@ -100,7 +93,7 @@ void __init init_IRQ(void)
* then this vector space can be freed and re-used dynamically as the
* irq's migrate etc.
*/
- for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
+ for (i = 0; i < nr_legacy_irqs(); i++)
per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
x86_init.irqs.intr_init();
@@ -121,7 +114,7 @@ void setup_vector_irq(int cpu)
* legacy PIC, for the new cpu that is coming online, setup the static
* legacy vector to irq mapping:
*/
- for (irq = 0; irq < legacy_pic->nr_legacy_irqs; irq++)
+ for (irq = 0; irq < nr_legacy_irqs(); irq++)
per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
#endif
@@ -209,7 +202,7 @@ void __init native_init_IRQ(void)
set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
}
- if (!acpi_ioapic && !of_ioapic)
+ if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
setup_irq(2, &irq2);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
new file mode 100644
index 000000000000..ca05f86481aa
--- /dev/null
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -0,0 +1,554 @@
+/*
+ * Kexec bzImage loader
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ * Authors:
+ * Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#define pr_fmt(fmt) "kexec-bzImage64: " fmt
+
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/efi.h>
+#include <linux/verify_pefile.h>
+#include <keys/system_keyring.h>
+
+#include <asm/bootparam.h>
+#include <asm/setup.h>
+#include <asm/crash.h>
+#include <asm/efi.h>
+#include <asm/kexec-bzimage64.h>
+
+#define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */
+
+/*
+ * Defines lowest physical address for various segments. Not sure where
+ * exactly these limits came from. Current bzimage64 loader in kexec-tools
+ * uses these so I am retaining it. It can be changed over time as we gain
+ * more insight.
+ */
+#define MIN_PURGATORY_ADDR 0x3000
+#define MIN_BOOTPARAM_ADDR 0x3000
+#define MIN_KERNEL_LOAD_ADDR 0x100000
+#define MIN_INITRD_LOAD_ADDR 0x1000000
+
+/*
+ * This is a place holder for all boot loader specific data structure which
+ * gets allocated in one call but gets freed much later during cleanup
+ * time. Right now there is only one field but it can grow as need be.
+ */
+struct bzimage64_data {
+ /*
+ * Temporary buffer to hold bootparams buffer. This should be
+ * freed once the bootparam segment has been loaded.
+ */
+ void *bootparams_buf;
+};
+
+static int setup_initrd(struct boot_params *params,
+ unsigned long initrd_load_addr, unsigned long initrd_len)
+{
+ params->hdr.ramdisk_image = initrd_load_addr & 0xffffffffUL;
+ params->hdr.ramdisk_size = initrd_len & 0xffffffffUL;
+
+ params->ext_ramdisk_image = initrd_load_addr >> 32;
+ params->ext_ramdisk_size = initrd_len >> 32;
+
+ return 0;
+}
+
+static int setup_cmdline(struct kimage *image, struct boot_params *params,
+ unsigned long bootparams_load_addr,
+ unsigned long cmdline_offset, char *cmdline,
+ unsigned long cmdline_len)
+{
+ char *cmdline_ptr = ((char *)params) + cmdline_offset;
+ unsigned long cmdline_ptr_phys, len;
+ uint32_t cmdline_low_32, cmdline_ext_32;
+
+ memcpy(cmdline_ptr, cmdline, cmdline_len);
+ if (image->type == KEXEC_TYPE_CRASH) {
+ len = sprintf(cmdline_ptr + cmdline_len - 1,
+ " elfcorehdr=0x%lx", image->arch.elf_load_addr);
+ cmdline_len += len;
+ }
+ cmdline_ptr[cmdline_len - 1] = '\0';
+
+ pr_debug("Final command line is: %s\n", cmdline_ptr);
+ cmdline_ptr_phys = bootparams_load_addr + cmdline_offset;
+ cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL;
+ cmdline_ext_32 = cmdline_ptr_phys >> 32;
+
+ params->hdr.cmd_line_ptr = cmdline_low_32;
+ if (cmdline_ext_32)
+ params->ext_cmd_line_ptr = cmdline_ext_32;
+
+ return 0;
+}
+
+static int setup_e820_entries(struct boot_params *params)
+{
+ unsigned int nr_e820_entries;
+
+ nr_e820_entries = e820_saved.nr_map;
+
+ /* TODO: Pass entries more than E820MAX in bootparams setup data */
+ if (nr_e820_entries > E820MAX)
+ nr_e820_entries = E820MAX;
+
+ params->e820_entries = nr_e820_entries;
+ memcpy(&params->e820_map, &e820_saved.map,
+ nr_e820_entries * sizeof(struct e820entry));
+
+ return 0;
+}
+
+#ifdef CONFIG_EFI
+static int setup_efi_info_memmap(struct boot_params *params,
+ unsigned long params_load_addr,
+ unsigned int efi_map_offset,
+ unsigned int efi_map_sz)
+{
+ void *efi_map = (void *)params + efi_map_offset;
+ unsigned long efi_map_phys_addr = params_load_addr + efi_map_offset;
+ struct efi_info *ei = &params->efi_info;
+
+ if (!efi_map_sz)
+ return 0;
+
+ efi_runtime_map_copy(efi_map, efi_map_sz);
+
+ ei->efi_memmap = efi_map_phys_addr & 0xffffffff;
+ ei->efi_memmap_hi = efi_map_phys_addr >> 32;
+ ei->efi_memmap_size = efi_map_sz;
+
+ return 0;
+}
+
+static int
+prepare_add_efi_setup_data(struct boot_params *params,
+ unsigned long params_load_addr,
+ unsigned int efi_setup_data_offset)
+{
+ unsigned long setup_data_phys;
+ struct setup_data *sd = (void *)params + efi_setup_data_offset;
+ struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data);
+
+ esd->fw_vendor = efi.fw_vendor;
+ esd->runtime = efi.runtime;
+ esd->tables = efi.config_table;
+ esd->smbios = efi.smbios;
+
+ sd->type = SETUP_EFI;
+ sd->len = sizeof(struct efi_setup_data);
+
+ /* Add setup data */
+ setup_data_phys = params_load_addr + efi_setup_data_offset;
+ sd->next = params->hdr.setup_data;
+ params->hdr.setup_data = setup_data_phys;
+
+ return 0;
+}
+
+static int
+setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
+ unsigned int efi_map_offset, unsigned int efi_map_sz,
+ unsigned int efi_setup_data_offset)
+{
+ struct efi_info *current_ei = &boot_params.efi_info;
+ struct efi_info *ei = &params->efi_info;
+
+ if (!current_ei->efi_memmap_size)
+ return 0;
+
+ /*
+ * If 1:1 mapping is not enabled, second kernel can not setup EFI
+ * and use EFI run time services. User space will have to pass
+ * acpi_rsdp=<addr> on kernel command line to make second kernel boot
+ * without efi.
+ */
+ if (efi_enabled(EFI_OLD_MEMMAP))
+ return 0;
+
+ ei->efi_loader_signature = current_ei->efi_loader_signature;
+ ei->efi_systab = current_ei->efi_systab;
+ ei->efi_systab_hi = current_ei->efi_systab_hi;
+
+ ei->efi_memdesc_version = current_ei->efi_memdesc_version;
+ ei->efi_memdesc_size = efi_get_runtime_map_desc_size();
+
+ setup_efi_info_memmap(params, params_load_addr, efi_map_offset,
+ efi_map_sz);
+ prepare_add_efi_setup_data(params, params_load_addr,
+ efi_setup_data_offset);
+ return 0;
+}
+#endif /* CONFIG_EFI */
+
+static int
+setup_boot_parameters(struct kimage *image, struct boot_params *params,
+ unsigned long params_load_addr,
+ unsigned int efi_map_offset, unsigned int efi_map_sz,
+ unsigned int efi_setup_data_offset)
+{
+ unsigned int nr_e820_entries;
+ unsigned long long mem_k, start, end;
+ int i, ret = 0;
+
+ /* Get subarch from existing bootparams */
+ params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;
+
+ /* Copying screen_info will do? */
+ memcpy(&params->screen_info, &boot_params.screen_info,
+ sizeof(struct screen_info));
+
+ /* Fill in memsize later */
+ params->screen_info.ext_mem_k = 0;
+ params->alt_mem_k = 0;
+
+ /* Default APM info */
+ memset(&params->apm_bios_info, 0, sizeof(params->apm_bios_info));
+
+ /* Default drive info */
+ memset(&params->hd0_info, 0, sizeof(params->hd0_info));
+ memset(&params->hd1_info, 0, sizeof(params->hd1_info));
+
+ /* Default sysdesc table */
+ params->sys_desc_table.length = 0;
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = crash_setup_memmap_entries(image, params);
+ if (ret)
+ return ret;
+ } else
+ setup_e820_entries(params);
+
+ nr_e820_entries = params->e820_entries;
+
+ for (i = 0; i < nr_e820_entries; i++) {
+ if (params->e820_map[i].type != E820_RAM)
+ continue;
+ start = params->e820_map[i].addr;
+ end = params->e820_map[i].addr + params->e820_map[i].size - 1;
+
+ if ((start <= 0x100000) && end > 0x100000) {
+ mem_k = (end >> 10) - (0x100000 >> 10);
+ params->screen_info.ext_mem_k = mem_k;
+ params->alt_mem_k = mem_k;
+ if (mem_k > 0xfc00)
+ params->screen_info.ext_mem_k = 0xfc00; /* 64M*/
+ if (mem_k > 0xffffffff)
+ params->alt_mem_k = 0xffffffff;
+ }
+ }
+
+#ifdef CONFIG_EFI
+ /* Setup EFI state */
+ setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz,
+ efi_setup_data_offset);
+#endif
+
+ /* Setup EDD info */
+ memcpy(params->eddbuf, boot_params.eddbuf,
+ EDDMAXNR * sizeof(struct edd_info));
+ params->eddbuf_entries = boot_params.eddbuf_entries;
+
+ memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer,
+ EDD_MBR_SIG_MAX * sizeof(unsigned int));
+
+ return ret;
+}
+
+static int bzImage64_probe(const char *buf, unsigned long len)
+{
+ int ret = -ENOEXEC;
+ struct setup_header *header;
+
+ /* kernel should be atleast two sectors long */
+ if (len < 2 * 512) {
+ pr_err("File is too short to be a bzImage\n");
+ return ret;
+ }
+
+ header = (struct setup_header *)(buf + offsetof(struct boot_params, hdr));
+ if (memcmp((char *)&header->header, "HdrS", 4) != 0) {
+ pr_err("Not a bzImage\n");
+ return ret;
+ }
+
+ if (header->boot_flag != 0xAA55) {
+ pr_err("No x86 boot sector present\n");
+ return ret;
+ }
+
+ if (header->version < 0x020C) {
+ pr_err("Must be at least protocol version 2.12\n");
+ return ret;
+ }
+
+ if (!(header->loadflags & LOADED_HIGH)) {
+ pr_err("zImage not a bzImage\n");
+ return ret;
+ }
+
+ if (!(header->xloadflags & XLF_KERNEL_64)) {
+ pr_err("Not a bzImage64. XLF_KERNEL_64 is not set.\n");
+ return ret;
+ }
+
+ if (!(header->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)) {
+ pr_err("XLF_CAN_BE_LOADED_ABOVE_4G is not set.\n");
+ return ret;
+ }
+
+ /*
+ * Can't handle 32bit EFI as it does not allow loading kernel
+ * above 4G. This should be handled by 32bit bzImage loader
+ */
+ if (efi_enabled(EFI_RUNTIME_SERVICES) && !efi_enabled(EFI_64BIT)) {
+ pr_debug("EFI is 32 bit. Can't load kernel above 4G.\n");
+ return ret;
+ }
+
+ /* I've got a bzImage */
+ pr_debug("It's a relocatable bzImage64\n");
+ ret = 0;
+
+ return ret;
+}
+
+static void *bzImage64_load(struct kimage *image, char *kernel,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+
+ struct setup_header *header;
+ int setup_sects, kern16_size, ret = 0;
+ unsigned long setup_header_size, params_cmdline_sz, params_misc_sz;
+ struct boot_params *params;
+ unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
+ unsigned long purgatory_load_addr;
+ unsigned long kernel_bufsz, kernel_memsz, kernel_align;
+ char *kernel_buf;
+ struct bzimage64_data *ldata;
+ struct kexec_entry64_regs regs64;
+ void *stack;
+ unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
+ unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
+
+ header = (struct setup_header *)(kernel + setup_hdr_offset);
+ setup_sects = header->setup_sects;
+ if (setup_sects == 0)
+ setup_sects = 4;
+
+ kern16_size = (setup_sects + 1) * 512;
+ if (kernel_len < kern16_size) {
+ pr_err("bzImage truncated\n");
+ return ERR_PTR(-ENOEXEC);
+ }
+
+ if (cmdline_len > header->cmdline_size) {
+ pr_err("Kernel command line too long\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ /*
+ * In case of crash dump, we will append elfcorehdr=<addr> to
+ * command line. Make sure it does not overflow
+ */
+ if (cmdline_len + MAX_ELFCOREHDR_STR_LEN > header->cmdline_size) {
+ pr_debug("Appending elfcorehdr=<addr> to command line exceeds maximum allowed length\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* Allocate and load backup region */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = crash_load_segments(image);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ /*
+ * Load purgatory. For 64bit entry point, purgatory code can be
+ * anywhere.
+ */
+ ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1,
+ &purgatory_load_addr);
+ if (ret) {
+ pr_err("Loading purgatory failed\n");
+ return ERR_PTR(ret);
+ }
+
+ pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+
+
+ /*
+ * Load Bootparams and cmdline and space for efi stuff.
+ *
+ * Allocate memory together for multiple data structures so
+ * that they all can go in single area/segment and we don't
+ * have to create separate segment for each. Keeps things
+ * little bit simple
+ */
+ efi_map_sz = efi_get_runtime_map_size();
+ efi_map_sz = ALIGN(efi_map_sz, 16);
+ params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
+ MAX_ELFCOREHDR_STR_LEN;
+ params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
+ params_misc_sz = params_cmdline_sz + efi_map_sz +
+ sizeof(struct setup_data) +
+ sizeof(struct efi_setup_data);
+
+ params = kzalloc(params_misc_sz, GFP_KERNEL);
+ if (!params)
+ return ERR_PTR(-ENOMEM);
+ efi_map_offset = params_cmdline_sz;
+ efi_setup_data_offset = efi_map_offset + efi_map_sz;
+
+ /* Copy setup header onto bootparams. Documentation/x86/boot.txt */
+ setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
+
+ /* Is there a limit on setup header size? */
+ memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
+
+ ret = kexec_add_buffer(image, (char *)params, params_misc_sz,
+ params_misc_sz, 16, MIN_BOOTPARAM_ADDR,
+ ULONG_MAX, 1, &bootparam_load_addr);
+ if (ret)
+ goto out_free_params;
+ pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ bootparam_load_addr, params_misc_sz, params_misc_sz);
+
+ /* Load kernel */
+ kernel_buf = kernel + kern16_size;
+ kernel_bufsz = kernel_len - kern16_size;
+ kernel_memsz = PAGE_ALIGN(header->init_size);
+ kernel_align = header->kernel_alignment;
+
+ ret = kexec_add_buffer(image, kernel_buf,
+ kernel_bufsz, kernel_memsz, kernel_align,
+ MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1,
+ &kernel_load_addr);
+ if (ret)
+ goto out_free_params;
+
+ pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kernel_load_addr, kernel_memsz, kernel_memsz);
+
+ /* Load initrd high */
+ if (initrd) {
+ ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len,
+ PAGE_SIZE, MIN_INITRD_LOAD_ADDR,
+ ULONG_MAX, 1, &initrd_load_addr);
+ if (ret)
+ goto out_free_params;
+
+ pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ initrd_load_addr, initrd_len, initrd_len);
+
+ setup_initrd(params, initrd_load_addr, initrd_len);
+ }
+
+ setup_cmdline(image, params, bootparam_load_addr,
+ sizeof(struct boot_params), cmdline, cmdline_len);
+
+ /* bootloader info. Do we need a separate ID for kexec kernel loader? */
+ params->hdr.type_of_loader = 0x0D << 4;
+ params->hdr.loadflags = 0;
+
+ /* Setup purgatory regs for entry */
+ ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
+ sizeof(regs64), 1);
+ if (ret)
+ goto out_free_params;
+
+ regs64.rbx = 0; /* Bootstrap Processor */
+ regs64.rsi = bootparam_load_addr;
+ regs64.rip = kernel_load_addr + 0x200;
+ stack = kexec_purgatory_get_symbol_addr(image, "stack_end");
+ if (IS_ERR(stack)) {
+ pr_err("Could not find address of symbol stack_end\n");
+ ret = -EINVAL;
+ goto out_free_params;
+ }
+
+ regs64.rsp = (unsigned long)stack;
+ ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
+ sizeof(regs64), 0);
+ if (ret)
+ goto out_free_params;
+
+ ret = setup_boot_parameters(image, params, bootparam_load_addr,
+ efi_map_offset, efi_map_sz,
+ efi_setup_data_offset);
+ if (ret)
+ goto out_free_params;
+
+ /* Allocate loader specific data */
+ ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL);
+ if (!ldata) {
+ ret = -ENOMEM;
+ goto out_free_params;
+ }
+
+ /*
+ * Store pointer to params so that it could be freed after loading
+ * params segment has been loaded and contents have been copied
+ * somewhere else.
+ */
+ ldata->bootparams_buf = params;
+ return ldata;
+
+out_free_params:
+ kfree(params);
+ return ERR_PTR(ret);
+}
+
+/* This cleanup function is called after various segments have been loaded */
+static int bzImage64_cleanup(void *loader_data)
+{
+ struct bzimage64_data *ldata = loader_data;
+
+ if (!ldata)
+ return 0;
+
+ kfree(ldata->bootparams_buf);
+ ldata->bootparams_buf = NULL;
+
+ return 0;
+}
+
+#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
+static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
+{
+ bool trusted;
+ int ret;
+
+ ret = verify_pefile_signature(kernel, kernel_len,
+ system_trusted_keyring, &trusted);
+ if (ret < 0)
+ return ret;
+ if (!trusted)
+ return -EKEYREJECTED;
+ return 0;
+}
+#endif
+
+struct kexec_file_ops kexec_bzImage64_ops = {
+ .probe = bzImage64_probe,
+ .load = bzImage64_load,
+ .cleanup = bzImage64_cleanup,
+#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
+ .verify_sig = bzImage64_verify_sig,
+#endif
+};
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index f304773285ae..f1314d0bcf0a 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -338,8 +338,10 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
* a relative jump.
*/
rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
- if (abs(rel) > 0x7fffffff)
+ if (abs(rel) > 0x7fffffff) {
+ __arch_remove_optimized_kprobe(op, 0);
return -ERANGE;
+ }
buf = (u8 *)op->optinsn.insn;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 3dd8e2c4d74a..f6945bef2cd1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -35,6 +35,7 @@
#include <linux/slab.h>
#include <linux/kprobes.h>
#include <linux/debugfs.h>
+#include <linux/nmi.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
@@ -243,9 +244,9 @@ u32 kvm_read_and_reset_pf_reason(void)
{
u32 reason = 0;
- if (__get_cpu_var(apf_reason).enabled) {
- reason = __get_cpu_var(apf_reason).reason;
- __get_cpu_var(apf_reason).reason = 0;
+ if (__this_cpu_read(apf_reason.enabled)) {
+ reason = __this_cpu_read(apf_reason.reason);
+ __this_cpu_write(apf_reason.reason, 0);
}
return reason;
@@ -318,7 +319,7 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
* there's no need for lock or memory barriers.
* An optimization barrier is implied in apic write.
*/
- if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi)))
+ if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi)))
return;
apic_write(APIC_EOI, APIC_EOI_ACK);
}
@@ -329,13 +330,13 @@ void kvm_guest_cpu_init(void)
return;
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
- u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason));
+ u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
#ifdef CONFIG_PREEMPT
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
#endif
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED);
- __get_cpu_var(apf_reason).enabled = 1;
+ __this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
smp_processor_id());
}
@@ -344,8 +345,8 @@ void kvm_guest_cpu_init(void)
unsigned long pa;
/* Size alignment is implied but just to make it explicit. */
BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
- __get_cpu_var(kvm_apic_eoi) = 0;
- pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi))
+ __this_cpu_write(kvm_apic_eoi, 0);
+ pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi))
| KVM_MSR_ENABLED;
wrmsrl(MSR_KVM_PV_EOI_EN, pa);
}
@@ -356,11 +357,11 @@ void kvm_guest_cpu_init(void)
static void kvm_pv_disable_apf(void)
{
- if (!__get_cpu_var(apf_reason).enabled)
+ if (!__this_cpu_read(apf_reason.enabled))
return;
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
- __get_cpu_var(apf_reason).enabled = 0;
+ __this_cpu_write(apf_reason.enabled, 0);
printk(KERN_INFO"Unregister pv shared memory for cpu %d\n",
smp_processor_id());
@@ -499,6 +500,13 @@ void __init kvm_guest_init(void)
#else
kvm_guest_cpu_init();
#endif
+
+ /*
+ * Hard lockup detection is enabled by default. Disable it, as guests
+ * can get false positives too easily, for example if the host is
+ * overcommitted.
+ */
+ watchdog_enable_hardlockup_detector(false);
}
static noinline uint32_t __kvm_cpuid_base(void)
@@ -716,7 +724,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
if (in_nmi())
return;
- w = &__get_cpu_var(klock_waiting);
+ w = this_cpu_ptr(&klock_waiting);
cpu = smp_processor_id();
start = spin_time_start();
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 1667b1de8d5d..72e8e310258d 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -247,7 +247,8 @@ void machine_kexec(struct kimage *image)
/* now call it */
image->start = relocate_kernel_ptr((unsigned long)image->head,
(unsigned long)page_list,
- image->start, cpu_has_pae,
+ image->start,
+ boot_cpu_has(X86_FEATURE_PAE),
image->preserve_context);
#ifdef CONFIG_KEXEC_JUMP
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 679cef0791cd..485981059a40 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -6,6 +6,8 @@
* Version 2. See the file COPYING for more details.
*/
+#define pr_fmt(fmt) "kexec: " fmt
+
#include <linux/mm.h>
#include <linux/kexec.h>
#include <linux/string.h>
@@ -21,6 +23,13 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/debugreg.h>
+#include <asm/kexec-bzimage64.h>
+
+#ifdef CONFIG_KEXEC_FILE
+static struct kexec_file_ops *kexec_file_loaders[] = {
+ &kexec_bzImage64_ops,
+};
+#endif
static void free_transition_pgtable(struct kimage *image)
{
@@ -171,6 +180,45 @@ static void load_segments(void)
);
}
+#ifdef CONFIG_KEXEC_FILE
+/* Update purgatory as needed after various image segments have been prepared */
+static int arch_update_purgatory(struct kimage *image)
+{
+ int ret = 0;
+
+ if (!image->file_mode)
+ return 0;
+
+ /* Setup copying of backup region */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
+ &image->arch.backup_load_addr,
+ sizeof(image->arch.backup_load_addr), 0);
+ if (ret)
+ return ret;
+
+ ret = kexec_purgatory_get_set_symbol(image, "backup_src",
+ &image->arch.backup_src_start,
+ sizeof(image->arch.backup_src_start), 0);
+ if (ret)
+ return ret;
+
+ ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
+ &image->arch.backup_src_sz,
+ sizeof(image->arch.backup_src_sz), 0);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+#else /* !CONFIG_KEXEC_FILE */
+static inline int arch_update_purgatory(struct kimage *image)
+{
+ return 0;
+}
+#endif /* CONFIG_KEXEC_FILE */
+
int machine_kexec_prepare(struct kimage *image)
{
unsigned long start_pgtable;
@@ -184,6 +232,11 @@ int machine_kexec_prepare(struct kimage *image)
if (result)
return result;
+ /* update purgatory as needed */
+ result = arch_update_purgatory(image);
+ if (result)
+ return result;
+
return 0;
}
@@ -283,3 +336,200 @@ void arch_crash_save_vmcoreinfo(void)
(unsigned long)&_text - __START_KERNEL);
}
+/* arch-dependent functionality related to kexec file-based syscall */
+
+#ifdef CONFIG_KEXEC_FILE
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ int i, ret = -ENOEXEC;
+ struct kexec_file_ops *fops;
+
+ for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
+ fops = kexec_file_loaders[i];
+ if (!fops || !fops->probe)
+ continue;
+
+ ret = fops->probe(buf, buf_len);
+ if (!ret) {
+ image->fops = fops;
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+void *arch_kexec_kernel_image_load(struct kimage *image)
+{
+ vfree(image->arch.elf_headers);
+ image->arch.elf_headers = NULL;
+
+ if (!image->fops || !image->fops->load)
+ return ERR_PTR(-ENOEXEC);
+
+ return image->fops->load(image, image->kernel_buf,
+ image->kernel_buf_len, image->initrd_buf,
+ image->initrd_buf_len, image->cmdline_buf,
+ image->cmdline_buf_len);
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ if (!image->fops || !image->fops->cleanup)
+ return 0;
+
+ return image->fops->cleanup(image->image_loader_data);
+}
+
+int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel,
+ unsigned long kernel_len)
+{
+ if (!image->fops || !image->fops->verify_sig) {
+ pr_debug("kernel loader does not support signature verification.");
+ return -EKEYREJECTED;
+ }
+
+ return image->fops->verify_sig(kernel, kernel_len);
+}
+
+/*
+ * Apply purgatory relocations.
+ *
+ * ehdr: Pointer to elf headers
+ * sechdrs: Pointer to section headers.
+ * relsec: section index of SHT_RELA section.
+ *
+ * TODO: Some of the code belongs to generic code. Move that in kexec.c.
+ */
+int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
+ Elf64_Shdr *sechdrs, unsigned int relsec)
+{
+ unsigned int i;
+ Elf64_Rela *rel;
+ Elf64_Sym *sym;
+ void *location;
+ Elf64_Shdr *section, *symtabsec;
+ unsigned long address, sec_base, value;
+ const char *strtab, *name, *shstrtab;
+
+ /*
+ * ->sh_offset has been modified to keep the pointer to section
+ * contents in memory
+ */
+ rel = (void *)sechdrs[relsec].sh_offset;
+
+ /* Section to which relocations apply */
+ section = &sechdrs[sechdrs[relsec].sh_info];
+
+ pr_debug("Applying relocate section %u to %u\n", relsec,
+ sechdrs[relsec].sh_info);
+
+ /* Associated symbol table */
+ symtabsec = &sechdrs[sechdrs[relsec].sh_link];
+
+ /* String table */
+ if (symtabsec->sh_link >= ehdr->e_shnum) {
+ /* Invalid strtab section number */
+ pr_err("Invalid string table section index %d\n",
+ symtabsec->sh_link);
+ return -ENOEXEC;
+ }
+
+ strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset;
+
+ /* section header string table */
+ shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset;
+
+ for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+
+ /*
+ * rel[i].r_offset contains byte offset from beginning
+ * of section to the storage unit affected.
+ *
+ * This is location to update (->sh_offset). This is temporary
+ * buffer where section is currently loaded. This will finally
+ * be loaded to a different address later, pointed to by
+ * ->sh_addr. kexec takes care of moving it
+ * (kexec_load_segment()).
+ */
+ location = (void *)(section->sh_offset + rel[i].r_offset);
+
+ /* Final address of the location */
+ address = section->sh_addr + rel[i].r_offset;
+
+ /*
+ * rel[i].r_info contains information about symbol table index
+ * w.r.t which relocation must be made and type of relocation
+ * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get
+ * these respectively.
+ */
+ sym = (Elf64_Sym *)symtabsec->sh_offset +
+ ELF64_R_SYM(rel[i].r_info);
+
+ if (sym->st_name)
+ name = strtab + sym->st_name;
+ else
+ name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+ pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n",
+ name, sym->st_info, sym->st_shndx, sym->st_value,
+ sym->st_size);
+
+ if (sym->st_shndx == SHN_UNDEF) {
+ pr_err("Undefined symbol: %s\n", name);
+ return -ENOEXEC;
+ }
+
+ if (sym->st_shndx == SHN_COMMON) {
+ pr_err("symbol '%s' in common section\n", name);
+ return -ENOEXEC;
+ }
+
+ if (sym->st_shndx == SHN_ABS)
+ sec_base = 0;
+ else if (sym->st_shndx >= ehdr->e_shnum) {
+ pr_err("Invalid section %d for symbol %s\n",
+ sym->st_shndx, name);
+ return -ENOEXEC;
+ } else
+ sec_base = sechdrs[sym->st_shndx].sh_addr;
+
+ value = sym->st_value;
+ value += sec_base;
+ value += rel[i].r_addend;
+
+ switch (ELF64_R_TYPE(rel[i].r_info)) {
+ case R_X86_64_NONE:
+ break;
+ case R_X86_64_64:
+ *(u64 *)location = value;
+ break;
+ case R_X86_64_32:
+ *(u32 *)location = value;
+ if (value != *(u32 *)location)
+ goto overflow;
+ break;
+ case R_X86_64_32S:
+ *(s32 *)location = value;
+ if ((s64)value != *(s32 *)location)
+ goto overflow;
+ break;
+ case R_X86_64_PC32:
+ value -= (u64)address;
+ *(u32 *)location = value;
+ break;
+ default:
+ pr_err("Unknown rela relocation: %llu\n",
+ ELF64_R_TYPE(rel[i].r_info));
+ return -ENOEXEC;
+ }
+ }
+ return 0;
+
+overflow:
+ pr_err("Overflow in relocation type %d value 0x%lx\n",
+ (int)ELF64_R_TYPE(rel[i].r_info), value);
+ return -ENOEXEC;
+}
+#endif /* CONFIG_KEXEC_FILE */
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index d2b56489d70f..2d2a237f2c73 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/pci.h>
+#include <linux/irqdomain.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
@@ -67,7 +68,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
boot_cpu_physical_apicid = m->apicid;
}
- printk(KERN_INFO "Processor #%d%s\n", m->apicid, bootup_cpu);
+ pr_info("Processor #%d%s\n", m->apicid, bootup_cpu);
generic_processor_info(apicid, m->apicver);
}
@@ -87,9 +88,8 @@ static void __init MP_bus_info(struct mpc_bus *m)
#if MAX_MP_BUSSES < 256
if (m->busid >= MAX_MP_BUSSES) {
- printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
- " is too large, max. supported is %d\n",
- m->busid, str, MAX_MP_BUSSES - 1);
+ pr_warn("MP table busid value (%d) for bustype %s is too large, max. supported is %d\n",
+ m->busid, str, MAX_MP_BUSSES - 1);
return;
}
#endif
@@ -110,19 +110,29 @@ static void __init MP_bus_info(struct mpc_bus *m)
mp_bus_id_to_type[m->busid] = MP_BUS_EISA;
#endif
} else
- printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
+ pr_warn("Unknown bustype %s - ignoring\n", str);
}
+static struct irq_domain_ops mp_ioapic_irqdomain_ops = {
+ .map = mp_irqdomain_map,
+ .unmap = mp_irqdomain_unmap,
+};
+
static void __init MP_ioapic_info(struct mpc_ioapic *m)
{
+ struct ioapic_domain_cfg cfg = {
+ .type = IOAPIC_DOMAIN_LEGACY,
+ .ops = &mp_ioapic_irqdomain_ops,
+ };
+
if (m->flags & MPC_APIC_USABLE)
- mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
+ mp_register_ioapic(m->apicid, m->apicaddr, gsi_top, &cfg);
}
static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
{
- apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
- " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+ apic_printk(APIC_VERBOSE,
+ "Int: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC INT %02x\n",
mp_irq->irqtype, mp_irq->irqflag & 3,
(mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
@@ -135,8 +145,8 @@ static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
{
- apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
- " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+ apic_printk(APIC_VERBOSE,
+ "Lint: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC LINT %02x\n",
m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbusid,
m->srcbusirq, m->destapic, m->destapiclint);
}
@@ -148,34 +158,33 @@ static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
{
if (memcmp(mpc->signature, MPC_SIGNATURE, 4)) {
- printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
+ pr_err("MPTABLE: bad signature [%c%c%c%c]!\n",
mpc->signature[0], mpc->signature[1],
mpc->signature[2], mpc->signature[3]);
return 0;
}
if (mpf_checksum((unsigned char *)mpc, mpc->length)) {
- printk(KERN_ERR "MPTABLE: checksum error!\n");
+ pr_err("MPTABLE: checksum error!\n");
return 0;
}
if (mpc->spec != 0x01 && mpc->spec != 0x04) {
- printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
- mpc->spec);
+ pr_err("MPTABLE: bad table version (%d)!!\n", mpc->spec);
return 0;
}
if (!mpc->lapic) {
- printk(KERN_ERR "MPTABLE: null local APIC address!\n");
+ pr_err("MPTABLE: null local APIC address!\n");
return 0;
}
memcpy(oem, mpc->oem, 8);
oem[8] = 0;
- printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem);
+ pr_info("MPTABLE: OEM ID: %s\n", oem);
memcpy(str, mpc->productid, 12);
str[12] = 0;
- printk(KERN_INFO "MPTABLE: Product ID: %s\n", str);
+ pr_info("MPTABLE: Product ID: %s\n", str);
- printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->lapic);
+ pr_info("MPTABLE: APIC at: 0x%X\n", mpc->lapic);
return 1;
}
@@ -188,8 +197,8 @@ static void skip_entry(unsigned char **ptr, int *count, int size)
static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
{
- printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"
- "type %x\n", *mpt);
+ pr_err("Your mptable is wrong, contact your HW vendor!\n");
+ pr_cont("type %x\n", *mpt);
print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
1, mpc, mpc->length, 1);
}
@@ -207,9 +216,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
if (!smp_check_mpc(mpc, oem, str))
return 0;
-#ifdef CONFIG_X86_32
- generic_mps_oem_check(mpc, oem, str);
-#endif
/* Initialize the lapic mapping */
if (!acpi_lapic)
register_lapic_address(mpc->lapic);
@@ -259,7 +265,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
}
if (!num_processors)
- printk(KERN_ERR "MPTABLE: no processors registered!\n");
+ pr_err("MPTABLE: no processors registered!\n");
return num_processors;
}
@@ -295,16 +301,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
* If it does, we assume it's valid.
*/
if (mpc_default_type == 5) {
- printk(KERN_INFO "ISA/PCI bus type with no IRQ information... "
- "falling back to ELCR\n");
+ pr_info("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) ||
ELCR_trigger(13))
- printk(KERN_ERR "ELCR contains invalid data... "
- "not using ELCR\n");
+ pr_err("ELCR contains invalid data... not using ELCR\n");
else {
- printk(KERN_INFO
- "Using ELCR to identify PCI interrupts\n");
+ pr_info("Using ELCR to identify PCI interrupts\n");
ELCR_fallback = 1;
}
}
@@ -353,7 +356,7 @@ static void __init construct_ioapic_table(int mpc_default_type)
bus.busid = 0;
switch (mpc_default_type) {
default:
- printk(KERN_ERR "???\nUnknown standard configuration %d\n",
+ pr_err("???\nUnknown standard configuration %d\n",
mpc_default_type);
/* fall through */
case 1:
@@ -462,8 +465,8 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 0;
#endif
- printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"
- "... disabling SMP support. (tell your hw vendor)\n");
+ pr_err("BIOS bug, MP table errors detected!...\n");
+ pr_cont("... disabling SMP support. (tell your hw vendor)\n");
early_iounmap(mpc, size);
return -1;
}
@@ -481,8 +484,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
if (!mp_irq_entries) {
struct mpc_bus bus;
- printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
- "using default mptable. (tell your hw vendor)\n");
+ pr_err("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
bus.type = MP_BUS;
bus.busid = 0;
@@ -516,14 +518,14 @@ void __init default_get_smp_config(unsigned int early)
if (acpi_lapic && acpi_ioapic)
return;
- printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
- mpf->specification);
+ pr_info("Intel MultiProcessor Specification v1.%d\n",
+ mpf->specification);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
if (mpf->feature2 & (1 << 7)) {
- printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
+ pr_info(" IMCR and PIC compatibility mode.\n");
pic_mode = 1;
} else {
- printk(KERN_INFO " Virtual Wire compatibility mode.\n");
+ pr_info(" Virtual Wire compatibility mode.\n");
pic_mode = 0;
}
#endif
@@ -539,8 +541,7 @@ void __init default_get_smp_config(unsigned int early)
return;
}
- printk(KERN_INFO "Default MP configuration #%d\n",
- mpf->feature1);
+ pr_info("Default MP configuration #%d\n", mpf->feature1);
construct_default_ISA_mptable(mpf->feature1);
} else if (mpf->physptr) {
@@ -550,7 +551,7 @@ void __init default_get_smp_config(unsigned int early)
BUG();
if (!early)
- printk(KERN_INFO "Processors: %d\n", num_processors);
+ pr_info("Processors: %d\n", num_processors);
/*
* Only use the first configuration found.
*/
@@ -583,10 +584,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
#endif
mpf_found = mpf;
- printk(KERN_INFO "found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n",
- (unsigned long long) virt_to_phys(mpf),
- (unsigned long long) virt_to_phys(mpf) +
- sizeof(*mpf) - 1, mpf);
+ pr_info("found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n",
+ (unsigned long long) virt_to_phys(mpf),
+ (unsigned long long) virt_to_phys(mpf) +
+ sizeof(*mpf) - 1, mpf);
mem = virt_to_phys(mpf);
memblock_reserve(mem, sizeof(*mpf));
@@ -735,7 +736,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
int nr_m_spare = 0;
unsigned char *mpt = ((unsigned char *)mpc) + count;
- printk(KERN_INFO "mpc_length %x\n", mpc->length);
+ pr_info("mpc_length %x\n", mpc->length);
while (count < mpc->length) {
switch (*mpt) {
case MP_PROCESSOR:
@@ -862,13 +863,13 @@ static int __init update_mp_table(void)
if (!smp_check_mpc(mpc, oem, str))
return 0;
- printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf));
- printk(KERN_INFO "physptr: %x\n", mpf->physptr);
+ pr_info("mpf: %llx\n", (u64)virt_to_phys(mpf));
+ pr_info("physptr: %x\n", mpf->physptr);
if (mpc_new_phys && mpc->length > mpc_new_length) {
mpc_new_phys = 0;
- printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n",
- mpc_new_length);
+ pr_info("mpc_new_length is %ld, please use alloc_mptable=8k\n",
+ mpc_new_length);
}
if (!mpc_new_phys) {
@@ -879,10 +880,10 @@ static int __init update_mp_table(void)
mpc->checksum = 0xff;
new = mpf_checksum((unsigned char *)mpc, mpc->length);
if (old == new) {
- printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
+ pr_info("mpc is readonly, please try alloc_mptable instead\n");
return 0;
}
- printk(KERN_INFO "use in-position replacing\n");
+ pr_info("use in-position replacing\n");
} else {
mpf->physptr = mpc_new_phys;
mpc_new = phys_to_virt(mpc_new_phys);
@@ -892,7 +893,7 @@ static int __init update_mp_table(void)
if (mpc_new_phys - mpf->physptr) {
struct mpf_intel *mpf_new;
/* steal 16 bytes from [0, 1k) */
- printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
+ pr_info("mpf new: %x\n", 0x400 - 16);
mpf_new = phys_to_virt(0x400 - 16);
memcpy(mpf_new, mpf, 16);
mpf = mpf_new;
@@ -900,7 +901,7 @@ static int __init update_mp_table(void)
}
mpf->checksum = 0;
mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
- printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
+ pr_info("physptr new: %x\n", mpf->physptr);
}
/*
diff --git a/arch/x86/kernel/pmc_atom.c b/arch/x86/kernel/pmc_atom.c
index 0c424a67985d..0ee5025e0fa4 100644
--- a/arch/x86/kernel/pmc_atom.c
+++ b/arch/x86/kernel/pmc_atom.c
@@ -235,6 +235,11 @@ err:
pmc_dbgfs_unregister(pmc);
return -ENODEV;
}
+#else
+static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev)
+{
+ return 0;
+}
#endif /* CONFIG_DEBUG_FS */
static int pmc_setup_dev(struct pci_dev *pdev)
@@ -262,14 +267,12 @@ static int pmc_setup_dev(struct pci_dev *pdev)
/* PMC hardware registers setup */
pmc_hw_reg_setup(pmc);
-#ifdef CONFIG_DEBUG_FS
ret = pmc_dbgfs_register(pmc, pdev);
if (ret) {
iounmap(pmc->regmap);
- return ret;
}
-#endif /* CONFIG_DEBUG_FS */
- return 0;
+
+ return ret;
}
/*
diff --git a/arch/x86/kernel/preempt.S b/arch/x86/kernel/preempt.S
deleted file mode 100644
index ca7f0d58a87d..000000000000
--- a/arch/x86/kernel/preempt.S
+++ /dev/null
@@ -1,25 +0,0 @@
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/asm.h>
-#include <asm/calling.h>
-
-ENTRY(___preempt_schedule)
- CFI_STARTPROC
- SAVE_ALL
- call preempt_schedule
- RESTORE_ALL
- ret
- CFI_ENDPROC
-
-#ifdef CONFIG_CONTEXT_TRACKING
-
-ENTRY(___preempt_schedule_context)
- CFI_STARTPROC
- SAVE_ALL
- call preempt_schedule_context
- RESTORE_ALL
- ret
- CFI_ENDPROC
-
-#endif
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4505e2a950d8..e127ddaa2d5a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -64,14 +64,16 @@ EXPORT_SYMBOL_GPL(task_xstate_cachep);
*/
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- int ret;
-
*dst = *src;
- if (fpu_allocated(&src->thread.fpu)) {
- memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
- ret = fpu_alloc(&dst->thread.fpu);
- if (ret)
- return ret;
+
+ dst->thread.fpu_counter = 0;
+ dst->thread.fpu.has_fpu = 0;
+ dst->thread.fpu.last_cpu = ~0;
+ dst->thread.fpu.state = NULL;
+ if (tsk_used_math(src)) {
+ int err = fpu_alloc(&dst->thread.fpu);
+ if (err)
+ return err;
fpu_copy(dst, src);
}
return 0;
@@ -93,6 +95,7 @@ void arch_task_cache_init(void)
kmem_cache_create("task_xstate", xstate_size,
__alignof__(union thread_xstate),
SLAB_PANIC | SLAB_NOTRACK, NULL);
+ setup_xstate_comp();
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7bc86bbe7485..8f3ebfe710d0 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -138,6 +138,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.sp = (unsigned long) childregs;
p->thread.sp0 = (unsigned long) (childregs+1);
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
@@ -152,9 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
childregs->orig_ax = -1;
childregs->cs = __KERNEL_CS | get_kernel_rpl();
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
- p->thread.fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
- memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
return 0;
}
*childregs = *current_pt_regs();
@@ -165,13 +164,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.ip = (unsigned long) ret_from_fork;
task_user_gs(p) = get_user_gs(current_pt_regs());
- p->thread.fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
tsk = current;
err = -ENOMEM;
- memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
-
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ca5b02d405c3..3ed4a68d4013 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -163,7 +163,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.sp = (unsigned long) childregs;
p->thread.usersp = me->thread.usersp;
set_tsk_thread_flag(p, TIF_FORK);
- p->thread.fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
@@ -193,8 +192,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
childregs->sp = sp;
err = -ENOMEM;
- memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
-
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 678c0ada3b3c..749b0e423419 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1441,24 +1441,126 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
force_sig_info(SIGTRAP, &info, tsk);
}
-
-#ifdef CONFIG_X86_32
-# define IS_IA32 1
-#elif defined CONFIG_IA32_EMULATION
-# define IS_IA32 is_compat_task()
-#else
-# define IS_IA32 0
+static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
+{
+#ifdef CONFIG_X86_64
+ if (arch == AUDIT_ARCH_X86_64) {
+ audit_syscall_entry(regs->orig_ax, regs->di,
+ regs->si, regs->dx, regs->r10);
+ } else
#endif
+ {
+ audit_syscall_entry(regs->orig_ax, regs->bx,
+ regs->cx, regs->dx, regs->si);
+ }
+}
/*
- * We must return the syscall number to actually look up in the table.
- * This can be -1L to skip running any syscall at all.
+ * We can return 0 to resume the syscall or anything else to go to phase
+ * 2. If we resume the syscall, we need to put something appropriate in
+ * regs->orig_ax.
+ *
+ * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
+ * are fully functional.
+ *
+ * For phase 2's benefit, our return value is:
+ * 0: resume the syscall
+ * 1: go to phase 2; no seccomp phase 2 needed
+ * anything else: go to phase 2; pass return value to seccomp
*/
-long syscall_trace_enter(struct pt_regs *regs)
+unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
+{
+ unsigned long ret = 0;
+ u32 work;
+
+ BUG_ON(regs != task_pt_regs(current));
+
+ work = ACCESS_ONCE(current_thread_info()->flags) &
+ _TIF_WORK_SYSCALL_ENTRY;
+
+ /*
+ * If TIF_NOHZ is set, we are required to call user_exit() before
+ * doing anything that could touch RCU.
+ */
+ if (work & _TIF_NOHZ) {
+ user_exit();
+ work &= ~TIF_NOHZ;
+ }
+
+#ifdef CONFIG_SECCOMP
+ /*
+ * Do seccomp first -- it should minimize exposure of other
+ * code, and keeping seccomp fast is probably more valuable
+ * than the rest of this.
+ */
+ if (work & _TIF_SECCOMP) {
+ struct seccomp_data sd;
+
+ sd.arch = arch;
+ sd.nr = regs->orig_ax;
+ sd.instruction_pointer = regs->ip;
+#ifdef CONFIG_X86_64
+ if (arch == AUDIT_ARCH_X86_64) {
+ sd.args[0] = regs->di;
+ sd.args[1] = regs->si;
+ sd.args[2] = regs->dx;
+ sd.args[3] = regs->r10;
+ sd.args[4] = regs->r8;
+ sd.args[5] = regs->r9;
+ } else
+#endif
+ {
+ sd.args[0] = regs->bx;
+ sd.args[1] = regs->cx;
+ sd.args[2] = regs->dx;
+ sd.args[3] = regs->si;
+ sd.args[4] = regs->di;
+ sd.args[5] = regs->bp;
+ }
+
+ BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
+ BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
+
+ ret = seccomp_phase1(&sd);
+ if (ret == SECCOMP_PHASE1_SKIP) {
+ regs->orig_ax = -1;
+ ret = 0;
+ } else if (ret != SECCOMP_PHASE1_OK) {
+ return ret; /* Go directly to phase 2 */
+ }
+
+ work &= ~_TIF_SECCOMP;
+ }
+#endif
+
+ /* Do our best to finish without phase 2. */
+ if (work == 0)
+ return ret; /* seccomp and/or nohz only (ret == 0 here) */
+
+#ifdef CONFIG_AUDITSYSCALL
+ if (work == _TIF_SYSCALL_AUDIT) {
+ /*
+ * If there is no more work to be done except auditing,
+ * then audit in phase 1. Phase 2 always audits, so, if
+ * we audit here, then we can't go on to phase 2.
+ */
+ do_audit_syscall_entry(regs, arch);
+ return 0;
+ }
+#endif
+
+ return 1; /* Something is enabled that we can't handle in phase 1 */
+}
+
+/* Returns the syscall nr to run (which should match regs->orig_ax). */
+long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
+ unsigned long phase1_result)
{
long ret = 0;
+ u32 work = ACCESS_ONCE(current_thread_info()->flags) &
+ _TIF_WORK_SYSCALL_ENTRY;
- user_exit();
+ BUG_ON(regs != task_pt_regs(current));
/*
* If we stepped into a sysenter/syscall insn, it trapped in
@@ -1467,17 +1569,21 @@ long syscall_trace_enter(struct pt_regs *regs)
* do_debug() and we need to set it again to restore the user
* state. If we entered on the slow path, TF was already set.
*/
- if (test_thread_flag(TIF_SINGLESTEP))
+ if (work & _TIF_SINGLESTEP)
regs->flags |= X86_EFLAGS_TF;
- /* do the secure computing check first */
- if (secure_computing(regs->orig_ax)) {
+#ifdef CONFIG_SECCOMP
+ /*
+ * Call seccomp_phase2 before running the other hooks so that
+ * they can see any changes made by a seccomp tracer.
+ */
+ if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
/* seccomp failures shouldn't expose any additional code. */
- ret = -1L;
- goto out;
+ return -1;
}
+#endif
- if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
+ if (unlikely(work & _TIF_SYSCALL_EMU))
ret = -1L;
if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
@@ -1487,23 +1593,22 @@ long syscall_trace_enter(struct pt_regs *regs)
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->orig_ax);
- if (IS_IA32)
- audit_syscall_entry(AUDIT_ARCH_I386,
- regs->orig_ax,
- regs->bx, regs->cx,
- regs->dx, regs->si);
-#ifdef CONFIG_X86_64
- else
- audit_syscall_entry(AUDIT_ARCH_X86_64,
- regs->orig_ax,
- regs->di, regs->si,
- regs->dx, regs->r10);
-#endif
+ do_audit_syscall_entry(regs, arch);
-out:
return ret ?: regs->orig_ax;
}
+long syscall_trace_enter(struct pt_regs *regs)
+{
+ u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+ unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
+
+ if (phase1_result == 0)
+ return regs->orig_ax;
+ else
+ return syscall_trace_enter_phase2(regs, arch, phase1_result);
+}
+
void syscall_trace_leave(struct pt_regs *regs)
{
bool step;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index ff898bbf579d..176a0f99d4da 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -498,6 +498,24 @@ void force_hpet_resume(void)
}
/*
+ * According to the datasheet e6xx systems have the HPET hardwired to
+ * 0xfed00000
+ */
+static void e6xx_force_enable_hpet(struct pci_dev *dev)
+{
+ if (hpet_address || force_hpet_address)
+ return;
+
+ force_hpet_address = 0xFED00000;
+ force_hpet_resume_type = NONE_FORCE_HPET_RESUME;
+ dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at "
+ "0x%lx\n", force_hpet_address);
+ return;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU,
+ e6xx_force_enable_hpet);
+
+/*
* HPET MSI on some boards (ATI SB700/SB800) has side effect on
* floppy DMA. Disable HPET MSI on such platforms.
* See erratum #27 (Misinterpreted MSI Requests May Result in
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 41ead8d3bc0b..ab08aa2276fb 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -879,6 +879,15 @@ void __init setup_arch(char **cmdline_p)
KERNEL_PGD_PTRS);
load_cr3(swapper_pg_dir);
+ /*
+ * Note: Quark X1000 CPUs advertise PGE incorrectly and require
+ * a cr3 based tlb flush, so the following __flush_tlb_all()
+ * will not flush anything because the cpu quirk which clears
+ * X86_FEATURE_PGE has not been invoked yet. Though due to the
+ * load_cr3() above the TLB has been flushed already. The
+ * quirk is invoked before subsequent calls to __flush_tlb_all()
+ * so proper operation is guaranteed.
+ */
__flush_tlb_all();
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
@@ -1119,7 +1128,6 @@ void __init setup_arch(char **cmdline_p)
setup_real_mode();
memblock_set_current_limit(get_max_mapped());
- dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
/*
* NOTE: On x86-32, only from this point on, fixmaps are ready for use.
@@ -1150,6 +1158,7 @@ void __init setup_arch(char **cmdline_p)
early_acpi_boot_init();
initmem_init();
+ dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
/*
* Reserve memory for crash kernel after SRAT is parsed so that it
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 2851d63c1202..ed37a768d0fc 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -675,6 +675,11 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
* handler too.
*/
regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
+ /*
+ * Ensure the signal handler starts with the new fpu state.
+ */
+ if (used_math())
+ drop_init_fpu(current);
}
signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5492798930ef..4d2128ac70bd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -111,7 +111,6 @@ atomic_t init_deasserted;
static void smp_callin(void)
{
int cpuid, phys_id;
- unsigned long timeout;
/*
* If waken up by an INIT in an 82489DX configuration
@@ -130,37 +129,6 @@ static void smp_callin(void)
* (This works even if the APIC is not enabled.)
*/
phys_id = read_apic_id();
- if (cpumask_test_cpu(cpuid, cpu_callin_mask)) {
- panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
- phys_id, cpuid);
- }
- pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
- /*
- * STARTUP IPIs are fragile beasts as they might sometimes
- * trigger some glue motherboard logic. Complete APIC bus
- * silence for 1 second, this overestimates the time the
- * boot CPU is spending to send the up to 2 STARTUP IPIs
- * by a factor of two. This should be enough.
- */
-
- /*
- * Waiting 2s total for startup (udelay is not yet working)
- */
- timeout = jiffies + 2*HZ;
- while (time_before(jiffies, timeout)) {
- /*
- * Has the boot CPU finished it's STARTUP sequence?
- */
- if (cpumask_test_cpu(cpuid, cpu_callout_mask))
- break;
- cpu_relax();
- }
-
- if (!time_before(jiffies, timeout)) {
- panic("%s: CPU%d started up but did not get a callout!\n",
- __func__, cpuid);
- }
/*
* the boot CPU has finished the init stage and is spinning
@@ -168,10 +136,6 @@ static void smp_callin(void)
* CPU, first the APIC. (this is probably redundant on most
* boards)
*/
-
- pr_debug("CALLIN, before setup_local_APIC()\n");
- if (apic->smp_callin_clear_local_apic)
- apic->smp_callin_clear_local_apic();
setup_local_APIC();
end_local_APIC_setup();
@@ -300,11 +264,19 @@ void smp_store_cpu_info(int id)
}
static bool
+topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+ int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
+
+ return (cpu_to_node(cpu1) == cpu_to_node(cpu2));
+}
+
+static bool
topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
{
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
- return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2),
+ return !WARN_ONCE(!topology_same_node(c, o),
"sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
"[node: %d != %d]. Ignoring dependency.\n",
cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
@@ -345,17 +317,44 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
-static bool match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+/*
+ * Unlike the other levels, we do not enforce keeping a
+ * multicore group inside a NUMA node. If this happens, we will
+ * discard the MC level of the topology later.
+ */
+static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
- if (c->phys_proc_id == o->phys_proc_id) {
- if (cpu_has(c, X86_FEATURE_AMD_DCM))
- return true;
-
- return topology_sane(c, o, "mc");
- }
+ if (c->phys_proc_id == o->phys_proc_id)
+ return true;
return false;
}
+static struct sched_domain_topology_level numa_inside_package_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+ { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+#ifdef CONFIG_SCHED_MC
+ { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+ { NULL, },
+};
+/*
+ * set_sched_topology() sets the topology internal to a CPU. The
+ * NUMA topologies are layered on top of it to build the full
+ * system topology.
+ *
+ * If NUMA nodes are observed to occur within a CPU package, this
+ * function should be called. It forces the sched domain code to
+ * only use the SMT level for the CPU portion of the topology.
+ * This essentially falls back to relying on NUMA information
+ * from the SRAT table to describe the entire system topology
+ * (except for hyperthreads).
+ */
+static void primarily_use_numa_for_topology(void)
+{
+ set_sched_topology(numa_inside_package_topology);
+}
+
void set_cpu_sibling_map(int cpu)
{
bool has_smt = smp_num_siblings > 1;
@@ -392,7 +391,7 @@ void set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
o = &cpu_data(i);
- if ((i == cpu) || (has_mp && match_mc(c, o))) {
+ if ((i == cpu) || (has_mp && match_die(c, o))) {
link_mask(core, cpu, i);
/*
@@ -414,6 +413,8 @@ void set_cpu_sibling_map(int cpu)
} else if (i != cpu && !c->booted_cores)
c->booted_cores = cpu_data(i).booted_cores;
}
+ if (match_die(c, o) && !topology_same_node(c, o))
+ primarily_use_numa_for_topology();
}
}
@@ -757,8 +758,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
unsigned long start_ip = real_mode_header->trampoline_start;
unsigned long boot_error = 0;
- int timeout;
int cpu0_nmi_registered = 0;
+ unsigned long timeout;
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
@@ -806,6 +807,15 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
}
/*
+ * AP might wait on cpu_callout_mask in cpu_init() with
+ * cpu_initialized_mask set if previous attempt to online
+ * it timed-out. Clear cpu_initialized_mask so that after
+ * INIT/SIPI it could start with a clean state.
+ */
+ cpumask_clear_cpu(cpu, cpu_initialized_mask);
+ smp_mb();
+
+ /*
* Wake up a CPU in difference cases:
* - Use the method in the APIC driver if it's defined
* Otherwise,
@@ -819,53 +829,38 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
if (!boot_error) {
/*
- * allow APs to start initializing.
+ * Wait 10s total for a response from AP
*/
- pr_debug("Before Callout %d\n", cpu);
- cpumask_set_cpu(cpu, cpu_callout_mask);
- pr_debug("After Callout %d\n", cpu);
+ boot_error = -1;
+ timeout = jiffies + 10*HZ;
+ while (time_before(jiffies, timeout)) {
+ if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
+ /*
+ * Tell AP to proceed with initialization
+ */
+ cpumask_set_cpu(cpu, cpu_callout_mask);
+ boot_error = 0;
+ break;
+ }
+ udelay(100);
+ schedule();
+ }
+ }
+ if (!boot_error) {
/*
- * Wait 5s total for a response
+ * Wait till AP completes initial initialization
*/
- for (timeout = 0; timeout < 50000; timeout++) {
- if (cpumask_test_cpu(cpu, cpu_callin_mask))
- break; /* It has booted */
- udelay(100);
+ while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
/*
* Allow other tasks to run while we wait for the
* AP to come online. This also gives a chance
* for the MTRR work(triggered by the AP coming online)
* to be completed in the stop machine context.
*/
+ udelay(100);
schedule();
}
-
- if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
- print_cpu_msr(&cpu_data(cpu));
- pr_debug("CPU%d: has booted.\n", cpu);
- } else {
- boot_error = 1;
- if (*trampoline_status == 0xA5A5A5A5)
- /* trampoline started but...? */
- pr_err("CPU%d: Stuck ??\n", cpu);
- else
- /* trampoline code not run */
- pr_err("CPU%d: Not responding\n", cpu);
- if (apic->inquire_remote_apic)
- apic->inquire_remote_apic(apicid);
- }
- }
-
- if (boot_error) {
- /* Try to put things back the way they were before ... */
- numa_remove_cpu(cpu); /* was set by numa_add_cpu */
-
- /* was set by do_boot_cpu() */
- cpumask_clear_cpu(cpu, cpu_callout_mask);
-
- /* was set by cpu_init() */
- cpumask_clear_cpu(cpu, cpu_initialized_mask);
}
/* mark "stuck" area as not stuck */
@@ -1143,10 +1138,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
enable_IO_APIC();
bsp_end_local_APIC_setup();
-
- if (apic->setup_portio_remap)
- apic->setup_portio_remap();
-
smpboot_setup_io_apic();
/*
* Set up local APIC timer on boot CPU.
@@ -1292,6 +1283,9 @@ static void remove_siblinginfo(int cpu)
for_each_cpu(sibling, cpu_sibling_mask(cpu))
cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
+ for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
+ cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
+ cpumask_clear(cpu_llc_shared_mask(cpu));
cpumask_clear(cpu_sibling_mask(cpu));
cpumask_clear(cpu_core_mask(cpu));
c->phys_proc_id = 0;
@@ -1322,6 +1316,8 @@ void cpu_disable_common(void)
fixup_irqs();
}
+static DEFINE_PER_CPU(struct completion, die_complete);
+
int native_cpu_disable(void)
{
int ret;
@@ -1331,26 +1327,24 @@ int native_cpu_disable(void)
return ret;
clear_local_APIC();
-
+ init_completion(&per_cpu(die_complete, smp_processor_id()));
cpu_disable_common();
+
return 0;
}
void native_cpu_die(unsigned int cpu)
{
/* We don't do anything here: idle task is faking death itself. */
- unsigned int i;
+ wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ);
- for (i = 0; i < 10; i++) {
- /* They ack this in play_dead by setting CPU_DEAD */
- if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
- if (system_state == SYSTEM_RUNNING)
- pr_info("CPU %u is now offline\n", cpu);
- return;
- }
- msleep(100);
+ /* They ack this in play_dead() by setting CPU_DEAD */
+ if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
+ if (system_state == SYSTEM_RUNNING)
+ pr_info("CPU %u is now offline\n", cpu);
+ } else {
+ pr_err("CPU %u didn't die...\n", cpu);
}
- pr_err("CPU %u didn't die...\n", cpu);
}
void play_dead_common(void)
@@ -1362,6 +1356,7 @@ void play_dead_common(void)
mb();
/* Ack it */
__this_cpu_write(cpu_state, CPU_DEAD);
+ complete(&per_cpu(die_complete, smp_processor_id()));
/*
* With physical CPU hotplug, we should halt the cpu
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index bf7ef5ce29df..0fa29609b2c4 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -68,6 +68,8 @@ static struct irqaction irq0 = {
void __init setup_default_timer_irq(void)
{
+ if (!nr_legacy_irqs())
+ return;
setup_irq(0, &irq0);
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 56b0c338061e..b7e50bba3bbb 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -950,7 +950,7 @@ core_initcall(cpufreq_tsc);
static struct clocksource clocksource_tsc;
/*
- * We compare the TSC to the cycle_last value in the clocksource
+ * We used to compare the TSC to the cycle_last value in the clocksource
* structure to avoid a nasty time-warp. This can be observed in a
* very small window right after one CPU updated cycle_last under
* xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
@@ -960,26 +960,23 @@ static struct clocksource clocksource_tsc;
* due to the unsigned delta calculation of the time keeping core
* code, which is necessary to support wrapping clocksources like pm
* timer.
+ *
+ * This sanity check is now done in the core timekeeping code.
+ * checking the result of read_tsc() - cycle_last for being negative.
+ * That works because CLOCKSOURCE_MASK(64) does not mask out any bit.
*/
static cycle_t read_tsc(struct clocksource *cs)
{
- cycle_t ret = (cycle_t)get_cycles();
-
- return ret >= clocksource_tsc.cycle_last ?
- ret : clocksource_tsc.cycle_last;
-}
-
-static void resume_tsc(struct clocksource *cs)
-{
- if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
- clocksource_tsc.cycle_last = 0;
+ return (cycle_t)get_cycles();
}
+/*
+ * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
+ */
static struct clocksource clocksource_tsc = {
.name = "tsc",
.rating = 300,
.read = read_tsc,
- .resume = resume_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_MUST_VERIFY,
@@ -1169,14 +1166,17 @@ void __init tsc_init(void)
x86_init.timers.tsc_pre_init();
- if (!cpu_has_tsc)
+ if (!cpu_has_tsc) {
+ setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
+ }
tsc_khz = x86_platform.calibrate_tsc();
cpu_khz = tsc_khz;
if (!tsc_khz) {
mark_tsc_unstable("could not calculate TSC khz");
+ setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
}
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index b99b9ad8540c..ee22c1d93ae5 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -152,7 +152,7 @@ static void __init detect_vsmp_box(void)
is_vsmp = 1;
}
-int is_vsmp_box(void)
+static int is_vsmp_box(void)
{
if (is_vsmp != -1)
return is_vsmp;
@@ -166,7 +166,7 @@ int is_vsmp_box(void)
static void __init detect_vsmp_box(void)
{
}
-int is_vsmp_box(void)
+static int is_vsmp_box(void)
{
return 0;
}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index e1e1e80fc6a6..957779f4eb40 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -216,7 +216,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
*/
regs->orig_ax = syscall_nr;
regs->ax = -ENOSYS;
- tmp = secure_computing(syscall_nr);
+ tmp = secure_computing();
if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
warn_bad_vsyscall(KERN_DEBUG, regs,
"seccomp tried to change syscall nr or ip");
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index 9531fbb123ba..c7d791f32b98 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -31,29 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
gtod_write_begin(vdata);
/* copy vsyscall data */
- vdata->vclock_mode = tk->clock->archdata.vclock_mode;
- vdata->cycle_last = tk->clock->cycle_last;
- vdata->mask = tk->clock->mask;
- vdata->mult = tk->mult;
- vdata->shift = tk->shift;
+ vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode;
+ vdata->cycle_last = tk->tkr.cycle_last;
+ vdata->mask = tk->tkr.mask;
+ vdata->mult = tk->tkr.mult;
+ vdata->shift = tk->tkr.shift;
vdata->wall_time_sec = tk->xtime_sec;
- vdata->wall_time_snsec = tk->xtime_nsec;
+ vdata->wall_time_snsec = tk->tkr.xtime_nsec;
vdata->monotonic_time_sec = tk->xtime_sec
+ tk->wall_to_monotonic.tv_sec;
- vdata->monotonic_time_snsec = tk->xtime_nsec
+ vdata->monotonic_time_snsec = tk->tkr.xtime_nsec
+ ((u64)tk->wall_to_monotonic.tv_nsec
- << tk->shift);
+ << tk->tkr.shift);
while (vdata->monotonic_time_snsec >=
- (((u64)NSEC_PER_SEC) << tk->shift)) {
+ (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
vdata->monotonic_time_snsec -=
- ((u64)NSEC_PER_SEC) << tk->shift;
+ ((u64)NSEC_PER_SEC) << tk->tkr.shift;
vdata->monotonic_time_sec++;
}
vdata->wall_time_coarse_sec = tk->xtime_sec;
- vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift);
+ vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
+ tk->tkr.shift);
vdata->monotonic_time_coarse_sec =
vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a4b451c6addf..4c540c4719d8 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -8,6 +8,7 @@
#include <linux/bootmem.h>
#include <linux/compat.h>
+#include <linux/cpu.h>
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/sigframe.h>
@@ -24,7 +25,9 @@ u64 pcntxt_mask;
struct xsave_struct *init_xstate_buf;
static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
-static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
+static unsigned int *xstate_offsets, *xstate_sizes;
+static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8];
+static unsigned int xstate_features;
/*
* If a processor implementation discern that a processor state component is
@@ -268,8 +271,6 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
return -1;
- drop_init_fpu(tsk); /* trigger finit */
-
return 0;
}
@@ -283,7 +284,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
if (use_xsave()) {
/* These bits must be zero. */
- xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
+ memset(xsave_hdr->reserved, 0, 48);
/*
* Init the state that is not present in the memory
@@ -399,8 +400,11 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
set_used_math();
}
- if (use_eager_fpu())
+ if (use_eager_fpu()) {
+ preempt_disable();
math_state_restore();
+ preempt_enable();
+ }
return err;
} else {
@@ -479,6 +483,52 @@ static void __init setup_xstate_features(void)
}
/*
+ * This function sets up offsets and sizes of all extended states in
+ * xsave area. This supports both standard format and compacted format
+ * of the xsave aread.
+ *
+ * Input: void
+ * Output: void
+ */
+void setup_xstate_comp(void)
+{
+ unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8];
+ int i;
+
+ /*
+ * The FP xstates and SSE xstates are legacy states. They are always
+ * in the fixed offsets in the xsave area in either compacted form
+ * or standard form.
+ */
+ xstate_comp_offsets[0] = 0;
+ xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space);
+
+ if (!cpu_has_xsaves) {
+ for (i = 2; i < xstate_features; i++) {
+ if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
+ xstate_comp_offsets[i] = xstate_offsets[i];
+ xstate_comp_sizes[i] = xstate_sizes[i];
+ }
+ }
+ return;
+ }
+
+ xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+
+ for (i = 2; i < xstate_features; i++) {
+ if (test_bit(i, (unsigned long *)&pcntxt_mask))
+ xstate_comp_sizes[i] = xstate_sizes[i];
+ else
+ xstate_comp_sizes[i] = 0;
+
+ if (i > 2)
+ xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
+ + xstate_comp_sizes[i-1];
+
+ }
+}
+
+/*
* setup the xstate image representing the init state
*/
static void __init setup_init_fpu_buf(void)
@@ -496,15 +546,21 @@ static void __init setup_init_fpu_buf(void)
setup_xstate_features();
+ if (cpu_has_xsaves) {
+ init_xstate_buf->xsave_hdr.xcomp_bv =
+ (u64)1 << 63 | pcntxt_mask;
+ init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask;
+ }
+
/*
* Init all the features state with header_bv being 0x0
*/
- xrstor_state(init_xstate_buf, -1);
+ xrstor_state_booting(init_xstate_buf, -1);
/*
* Dump the init state again. This is to identify the init state
* of any feature which is not represented by all zero's.
*/
- xsave_state(init_xstate_buf, -1);
+ xsave_state_booting(init_xstate_buf, -1);
}
static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
@@ -520,6 +576,30 @@ static int __init eager_fpu_setup(char *s)
}
__setup("eagerfpu=", eager_fpu_setup);
+
+/*
+ * Calculate total size of enabled xstates in XCR0/pcntxt_mask.
+ */
+static void __init init_xstate_size(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ int i;
+
+ if (!cpu_has_xsaves) {
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ xstate_size = ebx;
+ return;
+ }
+
+ xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+ for (i = 2; i < 64; i++) {
+ if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
+ cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
+ xstate_size += eax;
+ }
+ }
+}
+
/*
* Enable and initialize the xsave feature.
*/
@@ -551,8 +631,7 @@ static void __init xstate_enable_boot_cpu(void)
/*
* Recompute the context size for enabled features
*/
- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
- xstate_size = ebx;
+ init_xstate_size();
update_regset_xstate_info(xstate_size, pcntxt_mask);
prepare_fx_sw_frame();
@@ -572,8 +651,9 @@ static void __init xstate_enable_boot_cpu(void)
}
}
- pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
- pcntxt_mask, xstate_size);
+ pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n",
+ pcntxt_mask, xstate_size,
+ cpu_has_xsaves ? "compacted form" : "standard form");
}
/*
@@ -635,3 +715,26 @@ void eager_fpu_init(void)
else
fxrstor_checking(&init_xstate_buf->i387);
}
+
+/*
+ * Given the xsave area and a state inside, this function returns the
+ * address of the state.
+ *
+ * This is the API that is called to get xstate address in either
+ * standard format or compacted format of xsave area.
+ *
+ * Inputs:
+ * xsave: base address of the xsave area;
+ * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE,
+ * etc.)
+ * Output:
+ * address of the state in the xsave area.
+ */
+void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
+{
+ int feature = fls64(xstate) - 1;
+ if (!test_bit(feature, (unsigned long *)&pcntxt_mask))
+ return NULL;
+
+ return (void *)xsave + xstate_comp_offsets[feature];
+}