diff options
Diffstat (limited to 'arch/x86/kernel')
75 files changed, 7372 insertions, 4971 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bde3993624f1..8f1e77440b2b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -39,8 +39,6 @@ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o -obj-$(CONFIG_PREEMPT) += preempt.o - obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o @@ -71,6 +69,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o +obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-y += kprobes/ obj-$(CONFIG_MODULES) += module.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index a531f6564ed0..a142e77693e1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -31,6 +31,7 @@ #include <linux/module.h> #include <linux/dmi.h> #include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/slab.h> #include <linux/bootmem.h> #include <linux/ioport.h> @@ -43,6 +44,7 @@ #include <asm/io.h> #include <asm/mpspec.h> #include <asm/smp.h> +#include <asm/i8259.h> #include "sleep.h" /* To include x86_acpi_suspend_lowlevel */ static int __initdata acpi_force = 0; @@ -93,44 +95,7 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; -static unsigned int gsi_to_irq(unsigned int gsi) -{ - unsigned int irq = gsi + NR_IRQS_LEGACY; - unsigned int i; - - for (i = 0; i < NR_IRQS_LEGACY; i++) { - if (isa_irq_to_gsi[i] == gsi) { - return i; - } - } - - /* Provide an identity mapping of gsi == irq - * except on truly weird platforms that have - * non isa irqs in the first 16 gsis. - */ - if (gsi >= NR_IRQS_LEGACY) - irq = gsi; - else - irq = gsi_top + gsi; - - return irq; -} - -static u32 irq_to_gsi(int irq) -{ - unsigned int gsi; - - if (irq < NR_IRQS_LEGACY) - gsi = isa_irq_to_gsi[irq]; - else if (irq < gsi_top) - gsi = irq; - else if (irq < (gsi_top + NR_IRQS_LEGACY)) - gsi = irq - gsi_top; - else - gsi = 0xffffffff; - - return gsi; -} +#define ACPI_INVALID_GSI INT_MIN /* * This is just a simple wrapper around early_ioremap(), @@ -341,11 +306,145 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e #endif /*CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC +#define MP_ISA_BUS 0 + +static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, + u32 gsi) +{ + int ioapic; + int pin; + struct mpc_intsrc mp_irq; + + /* + * Convert 'gsi' to 'ioapic.pin'. + */ + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return; + pin = mp_find_ioapic_pin(ioapic, gsi); + + /* + * TBD: This check is for faulty timer entries, where the override + * erroneously sets the trigger to level, resulting in a HUGE + * increase of timer interrupts! + */ + if ((bus_irq == 0) && (trigger == 3)) + trigger = 1; + + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger << 2) | polarity; + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.srcbusirq = bus_irq; /* IRQ */ + mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */ + mp_irq.dstirq = pin; /* INTIN# */ + + mp_save_irq(&mp_irq); + + /* + * Reset default identity mapping if gsi is also an legacy IRQ, + * otherwise there will be more than one entry with the same GSI + * and acpi_isa_irq_to_gsi() may give wrong result. + */ + if (gsi < nr_legacy_irqs() && isa_irq_to_gsi[gsi] == gsi) + isa_irq_to_gsi[gsi] = ACPI_INVALID_GSI; + isa_irq_to_gsi[bus_irq] = gsi; +} + +static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, + int polarity) +{ +#ifdef CONFIG_X86_MPPARSE + struct mpc_intsrc mp_irq; + struct pci_dev *pdev; + unsigned char number; + unsigned int devfn; + int ioapic; + u8 pin; + + if (!acpi_ioapic) + return 0; + if (!dev || !dev_is_pci(dev)) + return 0; + + pdev = to_pci_dev(dev); + number = pdev->bus->number; + devfn = pdev->devfn; + pin = pdev->pin; + /* print the entry should happen on mptable identically */ + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | + (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); + mp_irq.srcbus = number; + mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); + ioapic = mp_find_ioapic(gsi); + mp_irq.dstapic = mpc_ioapic_id(ioapic); + mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); + + mp_save_irq(&mp_irq); +#endif + return 0; +} + +static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, + int polarity) +{ + int irq, node; + + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) + return gsi; + + /* Don't set up the ACPI SCI because it's already set up */ + if (acpi_gbl_FADT.sci_interrupt == gsi) + return mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); + + trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; + polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; + node = dev ? dev_to_node(dev) : NUMA_NO_NODE; + if (mp_set_gsi_attr(gsi, trigger, polarity, node)) { + pr_warn("Failed to set pin attr for GSI%d\n", gsi); + return -1; + } + + irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); + if (irq < 0) + return irq; + + if (enable_update_mptable) + mp_config_acpi_gsi(dev, gsi, trigger, polarity); + + return irq; +} + +static void mp_unregister_gsi(u32 gsi) +{ + int irq; + + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) + return; + + if (acpi_gbl_FADT.sci_interrupt == gsi) + return; + + irq = mp_map_gsi_to_irq(gsi, 0); + if (irq > 0) + mp_unmap_irq(irq); +} + +static struct irq_domain_ops acpi_irqdomain_ops = { + .map = mp_irqdomain_map, + .unmap = mp_irqdomain_unmap, +}; static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { struct acpi_madt_io_apic *ioapic = NULL; + struct ioapic_domain_cfg cfg = { + .type = IOAPIC_DOMAIN_DYNAMIC, + .ops = &acpi_irqdomain_ops, + }; ioapic = (struct acpi_madt_io_apic *)header; @@ -354,8 +453,12 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) acpi_table_print_madt_entry(header); - mp_register_ioapic(ioapic->id, - ioapic->address, ioapic->global_irq_base); + /* Statically assign IRQ numbers for IOAPICs hosting legacy IRQs */ + if (ioapic->global_irq_base < nr_legacy_irqs()) + cfg.type = IOAPIC_DOMAIN_LEGACY; + + mp_register_ioapic(ioapic->id, ioapic->address, ioapic->global_irq_base, + &cfg); return 0; } @@ -378,11 +481,6 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; - /* - * mp_config_acpi_legacy_irqs() already setup IRQs < 16 - * If GSI is < 16, this will update its flags, - * else it will create a new mp_irqs[] entry. - */ mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); /* @@ -504,25 +602,32 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) outb(new >> 8, 0x4d1); } -int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) +int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) { - *irq = gsi_to_irq(gsi); - -#ifdef CONFIG_X86_IO_APIC - if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) - setup_IO_APIC_irq_extra(gsi); -#endif + int irq; + if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { + *irqp = gsi; + } else { + irq = mp_map_gsi_to_irq(gsi, + IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); + if (irq < 0) + return -1; + *irqp = irq; + } return 0; } EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) { - if (isa_irq >= 16) - return -1; - *gsi = irq_to_gsi(isa_irq); - return 0; + if (isa_irq < nr_legacy_irqs() && + isa_irq_to_gsi[isa_irq] != ACPI_INVALID_GSI) { + *gsi = isa_irq_to_gsi[isa_irq]; + return 0; + } + + return -1; } static int acpi_register_gsi_pic(struct device *dev, u32 gsi, @@ -542,15 +647,25 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi, static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, int trigger, int polarity) { + int irq = gsi; + #ifdef CONFIG_X86_IO_APIC - gsi = mp_register_gsi(dev, gsi, trigger, polarity); + irq = mp_register_gsi(dev, gsi, trigger, polarity); #endif - return gsi; + return irq; +} + +static void acpi_unregister_gsi_ioapic(u32 gsi) +{ +#ifdef CONFIG_X86_IO_APIC + mp_unregister_gsi(gsi); +#endif } int (*__acpi_register_gsi)(struct device *dev, u32 gsi, int trigger, int polarity) = acpi_register_gsi_pic; +void (*__acpi_unregister_gsi)(u32 gsi) = NULL; #ifdef CONFIG_ACPI_SLEEP int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel; @@ -564,32 +679,22 @@ int (*acpi_suspend_lowlevel)(void); */ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) { - unsigned int irq; - unsigned int plat_gsi = gsi; - - plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity); - irq = gsi_to_irq(plat_gsi); - - return irq; + return __acpi_register_gsi(dev, gsi, trigger, polarity); } EXPORT_SYMBOL_GPL(acpi_register_gsi); void acpi_unregister_gsi(u32 gsi) { + if (__acpi_unregister_gsi) + __acpi_unregister_gsi(gsi); } EXPORT_SYMBOL_GPL(acpi_unregister_gsi); -void __init acpi_set_irq_model_pic(void) -{ - acpi_irq_model = ACPI_IRQ_MODEL_PIC; - __acpi_register_gsi = acpi_register_gsi_pic; - acpi_ioapic = 0; -} - -void __init acpi_set_irq_model_ioapic(void) +static void __init acpi_set_irq_model_ioapic(void) { acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; __acpi_register_gsi = acpi_register_gsi_ioapic; + __acpi_unregister_gsi = acpi_unregister_gsi_ioapic; acpi_ioapic = 1; } @@ -825,9 +930,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). */ - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, - acpi_parse_lapic_addr_ovr, 0); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, + acpi_parse_lapic_addr_ovr, 0); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); @@ -852,9 +956,8 @@ static int __init acpi_parse_madt_lapic_entries(void) * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). */ - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, - acpi_parse_lapic_addr_ovr, 0); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, + acpi_parse_lapic_addr_ovr, 0); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); @@ -882,11 +985,10 @@ static int __init acpi_parse_madt_lapic_entries(void) return count; } - x2count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI, - acpi_parse_x2apic_nmi, 0); - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0); + x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI, + acpi_parse_x2apic_nmi, 0); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, + acpi_parse_lapic_nmi, 0); if (count < 0 || x2count < 0) { printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); /* TBD: Cleanup to allow fallback to MPS */ @@ -897,44 +999,7 @@ static int __init acpi_parse_madt_lapic_entries(void) #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC -#define MP_ISA_BUS 0 - -void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) -{ - int ioapic; - int pin; - struct mpc_intsrc mp_irq; - - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = mp_find_ioapic_pin(ioapic, gsi); - - /* - * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE - * increase of timer interrupts! - */ - if ((bus_irq == 0) && (trigger == 3)) - trigger = 1; - - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (trigger << 2) | polarity; - mp_irq.srcbus = MP_ISA_BUS; - mp_irq.srcbusirq = bus_irq; /* IRQ */ - mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */ - mp_irq.dstirq = pin; /* INTIN# */ - - mp_save_irq(&mp_irq); - - isa_irq_to_gsi[bus_irq] = gsi; -} - -void __init mp_config_acpi_legacy_irqs(void) +static void __init mp_config_acpi_legacy_irqs(void) { int i; struct mpc_intsrc mp_irq; @@ -952,7 +1017,7 @@ void __init mp_config_acpi_legacy_irqs(void) * Use the default configuration for the IRQs 0-15. Unless * overridden by (MADT) interrupt source override entries. */ - for (i = 0; i < 16; i++) { + for (i = 0; i < nr_legacy_irqs(); i++) { int ioapic, pin; unsigned int dstapic; int idx; @@ -1000,84 +1065,6 @@ void __init mp_config_acpi_legacy_irqs(void) } } -static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, - int polarity) -{ -#ifdef CONFIG_X86_MPPARSE - struct mpc_intsrc mp_irq; - struct pci_dev *pdev; - unsigned char number; - unsigned int devfn; - int ioapic; - u8 pin; - - if (!acpi_ioapic) - return 0; - if (!dev || !dev_is_pci(dev)) - return 0; - - pdev = to_pci_dev(dev); - number = pdev->bus->number; - devfn = pdev->devfn; - pin = pdev->pin; - /* print the entry should happen on mptable identically */ - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | - (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); - mp_irq.srcbus = number; - mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); - ioapic = mp_find_ioapic(gsi); - mp_irq.dstapic = mpc_ioapic_id(ioapic); - mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); - - mp_save_irq(&mp_irq); -#endif - return 0; -} - -int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) -{ - int ioapic; - int ioapic_pin; - struct io_apic_irq_attr irq_attr; - int ret; - - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) - return gsi; - - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_gbl_FADT.sci_interrupt == gsi) - return gsi; - - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) { - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); - return gsi; - } - - ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); - - if (ioapic_pin > MP_MAX_IOAPIC_PIN) { - printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mpc_ioapic_id(ioapic), - ioapic_pin); - return gsi; - } - - if (enable_update_mptable) - mp_config_acpi_gsi(dev, gsi, trigger, polarity); - - set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, - trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - ret = io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr); - if (ret < 0) - gsi = INT_MIN; - - return gsi; -} - /* * Parse IOAPIC related entries in MADT * returns 0 on success, < 0 on error @@ -1107,9 +1094,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) return -ENODEV; } - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic, - MAX_IO_APICS); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic, + MAX_IO_APICS); if (!count) { printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); return -ENODEV; @@ -1118,9 +1104,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) return count; } - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, - nr_irqs); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, + acpi_parse_int_src_ovr, nr_irqs); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); @@ -1139,9 +1124,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) /* Fill in identity legacy mappings where no override */ mp_config_acpi_legacy_irqs(); - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, - nr_irqs); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, + acpi_parse_nmi_src, nr_irqs); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); /* TBD: Cleanup to allow fallback to MPS */ diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index af5b08ab3b71..b708738d016e 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -146,7 +146,7 @@ static inline int is_apbt_capable(void) static int __init apbt_clockevent_register(void) { struct sfi_timer_table_entry *mtmr; - struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev); + struct apbt_dev *adev = this_cpu_ptr(&cpu_apbt_dev); mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); if (mtmr == NULL) { @@ -185,8 +185,6 @@ static void apbt_setup_irq(struct apbt_dev *adev) irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); - /* APB timer irqs are set up as mp_irqs, timer is edge type */ - __irq_set_handler(adev->irq, handle_edge_irq, 0, "edge"); } /* Should be called with per cpu */ @@ -200,7 +198,7 @@ void apbt_setup_secondary_clock(void) if (!cpu) return; - adev = &__get_cpu_var(cpu_apbt_dev); + adev = this_cpu_ptr(&cpu_apbt_dev); if (!adev->timer) { adev->timer = dw_apb_clockevent_init(cpu, adev->name, APBT_CLOCKEVENT_RATING, adev_virt_addr(adev), diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ad28db7e6bde..ba6cc041edb1 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -67,7 +67,7 @@ EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); /* * The highest APIC ID seen during enumeration. */ -unsigned int max_physical_apicid; +static unsigned int max_physical_apicid; /* * Bitmask of physically existing CPUs: @@ -561,7 +561,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); */ static void setup_APIC_timer(void) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events); + struct clock_event_device *levt = this_cpu_ptr(&lapic_events); if (this_cpu_has(X86_FEATURE_ARAT)) { lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; @@ -696,7 +696,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) static int __init calibrate_APIC_clock(void) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events); + struct clock_event_device *levt = this_cpu_ptr(&lapic_events); void (*real_handler)(struct clock_event_device *dev); unsigned long deltaj; long delta, deltatsc; @@ -1297,7 +1297,7 @@ void setup_local_APIC(void) unsigned int value, queued; int i, j, acked = 0; unsigned long long tsc = 0, ntsc; - long long max_loops = cpu_khz; + long long max_loops = cpu_khz ? cpu_khz : 1000000; if (cpu_has_tsc) rdtscll(tsc); @@ -1342,17 +1342,6 @@ void setup_local_APIC(void) /* always use the value from LDR */ early_per_cpu(x86_cpu_to_logical_apicid, cpu) = logical_smp_processor_id(); - - /* - * Some NUMA implementations (NUMAQ) don't initialize apicid to - * node mapping during NUMA init. Now that logical apicid is - * guaranteed to be known, give it another chance. This is already - * a bit too late - percpu allocation has already happened without - * proper NUMA affinity. - */ - if (apic->x86_32_numa_cpu_node) - set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu), - apic->x86_32_numa_cpu_node(cpu)); #endif /* @@ -1394,7 +1383,7 @@ void setup_local_APIC(void) break; } if (queued) { - if (cpu_has_tsc) { + if (cpu_has_tsc && cpu_khz) { rdtscll(ntsc); max_loops = (cpu_khz << 10) - (ntsc - tsc); } else @@ -2053,8 +2042,6 @@ void __init connect_bsp_APIC(void) imcr_pic_to_apic(); } #endif - if (apic->enable_apic_mode) - apic->enable_apic_mode(); } /** @@ -2451,51 +2438,6 @@ static void apic_pm_activate(void) { } #ifdef CONFIG_X86_64 -static int apic_cluster_num(void) -{ - int i, clusters, zeros; - unsigned id; - u16 *bios_cpu_apicid; - DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - - bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - - for (i = 0; i < nr_cpu_ids; i++) { - /* are we being called early in kernel startup? */ - if (bios_cpu_apicid) { - id = bios_cpu_apicid[i]; - } else if (i < nr_cpu_ids) { - if (cpu_present(i)) - id = per_cpu(x86_bios_cpu_apicid, i); - else - continue; - } else - break; - - if (id != BAD_APICID) - __set_bit(APIC_CLUSTERID(id), clustermap); - } - - /* Problem: Partially populated chassis may not have CPUs in some of - * the APIC clusters they have been allocated. Only present CPUs have - * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. - * Since clusters are allocated sequentially, count zeros only if - * they are bounded by ones. - */ - clusters = 0; - zeros = 0; - for (i = 0; i < NUM_APIC_CLUSTERS; i++) { - if (test_bit(i, clustermap)) { - clusters += 1 + zeros; - zeros = 0; - } else - ++zeros; - } - - return clusters; -} - static int multi_checked; static int multi; @@ -2540,20 +2482,7 @@ static void dmi_check_multi(void) int apic_is_clustered_box(void) { dmi_check_multi(); - if (multi) - return 1; - - if (!is_vsmp_box()) - return 0; - - /* - * ScaleMP vSMPowered boxes have one cluster per board and TSCs are - * not guaranteed to be synced between boards - */ - if (apic_cluster_num() > 1) - return 1; - - return 0; + return multi; } #endif diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 7c1b29479513..de918c410eae 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -168,21 +168,16 @@ static struct apic apic_flat = { .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, - .check_apicid_present = NULL, .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = flat_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, @@ -196,10 +191,7 @@ static struct apic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_self = apic_send_IPI_self, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, @@ -283,7 +275,6 @@ static struct apic apic_physflat = { .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .check_apicid_present = NULL, .vector_allocation_domain = default_vector_allocation_domain, /* not needed, but shouldn't hurt: */ @@ -291,14 +282,10 @@ static struct apic apic_physflat = { .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = flat_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, @@ -312,10 +299,7 @@ static struct apic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_self = apic_send_IPI_self, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 8c7c98249c20..b205cdbdbe6a 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -89,16 +89,6 @@ static const struct cpumask *noop_target_cpus(void) return cpumask_of(0); } -static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid) -{ - return physid_isset(apicid, *map); -} - -static unsigned long noop_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, const struct cpumask *mask) { @@ -133,27 +123,21 @@ struct apic apic_noop = { .target_cpus = noop_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = noop_check_apicid_used, - .check_apicid_present = noop_check_apicid_present, + .check_apicid_used = default_check_apicid_used, .vector_allocation_domain = noop_vector_allocation_domain, .init_apic_ldr = noop_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = noop_phys_pkg_id, - .mps_oem_check = NULL, - .get_apic_id = noop_get_apic_id, .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, @@ -168,12 +152,7 @@ struct apic apic_noop = { .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, - /* should be safe */ - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, .read = noop_apic_read, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index a5b45df8bc88..4128b5fcb559 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -32,7 +32,7 @@ static int numachip_system __read_mostly; -static const struct apic apic_numachip __read_mostly; +static const struct apic apic_numachip; static unsigned int get_apic_id(unsigned long x) { @@ -217,21 +217,16 @@ static const struct apic apic_numachip __refconst = { .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .check_apicid_present = NULL, .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = numachip_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, @@ -246,10 +241,7 @@ static const struct apic apic_numachip __refconst = { .send_IPI_self = numachip_send_IPI_self, .wakeup_secondary_cpu = numachip_wakeup_secondary, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, /* REMRD not supported */ .read = native_apic_mem_read, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index e4840aa7a255..c4a8d63f8220 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -31,11 +31,6 @@ static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) return 0; } -static unsigned long bigsmp_check_apicid_present(int bit) -{ - return 1; -} - static int bigsmp_early_logical_apicid(int cpu) { /* on bigsmp, logical apicid is the same as physical */ @@ -168,21 +163,16 @@ static struct apic apic_bigsmp = { .disable_esr = 1, .dest_logical = 0, .check_apicid_used = bigsmp_check_apicid_used, - .check_apicid_present = bigsmp_check_apicid_present, .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = bigsmp_init_apic_ldr, .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .setup_apic_routing = bigsmp_setup_apic_routing, - .multi_timer_check = NULL, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, - .setup_portio_remap = NULL, .check_phys_apicid_present = bigsmp_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = bigsmp_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, @@ -196,11 +186,7 @@ static struct apic apic_bigsmp = { .send_IPI_all = bigsmp_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = true, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 81e08eff05ee..1183d545da1e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -31,6 +31,7 @@ #include <linux/acpi.h> #include <linux/module.h> #include <linux/syscore_ops.h> +#include <linux/irqdomain.h> #include <linux/msi.h> #include <linux/htirq.h> #include <linux/freezer.h> @@ -62,6 +63,16 @@ #define __apicdebuginit(type) static type __init +#define for_each_ioapic(idx) \ + for ((idx) = 0; (idx) < nr_ioapics; (idx)++) +#define for_each_ioapic_reverse(idx) \ + for ((idx) = nr_ioapics - 1; (idx) >= 0; (idx)--) +#define for_each_pin(idx, pin) \ + for ((pin) = 0; (pin) < ioapics[(idx)].nr_registers; (pin)++) +#define for_each_ioapic_pin(idx, pin) \ + for_each_ioapic((idx)) \ + for_each_pin((idx), (pin)) + #define for_each_irq_pin(entry, head) \ for (entry = head; entry; entry = entry->next) @@ -73,6 +84,17 @@ int sis_apic_bug = -1; static DEFINE_RAW_SPINLOCK(ioapic_lock); static DEFINE_RAW_SPINLOCK(vector_lock); +static DEFINE_MUTEX(ioapic_mutex); +static unsigned int ioapic_dynirq_base; +static int ioapic_initialized; + +struct mp_pin_info { + int trigger; + int polarity; + int node; + int set; + u32 count; +}; static struct ioapic { /* @@ -87,7 +109,9 @@ static struct ioapic { struct mpc_ioapic mp_config; /* IO APIC gsi routing info */ struct mp_ioapic_gsi gsi_config; - DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); + struct ioapic_domain_cfg irqdomain_cfg; + struct irq_domain *irqdomain; + struct mp_pin_info *pin_info; } ioapics[MAX_IO_APICS]; #define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver @@ -107,6 +131,41 @@ struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) return &ioapics[ioapic_idx].gsi_config; } +static inline int mp_ioapic_pin_count(int ioapic) +{ + struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); + + return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; +} + +u32 mp_pin_to_gsi(int ioapic, int pin) +{ + return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin; +} + +/* + * Initialize all legacy IRQs and all pins on the first IOAPIC + * if we have legacy interrupt controller. Kernel boot option "pirq=" + * may rely on non-legacy pins on the first IOAPIC. + */ +static inline int mp_init_irq_at_boot(int ioapic, int irq) +{ + if (!nr_legacy_irqs()) + return 0; + + return ioapic == 0 || (irq >= 0 && irq < nr_legacy_irqs()); +} + +static inline struct mp_pin_info *mp_pin_info(int ioapic_idx, int pin) +{ + return ioapics[ioapic_idx].pin_info + pin; +} + +static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic) +{ + return ioapics[ioapic].irqdomain; +} + int nr_ioapics; /* The one past the highest gsi number used */ @@ -118,9 +177,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; -/* GSI interrupts */ -static int nr_irqs_gsi = NR_IRQS_LEGACY; - #ifdef CONFIG_EISA int mp_bus_id_to_type[MAX_MP_BUSSES]; #endif @@ -149,8 +205,7 @@ static int __init parse_noapic(char *str) } early_param("noapic", parse_noapic); -static int io_apic_setup_irq_pin(unsigned int irq, int node, - struct io_apic_irq_attr *attr); +static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ void mp_save_irq(struct mpc_intsrc *m) @@ -182,19 +237,15 @@ static struct irq_pin_list *alloc_irq_pin_list(int node) return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); } - -/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; - int __init arch_early_irq_init(void) { struct irq_cfg *cfg; - int count, node, i; + int i, node = cpu_to_node(0); - if (!legacy_pic->nr_legacy_irqs) + if (!nr_legacy_irqs()) io_apic_irqs = ~0UL; - for (i = 0; i < nr_ioapics; i++) { + for_each_ioapic(i) { ioapics[i].saved_registers = kzalloc(sizeof(struct IO_APIC_route_entry) * ioapics[i].nr_registers, GFP_KERNEL); @@ -202,28 +253,20 @@ int __init arch_early_irq_init(void) pr_err("IOAPIC %d: suspend/resume impossible!\n", i); } - cfg = irq_cfgx; - count = ARRAY_SIZE(irq_cfgx); - node = cpu_to_node(0); - - for (i = 0; i < count; i++) { - irq_set_chip_data(i, &cfg[i]); - zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node); - zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); - /* - * For legacy IRQ's, start with assigning irq0 to irq15 to - * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. - */ - if (i < legacy_pic->nr_legacy_irqs) { - cfg[i].vector = IRQ0_VECTOR + i; - cpumask_setall(cfg[i].domain); - } + /* + * For legacy IRQ's, start with assigning irq0 to irq15 to + * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. + */ + for (i = 0; i < nr_legacy_irqs(); i++) { + cfg = alloc_irq_and_cfg_at(i, node); + cfg->vector = IRQ0_VECTOR + i; + cpumask_setall(cfg->domain); } return 0; } -static struct irq_cfg *irq_cfg(unsigned int irq) +static inline struct irq_cfg *irq_cfg(unsigned int irq) { return irq_get_chip_data(irq); } @@ -265,7 +308,7 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) if (res < 0) { if (res != -EEXIST) return NULL; - cfg = irq_get_chip_data(at); + cfg = irq_cfg(at); if (cfg) return cfg; } @@ -425,6 +468,21 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi return 0; } +static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin) +{ + struct irq_pin_list **last, *entry; + + last = &cfg->irq_2_pin; + for_each_irq_pin(entry, cfg->irq_2_pin) + if (entry->apic == apic && entry->pin == pin) { + *last = entry->next; + kfree(entry); + return; + } else { + last = &entry->next; + } +} + static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { if (__add_pin_to_irq_node(cfg, node, apic, pin)) @@ -627,9 +685,8 @@ static void clear_IO_APIC (void) { int apic, pin; - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) - clear_IO_APIC_pin(apic, pin); + for_each_ioapic_pin(apic, pin) + clear_IO_APIC_pin(apic, pin); } #ifdef CONFIG_X86_32 @@ -678,13 +735,13 @@ int save_ioapic_entries(void) int apic, pin; int err = 0; - for (apic = 0; apic < nr_ioapics; apic++) { + for_each_ioapic(apic) { if (!ioapics[apic].saved_registers) { err = -ENOMEM; continue; } - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) + for_each_pin(apic, pin) ioapics[apic].saved_registers[pin] = ioapic_read_entry(apic, pin); } @@ -699,11 +756,11 @@ void mask_ioapic_entries(void) { int apic, pin; - for (apic = 0; apic < nr_ioapics; apic++) { + for_each_ioapic(apic) { if (!ioapics[apic].saved_registers) continue; - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { + for_each_pin(apic, pin) { struct IO_APIC_route_entry entry; entry = ioapics[apic].saved_registers[pin]; @@ -722,11 +779,11 @@ int restore_ioapic_entries(void) { int apic, pin; - for (apic = 0; apic < nr_ioapics; apic++) { + for_each_ioapic(apic) { if (!ioapics[apic].saved_registers) continue; - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) + for_each_pin(apic, pin) ioapic_write_entry(apic, pin, ioapics[apic].saved_registers[pin]); } @@ -785,7 +842,7 @@ static int __init find_isa_irq_apic(int irq, int type) if (i < mp_irq_entries) { int ioapic_idx; - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + for_each_ioapic(ioapic_idx) if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic) return ioapic_idx; } @@ -799,7 +856,7 @@ static int __init find_isa_irq_apic(int irq, int type) */ static int EISA_ELCR(unsigned int irq) { - if (irq < legacy_pic->nr_legacy_irqs) { + if (irq < nr_legacy_irqs()) { unsigned int port = 0x4d0 + (irq >> 3); return (inb(port) >> (irq & 7)) & 1; } @@ -939,29 +996,106 @@ static int irq_trigger(int idx) return trigger; } -static int pin_2_irq(int idx, int apic, int pin) +static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin) +{ + int irq = -1; + int ioapic = (int)(long)domain->host_data; + int type = ioapics[ioapic].irqdomain_cfg.type; + + switch (type) { + case IOAPIC_DOMAIN_LEGACY: + /* + * Dynamically allocate IRQ number for non-ISA IRQs in the first 16 + * GSIs on some weird platforms. + */ + if (gsi < nr_legacy_irqs()) + irq = irq_create_mapping(domain, pin); + else if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0) + irq = gsi; + break; + case IOAPIC_DOMAIN_STRICT: + if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0) + irq = gsi; + break; + case IOAPIC_DOMAIN_DYNAMIC: + irq = irq_create_mapping(domain, pin); + break; + default: + WARN(1, "ioapic: unknown irqdomain type %d\n", type); + break; + } + + return irq > 0 ? irq : -1; +} + +static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, + unsigned int flags) { int irq; - int bus = mp_irqs[idx].srcbus; - struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(apic); + struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); + struct mp_pin_info *info = mp_pin_info(ioapic, pin); + + if (!domain) + return -1; + + mutex_lock(&ioapic_mutex); /* - * Debugging check, we are in big trouble if this message pops up! + * Don't use irqdomain to manage ISA IRQs because there may be + * multiple IOAPIC pins sharing the same ISA IRQ number and + * irqdomain only supports 1:1 mapping between IOAPIC pin and + * IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are used + * for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H). + * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are + * available, and some BIOSes may use MP Interrupt Source records + * to override IRQ numbers for PIRQs instead of reprogramming + * the interrupt routing logic. Thus there may be multiple pins + * sharing the same legacy IRQ number when ACPI is disabled. */ - if (mp_irqs[idx].dstirq != pin) - pr_err("broken BIOS or MPTABLE parser, ayiee!!\n"); - - if (test_bit(bus, mp_bus_not_pci)) { + if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; + if (flags & IOAPIC_MAP_ALLOC) { + if (info->count == 0 && + mp_irqdomain_map(domain, irq, pin) != 0) + irq = -1; + + /* special handling for timer IRQ0 */ + if (irq == 0) + info->count++; + } } else { - u32 gsi = gsi_cfg->gsi_base + pin; + irq = irq_find_mapping(domain, pin); + if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC)) + irq = alloc_irq_from_domain(domain, gsi, pin); + } - if (gsi >= NR_IRQS_LEGACY) - irq = gsi; - else - irq = gsi_top + gsi; + if (flags & IOAPIC_MAP_ALLOC) { + /* special handling for legacy IRQs */ + if (irq < nr_legacy_irqs() && info->count == 1 && + mp_irqdomain_map(domain, irq, pin) != 0) + irq = -1; + + if (irq > 0) + info->count++; + else if (info->count == 0) + info->set = 0; } + mutex_unlock(&ioapic_mutex); + + return irq > 0 ? irq : -1; +} + +static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags) +{ + u32 gsi = mp_pin_to_gsi(ioapic, pin); + + /* + * Debugging check, we are in big trouble if this message pops up! + */ + if (mp_irqs[idx].dstirq != pin) + pr_err("broken BIOS or MPTABLE parser, ayiee!!\n"); + #ifdef CONFIG_X86_32 /* * PCI IRQ command line redirection. Yes, limits are hardcoded. @@ -972,16 +1106,58 @@ static int pin_2_irq(int idx, int apic, int pin) apic_printk(APIC_VERBOSE, KERN_DEBUG "disabling PIRQ%d\n", pin-16); } else { - irq = pirq_entries[pin-16]; + int irq = pirq_entries[pin-16]; apic_printk(APIC_VERBOSE, KERN_DEBUG "using PIRQ%d -> IRQ %d\n", pin-16, irq); + return irq; } } } #endif - return irq; + return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags); +} + +int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) +{ + int ioapic, pin, idx; + + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return -1; + + pin = mp_find_ioapic_pin(ioapic, gsi); + idx = find_irq_entry(ioapic, pin, mp_INT); + if ((flags & IOAPIC_MAP_CHECK) && idx < 0) + return -1; + + return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags); +} + +void mp_unmap_irq(int irq) +{ + struct irq_data *data = irq_get_irq_data(irq); + struct mp_pin_info *info; + int ioapic, pin; + + if (!data || !data->domain) + return; + + ioapic = (int)(long)data->domain->host_data; + pin = (int)data->hwirq; + info = mp_pin_info(ioapic, pin); + + mutex_lock(&ioapic_mutex); + if (--info->count == 0) { + info->set = 0; + if (irq < nr_legacy_irqs() && + ioapics[ioapic].irqdomain_cfg.type == IOAPIC_DOMAIN_LEGACY) + mp_irqdomain_unmap(data->domain, irq); + else + irq_dispose_mapping(irq); + } + mutex_unlock(&ioapic_mutex); } /* @@ -991,7 +1167,7 @@ static int pin_2_irq(int idx, int apic, int pin) int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, struct io_apic_irq_attr *irq_attr) { - int ioapic_idx, i, best_guess = -1; + int irq, i, best_ioapic = -1, best_idx = -1; apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", @@ -1001,44 +1177,56 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } + for (i = 0; i < mp_irq_entries; i++) { int lbus = mp_irqs[i].srcbus; + int ioapic_idx, found = 0; - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + if (bus != lbus || mp_irqs[i].irqtype != mp_INT || + slot != ((mp_irqs[i].srcbusirq >> 2) & 0x1f)) + continue; + + for_each_ioapic(ioapic_idx) if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic || - mp_irqs[i].dstapic == MP_APIC_ALL) + mp_irqs[i].dstapic == MP_APIC_ALL) { + found = 1; break; + } + if (!found) + continue; - if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq); + /* Skip ISA IRQs */ + irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq, 0); + if (irq > 0 && !IO_APIC_IRQ(irq)) + continue; - if (!(ioapic_idx || IO_APIC_IRQ(irq))) - continue; + if (pin == (mp_irqs[i].srcbusirq & 3)) { + best_idx = i; + best_ioapic = ioapic_idx; + goto out; + } - if (pin == (mp_irqs[i].srcbusirq & 3)) { - set_io_apic_irq_attr(irq_attr, ioapic_idx, - mp_irqs[i].dstirq, - irq_trigger(i), - irq_polarity(i)); - return irq; - } - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) { - set_io_apic_irq_attr(irq_attr, ioapic_idx, - mp_irqs[i].dstirq, - irq_trigger(i), - irq_polarity(i)); - best_guess = irq; - } + /* + * Use the first all-but-pin matching entry as a + * best-guess fuzzy result for broken mptables. + */ + if (best_idx < 0) { + best_idx = i; + best_ioapic = ioapic_idx; } } - return best_guess; + if (best_idx < 0) + return -1; + +out: + irq = pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, + IOAPIC_MAP_ALLOC); + if (irq > 0) + set_io_apic_irq_attr(irq_attr, best_ioapic, + mp_irqs[best_idx].dstirq, + irq_trigger(best_idx), + irq_polarity(best_idx)); + return irq; } EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); @@ -1198,7 +1386,7 @@ void __setup_vector_irq(int cpu) raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_active_irq(irq) { - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) continue; @@ -1227,12 +1415,10 @@ static inline int IO_APIC_irq_trigger(int irq) { int apic, idx, pin; - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } + for_each_ioapic_pin(apic, pin) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin, 0))) + return irq_trigger(idx); } /* * nonexistent IRQs are edge default @@ -1330,95 +1516,29 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, } ioapic_register_intr(irq, cfg, attr->trigger); - if (irq < legacy_pic->nr_legacy_irqs) + if (irq < nr_legacy_irqs()) legacy_pic->mask(irq); ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry); } -static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin) -{ - if (idx != -1) - return false; - - apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n", - mpc_ioapic_id(ioapic_idx), pin); - return true; -} - -static void __init __io_apic_setup_irqs(unsigned int ioapic_idx) -{ - int idx, node = cpu_to_node(0); - struct io_apic_irq_attr attr; - unsigned int pin, irq; - - for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) { - idx = find_irq_entry(ioapic_idx, pin, mp_INT); - if (io_apic_pin_not_connected(idx, ioapic_idx, pin)) - continue; - - irq = pin_2_irq(idx, ioapic_idx, pin); - - if ((ioapic_idx > 0) && (irq > 16)) - continue; - - /* - * Skip the timer IRQ if there's a quirk handler - * installed and if it returns 1: - */ - if (apic->multi_timer_check && - apic->multi_timer_check(ioapic_idx, irq)) - continue; - - set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), - irq_polarity(idx)); - - io_apic_setup_irq_pin(irq, node, &attr); - } -} - static void __init setup_IO_APIC_irqs(void) { - unsigned int ioapic_idx; + unsigned int ioapic, pin; + int idx; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) - __io_apic_setup_irqs(ioapic_idx); -} - -/* - * for the gsit that is not in first ioapic - * but could not use acpi_register_gsi() - * like some special sci in IBM x3330 - */ -void setup_IO_APIC_irq_extra(u32 gsi) -{ - int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0); - struct io_apic_irq_attr attr; - - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic_idx = mp_find_ioapic(gsi); - if (ioapic_idx < 0) - return; - - pin = mp_find_ioapic_pin(ioapic_idx, gsi); - idx = find_irq_entry(ioapic_idx, pin, mp_INT); - if (idx == -1) - return; - - irq = pin_2_irq(idx, ioapic_idx, pin); - - /* Only handle the non legacy irqs on secondary ioapics */ - if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY) - return; - - set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), - irq_polarity(idx)); - - io_apic_setup_irq_pin_once(irq, node, &attr); + for_each_ioapic_pin(ioapic, pin) { + idx = find_irq_entry(ioapic, pin, mp_INT); + if (idx < 0) + apic_printk(APIC_VERBOSE, + KERN_DEBUG " apic %d pin %d not connected\n", + mpc_ioapic_id(ioapic), pin); + else + pin_2_irq(idx, ioapic, pin, + ioapic ? 0 : IOAPIC_MAP_ALLOC); + } } /* @@ -1586,7 +1706,7 @@ __apicdebuginit(void) print_IO_APICs(void) struct irq_chip *chip; printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + for_each_ioapic(ioapic_idx) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", mpc_ioapic_id(ioapic_idx), ioapics[ioapic_idx].nr_registers); @@ -1597,7 +1717,7 @@ __apicdebuginit(void) print_IO_APICs(void) */ printk(KERN_INFO "testing the IO APIC.......................\n"); - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + for_each_ioapic(ioapic_idx) print_IO_APIC(ioapic_idx); printk(KERN_DEBUG "IRQ to pin mappings:\n"); @@ -1608,7 +1728,7 @@ __apicdebuginit(void) print_IO_APICs(void) if (chip != &ioapic_chip) continue; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) continue; entry = cfg->irq_2_pin; @@ -1758,7 +1878,7 @@ __apicdebuginit(void) print_PIC(void) unsigned int v; unsigned long flags; - if (!legacy_pic->nr_legacy_irqs) + if (!nr_legacy_irqs()) return; printk(KERN_DEBUG "\nprinting PIC contents\n"); @@ -1828,26 +1948,22 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; void __init enable_IO_APIC(void) { int i8259_apic, i8259_pin; - int apic; + int apic, pin; - if (!legacy_pic->nr_legacy_irqs) + if (!nr_legacy_irqs()) return; - for(apic = 0; apic < nr_ioapics; apic++) { - int pin; + for_each_ioapic_pin(apic, pin) { /* See if any of the pins is in ExtINT mode */ - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { - struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, pin); + struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin); - /* If the interrupt line is enabled and in ExtInt mode - * I have found the pin where the i8259 is connected. - */ - if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { - ioapic_i8259.apic = apic; - ioapic_i8259.pin = pin; - goto found_i8259; - } + /* If the interrupt line is enabled and in ExtInt mode + * I have found the pin where the i8259 is connected. + */ + if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { + ioapic_i8259.apic = apic; + ioapic_i8259.pin = pin; + goto found_i8259; } } found_i8259: @@ -1919,7 +2035,7 @@ void disable_IO_APIC(void) */ clear_IO_APIC(); - if (!legacy_pic->nr_legacy_irqs) + if (!nr_legacy_irqs()) return; x86_io_apic_ops.disable(); @@ -1950,7 +2066,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) /* * Set the IOAPIC ID to the value stored in the MPC table. */ - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { + for_each_ioapic(ioapic_idx) { /* Read the register 0 value */ raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic_idx, 0); @@ -2123,7 +2239,7 @@ static unsigned int startup_ioapic_irq(struct irq_data *data) unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - if (irq < legacy_pic->nr_legacy_irqs) { + if (irq < nr_legacy_irqs()) { legacy_pic->mask(irq); if (legacy_pic->irq_pending(irq)) was_pending = 1; @@ -2225,7 +2341,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); goto unlock; } - __this_cpu_write(vector_irq[vector], -1); + __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); unlock: raw_spin_unlock(&desc->lock); } @@ -2253,7 +2369,7 @@ static void irq_complete_move(struct irq_cfg *cfg) void irq_force_complete_move(int irq) { - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); if (!cfg) return; @@ -2507,6 +2623,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_eoi = ack_apic_level, .irq_set_affinity = native_ioapic_set_affinity, .irq_retrigger = ioapic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, }; static inline void init_IO_APIC_traps(void) @@ -2514,26 +2631,15 @@ static inline void init_IO_APIC_traps(void) struct irq_cfg *cfg; unsigned int irq; - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ for_each_active_irq(irq) { - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 * interrupt if we can.. */ - if (irq < legacy_pic->nr_legacy_irqs) + if (irq < nr_legacy_irqs()) legacy_pic->make_irq(irq); else /* Strange. Oh, well.. */ @@ -2649,8 +2755,6 @@ static int __init disable_timer_pin_setup(char *arg) } early_param("disable_timer_pin_1", disable_timer_pin_setup); -int timer_through_8259 __initdata; - /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -2661,7 +2765,7 @@ int timer_through_8259 __initdata; */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = irq_get_chip_data(0); + struct irq_cfg *cfg = irq_cfg(0); int node = cpu_to_node(0); int apic1, pin1, apic2, pin2; unsigned long flags; @@ -2755,7 +2859,6 @@ static inline void __init check_timer(void) legacy_pic->unmask(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); - timer_through_8259 = 1; goto out; } /* @@ -2827,15 +2930,54 @@ out: */ #define PIC_IRQS (1UL << PIC_CASCADE_IR) +static int mp_irqdomain_create(int ioapic) +{ + size_t size; + int hwirqs = mp_ioapic_pin_count(ioapic); + struct ioapic *ip = &ioapics[ioapic]; + struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg; + struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); + + size = sizeof(struct mp_pin_info) * mp_ioapic_pin_count(ioapic); + ip->pin_info = kzalloc(size, GFP_KERNEL); + if (!ip->pin_info) + return -ENOMEM; + + if (cfg->type == IOAPIC_DOMAIN_INVALID) + return 0; + + ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops, + (void *)(long)ioapic); + if(!ip->irqdomain) { + kfree(ip->pin_info); + ip->pin_info = NULL; + return -ENOMEM; + } + + if (cfg->type == IOAPIC_DOMAIN_LEGACY || + cfg->type == IOAPIC_DOMAIN_STRICT) + ioapic_dynirq_base = max(ioapic_dynirq_base, + gsi_cfg->gsi_end + 1); + + if (gsi_cfg->gsi_base == 0) + irq_set_default_host(ip->irqdomain); + + return 0; +} + void __init setup_IO_APIC(void) { + int ioapic; /* * calling enable_IO_APIC() is moved to setup_local_APIC for BP */ - io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL; + io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); + for_each_ioapic(ioapic) + BUG_ON(mp_irqdomain_create(ioapic)); + /* * Set up IO-APIC IRQ routing. */ @@ -2844,8 +2986,10 @@ void __init setup_IO_APIC(void) sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); - if (legacy_pic->nr_legacy_irqs) + if (nr_legacy_irqs()) check_timer(); + + ioapic_initialized = 1; } /* @@ -2880,7 +3024,7 @@ static void ioapic_resume(void) { int ioapic_idx; - for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--) + for_each_ioapic_reverse(ioapic_idx) resume_ioapic_id(ioapic_idx); restore_ioapic_entries(); @@ -2926,7 +3070,7 @@ int arch_setup_hwirq(unsigned int irq, int node) void arch_teardown_hwirq(unsigned int irq) { - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; free_remapped_irq(irq); @@ -3030,6 +3174,7 @@ static struct irq_chip msi_chip = { .irq_ack = ack_apic_edge, .irq_set_affinity = msi_set_affinity, .irq_retrigger = ioapic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, }; int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, @@ -3053,7 +3198,7 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, if (!irq_offset) write_msi_msg(irq, &msg); - setup_remapped_irq(irq, irq_get_chip_data(irq), chip); + setup_remapped_irq(irq, irq_cfg(irq), chip); irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); @@ -3128,6 +3273,7 @@ static struct irq_chip dmar_msi_type = { .irq_ack = ack_apic_edge, .irq_set_affinity = dmar_msi_set_affinity, .irq_retrigger = ioapic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, }; int arch_setup_dmar_msi(unsigned int irq) @@ -3178,6 +3324,7 @@ static struct irq_chip hpet_msi_type = { .irq_ack = ack_apic_edge, .irq_set_affinity = hpet_msi_set_affinity, .irq_retrigger = ioapic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, }; int default_setup_hpet_msi(unsigned int irq, unsigned int id) @@ -3192,7 +3339,7 @@ int default_setup_hpet_msi(unsigned int irq, unsigned int id) hpet_msi_write(irq_get_handler_data(irq), &msg); irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - setup_remapped_irq(irq, irq_get_chip_data(irq), chip); + setup_remapped_irq(irq, irq_cfg(irq), chip); irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); return 0; @@ -3241,6 +3388,7 @@ static struct irq_chip ht_irq_chip = { .irq_ack = ack_apic_edge, .irq_set_affinity = ht_set_affinity, .irq_retrigger = ioapic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, }; int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) @@ -3303,27 +3451,6 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) return ret; } -int io_apic_setup_irq_pin_once(unsigned int irq, int node, - struct io_apic_irq_attr *attr) -{ - unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin; - int ret; - struct IO_APIC_route_entry orig_entry; - - /* Avoid redundant programming */ - if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) { - pr_debug("Pin %d-%d already programmed\n", mpc_ioapic_id(ioapic_idx), pin); - orig_entry = ioapic_read_entry(attr->ioapic, pin); - if (attr->trigger == orig_entry.trigger && attr->polarity == orig_entry.polarity) - return 0; - return -EBUSY; - } - ret = io_apic_setup_irq_pin(irq, node, attr); - if (!ret) - set_bit(pin, ioapics[ioapic_idx].pin_programmed); - return ret; -} - static int __init io_apic_get_redir_entries(int ioapic) { union IO_APIC_reg_01 reg_01; @@ -3340,20 +3467,13 @@ static int __init io_apic_get_redir_entries(int ioapic) return reg_01.bits.entries + 1; } -static void __init probe_nr_irqs_gsi(void) -{ - int nr; - - nr = gsi_top + NR_IRQS_LEGACY; - if (nr > nr_irqs_gsi) - nr_irqs_gsi = nr; - - printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); -} - unsigned int arch_dynirq_lower_bound(unsigned int from) { - return from < nr_irqs_gsi ? nr_irqs_gsi : from; + /* + * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use + * gsi_top if ioapic_dynirq_base hasn't been initialized yet. + */ + return ioapic_initialized ? ioapic_dynirq_base : gsi_top; } int __init arch_probe_nr_irqs(void) @@ -3363,33 +3483,17 @@ int __init arch_probe_nr_irqs(void) if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) nr_irqs = NR_VECTORS * nr_cpu_ids; - nr = nr_irqs_gsi + 8 * nr_cpu_ids; + nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; #if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) /* * for MSI and HT dyn irq */ - nr += nr_irqs_gsi * 16; + nr += gsi_top * 16; #endif if (nr < nr_irqs) nr_irqs = nr; - return NR_IRQS_LEGACY; -} - -int io_apic_set_pci_routing(struct device *dev, int irq, - struct io_apic_irq_attr *irq_attr) -{ - int node; - - if (!IO_APIC_IRQ(irq)) { - apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - irq_attr->ioapic); - return -EINVAL; - } - - node = dev ? dev_to_node(dev) : cpu_to_node(0); - - return io_apic_setup_irq_pin_once(irq, node, irq_attr); + return 0; } #ifdef CONFIG_X86_32 @@ -3483,9 +3587,8 @@ static u8 __init io_apic_unique_id(u8 id) DECLARE_BITMAP(used, 256); bitmap_zero(used, 256); - for (i = 0; i < nr_ioapics; i++) { + for_each_ioapic(i) __set_bit(mpc_ioapic_id(i), used); - } if (!test_bit(id, used)) return id; return find_first_zero_bit(used, 256); @@ -3543,14 +3646,13 @@ void __init setup_ioapic_dest(void) if (skip_ioapic_setup == 1) return; - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) - for (pin = 0; pin < ioapics[ioapic].nr_registers; pin++) { + for_each_ioapic_pin(ioapic, pin) { irq_entry = find_irq_entry(ioapic, pin, mp_INT); if (irq_entry == -1) continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - if ((ioapic > 0) && (irq > 16)) + irq = pin_2_irq(irq_entry, ioapic, pin, 0); + if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq)) continue; idata = irq_get_irq_data(irq); @@ -3573,29 +3675,33 @@ void __init setup_ioapic_dest(void) static struct resource *ioapic_resources; -static struct resource * __init ioapic_setup_resources(int nr_ioapics) +static struct resource * __init ioapic_setup_resources(void) { unsigned long n; struct resource *res; char *mem; - int i; + int i, num = 0; - if (nr_ioapics <= 0) + for_each_ioapic(i) + num++; + if (num == 0) return NULL; n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); - n *= nr_ioapics; + n *= num; mem = alloc_bootmem(n); res = (void *)mem; - mem += sizeof(struct resource) * nr_ioapics; + mem += sizeof(struct resource) * num; - for (i = 0; i < nr_ioapics; i++) { - res[i].name = mem; - res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + num = 0; + for_each_ioapic(i) { + res[num].name = mem; + res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY; snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; + num++; } ioapic_resources = res; @@ -3609,8 +3715,8 @@ void __init native_io_apic_init_mappings(void) struct resource *ioapic_res; int i; - ioapic_res = ioapic_setup_resources(nr_ioapics); - for (i = 0; i < nr_ioapics; i++) { + ioapic_res = ioapic_setup_resources(); + for_each_ioapic(i) { if (smp_found_config) { ioapic_phys = mpc_ioapic_addr(i); #ifdef CONFIG_X86_32 @@ -3641,8 +3747,6 @@ fake_ioapic_page: ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ioapic_res++; } - - probe_nr_irqs_gsi(); } void __init ioapic_insert_resources(void) @@ -3657,7 +3761,7 @@ void __init ioapic_insert_resources(void) return; } - for (i = 0; i < nr_ioapics; i++) { + for_each_ioapic(i) { insert_resource(&iomem_resource, r); r++; } @@ -3665,16 +3769,15 @@ void __init ioapic_insert_resources(void) int mp_find_ioapic(u32 gsi) { - int i = 0; + int i; if (nr_ioapics == 0) return -1; /* Find the IOAPIC that manages this GSI. */ - for (i = 0; i < nr_ioapics; i++) { + for_each_ioapic(i) { struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i); - if ((gsi >= gsi_cfg->gsi_base) - && (gsi <= gsi_cfg->gsi_end)) + if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end) return i; } @@ -3686,7 +3789,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi) { struct mp_ioapic_gsi *gsi_cfg; - if (WARN_ON(ioapic == -1)) + if (WARN_ON(ioapic < 0)) return -1; gsi_cfg = mp_ioapic_gsi_routing(ioapic); @@ -3729,7 +3832,8 @@ static __init int bad_ioapic_register(int idx) return 0; } -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, + struct ioapic_domain_cfg *cfg) { int idx = 0; int entries; @@ -3743,6 +3847,8 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) ioapics[idx].mp_config.type = MP_IOAPIC; ioapics[idx].mp_config.flags = MPC_APIC_USABLE; ioapics[idx].mp_config.apicaddr = address; + ioapics[idx].irqdomain = NULL; + ioapics[idx].irqdomain_cfg = *cfg; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); @@ -3779,6 +3885,97 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } +int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) +{ + int ioapic = (int)(long)domain->host_data; + struct mp_pin_info *info = mp_pin_info(ioapic, hwirq); + struct io_apic_irq_attr attr; + + /* Get default attribute if not set by caller yet */ + if (!info->set) { + u32 gsi = mp_pin_to_gsi(ioapic, hwirq); + + if (acpi_get_override_irq(gsi, &info->trigger, + &info->polarity) < 0) { + /* + * PCI interrupts are always polarity one level + * triggered. + */ + info->trigger = 1; + info->polarity = 1; + } + info->node = NUMA_NO_NODE; + + /* + * setup_IO_APIC_irqs() programs all legacy IRQs with default + * trigger and polarity attributes. Don't set the flag for that + * case so the first legacy IRQ user could reprogram the pin + * with real trigger and polarity attributes. + */ + if (virq >= nr_legacy_irqs() || info->count) + info->set = 1; + } + set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger, + info->polarity); + + return io_apic_setup_irq_pin(virq, info->node, &attr); +} + +void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq) +{ + struct irq_data *data = irq_get_irq_data(virq); + struct irq_cfg *cfg = irq_cfg(virq); + int ioapic = (int)(long)domain->host_data; + int pin = (int)data->hwirq; + + ioapic_mask_entry(ioapic, pin); + __remove_pin_from_irq(cfg, ioapic, pin); + WARN_ON(cfg->irq_2_pin != NULL); + arch_teardown_hwirq(virq); +} + +int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) +{ + int ret = 0; + int ioapic, pin; + struct mp_pin_info *info; + + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return -ENODEV; + + pin = mp_find_ioapic_pin(ioapic, gsi); + info = mp_pin_info(ioapic, pin); + trigger = trigger ? 1 : 0; + polarity = polarity ? 1 : 0; + + mutex_lock(&ioapic_mutex); + if (!info->set) { + info->trigger = trigger; + info->polarity = polarity; + info->node = node; + info->set = 1; + } else if (info->trigger != trigger || info->polarity != polarity) { + ret = -EBUSY; + } + mutex_unlock(&ioapic_mutex); + + return ret; +} + +bool mp_should_keep_irq(struct device *dev) +{ + if (dev->power.is_prepared) + return true; +#ifdef CONFIG_PM_RUNTIME + if (dev->power.runtime_status == RPM_SUSPENDING) + return true; +#endif + + return false; +} + /* Enable IOAPIC early just for system timer */ void __init pre_init_apic_IRQ0(void) { diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index cceb352c968c..bda488680dbc 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -88,21 +88,16 @@ static struct apic apic_default = { .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = default_check_apicid_used, - .check_apicid_present = default_check_apicid_present, .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = setup_apic_flat_routing, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = default_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = default_get_apic_id, .set_apic_id = NULL, @@ -116,11 +111,7 @@ static struct apic apic_default = { .send_IPI_all = default_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = true, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, @@ -214,29 +205,7 @@ void __init generic_apic_probe(void) printk(KERN_INFO "Using APIC driver %s\n", apic->name); } -/* These functions can switch the APIC even after the initial ->probe() */ - -int __init -generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - struct apic **drv; - - for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { - if (!((*drv)->mps_oem_check)) - continue; - if (!(*drv)->mps_oem_check(mpc, oem, productid)) - continue; - - if (!cmdline_apic) { - apic = *drv; - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - apic->name); - } - return 1; - } - return 0; -} - +/* This function can switch the APIC even after the initial ->probe() */ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { struct apic **drv; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index e66766bf1641..e658f21681c8 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -42,7 +42,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) * We are to modify mask, so we need an own copy * and be sure it's manipulated with irq off. */ - ipi_mask_ptr = __raw_get_cpu_var(ipi_mask); + ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask); cpumask_copy(ipi_mask_ptr, mask); /* @@ -249,21 +249,16 @@ static struct apic apic_x2apic_cluster = { .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, - .check_apicid_present = NULL, .vector_allocation_domain = cluster_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = x2apic_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, @@ -277,10 +272,7 @@ static struct apic apic_x2apic_cluster = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, .read = native_apic_msr_read, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 6d600ebf6c12..6fae733e9194 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -103,21 +103,16 @@ static struct apic apic_x2apic_phys = { .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .check_apicid_present = NULL, .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = x2apic_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, @@ -131,10 +126,7 @@ static struct apic apic_x2apic_phys = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, .read = native_apic_msr_read, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 293b41df54ef..8e9dcfd630e4 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -204,7 +204,6 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { -#ifdef CONFIG_SMP unsigned long val; int pnode; @@ -223,7 +222,6 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); atomic_set(&init_deasserted, 1); -#endif return 0; } @@ -365,21 +363,16 @@ static struct apic __refdata apic_x2apic_uv_x = { .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, - .check_apicid_present = NULL, .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = uv_init_apic_ldr, .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = uv_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = x2apic_get_apic_id, .set_apic_id = set_apic_id, @@ -394,10 +387,7 @@ static struct apic __refdata apic_x2apic_uv_x = { .send_IPI_self = uv_send_IPI_self, .wakeup_secondary_cpu = uv_wakeup_secondary, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, .read = native_apic_msr_read, diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 7fd54f09b011..e27b49d7c922 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -13,10 +13,13 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_common.o := $(nostackp) obj-y := intel_cacheinfo.o scattered.o topology.o -obj-y += proc.o capflags.o powerflags.o common.o +obj-y += common.o obj-y += rdrand.o obj-y += match.o +obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o + obj-$(CONFIG_X86_32) += bugs.o obj-$(CONFIG_X86_64) += bugs_64.o @@ -36,7 +39,12 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o endif obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_rapl.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o + +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ + perf_event_intel_uncore_snb.o \ + perf_event_intel_uncore_snbep.o \ + perf_event_intel_uncore_nhmex.o endif @@ -48,6 +56,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o +ifdef CONFIG_X86_FEATURE_NAMES quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@ @@ -56,3 +65,4 @@ cpufeature = $(src)/../../include/asm/cpufeature.h targets += capflags.c $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE $(call if_changed,mkcapflags) +endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 60e5497681f5..813d29d00a17 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -525,6 +525,13 @@ static void early_init_amd(struct cpuinfo_x86 *c) } #endif + /* + * This is only needed to tell the kernel whether to use VMCALL + * and VMMCALL. VMMCALL is never executed except under virt, so + * we can set it unconditionally. + */ + set_cpu_cap(c, X86_FEATURE_VMMCALL); + /* F16h erratum 793, CVE-2013-6885 */ if (c->x86 == 0x16 && c->x86_model <= 0xf) msr_set_bit(MSR_AMD64_LS_CFG, 15); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 333fd5209336..4b4f78c9ba19 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -148,6 +148,7 @@ static int __init x86_xsave_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + setup_clear_cpu_cap(X86_FEATURE_XSAVES); setup_clear_cpu_cap(X86_FEATURE_AVX); setup_clear_cpu_cap(X86_FEATURE_AVX2); return 1; @@ -161,6 +162,13 @@ static int __init x86_xsaveopt_setup(char *s) } __setup("noxsaveopt", x86_xsaveopt_setup); +static int __init x86_xsaves_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVES); + return 1; +} +__setup("noxsaves", x86_xsaves_setup); + #ifdef CONFIG_X86_32 static int cachesize_override = -1; static int disable_x86_serial_nr = 1; @@ -338,8 +346,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) continue; printk(KERN_WARNING - "CPU: CPU feature %s disabled, no CPUID level 0x%x\n", - x86_cap_flags[df->feature], df->level); + "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n", + x86_cap_flag(df->feature), df->level); } } @@ -956,6 +964,7 @@ static void vgetcpu_set_mode(void) vgetcpu_mode = VGETCPU_LSL; } +#ifdef CONFIG_IA32_EMULATION /* May not be __init: called during resume */ static void syscall32_cpu_init(void) { @@ -967,7 +976,8 @@ static void syscall32_cpu_init(void) wrmsrl(MSR_CSTAR, ia32_cstar_target); } -#endif +#endif /* CONFIG_IA32_EMULATION */ +#endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_32 void enable_sep_cpu(void) @@ -1176,7 +1186,7 @@ void syscall_init(void) /* Flags to clear on syscall */ wrmsrl(MSR_SYSCALL_MASK, X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| - X86_EFLAGS_IOPL|X86_EFLAGS_AC); + X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); } /* @@ -1190,9 +1200,9 @@ DEFINE_PER_CPU(int, debug_stack_usage); int is_debug_stack(unsigned long addr) { - return __get_cpu_var(debug_stack_usage) || - (addr <= __get_cpu_var(debug_stack_addr) && - addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); + return __this_cpu_read(debug_stack_usage) || + (addr <= __this_cpu_read(debug_stack_addr) && + addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ)); } NOKPROBE_SYMBOL(is_debug_stack); @@ -1258,6 +1268,19 @@ static void dbg_restore_debug_regs(void) #define dbg_restore_debug_regs() #endif /* ! CONFIG_KGDB */ +static void wait_for_master_cpu(int cpu) +{ +#ifdef CONFIG_SMP + /* + * wait for ACK from master CPU before continuing + * with AP initialization + */ + WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)); + while (!cpumask_test_cpu(cpu, cpu_callout_mask)) + cpu_relax(); +#endif +} + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT @@ -1273,16 +1296,17 @@ void cpu_init(void) struct task_struct *me; struct tss_struct *t; unsigned long v; - int cpu; + int cpu = stack_smp_processor_id(); int i; + wait_for_master_cpu(cpu); + /* * Load microcode on this cpu if a valid microcode is available. * This is early microcode loading procedure. */ load_ucode_ap(); - cpu = stack_smp_processor_id(); t = &per_cpu(init_tss, cpu); oist = &per_cpu(orig_ist, cpu); @@ -1294,9 +1318,6 @@ void cpu_init(void) me = current; - if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) - panic("CPU#%d already initialized!\n", cpu); - pr_debug("Initializing CPU#%d\n", cpu); clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); @@ -1373,17 +1394,13 @@ void cpu_init(void) struct tss_struct *t = &per_cpu(init_tss, cpu); struct thread_struct *thread = &curr->thread; - show_ucode_info_early(); + wait_for_master_cpu(cpu); - if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) - local_irq_enable(); - } + show_ucode_info_early(); printk(KERN_INFO "Initializing CPU#%d\n", cpu); - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); load_current_idt(); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 74e804ddc5c7..9cc6b6f25f42 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -144,6 +144,21 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_ERMS); } } + + /* + * Intel Quark Core DevMan_001.pdf section 6.4.11 + * "The operating system also is required to invalidate (i.e., flush) + * the TLB when any changes are made to any of the page table entries. + * The operating system must reload CR3 to cause the TLB to be flushed" + * + * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should + * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE + * to be modified + */ + if (c->x86 == 5 && c->x86_model == 9) { + pr_info("Disabling PGE capability bit\n"); + setup_clear_cpu_cap(X86_FEATURE_PGE); + } } #ifdef CONFIG_X86_32 @@ -198,12 +213,13 @@ static void intel_workarounds(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_F00F_BUG /* - * All current models of Pentium and Pentium with MMX technology CPUs + * All models of Pentium and Pentium with MMX technology CPUs * have the F0 0F bug, which lets nonprivileged users lock up the * system. Announce that the fault handler will be checking for it. + * The Quark is also family 5, but does not have the same bug. */ clear_cpu_bug(c, X86_BUG_F00F); - if (!paravirt_enabled() && c->x86 == 5) { + if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) { static int f00f_workaround_enabled; set_cpu_bug(c, X86_BUG_F00F); @@ -382,6 +398,13 @@ static void init_intel(struct cpuinfo_x86 *c) } l2 = init_intel_cacheinfo(c); + + /* Detect legacy cache sizes if init_intel_cacheinfo did not */ + if (l2 == 0) { + cpu_detect_cache_sizes(c); + l2 = c->x86_cache_size; + } + if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ @@ -485,6 +508,13 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) */ if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) size = 256; + + /* + * Intel Quark SoC X1000 contains a 4-way set associative + * 16K cache with a 16 byte cache line and 256 lines per tag + */ + if ((c->x86 == 5) && (c->x86_model == 9)) + size = 16; return size; } #endif @@ -686,7 +716,8 @@ static const struct cpu_dev intel_cpu_dev = { [3] = "OverDrive PODP5V83", [4] = "Pentium MMX", [7] = "Mobile Pentium 75 - 200", - [8] = "Mobile Pentium MMX" + [8] = "Mobile Pentium MMX", + [9] = "Quark SoC X1000", } }, { .family = 6, .model_names = diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 9c8f7394c612..c7035073dfc1 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -461,7 +461,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); - if (strict_strtoul(buf, 10, &val) < 0) + if (kstrtoul(buf, 10, &val) < 0) return -EINVAL; err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); @@ -511,7 +511,7 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count, if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) return -EINVAL; - if (strict_strtoul(buf, 16, &val) < 0) + if (kstrtoul(buf, 16, &val) < 0) return -EINVAL; if (amd_set_subcaches(cpu, val)) diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 5ac2d1fb28bc..4cfba4371a71 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -83,7 +83,7 @@ static DEFINE_MUTEX(mce_inject_mutex); static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) { int cpu = smp_processor_id(); - struct mce *m = &__get_cpu_var(injectm); + struct mce *m = this_cpu_ptr(&injectm); if (!cpumask_test_cpu(cpu, mce_inject_cpumask)) return NMI_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); @@ -97,7 +97,7 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) static void mce_irq_ipi(void *info) { int cpu = smp_processor_id(); - struct mce *m = &__get_cpu_var(injectm); + struct mce *m = this_cpu_ptr(&injectm); if (cpumask_test_cpu(cpu, mce_inject_cpumask) && m->inject_flags & MCJ_EXCEPTION) { @@ -109,7 +109,7 @@ static void mce_irq_ipi(void *info) /* Inject mce on current CPU */ static int raise_local(void) { - struct mce *m = &__get_cpu_var(injectm); + struct mce *m = this_cpu_ptr(&injectm); int context = MCJ_CTX(m->inject_flags); int ret = 0; int cpu = m->extcpu; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4fc57975acc1..61a9668cebfd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -400,7 +400,7 @@ static u64 mce_rdmsrl(u32 msr) if (offset < 0) return 0; - return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); + return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset); } if (rdmsrl_safe(msr, &v)) { @@ -422,7 +422,7 @@ static void mce_wrmsrl(u32 msr, u64 v) int offset = msr_to_offset(msr); if (offset >= 0) - *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; + *(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v; return; } wrmsrl(msr, v); @@ -478,7 +478,7 @@ static DEFINE_PER_CPU(struct mce_ring, mce_ring); /* Runs with CPU affinity in workqueue */ static int mce_ring_empty(void) { - struct mce_ring *r = &__get_cpu_var(mce_ring); + struct mce_ring *r = this_cpu_ptr(&mce_ring); return r->start == r->end; } @@ -490,7 +490,7 @@ static int mce_ring_get(unsigned long *pfn) *pfn = 0; get_cpu(); - r = &__get_cpu_var(mce_ring); + r = this_cpu_ptr(&mce_ring); if (r->start == r->end) goto out; *pfn = r->ring[r->start]; @@ -504,7 +504,7 @@ out: /* Always runs in MCE context with preempt off */ static int mce_ring_add(unsigned long pfn) { - struct mce_ring *r = &__get_cpu_var(mce_ring); + struct mce_ring *r = this_cpu_ptr(&mce_ring); unsigned next; next = (r->end + 1) % MCE_RING_SIZE; @@ -526,7 +526,7 @@ int mce_available(struct cpuinfo_x86 *c) static void mce_schedule_work(void) { if (!mce_ring_empty()) - schedule_work(&__get_cpu_var(mce_work)); + schedule_work(this_cpu_ptr(&mce_work)); } DEFINE_PER_CPU(struct irq_work, mce_irq_work); @@ -551,7 +551,7 @@ static void mce_report_event(struct pt_regs *regs) return; } - irq_work_queue(&__get_cpu_var(mce_irq_work)); + irq_work_queue(this_cpu_ptr(&mce_irq_work)); } /* @@ -1045,7 +1045,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_gather_info(&m, regs); - final = &__get_cpu_var(mces_seen); + final = this_cpu_ptr(&mces_seen); *final = m; memset(valid_banks, 0, sizeof(valid_banks)); @@ -1278,22 +1278,22 @@ static unsigned long (*mce_adjust_timer)(unsigned long interval) = static int cmc_error_seen(void) { - unsigned long *v = &__get_cpu_var(mce_polled_error); + unsigned long *v = this_cpu_ptr(&mce_polled_error); return test_and_clear_bit(0, v); } static void mce_timer_fn(unsigned long data) { - struct timer_list *t = &__get_cpu_var(mce_timer); + struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned long iv; int notify; WARN_ON(smp_processor_id() != data); - if (mce_available(__this_cpu_ptr(&cpu_info))) { + if (mce_available(this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); + this_cpu_ptr(&mce_poll_banks)); mce_intel_cmci_poll(); } @@ -1323,7 +1323,7 @@ static void mce_timer_fn(unsigned long data) */ void mce_timer_kick(unsigned long interval) { - struct timer_list *t = &__get_cpu_var(mce_timer); + struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned long when = jiffies + interval; unsigned long iv = __this_cpu_read(mce_next_interval); @@ -1659,7 +1659,7 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) static void __mcheck_cpu_init_timer(void) { - struct timer_list *t = &__get_cpu_var(mce_timer); + struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned int cpu = smp_processor_id(); setup_timer(t, mce_timer_fn, cpu); @@ -1702,8 +1702,8 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_timer(); - INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); - init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb); + INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work); + init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb); } /* @@ -1955,7 +1955,7 @@ static struct miscdevice mce_chrdev_device = { static void __mce_disable_bank(void *arg) { int bank = *((int *)arg); - __clear_bit(bank, __get_cpu_var(mce_poll_banks)); + __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); cmci_disable_bank(bank); } @@ -2065,7 +2065,7 @@ static void mce_syscore_shutdown(void) static void mce_syscore_resume(void) { __mcheck_cpu_init_generic(); - __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); + __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); } static struct syscore_ops mce_syscore_ops = { @@ -2080,7 +2080,7 @@ static struct syscore_ops mce_syscore_ops = { static void mce_cpu_restart(void *data) { - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; __mcheck_cpu_init_generic(); __mcheck_cpu_init_timer(); @@ -2096,14 +2096,14 @@ static void mce_restart(void) /* Toggle features for corrected errors */ static void mce_disable_cmci(void *data) { - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; cmci_clear(); } static void mce_enable_ce(void *all) { - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; cmci_reenable(); cmci_recheck(); @@ -2136,7 +2136,7 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, { u64 new; - if (strict_strtoull(buf, 0, &new) < 0) + if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; attr_to_bank(attr)->ctl = new; @@ -2174,7 +2174,7 @@ static ssize_t set_ignore_ce(struct device *s, { u64 new; - if (strict_strtoull(buf, 0, &new) < 0) + if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; if (mca_cfg.ignore_ce ^ !!new) { @@ -2198,7 +2198,7 @@ static ssize_t set_cmci_disabled(struct device *s, { u64 new; - if (strict_strtoull(buf, 0, &new) < 0) + if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; if (mca_cfg.cmci_disabled ^ !!new) { @@ -2336,7 +2336,7 @@ static void mce_disable_cpu(void *h) unsigned long action = *(unsigned long *)h; int i; - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; if (!(action & CPU_TASKS_FROZEN)) @@ -2354,7 +2354,7 @@ static void mce_reenable_cpu(void *h) unsigned long action = *(unsigned long *)h; int i; - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; if (!(action & CPU_TASKS_FROZEN)) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 603df4f74640..5d4999f95aec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -310,7 +310,7 @@ static void amd_threshold_interrupt(void) * event. */ machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); + this_cpu_ptr(&mce_poll_banks)); if (high & MASK_OVERFLOW_HI) { rdmsrl(address, m.misc); @@ -353,7 +353,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) if (!b->interrupt_capable) return -EINVAL; - if (strict_strtoul(buf, 0, &new) < 0) + if (kstrtoul(buf, 0, &new) < 0) return -EINVAL; b->interrupt_enable = !!new; @@ -372,7 +372,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) struct thresh_restart tr; unsigned long new; - if (strict_strtoul(buf, 0, &new) < 0) + if (kstrtoul(buf, 0, &new) < 0) return -EINVAL; if (new > THRESHOLD_MAX) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 9a316b21df8b..b3c97bafc123 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -42,7 +42,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); * cmci_discover_lock protects against parallel discovery attempts * which could race against each other. */ -static DEFINE_SPINLOCK(cmci_discover_lock); +static DEFINE_RAW_SPINLOCK(cmci_discover_lock); #define CMCI_THRESHOLD 1 #define CMCI_POLL_INTERVAL (30 * HZ) @@ -86,7 +86,7 @@ void mce_intel_cmci_poll(void) { if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) return; - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); + machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); } void mce_intel_hcpu_update(unsigned long cpu) @@ -144,14 +144,14 @@ static void cmci_storm_disable_banks(void) int bank; u64 val; - spin_lock_irqsave(&cmci_discover_lock, flags); - owned = __get_cpu_var(mce_banks_owned); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); + owned = this_cpu_ptr(mce_banks_owned); for_each_set_bit(bank, owned, MAX_NR_BANKS) { rdmsrl(MSR_IA32_MCx_CTL2(bank), val); val &= ~MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(bank), val); } - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } static bool cmci_storm_detect(void) @@ -195,7 +195,7 @@ static void intel_threshold_interrupt(void) { if (cmci_storm_detect()) return; - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); + machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); mce_notify_irq(); } @@ -206,12 +206,12 @@ static void intel_threshold_interrupt(void) */ static void cmci_discover(int banks) { - unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); + unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); unsigned long flags; int i; int bios_wrong_thresh = 0; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { u64 val; int bios_zero_thresh = 0; @@ -228,7 +228,7 @@ static void cmci_discover(int banks) /* Already owned by someone else? */ if (val & MCI_CTL2_CMCI_EN) { clear_bit(i, owned); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); + __clear_bit(i, this_cpu_ptr(mce_poll_banks)); continue; } @@ -252,7 +252,7 @@ static void cmci_discover(int banks) /* Did the enable bit stick? -- the bank supports CMCI */ if (val & MCI_CTL2_CMCI_EN) { set_bit(i, owned); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); + __clear_bit(i, this_cpu_ptr(mce_poll_banks)); /* * We are able to set thresholds for some banks that * had a threshold of 0. This means the BIOS has not @@ -263,10 +263,10 @@ static void cmci_discover(int banks) (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) bios_wrong_thresh = 1; } else { - WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); + WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks))); } } - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { pr_info_once( "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); @@ -284,10 +284,10 @@ void cmci_recheck(void) unsigned long flags; int banks; - if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) + if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) return; local_irq_save(flags); - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); + machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); local_irq_restore(flags); } @@ -296,12 +296,12 @@ static void __cmci_disable_bank(int bank) { u64 val; - if (!test_bit(bank, __get_cpu_var(mce_banks_owned))) + if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) return; rdmsrl(MSR_IA32_MCx_CTL2(bank), val); val &= ~MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(bank), val); - __clear_bit(bank, __get_cpu_var(mce_banks_owned)); + __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); } /* @@ -316,10 +316,10 @@ void cmci_clear(void) if (!cmci_supported(&banks)) return; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) __cmci_disable_bank(i); - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } static void cmci_rediscover_work_func(void *arg) @@ -360,9 +360,9 @@ void cmci_disable_bank(int bank) if (!cmci_supported(&banks)) return; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); __cmci_disable_bank(bank); - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } static void intel_init_cmci(void) diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 36a1bb6d1ee0..1af51b1586d7 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -498,8 +498,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c) if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG - "CPU%d: Thermal monitoring handled by SMI\n", cpu); + if (system_state == SYSTEM_BOOTING) + printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); return; } diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c index 617a9e284245..7aa1acc79789 100644 --- a/arch/x86/kernel/cpu/microcode/amd_early.c +++ b/arch/x86/kernel/cpu/microcode/amd_early.c @@ -27,7 +27,7 @@ static u32 ucode_new_rev; u8 amd_ucode_patch[PATCH_MAX_SIZE]; static u16 this_equiv_id; -struct cpio_data ucode_cpio; +static struct cpio_data ucode_cpio; /* * Microcode patch container file is prepended to the initrd in cpio format. diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index a276fa75d9b5..c6826d1e8082 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -127,7 +127,7 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) return get_matching_microcode(csig, cpf, mc_intel, crev); } -int apply_microcode(int cpu) +static int apply_microcode_intel(int cpu) { struct microcode_intel *mc_intel; struct ucode_cpu_info *uci; @@ -314,7 +314,7 @@ static struct microcode_ops microcode_intel_ops = { .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info, - .apply_microcode = apply_microcode, + .apply_microcode = apply_microcode_intel, .microcode_fini_cpu = microcode_fini_cpu, }; diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c index 18f739129e72..b88343f7a3b3 100644 --- a/arch/x86/kernel/cpu/microcode/intel_early.c +++ b/arch/x86/kernel/cpu/microcode/intel_early.c @@ -28,8 +28,8 @@ #include <asm/tlbflush.h> #include <asm/setup.h> -unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; -struct mc_saved_data { +static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; +static struct mc_saved_data { unsigned int mc_saved_count; struct microcode_intel **mc_saved; } mc_saved_data; @@ -415,7 +415,7 @@ static void __ref show_saved_mc(void) struct ucode_cpu_info uci; if (mc_saved_data.mc_saved_count == 0) { - pr_debug("no micorcode data saved.\n"); + pr_debug("no microcode data saved.\n"); return; } pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); @@ -506,7 +506,7 @@ int save_mc_for_early(u8 *mc) if (mc_saved && mc_saved_count) memcpy(mc_saved_tmp, mc_saved, - mc_saved_count * sizeof(struct mirocode_intel *)); + mc_saved_count * sizeof(struct microcode_intel *)); /* * Save the microcode patch mc in mc_save_tmp structure if it's a newer * version. @@ -526,7 +526,7 @@ int save_mc_for_early(u8 *mc) show_saved_mc(); /* - * Free old saved microcod data. + * Free old saved microcode data. */ if (mc_saved) { for (i = 0; i < mc_saved_count_init; i++) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index f961de9964c7..ea5f363a1948 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -707,7 +707,7 @@ void __init mtrr_bp_init(void) } else { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: - if (cpu_has_k6_mtrr) { + if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) { /* Pre-Athlon (K6) AMD CPU MTRRs */ mtrr_if = mtrr_ops[X86_VENDOR_AMD]; size_or_mask = SIZE_OR_MASK_BITS(32); @@ -715,14 +715,14 @@ void __init mtrr_bp_init(void) } break; case X86_VENDOR_CENTAUR: - if (cpu_has_centaur_mcr) { + if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) { mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; } break; case X86_VENDOR_CYRIX: - if (cpu_has_cyrix_arr) { + if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) { mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2879ecdaac43..143e5f5dc855 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -243,7 +243,9 @@ static bool check_hw_exists(void) msr_fail: printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); - printk(KERN_ERR "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new); + printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n", + boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR, + reg, val_new); return false; } @@ -387,7 +389,7 @@ int x86_pmu_hw_config(struct perf_event *event) precise++; /* Support for IP fixup */ - if (x86_pmu.lbr_nr) + if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2) precise++; } @@ -487,7 +489,7 @@ static int __x86_pmu_event_init(struct perf_event *event) void x86_pmu_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -505,7 +507,7 @@ void x86_pmu_disable_all(void) static void x86_pmu_disable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu_initialized()) return; @@ -522,7 +524,7 @@ static void x86_pmu_disable(struct pmu *pmu) void x86_pmu_enable_all(int added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -869,7 +871,7 @@ static void x86_pmu_start(struct perf_event *event, int flags); static void x86_pmu_enable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; struct hw_perf_event *hwc; int i, added = cpuc->n_added; @@ -1020,7 +1022,7 @@ void x86_pmu_enable_event(struct perf_event *event) */ static int x86_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc; int assign[X86_PMC_IDX_MAX]; int n, n0, ret; @@ -1071,7 +1073,7 @@ out: static void x86_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx = event->hw.idx; if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) @@ -1150,7 +1152,7 @@ void perf_event_print_debug(void) void x86_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { @@ -1172,7 +1174,7 @@ void x86_pmu_stop(struct perf_event *event, int flags) static void x86_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; /* @@ -1227,7 +1229,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) int idx, handled = 0; u64 val; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); /* * Some chipsets need to unmask the LVTPC in a particular spot @@ -1636,7 +1638,7 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) */ static int x86_pmu_commit_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int assign[X86_PMC_IDX_MAX]; int n, ret; @@ -1995,7 +1997,7 @@ static unsigned long get_segment_base(unsigned int segment) if (idx > GDT_ENTRIES) return 0; - desc = __this_cpu_ptr(&gdt_page.gdt[0]); + desc = raw_cpu_ptr(gdt_page.gdt); } return get_desc_base(desc + idx); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 8ade93111e03..fc5eb390b368 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -67,8 +67,10 @@ struct event_constraint { */ #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ #define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ -#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */ +#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style datala, store */ #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ +#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ +#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -252,18 +254,52 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) #define INTEL_PLD_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) #define INTEL_PST_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) -/* DataLA version of store sampling without extra enable bit. */ -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ +/* Event constraint, but match on all event flags too. */ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + +/* Check only flags, but allow all event/umask */ +#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \ + EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS) + +/* Check flags and event code, and set the HSW store flag */ +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) + +/* Check flags and event code, and set the HSW load flag */ +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) + +/* Check flags and event code/umask, and set the HSW store flag */ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) +/* Check flags and event code/umask, and set the HSW load flag */ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) + +/* Check flags and event code/umask, and set the HSW N/A flag */ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|INTEL_ARCH_EVENT_MASK, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW) + + /* * We define the end marker as having a weight of -1 * to enable blacklisting of events using a counter bitmask diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index beeb7cc07044..28926311aac1 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -699,7 +699,7 @@ __init int amd_pmu_init(void) void amd_pmu_enable_virt(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); cpuc->perf_ctr_virt_mask = 0; @@ -711,7 +711,7 @@ EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); void amd_pmu_disable_virt(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); /* * We only mask out the Host-only bit so that host-only counting works diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2502d0d9d246..944bf019b74f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1045,7 +1045,7 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) static void intel_pmu_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); @@ -1058,7 +1058,7 @@ static void intel_pmu_disable_all(void) static void intel_pmu_enable_all(int added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); intel_pmu_pebs_enable_all(); intel_pmu_lbr_enable_all(); @@ -1092,7 +1092,7 @@ static void intel_pmu_enable_all(int added) */ static void intel_pmu_nhm_workaround(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); static const unsigned long nhm_magic[4] = { 0x4300B5, 0x4300D2, @@ -1191,7 +1191,7 @@ static inline bool event_is_checkpointed(struct perf_event *event) static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { intel_pmu_disable_bts(); @@ -1255,7 +1255,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) static void intel_pmu_enable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { if (!__this_cpu_read(cpu_hw_events.enabled)) @@ -1349,7 +1349,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) u64 status; int handled; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); /* * No known reason to not always do late ACK, @@ -1781,7 +1781,7 @@ EXPORT_SYMBOL_GPL(perf_guest_get_msrs); static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; @@ -1802,7 +1802,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; int idx; @@ -1836,7 +1836,7 @@ static void core_pmu_enable_event(struct perf_event *event) static void core_pmu_enable_all(int added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -2367,15 +2367,15 @@ __init int intel_pmu_init(void) * Install the hw-cache-events table: */ switch (boot_cpu_data.x86_model) { - case 14: /* 65 nm core solo/duo, "Yonah" */ + case 14: /* 65nm Core "Yonah" */ pr_cont("Core events, "); break; - case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + case 15: /* 65nm Core2 "Merom" */ x86_add_quirk(intel_clovertown_quirk); - case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 29: /* six-core 45 nm xeon "Dunnington" */ + case 22: /* 65nm Core2 "Merom-L" */ + case 23: /* 45nm Core2 "Penryn" */ + case 29: /* 45nm Core2 "Dunnington (MP) */ memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -2386,9 +2386,9 @@ __init int intel_pmu_init(void) pr_cont("Core2 events, "); break; - case 26: /* 45 nm nehalem, "Bloomfield" */ - case 30: /* 45 nm nehalem, "Lynnfield" */ - case 46: /* 45 nm nehalem-ex, "Beckton" */ + case 30: /* 45nm Nehalem */ + case 26: /* 45nm Nehalem-EP */ + case 46: /* 45nm Nehalem-EX */ memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -2415,11 +2415,11 @@ __init int intel_pmu_init(void) pr_cont("Nehalem events, "); break; - case 28: /* Atom */ - case 38: /* Lincroft */ - case 39: /* Penwell */ - case 53: /* Cloverview */ - case 54: /* Cedarview */ + case 28: /* 45nm Atom "Pineview" */ + case 38: /* 45nm Atom "Lincroft" */ + case 39: /* 32nm Atom "Penwell" */ + case 53: /* 32nm Atom "Cloverview" */ + case 54: /* 32nm Atom "Cedarview" */ memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -2430,8 +2430,8 @@ __init int intel_pmu_init(void) pr_cont("Atom events, "); break; - case 55: /* Atom 22nm "Silvermont" */ - case 77: /* Avoton "Silvermont" */ + case 55: /* 22nm Atom "Silvermont" */ + case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, @@ -2446,9 +2446,9 @@ __init int intel_pmu_init(void) pr_cont("Silvermont events, "); break; - case 37: /* 32 nm nehalem, "Clarkdale" */ - case 44: /* 32 nm nehalem, "Gulftown" */ - case 47: /* 32 nm Xeon E7 */ + case 37: /* 32nm Westmere */ + case 44: /* 32nm Westmere-EP */ + case 47: /* 32nm Westmere-EX */ memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -2474,8 +2474,8 @@ __init int intel_pmu_init(void) pr_cont("Westmere events, "); break; - case 42: /* SandyBridge */ - case 45: /* SandyBridge, "Romely-EP" */ + case 42: /* 32nm SandyBridge */ + case 45: /* 32nm SandyBridge-E/EN/EP */ x86_add_quirk(intel_sandybridge_quirk); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -2506,8 +2506,9 @@ __init int intel_pmu_init(void) pr_cont("SandyBridge events, "); break; - case 58: /* IvyBridge */ - case 62: /* IvyBridge EP */ + + case 58: /* 22nm IvyBridge */ + case 62: /* 22nm IvyBridge-EP/EX */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); /* dTLB-load-misses on IVB is different than SNB */ @@ -2539,11 +2540,10 @@ __init int intel_pmu_init(void) break; - case 60: /* Haswell Client */ - case 70: - case 71: - case 63: - case 69: + case 60: /* 22nm Haswell Core */ + case 63: /* 22nm Haswell Server */ + case 69: /* 22nm Haswell ULT */ + case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -2552,7 +2552,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_hsw_event_constraints; x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; - x86_pmu.extra_regs = intel_snb_extra_regs; + x86_pmu.extra_regs = intel_snbep_extra_regs; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 696ade311ded..46211bcc813e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -108,14 +108,16 @@ static u64 precise_store_data(u64 status) return val; } -static u64 precise_store_data_hsw(struct perf_event *event, u64 status) +static u64 precise_datala_hsw(struct perf_event *event, u64 status) { union perf_mem_data_src dse; - u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK; - dse.val = 0; - dse.mem_op = PERF_MEM_OP_STORE; - dse.mem_lvl = PERF_MEM_LVL_NA; + dse.val = PERF_MEM_NA; + + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) + dse.mem_op = PERF_MEM_OP_STORE; + else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW) + dse.mem_op = PERF_MEM_OP_LOAD; /* * L1 info only valid for following events: @@ -125,15 +127,12 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status) * MEM_UOPS_RETIRED.SPLIT_STORES * MEM_UOPS_RETIRED.ALL_STORES */ - if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0) - return dse.mem_lvl; - - if (status & 1) - dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; - else - dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; - - /* Nothing else supported. Sorry. */ + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) { + if (status & 1) + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + else + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; + } return dse.val; } @@ -475,7 +474,7 @@ void intel_pmu_enable_bts(u64 config) void intel_pmu_disable_bts(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned long debugctlmsr; if (!cpuc->ds) @@ -492,7 +491,7 @@ void intel_pmu_disable_bts(void) int intel_pmu_drain_bts_buffer(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; struct bts_record { u64 from; @@ -569,28 +568,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { }; struct event_constraint intel_slm_pebs_event_constraints[] = { - INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */ - INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */ - INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */ - INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */ - INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */ - INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */ - INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */ - INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */ - INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */ - INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */ - INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */ - INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */ - INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */ - INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */ - INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */ - INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */ - INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */ - INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), EVENT_CONSTRAINT_END }; @@ -626,68 +607,44 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = { struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ - INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ - INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), EVENT_CONSTRAINT_END }; struct event_constraint intel_ivb_pebs_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ - INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ - INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), EVENT_CONSTRAINT_END }; struct event_constraint intel_hsw_pebs_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ - INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */ - INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */ - INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */ - INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */ - /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), - /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ - INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), - INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ - /* MEM_UOPS_RETIRED.SPLIT_STORES */ - INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), - INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ - INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ - INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */ - INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */ - INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */ - /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */ - INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf), - /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */ - INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf), - /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */ - INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf), - /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */ - INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf), - INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */ - INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */ - + INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), EVENT_CONSTRAINT_END }; @@ -712,7 +669,7 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) void intel_pmu_pebs_enable(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; @@ -727,7 +684,7 @@ void intel_pmu_pebs_enable(struct perf_event *event) void intel_pmu_pebs_disable(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; cpuc->pebs_enabled &= ~(1ULL << hwc->idx); @@ -745,7 +702,7 @@ void intel_pmu_pebs_disable(struct perf_event *event) void intel_pmu_pebs_enable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->pebs_enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); @@ -753,7 +710,7 @@ void intel_pmu_pebs_enable_all(void) void intel_pmu_pebs_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->pebs_enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, 0); @@ -761,7 +718,7 @@ void intel_pmu_pebs_disable_all(void) static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned long from = cpuc->lbr_entries[0].from; unsigned long old_to, to = cpuc->lbr_entries[0].to; unsigned long ip = regs->ip; @@ -864,51 +821,53 @@ static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs) static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *__pebs) { +#define PERF_X86_EVENT_PEBS_HSW_PREC \ + (PERF_X86_EVENT_PEBS_ST_HSW | \ + PERF_X86_EVENT_PEBS_LD_HSW | \ + PERF_X86_EVENT_PEBS_NA_HSW) /* * We cast to the biggest pebs_record but are careful not to * unconditionally access the 'extra' entries. */ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct pebs_record_hsw *pebs = __pebs; struct perf_sample_data data; struct pt_regs regs; u64 sample_type; - int fll, fst; + int fll, fst, dsrc; + int fl = event->hw.flags; if (!intel_pmu_save_and_restart(event)) return; - fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; - fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST | - PERF_X86_EVENT_PEBS_ST_HSW); + sample_type = event->attr.sample_type; + dsrc = sample_type & PERF_SAMPLE_DATA_SRC; + + fll = fl & PERF_X86_EVENT_PEBS_LDLAT; + fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); perf_sample_data_init(&data, 0, event->hw.last_period); data.period = event->hw.last_period; - sample_type = event->attr.sample_type; /* - * if PEBS-LL or PreciseStore + * Use latency for weight (only avail with PEBS-LL) */ - if (fll || fst) { - /* - * Use latency for weight (only avail with PEBS-LL) - */ - if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) - data.weight = pebs->lat; - - /* - * data.data_src encodes the data source - */ - if (sample_type & PERF_SAMPLE_DATA_SRC) { - if (fll) - data.data_src.val = load_latency_data(pebs->dse); - else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) - data.data_src.val = - precise_store_data_hsw(event, pebs->dse); - else - data.data_src.val = precise_store_data(pebs->dse); - } + if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) + data.weight = pebs->lat; + + /* + * data.data_src encodes the data source + */ + if (dsrc) { + u64 val = PERF_MEM_NA; + if (fll) + val = load_latency_data(pebs->dse); + else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) + val = precise_datala_hsw(event, pebs->dse); + else if (fst) + val = precise_store_data(pebs->dse); + data.data_src.val = val; } /* @@ -935,16 +894,16 @@ static void __intel_pmu_pebs_event(struct perf_event *event, else regs.flags &= ~PERF_EFLAGS_EXACT; - if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && + if ((sample_type & PERF_SAMPLE_ADDR) && x86_pmu.intel_cap.pebs_format >= 1) data.addr = pebs->dla; if (x86_pmu.intel_cap.pebs_format >= 2) { /* Only set the TSX weight when no memory weight. */ - if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll) + if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) data.weight = intel_hsw_weight(pebs); - if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION) + if (sample_type & PERF_SAMPLE_TRANSACTION) data.txn = intel_hsw_transaction(pebs); } @@ -957,7 +916,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; struct perf_event *event = cpuc->events[0]; /* PMC0 only */ struct pebs_record_core *at, *top; @@ -998,7 +957,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; struct perf_event *event = NULL; void *at, *top; @@ -1055,7 +1014,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) * BTS, PEBS probe and setup */ -void intel_ds_init(void) +void __init intel_ds_init(void) { /* * No support for 32bit formats diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 9dd2459a4c73..45fa730a5283 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -133,7 +133,7 @@ static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); static void __intel_pmu_lbr_enable(void) { u64 debugctl; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->lbr_sel) wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); @@ -183,7 +183,7 @@ void intel_pmu_lbr_reset(void) void intel_pmu_lbr_enable(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu.lbr_nr) return; @@ -203,7 +203,7 @@ void intel_pmu_lbr_enable(struct perf_event *event) void intel_pmu_lbr_disable(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu.lbr_nr) return; @@ -220,7 +220,7 @@ void intel_pmu_lbr_disable(struct perf_event *event) void intel_pmu_lbr_enable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->lbr_users) __intel_pmu_lbr_enable(); @@ -228,7 +228,7 @@ void intel_pmu_lbr_enable_all(void) void intel_pmu_lbr_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->lbr_users) __intel_pmu_lbr_disable(); @@ -332,7 +332,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) void intel_pmu_lbr_read(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!cpuc->lbr_users) return; @@ -697,7 +697,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { }; /* core */ -void intel_pmu_lbr_init_core(void) +void __init intel_pmu_lbr_init_core(void) { x86_pmu.lbr_nr = 4; x86_pmu.lbr_tos = MSR_LBR_TOS; @@ -712,7 +712,7 @@ void intel_pmu_lbr_init_core(void) } /* nehalem/westmere */ -void intel_pmu_lbr_init_nhm(void) +void __init intel_pmu_lbr_init_nhm(void) { x86_pmu.lbr_nr = 16; x86_pmu.lbr_tos = MSR_LBR_TOS; @@ -733,7 +733,7 @@ void intel_pmu_lbr_init_nhm(void) } /* sandy bridge */ -void intel_pmu_lbr_init_snb(void) +void __init intel_pmu_lbr_init_snb(void) { x86_pmu.lbr_nr = 16; x86_pmu.lbr_tos = MSR_LBR_TOS; @@ -753,7 +753,7 @@ void intel_pmu_lbr_init_snb(void) } /* atom */ -void intel_pmu_lbr_init_atom(void) +void __init intel_pmu_lbr_init_atom(void) { /* * only models starting at stepping 10 seems diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 619f7699487a..d64f275fe274 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -135,7 +135,7 @@ static inline u64 rapl_scale(u64 v) * or use ldexp(count, -32). * Watts = Joules/Time delta */ - return v << (32 - __get_cpu_var(rapl_pmu)->hw_unit); + return v << (32 - __this_cpu_read(rapl_pmu->hw_unit)); } static u64 rapl_event_update(struct perf_event *event) @@ -187,7 +187,7 @@ static void rapl_stop_hrtimer(struct rapl_pmu *pmu) static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) { - struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); struct perf_event *event; unsigned long flags; @@ -234,7 +234,7 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, static void rapl_pmu_event_start(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); unsigned long flags; spin_lock_irqsave(&pmu->lock, flags); @@ -244,7 +244,7 @@ static void rapl_pmu_event_start(struct perf_event *event, int mode) static void rapl_pmu_event_stop(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); struct hw_perf_event *hwc = &event->hw; unsigned long flags; @@ -278,7 +278,7 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) static int rapl_pmu_event_add(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); struct hw_perf_event *hwc = &event->hw; unsigned long flags; @@ -696,7 +696,7 @@ static int __init rapl_pmu_init(void) return -1; } - pmu = __get_cpu_var(rapl_pmu); + pmu = __this_cpu_read(rapl_pmu); pr_info("RAPL PMU detected, hw unit 2^-%d Joules," " API unit is 2^-32 Joules," diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index cfc6f9dfcd90..9762dbd9f3f7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -1,83 +1,39 @@ #include "perf_event_intel_uncore.h" static struct intel_uncore_type *empty_uncore[] = { NULL, }; -static struct intel_uncore_type **msr_uncores = empty_uncore; -static struct intel_uncore_type **pci_uncores = empty_uncore; -/* pci bus to socket mapping */ -static int pcibus_to_physid[256] = { [0 ... 255] = -1, }; +struct intel_uncore_type **uncore_msr_uncores = empty_uncore; +struct intel_uncore_type **uncore_pci_uncores = empty_uncore; -static struct pci_dev *extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; +static bool pcidrv_registered; +struct pci_driver *uncore_pci_driver; +/* pci bus to socket mapping */ +int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, }; +struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; static DEFINE_RAW_SPINLOCK(uncore_box_lock); - /* mask of cpus that collect uncore events */ static cpumask_t uncore_cpu_mask; /* constraint for the fixed counter */ -static struct event_constraint constraint_fixed = +static struct event_constraint uncore_constraint_fixed = EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); -static struct event_constraint constraint_empty = +struct event_constraint uncore_constraint_empty = EVENT_CONSTRAINT(0, 0, 0); -#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ - ((1ULL << (n)) - 1))) - -DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); -DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); -DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); -DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); -DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); -DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); -DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); -DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); -DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); -DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); -DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); -DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); -DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); -DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); -DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); -DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); -DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47"); -DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); -DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22"); -DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); -DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60"); -DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7"); -DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15"); -DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23"); -DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31"); -DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51"); -DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35"); -DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31"); -DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17"); -DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12"); -DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8"); -DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4"); -DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63"); -DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51"); -DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35"); -DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31"); -DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17"); -DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12"); -DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8"); -DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4"); -DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); - -static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); -static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); -static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event); -static void uncore_pmu_event_read(struct perf_event *event); - -static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) +ssize_t uncore_event_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct uncore_event_desc *event = + container_of(attr, struct uncore_event_desc, attr); + return sprintf(buf, "%s", event->config); +} + +struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) { return container_of(event->pmu, struct intel_uncore_pmu, pmu); } -static struct intel_uncore_box * -uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) +struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { struct intel_uncore_box *box; @@ -86,6 +42,9 @@ uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) return box; raw_spin_lock(&uncore_box_lock); + /* Recheck in lock to handle races. */ + if (*per_cpu_ptr(pmu->box, cpu)) + goto out; list_for_each_entry(box, &pmu->box_list, list) { if (box->phys_id == topology_physical_package_id(cpu)) { atomic_inc(&box->refcnt); @@ -93,12 +52,13 @@ uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) break; } } +out: raw_spin_unlock(&uncore_box_lock); return *per_cpu_ptr(pmu->box, cpu); } -static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) +struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) { /* * perf core schedules event on the basis of cpu, uncore events are @@ -107,7 +67,7 @@ static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); } -static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) +u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) { u64 count; @@ -119,7 +79,7 @@ static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_eve /* * generic get constraint function for shared match/mask registers. */ -static struct event_constraint * +struct event_constraint * uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) { struct intel_uncore_extra_reg *er; @@ -154,10 +114,10 @@ uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) return NULL; } - return &constraint_empty; + return &uncore_constraint_empty; } -static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) { struct intel_uncore_extra_reg *er; struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; @@ -178,7 +138,7 @@ static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_even reg1->alloc = 0; } -static u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) +u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) { struct intel_uncore_extra_reg *er; unsigned long flags; @@ -193,2936 +153,6 @@ static u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) return config; } -/* Sandy Bridge-EP uncore support */ -static struct intel_uncore_type snbep_uncore_cbox; -static struct intel_uncore_type snbep_uncore_pcu; - -static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) -{ - struct pci_dev *pdev = box->pci_dev; - int box_ctl = uncore_pci_box_ctl(box); - u32 config = 0; - - if (!pci_read_config_dword(pdev, box_ctl, &config)) { - config |= SNBEP_PMON_BOX_CTL_FRZ; - pci_write_config_dword(pdev, box_ctl, config); - } -} - -static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) -{ - struct pci_dev *pdev = box->pci_dev; - int box_ctl = uncore_pci_box_ctl(box); - u32 config = 0; - - if (!pci_read_config_dword(pdev, box_ctl, &config)) { - config &= ~SNBEP_PMON_BOX_CTL_FRZ; - pci_write_config_dword(pdev, box_ctl, config); - } -} - -static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - - pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); -} - -static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - - pci_write_config_dword(pdev, hwc->config_base, hwc->config); -} - -static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - u64 count = 0; - - pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); - pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); - - return count; -} - -static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) -{ - struct pci_dev *pdev = box->pci_dev; - - pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT); -} - -static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) -{ - u64 config; - unsigned msr; - - msr = uncore_msr_box_ctl(box); - if (msr) { - rdmsrl(msr, config); - config |= SNBEP_PMON_BOX_CTL_FRZ; - wrmsrl(msr, config); - } -} - -static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) -{ - u64 config; - unsigned msr; - - msr = uncore_msr_box_ctl(box); - if (msr) { - rdmsrl(msr, config); - config &= ~SNBEP_PMON_BOX_CTL_FRZ; - wrmsrl(msr, config); - } -} - -static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - - if (reg1->idx != EXTRA_REG_NONE) - wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0)); - - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); -} - -static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, - struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - wrmsrl(hwc->config_base, hwc->config); -} - -static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) -{ - unsigned msr = uncore_msr_box_ctl(box); - - if (msr) - wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); -} - -static struct attribute *snbep_uncore_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh8.attr, - NULL, -}; - -static struct attribute *snbep_uncore_ubox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh5.attr, - NULL, -}; - -static struct attribute *snbep_uncore_cbox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_tid_en.attr, - &format_attr_inv.attr, - &format_attr_thresh8.attr, - &format_attr_filter_tid.attr, - &format_attr_filter_nid.attr, - &format_attr_filter_state.attr, - &format_attr_filter_opc.attr, - NULL, -}; - -static struct attribute *snbep_uncore_pcu_formats_attr[] = { - &format_attr_event_ext.attr, - &format_attr_occ_sel.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh5.attr, - &format_attr_occ_invert.attr, - &format_attr_occ_edge.attr, - &format_attr_filter_band0.attr, - &format_attr_filter_band1.attr, - &format_attr_filter_band2.attr, - &format_attr_filter_band3.attr, - NULL, -}; - -static struct attribute *snbep_uncore_qpi_formats_attr[] = { - &format_attr_event_ext.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh8.attr, - &format_attr_match_rds.attr, - &format_attr_match_rnid30.attr, - &format_attr_match_rnid4.attr, - &format_attr_match_dnid.attr, - &format_attr_match_mc.attr, - &format_attr_match_opc.attr, - &format_attr_match_vnw.attr, - &format_attr_match0.attr, - &format_attr_match1.attr, - &format_attr_mask_rds.attr, - &format_attr_mask_rnid30.attr, - &format_attr_mask_rnid4.attr, - &format_attr_mask_dnid.attr, - &format_attr_mask_mc.attr, - &format_attr_mask_opc.attr, - &format_attr_mask_vnw.attr, - &format_attr_mask0.attr, - &format_attr_mask1.attr, - NULL, -}; - -static struct uncore_event_desc snbep_uncore_imc_events[] = { - INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), - INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"), - INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), - { /* end: all zeroes */ }, -}; - -static struct uncore_event_desc snbep_uncore_qpi_events[] = { - INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"), - INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), - INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"), - INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"), - { /* end: all zeroes */ }, -}; - -static struct attribute_group snbep_uncore_format_group = { - .name = "format", - .attrs = snbep_uncore_formats_attr, -}; - -static struct attribute_group snbep_uncore_ubox_format_group = { - .name = "format", - .attrs = snbep_uncore_ubox_formats_attr, -}; - -static struct attribute_group snbep_uncore_cbox_format_group = { - .name = "format", - .attrs = snbep_uncore_cbox_formats_attr, -}; - -static struct attribute_group snbep_uncore_pcu_format_group = { - .name = "format", - .attrs = snbep_uncore_pcu_formats_attr, -}; - -static struct attribute_group snbep_uncore_qpi_format_group = { - .name = "format", - .attrs = snbep_uncore_qpi_formats_attr, -}; - -#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ - .init_box = snbep_uncore_msr_init_box, \ - .disable_box = snbep_uncore_msr_disable_box, \ - .enable_box = snbep_uncore_msr_enable_box, \ - .disable_event = snbep_uncore_msr_disable_event, \ - .enable_event = snbep_uncore_msr_enable_event, \ - .read_counter = uncore_msr_read_counter - -static struct intel_uncore_ops snbep_uncore_msr_ops = { - SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), -}; - -#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT() \ - .init_box = snbep_uncore_pci_init_box, \ - .disable_box = snbep_uncore_pci_disable_box, \ - .enable_box = snbep_uncore_pci_enable_box, \ - .disable_event = snbep_uncore_pci_disable_event, \ - .read_counter = snbep_uncore_pci_read_counter - -static struct intel_uncore_ops snbep_uncore_pci_ops = { - SNBEP_UNCORE_PCI_OPS_COMMON_INIT(), - .enable_event = snbep_uncore_pci_enable_event, \ -}; - -static struct event_constraint snbep_uncore_cbox_constraints[] = { - UNCORE_EVENT_CONSTRAINT(0x01, 0x1), - UNCORE_EVENT_CONSTRAINT(0x02, 0x3), - UNCORE_EVENT_CONSTRAINT(0x04, 0x3), - UNCORE_EVENT_CONSTRAINT(0x05, 0x3), - UNCORE_EVENT_CONSTRAINT(0x07, 0x3), - UNCORE_EVENT_CONSTRAINT(0x09, 0x3), - UNCORE_EVENT_CONSTRAINT(0x11, 0x1), - UNCORE_EVENT_CONSTRAINT(0x12, 0x3), - UNCORE_EVENT_CONSTRAINT(0x13, 0x3), - UNCORE_EVENT_CONSTRAINT(0x1b, 0xc), - UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), - UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), - UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), - EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff), - UNCORE_EVENT_CONSTRAINT(0x21, 0x3), - UNCORE_EVENT_CONSTRAINT(0x23, 0x3), - UNCORE_EVENT_CONSTRAINT(0x31, 0x3), - UNCORE_EVENT_CONSTRAINT(0x32, 0x3), - UNCORE_EVENT_CONSTRAINT(0x33, 0x3), - UNCORE_EVENT_CONSTRAINT(0x34, 0x3), - UNCORE_EVENT_CONSTRAINT(0x35, 0x3), - UNCORE_EVENT_CONSTRAINT(0x36, 0x1), - UNCORE_EVENT_CONSTRAINT(0x37, 0x3), - UNCORE_EVENT_CONSTRAINT(0x38, 0x3), - UNCORE_EVENT_CONSTRAINT(0x39, 0x3), - UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), - EVENT_CONSTRAINT_END -}; - -static struct event_constraint snbep_uncore_r2pcie_constraints[] = { - UNCORE_EVENT_CONSTRAINT(0x10, 0x3), - UNCORE_EVENT_CONSTRAINT(0x11, 0x3), - UNCORE_EVENT_CONSTRAINT(0x12, 0x1), - UNCORE_EVENT_CONSTRAINT(0x23, 0x3), - UNCORE_EVENT_CONSTRAINT(0x24, 0x3), - UNCORE_EVENT_CONSTRAINT(0x25, 0x3), - UNCORE_EVENT_CONSTRAINT(0x26, 0x3), - UNCORE_EVENT_CONSTRAINT(0x32, 0x3), - UNCORE_EVENT_CONSTRAINT(0x33, 0x3), - UNCORE_EVENT_CONSTRAINT(0x34, 0x3), - EVENT_CONSTRAINT_END -}; - -static struct event_constraint snbep_uncore_r3qpi_constraints[] = { - UNCORE_EVENT_CONSTRAINT(0x10, 0x3), - UNCORE_EVENT_CONSTRAINT(0x11, 0x3), - UNCORE_EVENT_CONSTRAINT(0x12, 0x3), - UNCORE_EVENT_CONSTRAINT(0x13, 0x1), - UNCORE_EVENT_CONSTRAINT(0x20, 0x3), - UNCORE_EVENT_CONSTRAINT(0x21, 0x3), - UNCORE_EVENT_CONSTRAINT(0x22, 0x3), - UNCORE_EVENT_CONSTRAINT(0x23, 0x3), - UNCORE_EVENT_CONSTRAINT(0x24, 0x3), - UNCORE_EVENT_CONSTRAINT(0x25, 0x3), - UNCORE_EVENT_CONSTRAINT(0x26, 0x3), - UNCORE_EVENT_CONSTRAINT(0x28, 0x3), - UNCORE_EVENT_CONSTRAINT(0x29, 0x3), - UNCORE_EVENT_CONSTRAINT(0x2a, 0x3), - UNCORE_EVENT_CONSTRAINT(0x2b, 0x3), - UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), - UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), - UNCORE_EVENT_CONSTRAINT(0x2e, 0x3), - UNCORE_EVENT_CONSTRAINT(0x2f, 0x3), - UNCORE_EVENT_CONSTRAINT(0x30, 0x3), - UNCORE_EVENT_CONSTRAINT(0x31, 0x3), - UNCORE_EVENT_CONSTRAINT(0x32, 0x3), - UNCORE_EVENT_CONSTRAINT(0x33, 0x3), - UNCORE_EVENT_CONSTRAINT(0x34, 0x3), - UNCORE_EVENT_CONSTRAINT(0x36, 0x3), - UNCORE_EVENT_CONSTRAINT(0x37, 0x3), - UNCORE_EVENT_CONSTRAINT(0x38, 0x3), - UNCORE_EVENT_CONSTRAINT(0x39, 0x3), - EVENT_CONSTRAINT_END -}; - -static struct intel_uncore_type snbep_uncore_ubox = { - .name = "ubox", - .num_counters = 2, - .num_boxes = 1, - .perf_ctr_bits = 44, - .fixed_ctr_bits = 48, - .perf_ctr = SNBEP_U_MSR_PMON_CTR0, - .event_ctl = SNBEP_U_MSR_PMON_CTL0, - .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, - .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, - .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, - .ops = &snbep_uncore_msr_ops, - .format_group = &snbep_uncore_ubox_format_group, -}; - -static struct extra_reg snbep_uncore_cbox_extra_regs[] = { - SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, - SNBEP_CBO_PMON_CTL_TID_EN, 0x1), - SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6), - SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6), - SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6), - SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), - SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa), - SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa), - SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa), - SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa), - SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2), - EVENT_EXTRA_END -}; - -static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct intel_uncore_extra_reg *er = &box->shared_regs[0]; - int i; - - if (uncore_box_is_fake(box)) - return; - - for (i = 0; i < 5; i++) { - if (reg1->alloc & (0x1 << i)) - atomic_sub(1 << (i * 6), &er->ref); - } - reg1->alloc = 0; -} - -static struct event_constraint * -__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event, - u64 (*cbox_filter_mask)(int fields)) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct intel_uncore_extra_reg *er = &box->shared_regs[0]; - int i, alloc = 0; - unsigned long flags; - u64 mask; - - if (reg1->idx == EXTRA_REG_NONE) - return NULL; - - raw_spin_lock_irqsave(&er->lock, flags); - for (i = 0; i < 5; i++) { - if (!(reg1->idx & (0x1 << i))) - continue; - if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) - continue; - - mask = cbox_filter_mask(0x1 << i); - if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) || - !((reg1->config ^ er->config) & mask)) { - atomic_add(1 << (i * 6), &er->ref); - er->config &= ~mask; - er->config |= reg1->config & mask; - alloc |= (0x1 << i); - } else { - break; - } - } - raw_spin_unlock_irqrestore(&er->lock, flags); - if (i < 5) - goto fail; - - if (!uncore_box_is_fake(box)) - reg1->alloc |= alloc; - - return NULL; -fail: - for (; i >= 0; i--) { - if (alloc & (0x1 << i)) - atomic_sub(1 << (i * 6), &er->ref); - } - return &constraint_empty; -} - -static u64 snbep_cbox_filter_mask(int fields) -{ - u64 mask = 0; - - if (fields & 0x1) - mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID; - if (fields & 0x2) - mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID; - if (fields & 0x4) - mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE; - if (fields & 0x8) - mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC; - - return mask; -} - -static struct event_constraint * -snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask); -} - -static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct extra_reg *er; - int idx = 0; - - for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) { - if (er->event != (event->hw.config & er->config_mask)) - continue; - idx |= er->idx; - } - - if (idx) { - reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + - SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; - reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx); - reg1->idx = idx; - } - return 0; -} - -static struct intel_uncore_ops snbep_uncore_cbox_ops = { - SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), - .hw_config = snbep_cbox_hw_config, - .get_constraint = snbep_cbox_get_constraint, - .put_constraint = snbep_cbox_put_constraint, -}; - -static struct intel_uncore_type snbep_uncore_cbox = { - .name = "cbox", - .num_counters = 4, - .num_boxes = 8, - .perf_ctr_bits = 44, - .event_ctl = SNBEP_C0_MSR_PMON_CTL0, - .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, - .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, - .msr_offset = SNBEP_CBO_MSR_OFFSET, - .num_shared_regs = 1, - .constraints = snbep_uncore_cbox_constraints, - .ops = &snbep_uncore_cbox_ops, - .format_group = &snbep_uncore_cbox_format_group, -}; - -static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - u64 config = reg1->config; - - if (new_idx > reg1->idx) - config <<= 8 * (new_idx - reg1->idx); - else - config >>= 8 * (reg1->idx - new_idx); - - if (modify) { - hwc->config += new_idx - reg1->idx; - reg1->config = config; - reg1->idx = new_idx; - } - return config; -} - -static struct event_constraint * -snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct intel_uncore_extra_reg *er = &box->shared_regs[0]; - unsigned long flags; - int idx = reg1->idx; - u64 mask, config1 = reg1->config; - bool ok = false; - - if (reg1->idx == EXTRA_REG_NONE || - (!uncore_box_is_fake(box) && reg1->alloc)) - return NULL; -again: - mask = 0xffULL << (idx * 8); - raw_spin_lock_irqsave(&er->lock, flags); - if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) || - !((config1 ^ er->config) & mask)) { - atomic_add(1 << (idx * 8), &er->ref); - er->config &= ~mask; - er->config |= config1 & mask; - ok = true; - } - raw_spin_unlock_irqrestore(&er->lock, flags); - - if (!ok) { - idx = (idx + 1) % 4; - if (idx != reg1->idx) { - config1 = snbep_pcu_alter_er(event, idx, false); - goto again; - } - return &constraint_empty; - } - - if (!uncore_box_is_fake(box)) { - if (idx != reg1->idx) - snbep_pcu_alter_er(event, idx, true); - reg1->alloc = 1; - } - return NULL; -} - -static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct intel_uncore_extra_reg *er = &box->shared_regs[0]; - - if (uncore_box_is_fake(box) || !reg1->alloc) - return; - - atomic_sub(1 << (reg1->idx * 8), &er->ref); - reg1->alloc = 0; -} - -static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; - - if (ev_sel >= 0xb && ev_sel <= 0xe) { - reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; - reg1->idx = ev_sel - 0xb; - reg1->config = event->attr.config1 & (0xff << reg1->idx); - } - return 0; -} - -static struct intel_uncore_ops snbep_uncore_pcu_ops = { - SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), - .hw_config = snbep_pcu_hw_config, - .get_constraint = snbep_pcu_get_constraint, - .put_constraint = snbep_pcu_put_constraint, -}; - -static struct intel_uncore_type snbep_uncore_pcu = { - .name = "pcu", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, - .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, - .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, - .num_shared_regs = 1, - .ops = &snbep_uncore_pcu_ops, - .format_group = &snbep_uncore_pcu_format_group, -}; - -static struct intel_uncore_type *snbep_msr_uncores[] = { - &snbep_uncore_ubox, - &snbep_uncore_cbox, - &snbep_uncore_pcu, - NULL, -}; - -enum { - SNBEP_PCI_QPI_PORT0_FILTER, - SNBEP_PCI_QPI_PORT1_FILTER, -}; - -static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - - if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) { - reg1->idx = 0; - reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0; - reg1->config = event->attr.config1; - reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0; - reg2->config = event->attr.config2; - } - return 0; -} - -static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - - if (reg1->idx != EXTRA_REG_NONE) { - int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; - struct pci_dev *filter_pdev = extra_pci_dev[box->phys_id][idx]; - WARN_ON_ONCE(!filter_pdev); - if (filter_pdev) { - pci_write_config_dword(filter_pdev, reg1->reg, - (u32)reg1->config); - pci_write_config_dword(filter_pdev, reg1->reg + 4, - (u32)(reg1->config >> 32)); - pci_write_config_dword(filter_pdev, reg2->reg, - (u32)reg2->config); - pci_write_config_dword(filter_pdev, reg2->reg + 4, - (u32)(reg2->config >> 32)); - } - } - - pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); -} - -static struct intel_uncore_ops snbep_uncore_qpi_ops = { - SNBEP_UNCORE_PCI_OPS_COMMON_INIT(), - .enable_event = snbep_qpi_enable_event, - .hw_config = snbep_qpi_hw_config, - .get_constraint = uncore_get_constraint, - .put_constraint = uncore_put_constraint, -}; - -#define SNBEP_UNCORE_PCI_COMMON_INIT() \ - .perf_ctr = SNBEP_PCI_PMON_CTR0, \ - .event_ctl = SNBEP_PCI_PMON_CTL0, \ - .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \ - .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ - .ops = &snbep_uncore_pci_ops, \ - .format_group = &snbep_uncore_format_group - -static struct intel_uncore_type snbep_uncore_ha = { - .name = "ha", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - SNBEP_UNCORE_PCI_COMMON_INIT(), -}; - -static struct intel_uncore_type snbep_uncore_imc = { - .name = "imc", - .num_counters = 4, - .num_boxes = 4, - .perf_ctr_bits = 48, - .fixed_ctr_bits = 48, - .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, - .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, - .event_descs = snbep_uncore_imc_events, - SNBEP_UNCORE_PCI_COMMON_INIT(), -}; - -static struct intel_uncore_type snbep_uncore_qpi = { - .name = "qpi", - .num_counters = 4, - .num_boxes = 2, - .perf_ctr_bits = 48, - .perf_ctr = SNBEP_PCI_PMON_CTR0, - .event_ctl = SNBEP_PCI_PMON_CTL0, - .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCI_PMON_BOX_CTL, - .num_shared_regs = 1, - .ops = &snbep_uncore_qpi_ops, - .event_descs = snbep_uncore_qpi_events, - .format_group = &snbep_uncore_qpi_format_group, -}; - - -static struct intel_uncore_type snbep_uncore_r2pcie = { - .name = "r2pcie", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 44, - .constraints = snbep_uncore_r2pcie_constraints, - SNBEP_UNCORE_PCI_COMMON_INIT(), -}; - -static struct intel_uncore_type snbep_uncore_r3qpi = { - .name = "r3qpi", - .num_counters = 3, - .num_boxes = 2, - .perf_ctr_bits = 44, - .constraints = snbep_uncore_r3qpi_constraints, - SNBEP_UNCORE_PCI_COMMON_INIT(), -}; - -enum { - SNBEP_PCI_UNCORE_HA, - SNBEP_PCI_UNCORE_IMC, - SNBEP_PCI_UNCORE_QPI, - SNBEP_PCI_UNCORE_R2PCIE, - SNBEP_PCI_UNCORE_R3QPI, -}; - -static struct intel_uncore_type *snbep_pci_uncores[] = { - [SNBEP_PCI_UNCORE_HA] = &snbep_uncore_ha, - [SNBEP_PCI_UNCORE_IMC] = &snbep_uncore_imc, - [SNBEP_PCI_UNCORE_QPI] = &snbep_uncore_qpi, - [SNBEP_PCI_UNCORE_R2PCIE] = &snbep_uncore_r2pcie, - [SNBEP_PCI_UNCORE_R3QPI] = &snbep_uncore_r3qpi, - NULL, -}; - -static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = { - { /* Home Agent */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0), - }, - { /* MC Channel 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0), - }, - { /* MC Channel 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1), - }, - { /* MC Channel 2 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2), - }, - { /* MC Channel 3 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3), - }, - { /* QPI Port 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0), - }, - { /* QPI Port 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1), - }, - { /* R2PCIe */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0), - }, - { /* R3QPI Link 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0), - }, - { /* R3QPI Link 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1), - }, - { /* QPI Port 0 filter */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - SNBEP_PCI_QPI_PORT0_FILTER), - }, - { /* QPI Port 0 filter */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - SNBEP_PCI_QPI_PORT1_FILTER), - }, - { /* end: all zeroes */ } -}; - -static struct pci_driver snbep_uncore_pci_driver = { - .name = "snbep_uncore", - .id_table = snbep_uncore_pci_ids, -}; - -/* - * build pci bus to socket mapping - */ -static int snbep_pci2phy_map_init(int devid) -{ - struct pci_dev *ubox_dev = NULL; - int i, bus, nodeid; - int err = 0; - u32 config = 0; - - while (1) { - /* find the UBOX device */ - ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev); - if (!ubox_dev) - break; - bus = ubox_dev->bus->number; - /* get the Node ID of the local register */ - err = pci_read_config_dword(ubox_dev, 0x40, &config); - if (err) - break; - nodeid = config; - /* get the Node ID mapping */ - err = pci_read_config_dword(ubox_dev, 0x54, &config); - if (err) - break; - /* - * every three bits in the Node ID mapping register maps - * to a particular node. - */ - for (i = 0; i < 8; i++) { - if (nodeid == ((config >> (3 * i)) & 0x7)) { - pcibus_to_physid[bus] = i; - break; - } - } - } - - if (!err) { - /* - * For PCI bus with no UBOX device, find the next bus - * that has UBOX device and use its mapping. - */ - i = -1; - for (bus = 255; bus >= 0; bus--) { - if (pcibus_to_physid[bus] >= 0) - i = pcibus_to_physid[bus]; - else - pcibus_to_physid[bus] = i; - } - } - - if (ubox_dev) - pci_dev_put(ubox_dev); - - return err ? pcibios_err_to_errno(err) : 0; -} -/* end of Sandy Bridge-EP uncore support */ - -/* IvyTown uncore support */ -static void ivt_uncore_msr_init_box(struct intel_uncore_box *box) -{ - unsigned msr = uncore_msr_box_ctl(box); - if (msr) - wrmsrl(msr, IVT_PMON_BOX_CTL_INT); -} - -static void ivt_uncore_pci_init_box(struct intel_uncore_box *box) -{ - struct pci_dev *pdev = box->pci_dev; - - pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVT_PMON_BOX_CTL_INT); -} - -#define IVT_UNCORE_MSR_OPS_COMMON_INIT() \ - .init_box = ivt_uncore_msr_init_box, \ - .disable_box = snbep_uncore_msr_disable_box, \ - .enable_box = snbep_uncore_msr_enable_box, \ - .disable_event = snbep_uncore_msr_disable_event, \ - .enable_event = snbep_uncore_msr_enable_event, \ - .read_counter = uncore_msr_read_counter - -static struct intel_uncore_ops ivt_uncore_msr_ops = { - IVT_UNCORE_MSR_OPS_COMMON_INIT(), -}; - -static struct intel_uncore_ops ivt_uncore_pci_ops = { - .init_box = ivt_uncore_pci_init_box, - .disable_box = snbep_uncore_pci_disable_box, - .enable_box = snbep_uncore_pci_enable_box, - .disable_event = snbep_uncore_pci_disable_event, - .enable_event = snbep_uncore_pci_enable_event, - .read_counter = snbep_uncore_pci_read_counter, -}; - -#define IVT_UNCORE_PCI_COMMON_INIT() \ - .perf_ctr = SNBEP_PCI_PMON_CTR0, \ - .event_ctl = SNBEP_PCI_PMON_CTL0, \ - .event_mask = IVT_PMON_RAW_EVENT_MASK, \ - .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ - .ops = &ivt_uncore_pci_ops, \ - .format_group = &ivt_uncore_format_group - -static struct attribute *ivt_uncore_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh8.attr, - NULL, -}; - -static struct attribute *ivt_uncore_ubox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh5.attr, - NULL, -}; - -static struct attribute *ivt_uncore_cbox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_tid_en.attr, - &format_attr_thresh8.attr, - &format_attr_filter_tid.attr, - &format_attr_filter_link.attr, - &format_attr_filter_state2.attr, - &format_attr_filter_nid2.attr, - &format_attr_filter_opc2.attr, - NULL, -}; - -static struct attribute *ivt_uncore_pcu_formats_attr[] = { - &format_attr_event_ext.attr, - &format_attr_occ_sel.attr, - &format_attr_edge.attr, - &format_attr_thresh5.attr, - &format_attr_occ_invert.attr, - &format_attr_occ_edge.attr, - &format_attr_filter_band0.attr, - &format_attr_filter_band1.attr, - &format_attr_filter_band2.attr, - &format_attr_filter_band3.attr, - NULL, -}; - -static struct attribute *ivt_uncore_qpi_formats_attr[] = { - &format_attr_event_ext.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_thresh8.attr, - &format_attr_match_rds.attr, - &format_attr_match_rnid30.attr, - &format_attr_match_rnid4.attr, - &format_attr_match_dnid.attr, - &format_attr_match_mc.attr, - &format_attr_match_opc.attr, - &format_attr_match_vnw.attr, - &format_attr_match0.attr, - &format_attr_match1.attr, - &format_attr_mask_rds.attr, - &format_attr_mask_rnid30.attr, - &format_attr_mask_rnid4.attr, - &format_attr_mask_dnid.attr, - &format_attr_mask_mc.attr, - &format_attr_mask_opc.attr, - &format_attr_mask_vnw.attr, - &format_attr_mask0.attr, - &format_attr_mask1.attr, - NULL, -}; - -static struct attribute_group ivt_uncore_format_group = { - .name = "format", - .attrs = ivt_uncore_formats_attr, -}; - -static struct attribute_group ivt_uncore_ubox_format_group = { - .name = "format", - .attrs = ivt_uncore_ubox_formats_attr, -}; - -static struct attribute_group ivt_uncore_cbox_format_group = { - .name = "format", - .attrs = ivt_uncore_cbox_formats_attr, -}; - -static struct attribute_group ivt_uncore_pcu_format_group = { - .name = "format", - .attrs = ivt_uncore_pcu_formats_attr, -}; - -static struct attribute_group ivt_uncore_qpi_format_group = { - .name = "format", - .attrs = ivt_uncore_qpi_formats_attr, -}; - -static struct intel_uncore_type ivt_uncore_ubox = { - .name = "ubox", - .num_counters = 2, - .num_boxes = 1, - .perf_ctr_bits = 44, - .fixed_ctr_bits = 48, - .perf_ctr = SNBEP_U_MSR_PMON_CTR0, - .event_ctl = SNBEP_U_MSR_PMON_CTL0, - .event_mask = IVT_U_MSR_PMON_RAW_EVENT_MASK, - .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, - .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, - .ops = &ivt_uncore_msr_ops, - .format_group = &ivt_uncore_ubox_format_group, -}; - -static struct extra_reg ivt_uncore_cbox_extra_regs[] = { - SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, - SNBEP_CBO_PMON_CTL_TID_EN, 0x1), - SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), - - SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18), - SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18), - SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), - SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), - SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8), - EVENT_EXTRA_END -}; - -static u64 ivt_cbox_filter_mask(int fields) -{ - u64 mask = 0; - - if (fields & 0x1) - mask |= IVT_CB0_MSR_PMON_BOX_FILTER_TID; - if (fields & 0x2) - mask |= IVT_CB0_MSR_PMON_BOX_FILTER_LINK; - if (fields & 0x4) - mask |= IVT_CB0_MSR_PMON_BOX_FILTER_STATE; - if (fields & 0x8) - mask |= IVT_CB0_MSR_PMON_BOX_FILTER_NID; - if (fields & 0x10) - mask |= IVT_CB0_MSR_PMON_BOX_FILTER_OPC; - - return mask; -} - -static struct event_constraint * -ivt_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - return __snbep_cbox_get_constraint(box, event, ivt_cbox_filter_mask); -} - -static int ivt_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct extra_reg *er; - int idx = 0; - - for (er = ivt_uncore_cbox_extra_regs; er->msr; er++) { - if (er->event != (event->hw.config & er->config_mask)) - continue; - idx |= er->idx; - } - - if (idx) { - reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + - SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; - reg1->config = event->attr.config1 & ivt_cbox_filter_mask(idx); - reg1->idx = idx; - } - return 0; -} - -static void ivt_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - - if (reg1->idx != EXTRA_REG_NONE) { - u64 filter = uncore_shared_reg_config(box, 0); - wrmsrl(reg1->reg, filter & 0xffffffff); - wrmsrl(reg1->reg + 6, filter >> 32); - } - - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); -} - -static struct intel_uncore_ops ivt_uncore_cbox_ops = { - .init_box = ivt_uncore_msr_init_box, - .disable_box = snbep_uncore_msr_disable_box, - .enable_box = snbep_uncore_msr_enable_box, - .disable_event = snbep_uncore_msr_disable_event, - .enable_event = ivt_cbox_enable_event, - .read_counter = uncore_msr_read_counter, - .hw_config = ivt_cbox_hw_config, - .get_constraint = ivt_cbox_get_constraint, - .put_constraint = snbep_cbox_put_constraint, -}; - -static struct intel_uncore_type ivt_uncore_cbox = { - .name = "cbox", - .num_counters = 4, - .num_boxes = 15, - .perf_ctr_bits = 44, - .event_ctl = SNBEP_C0_MSR_PMON_CTL0, - .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, - .event_mask = IVT_CBO_MSR_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, - .msr_offset = SNBEP_CBO_MSR_OFFSET, - .num_shared_regs = 1, - .constraints = snbep_uncore_cbox_constraints, - .ops = &ivt_uncore_cbox_ops, - .format_group = &ivt_uncore_cbox_format_group, -}; - -static struct intel_uncore_ops ivt_uncore_pcu_ops = { - IVT_UNCORE_MSR_OPS_COMMON_INIT(), - .hw_config = snbep_pcu_hw_config, - .get_constraint = snbep_pcu_get_constraint, - .put_constraint = snbep_pcu_put_constraint, -}; - -static struct intel_uncore_type ivt_uncore_pcu = { - .name = "pcu", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, - .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, - .event_mask = IVT_PCU_MSR_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, - .num_shared_regs = 1, - .ops = &ivt_uncore_pcu_ops, - .format_group = &ivt_uncore_pcu_format_group, -}; - -static struct intel_uncore_type *ivt_msr_uncores[] = { - &ivt_uncore_ubox, - &ivt_uncore_cbox, - &ivt_uncore_pcu, - NULL, -}; - -static struct intel_uncore_type ivt_uncore_ha = { - .name = "ha", - .num_counters = 4, - .num_boxes = 2, - .perf_ctr_bits = 48, - IVT_UNCORE_PCI_COMMON_INIT(), -}; - -static struct intel_uncore_type ivt_uncore_imc = { - .name = "imc", - .num_counters = 4, - .num_boxes = 8, - .perf_ctr_bits = 48, - .fixed_ctr_bits = 48, - .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, - .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, - IVT_UNCORE_PCI_COMMON_INIT(), -}; - -/* registers in IRP boxes are not properly aligned */ -static unsigned ivt_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4}; -static unsigned ivt_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0}; - -static void ivt_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - - pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], - hwc->config | SNBEP_PMON_CTL_EN); -} - -static void ivt_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - - pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], hwc->config); -} - -static u64 ivt_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - u64 count = 0; - - pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx], (u32 *)&count); - pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1); - - return count; -} - -static struct intel_uncore_ops ivt_uncore_irp_ops = { - .init_box = ivt_uncore_pci_init_box, - .disable_box = snbep_uncore_pci_disable_box, - .enable_box = snbep_uncore_pci_enable_box, - .disable_event = ivt_uncore_irp_disable_event, - .enable_event = ivt_uncore_irp_enable_event, - .read_counter = ivt_uncore_irp_read_counter, -}; - -static struct intel_uncore_type ivt_uncore_irp = { - .name = "irp", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - .event_mask = IVT_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCI_PMON_BOX_CTL, - .ops = &ivt_uncore_irp_ops, - .format_group = &ivt_uncore_format_group, -}; - -static struct intel_uncore_ops ivt_uncore_qpi_ops = { - .init_box = ivt_uncore_pci_init_box, - .disable_box = snbep_uncore_pci_disable_box, - .enable_box = snbep_uncore_pci_enable_box, - .disable_event = snbep_uncore_pci_disable_event, - .enable_event = snbep_qpi_enable_event, - .read_counter = snbep_uncore_pci_read_counter, - .hw_config = snbep_qpi_hw_config, - .get_constraint = uncore_get_constraint, - .put_constraint = uncore_put_constraint, -}; - -static struct intel_uncore_type ivt_uncore_qpi = { - .name = "qpi", - .num_counters = 4, - .num_boxes = 3, - .perf_ctr_bits = 48, - .perf_ctr = SNBEP_PCI_PMON_CTR0, - .event_ctl = SNBEP_PCI_PMON_CTL0, - .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCI_PMON_BOX_CTL, - .num_shared_regs = 1, - .ops = &ivt_uncore_qpi_ops, - .format_group = &ivt_uncore_qpi_format_group, -}; - -static struct intel_uncore_type ivt_uncore_r2pcie = { - .name = "r2pcie", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 44, - .constraints = snbep_uncore_r2pcie_constraints, - IVT_UNCORE_PCI_COMMON_INIT(), -}; - -static struct intel_uncore_type ivt_uncore_r3qpi = { - .name = "r3qpi", - .num_counters = 3, - .num_boxes = 2, - .perf_ctr_bits = 44, - .constraints = snbep_uncore_r3qpi_constraints, - IVT_UNCORE_PCI_COMMON_INIT(), -}; - -enum { - IVT_PCI_UNCORE_HA, - IVT_PCI_UNCORE_IMC, - IVT_PCI_UNCORE_IRP, - IVT_PCI_UNCORE_QPI, - IVT_PCI_UNCORE_R2PCIE, - IVT_PCI_UNCORE_R3QPI, -}; - -static struct intel_uncore_type *ivt_pci_uncores[] = { - [IVT_PCI_UNCORE_HA] = &ivt_uncore_ha, - [IVT_PCI_UNCORE_IMC] = &ivt_uncore_imc, - [IVT_PCI_UNCORE_IRP] = &ivt_uncore_irp, - [IVT_PCI_UNCORE_QPI] = &ivt_uncore_qpi, - [IVT_PCI_UNCORE_R2PCIE] = &ivt_uncore_r2pcie, - [IVT_PCI_UNCORE_R3QPI] = &ivt_uncore_r3qpi, - NULL, -}; - -static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = { - { /* Home Agent 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 0), - }, - { /* Home Agent 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 1), - }, - { /* MC0 Channel 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 0), - }, - { /* MC0 Channel 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 1), - }, - { /* MC0 Channel 3 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 2), - }, - { /* MC0 Channel 4 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 3), - }, - { /* MC1 Channel 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 4), - }, - { /* MC1 Channel 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 5), - }, - { /* MC1 Channel 3 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 6), - }, - { /* MC1 Channel 4 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7), - }, - { /* IRP */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IRP, 0), - }, - { /* QPI0 Port 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0), - }, - { /* QPI0 Port 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 1), - }, - { /* QPI1 Port 2 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 2), - }, - { /* R2PCIe */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R2PCIE, 0), - }, - { /* R3QPI0 Link 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 0), - }, - { /* R3QPI0 Link 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 1), - }, - { /* R3QPI1 Link 2 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2), - }, - { /* QPI Port 0 filter */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - SNBEP_PCI_QPI_PORT0_FILTER), - }, - { /* QPI Port 0 filter */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - SNBEP_PCI_QPI_PORT1_FILTER), - }, - { /* end: all zeroes */ } -}; - -static struct pci_driver ivt_uncore_pci_driver = { - .name = "ivt_uncore", - .id_table = ivt_uncore_pci_ids, -}; -/* end of IvyTown uncore support */ - -/* Sandy Bridge uncore support */ -static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - if (hwc->idx < UNCORE_PMC_IDX_FIXED) - wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); - else - wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); -} - -static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - wrmsrl(event->hw.config_base, 0); -} - -static void snb_uncore_msr_init_box(struct intel_uncore_box *box) -{ - if (box->pmu->pmu_idx == 0) { - wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, - SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); - } -} - -static struct uncore_event_desc snb_uncore_events[] = { - INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), - { /* end: all zeroes */ }, -}; - -static struct attribute *snb_uncore_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_cmask5.attr, - NULL, -}; - -static struct attribute_group snb_uncore_format_group = { - .name = "format", - .attrs = snb_uncore_formats_attr, -}; - -static struct intel_uncore_ops snb_uncore_msr_ops = { - .init_box = snb_uncore_msr_init_box, - .disable_event = snb_uncore_msr_disable_event, - .enable_event = snb_uncore_msr_enable_event, - .read_counter = uncore_msr_read_counter, -}; - -static struct event_constraint snb_uncore_cbox_constraints[] = { - UNCORE_EVENT_CONSTRAINT(0x80, 0x1), - UNCORE_EVENT_CONSTRAINT(0x83, 0x1), - EVENT_CONSTRAINT_END -}; - -static struct intel_uncore_type snb_uncore_cbox = { - .name = "cbox", - .num_counters = 2, - .num_boxes = 4, - .perf_ctr_bits = 44, - .fixed_ctr_bits = 48, - .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, - .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, - .fixed_ctr = SNB_UNC_FIXED_CTR, - .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, - .single_fixed = 1, - .event_mask = SNB_UNC_RAW_EVENT_MASK, - .msr_offset = SNB_UNC_CBO_MSR_OFFSET, - .constraints = snb_uncore_cbox_constraints, - .ops = &snb_uncore_msr_ops, - .format_group = &snb_uncore_format_group, - .event_descs = snb_uncore_events, -}; - -static struct intel_uncore_type *snb_msr_uncores[] = { - &snb_uncore_cbox, - NULL, -}; - -enum { - SNB_PCI_UNCORE_IMC, -}; - -static struct uncore_event_desc snb_uncore_imc_events[] = { - INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"), - INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"), - INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"), - - INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"), - INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"), - INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"), - - { /* end: all zeroes */ }, -}; - -#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff -#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48 - -/* page size multiple covering all config regs */ -#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000 - -#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1 -#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050 -#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2 -#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 -#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE - -static struct attribute *snb_uncore_imc_formats_attr[] = { - &format_attr_event.attr, - NULL, -}; - -static struct attribute_group snb_uncore_imc_format_group = { - .name = "format", - .attrs = snb_uncore_imc_formats_attr, -}; - -static void snb_uncore_imc_init_box(struct intel_uncore_box *box) -{ - struct pci_dev *pdev = box->pci_dev; - int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET; - resource_size_t addr; - u32 pci_dword; - - pci_read_config_dword(pdev, where, &pci_dword); - addr = pci_dword; - -#ifdef CONFIG_PHYS_ADDR_T_64BIT - pci_read_config_dword(pdev, where + 4, &pci_dword); - addr |= ((resource_size_t)pci_dword << 32); -#endif - - addr &= ~(PAGE_SIZE - 1); - - box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); - box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; -} - -static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) -{} - -static void snb_uncore_imc_disable_box(struct intel_uncore_box *box) -{} - -static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{} - -static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event) -{} - -static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - return (u64)*(unsigned int *)(box->io_addr + hwc->event_base); -} - -/* - * custom event_init() function because we define our own fixed, free - * running counters, so we do not want to conflict with generic uncore - * logic. Also simplifies processing - */ -static int snb_uncore_imc_event_init(struct perf_event *event) -{ - struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box; - struct hw_perf_event *hwc = &event->hw; - u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK; - int idx, base; - - if (event->attr.type != event->pmu->type) - return -ENOENT; - - pmu = uncore_event_to_pmu(event); - /* no device found for this pmu */ - if (pmu->func_id < 0) - return -ENOENT; - - /* Sampling not supported yet */ - if (hwc->sample_period) - return -EINVAL; - - /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest || - event->attr.sample_period) /* no sampling */ - return -EINVAL; - - /* - * Place all uncore events for a particular physical package - * onto a single cpu - */ - if (event->cpu < 0) - return -EINVAL; - - /* check only supported bits are set */ - if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK) - return -EINVAL; - - box = uncore_pmu_to_box(pmu, event->cpu); - if (!box || box->cpu < 0) - return -EINVAL; - - event->cpu = box->cpu; - - event->hw.idx = -1; - event->hw.last_tag = ~0ULL; - event->hw.extra_reg.idx = EXTRA_REG_NONE; - event->hw.branch_reg.idx = EXTRA_REG_NONE; - /* - * check event is known (whitelist, determines counter) - */ - switch (cfg) { - case SNB_UNCORE_PCI_IMC_DATA_READS: - base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE; - idx = UNCORE_PMC_IDX_FIXED; - break; - case SNB_UNCORE_PCI_IMC_DATA_WRITES: - base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; - idx = UNCORE_PMC_IDX_FIXED + 1; - break; - default: - return -EINVAL; - } - - /* must be done before validate_group */ - event->hw.event_base = base; - event->hw.config = cfg; - event->hw.idx = idx; - - /* no group validation needed, we have free running counters */ - - return 0; -} - -static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - return 0; -} - -static void snb_uncore_imc_event_start(struct perf_event *event, int flags) -{ - struct intel_uncore_box *box = uncore_event_to_box(event); - u64 count; - - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) - return; - - event->hw.state = 0; - box->n_active++; - - list_add_tail(&event->active_entry, &box->active_list); - - count = snb_uncore_imc_read_counter(box, event); - local64_set(&event->hw.prev_count, count); - - if (box->n_active == 1) - uncore_pmu_start_hrtimer(box); -} - -static void snb_uncore_imc_event_stop(struct perf_event *event, int flags) -{ - struct intel_uncore_box *box = uncore_event_to_box(event); - struct hw_perf_event *hwc = &event->hw; - - if (!(hwc->state & PERF_HES_STOPPED)) { - box->n_active--; - - WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); - hwc->state |= PERF_HES_STOPPED; - - list_del(&event->active_entry); - - if (box->n_active == 0) - uncore_pmu_cancel_hrtimer(box); - } - - if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - /* - * Drain the remaining delta count out of a event - * that we are disabling: - */ - uncore_perf_event_update(box, event); - hwc->state |= PERF_HES_UPTODATE; - } -} - -static int snb_uncore_imc_event_add(struct perf_event *event, int flags) -{ - struct intel_uncore_box *box = uncore_event_to_box(event); - struct hw_perf_event *hwc = &event->hw; - - if (!box) - return -ENODEV; - - hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - if (!(flags & PERF_EF_START)) - hwc->state |= PERF_HES_ARCH; - - snb_uncore_imc_event_start(event, 0); - - box->n_events++; - - return 0; -} - -static void snb_uncore_imc_event_del(struct perf_event *event, int flags) -{ - struct intel_uncore_box *box = uncore_event_to_box(event); - int i; - - snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); - - for (i = 0; i < box->n_events; i++) { - if (event == box->event_list[i]) { - --box->n_events; - break; - } - } -} - -static int snb_pci2phy_map_init(int devid) -{ - struct pci_dev *dev = NULL; - int bus; - - dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev); - if (!dev) - return -ENOTTY; - - bus = dev->bus->number; - - pcibus_to_physid[bus] = 0; - - pci_dev_put(dev); - - return 0; -} - -static struct pmu snb_uncore_imc_pmu = { - .task_ctx_nr = perf_invalid_context, - .event_init = snb_uncore_imc_event_init, - .add = snb_uncore_imc_event_add, - .del = snb_uncore_imc_event_del, - .start = snb_uncore_imc_event_start, - .stop = snb_uncore_imc_event_stop, - .read = uncore_pmu_event_read, -}; - -static struct intel_uncore_ops snb_uncore_imc_ops = { - .init_box = snb_uncore_imc_init_box, - .enable_box = snb_uncore_imc_enable_box, - .disable_box = snb_uncore_imc_disable_box, - .disable_event = snb_uncore_imc_disable_event, - .enable_event = snb_uncore_imc_enable_event, - .hw_config = snb_uncore_imc_hw_config, - .read_counter = snb_uncore_imc_read_counter, -}; - -static struct intel_uncore_type snb_uncore_imc = { - .name = "imc", - .num_counters = 2, - .num_boxes = 1, - .fixed_ctr_bits = 32, - .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE, - .event_descs = snb_uncore_imc_events, - .format_group = &snb_uncore_imc_format_group, - .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE, - .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK, - .ops = &snb_uncore_imc_ops, - .pmu = &snb_uncore_imc_pmu, -}; - -static struct intel_uncore_type *snb_pci_uncores[] = { - [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc, - NULL, -}; - -static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = { - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* end: all zeroes */ }, -}; - -static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = { - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* end: all zeroes */ }, -}; - -static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = { - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* end: all zeroes */ }, -}; - -static struct pci_driver snb_uncore_pci_driver = { - .name = "snb_uncore", - .id_table = snb_uncore_pci_ids, -}; - -static struct pci_driver ivb_uncore_pci_driver = { - .name = "ivb_uncore", - .id_table = ivb_uncore_pci_ids, -}; - -static struct pci_driver hsw_uncore_pci_driver = { - .name = "hsw_uncore", - .id_table = hsw_uncore_pci_ids, -}; - -/* end of Sandy Bridge uncore support */ - -/* Nehalem uncore support */ -static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) -{ - wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0); -} - -static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) -{ - wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); -} - -static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - if (hwc->idx < UNCORE_PMC_IDX_FIXED) - wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); - else - wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); -} - -static struct attribute *nhm_uncore_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_cmask8.attr, - NULL, -}; - -static struct attribute_group nhm_uncore_format_group = { - .name = "format", - .attrs = nhm_uncore_formats_attr, -}; - -static struct uncore_event_desc nhm_uncore_events[] = { - INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), - INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"), - INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"), - INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"), - INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"), - INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"), - INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"), - INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"), - INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"), - { /* end: all zeroes */ }, -}; - -static struct intel_uncore_ops nhm_uncore_msr_ops = { - .disable_box = nhm_uncore_msr_disable_box, - .enable_box = nhm_uncore_msr_enable_box, - .disable_event = snb_uncore_msr_disable_event, - .enable_event = nhm_uncore_msr_enable_event, - .read_counter = uncore_msr_read_counter, -}; - -static struct intel_uncore_type nhm_uncore = { - .name = "", - .num_counters = 8, - .num_boxes = 1, - .perf_ctr_bits = 48, - .fixed_ctr_bits = 48, - .event_ctl = NHM_UNC_PERFEVTSEL0, - .perf_ctr = NHM_UNC_UNCORE_PMC0, - .fixed_ctr = NHM_UNC_FIXED_CTR, - .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL, - .event_mask = NHM_UNC_RAW_EVENT_MASK, - .event_descs = nhm_uncore_events, - .ops = &nhm_uncore_msr_ops, - .format_group = &nhm_uncore_format_group, -}; - -static struct intel_uncore_type *nhm_msr_uncores[] = { - &nhm_uncore, - NULL, -}; -/* end of Nehalem uncore support */ - -/* Nehalem-EX uncore support */ -DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5"); -DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7"); -DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63"); -DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63"); - -static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) -{ - wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); -} - -static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) -{ - unsigned msr = uncore_msr_box_ctl(box); - u64 config; - - if (msr) { - rdmsrl(msr, config); - config &= ~((1ULL << uncore_num_counters(box)) - 1); - /* WBox has a fixed counter */ - if (uncore_msr_fixed_ctl(box)) - config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN; - wrmsrl(msr, config); - } -} - -static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box) -{ - unsigned msr = uncore_msr_box_ctl(box); - u64 config; - - if (msr) { - rdmsrl(msr, config); - config |= (1ULL << uncore_num_counters(box)) - 1; - /* WBox has a fixed counter */ - if (uncore_msr_fixed_ctl(box)) - config |= NHMEX_W_PMON_GLOBAL_FIXED_EN; - wrmsrl(msr, config); - } -} - -static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - wrmsrl(event->hw.config_base, 0); -} - -static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - if (hwc->idx >= UNCORE_PMC_IDX_FIXED) - wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); - else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); - else - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); -} - -#define NHMEX_UNCORE_OPS_COMMON_INIT() \ - .init_box = nhmex_uncore_msr_init_box, \ - .disable_box = nhmex_uncore_msr_disable_box, \ - .enable_box = nhmex_uncore_msr_enable_box, \ - .disable_event = nhmex_uncore_msr_disable_event, \ - .read_counter = uncore_msr_read_counter - -static struct intel_uncore_ops nhmex_uncore_ops = { - NHMEX_UNCORE_OPS_COMMON_INIT(), - .enable_event = nhmex_uncore_msr_enable_event, -}; - -static struct attribute *nhmex_uncore_ubox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_edge.attr, - NULL, -}; - -static struct attribute_group nhmex_uncore_ubox_format_group = { - .name = "format", - .attrs = nhmex_uncore_ubox_formats_attr, -}; - -static struct intel_uncore_type nhmex_uncore_ubox = { - .name = "ubox", - .num_counters = 1, - .num_boxes = 1, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_U_MSR_PMON_EV_SEL, - .perf_ctr = NHMEX_U_MSR_PMON_CTR, - .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL, - .ops = &nhmex_uncore_ops, - .format_group = &nhmex_uncore_ubox_format_group -}; - -static struct attribute *nhmex_uncore_cbox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh8.attr, - NULL, -}; - -static struct attribute_group nhmex_uncore_cbox_format_group = { - .name = "format", - .attrs = nhmex_uncore_cbox_formats_attr, -}; - -/* msr offset for each instance of cbox */ -static unsigned nhmex_cbox_msr_offsets[] = { - 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0, -}; - -static struct intel_uncore_type nhmex_uncore_cbox = { - .name = "cbox", - .num_counters = 6, - .num_boxes = 10, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0, - .perf_ctr = NHMEX_C0_MSR_PMON_CTR0, - .event_mask = NHMEX_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL, - .msr_offsets = nhmex_cbox_msr_offsets, - .pair_ctr_ctl = 1, - .ops = &nhmex_uncore_ops, - .format_group = &nhmex_uncore_cbox_format_group -}; - -static struct uncore_event_desc nhmex_uncore_wbox_events[] = { - INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"), - { /* end: all zeroes */ }, -}; - -static struct intel_uncore_type nhmex_uncore_wbox = { - .name = "wbox", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_W_MSR_PMON_CNT0, - .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0, - .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR, - .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL, - .event_mask = NHMEX_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_W_MSR_GLOBAL_CTL, - .pair_ctr_ctl = 1, - .event_descs = nhmex_uncore_wbox_events, - .ops = &nhmex_uncore_ops, - .format_group = &nhmex_uncore_cbox_format_group -}; - -static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - int ctr, ev_sel; - - ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >> - NHMEX_B_PMON_CTR_SHIFT; - ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >> - NHMEX_B_PMON_CTL_EV_SEL_SHIFT; - - /* events that do not use the match/mask registers */ - if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) || - (ctr == 2 && ev_sel != 0x4) || ctr == 3) - return 0; - - if (box->pmu->pmu_idx == 0) - reg1->reg = NHMEX_B0_MSR_MATCH; - else - reg1->reg = NHMEX_B1_MSR_MATCH; - reg1->idx = 0; - reg1->config = event->attr.config1; - reg2->config = event->attr.config2; - return 0; -} - -static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - - if (reg1->idx != EXTRA_REG_NONE) { - wrmsrl(reg1->reg, reg1->config); - wrmsrl(reg1->reg + 1, reg2->config); - } - wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | - (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK)); -} - -/* - * The Bbox has 4 counters, but each counter monitors different events. - * Use bits 6-7 in the event config to select counter. - */ -static struct event_constraint nhmex_uncore_bbox_constraints[] = { - EVENT_CONSTRAINT(0 , 1, 0xc0), - EVENT_CONSTRAINT(0x40, 2, 0xc0), - EVENT_CONSTRAINT(0x80, 4, 0xc0), - EVENT_CONSTRAINT(0xc0, 8, 0xc0), - EVENT_CONSTRAINT_END, -}; - -static struct attribute *nhmex_uncore_bbox_formats_attr[] = { - &format_attr_event5.attr, - &format_attr_counter.attr, - &format_attr_match.attr, - &format_attr_mask.attr, - NULL, -}; - -static struct attribute_group nhmex_uncore_bbox_format_group = { - .name = "format", - .attrs = nhmex_uncore_bbox_formats_attr, -}; - -static struct intel_uncore_ops nhmex_uncore_bbox_ops = { - NHMEX_UNCORE_OPS_COMMON_INIT(), - .enable_event = nhmex_bbox_msr_enable_event, - .hw_config = nhmex_bbox_hw_config, - .get_constraint = uncore_get_constraint, - .put_constraint = uncore_put_constraint, -}; - -static struct intel_uncore_type nhmex_uncore_bbox = { - .name = "bbox", - .num_counters = 4, - .num_boxes = 2, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_B0_MSR_PMON_CTL0, - .perf_ctr = NHMEX_B0_MSR_PMON_CTR0, - .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL, - .msr_offset = NHMEX_B_MSR_OFFSET, - .pair_ctr_ctl = 1, - .num_shared_regs = 1, - .constraints = nhmex_uncore_bbox_constraints, - .ops = &nhmex_uncore_bbox_ops, - .format_group = &nhmex_uncore_bbox_format_group -}; - -static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - - /* only TO_R_PROG_EV event uses the match/mask register */ - if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) != - NHMEX_S_EVENT_TO_R_PROG_EV) - return 0; - - if (box->pmu->pmu_idx == 0) - reg1->reg = NHMEX_S0_MSR_MM_CFG; - else - reg1->reg = NHMEX_S1_MSR_MM_CFG; - reg1->idx = 0; - reg1->config = event->attr.config1; - reg2->config = event->attr.config2; - return 0; -} - -static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - - if (reg1->idx != EXTRA_REG_NONE) { - wrmsrl(reg1->reg, 0); - wrmsrl(reg1->reg + 1, reg1->config); - wrmsrl(reg1->reg + 2, reg2->config); - wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN); - } - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); -} - -static struct attribute *nhmex_uncore_sbox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh8.attr, - &format_attr_match.attr, - &format_attr_mask.attr, - NULL, -}; - -static struct attribute_group nhmex_uncore_sbox_format_group = { - .name = "format", - .attrs = nhmex_uncore_sbox_formats_attr, -}; - -static struct intel_uncore_ops nhmex_uncore_sbox_ops = { - NHMEX_UNCORE_OPS_COMMON_INIT(), - .enable_event = nhmex_sbox_msr_enable_event, - .hw_config = nhmex_sbox_hw_config, - .get_constraint = uncore_get_constraint, - .put_constraint = uncore_put_constraint, -}; - -static struct intel_uncore_type nhmex_uncore_sbox = { - .name = "sbox", - .num_counters = 4, - .num_boxes = 2, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_S0_MSR_PMON_CTL0, - .perf_ctr = NHMEX_S0_MSR_PMON_CTR0, - .event_mask = NHMEX_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL, - .msr_offset = NHMEX_S_MSR_OFFSET, - .pair_ctr_ctl = 1, - .num_shared_regs = 1, - .ops = &nhmex_uncore_sbox_ops, - .format_group = &nhmex_uncore_sbox_format_group -}; - -enum { - EXTRA_REG_NHMEX_M_FILTER, - EXTRA_REG_NHMEX_M_DSP, - EXTRA_REG_NHMEX_M_ISS, - EXTRA_REG_NHMEX_M_MAP, - EXTRA_REG_NHMEX_M_MSC_THR, - EXTRA_REG_NHMEX_M_PGT, - EXTRA_REG_NHMEX_M_PLD, - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC, -}; - -static struct extra_reg nhmex_uncore_mbox_extra_regs[] = { - MBOX_INC_SEL_EXTAR_REG(0x0, DSP), - MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR), - MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR), - MBOX_INC_SEL_EXTAR_REG(0x9, ISS), - /* event 0xa uses two extra registers */ - MBOX_INC_SEL_EXTAR_REG(0xa, ISS), - MBOX_INC_SEL_EXTAR_REG(0xa, PLD), - MBOX_INC_SEL_EXTAR_REG(0xb, PLD), - /* events 0xd ~ 0x10 use the same extra register */ - MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC), - MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC), - MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC), - MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC), - MBOX_INC_SEL_EXTAR_REG(0x16, PGT), - MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP), - MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS), - MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT), - MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP), - EVENT_EXTRA_END -}; - -/* Nehalem-EX or Westmere-EX ? */ -static bool uncore_nhmex; - -static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config) -{ - struct intel_uncore_extra_reg *er; - unsigned long flags; - bool ret = false; - u64 mask; - - if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { - er = &box->shared_regs[idx]; - raw_spin_lock_irqsave(&er->lock, flags); - if (!atomic_read(&er->ref) || er->config == config) { - atomic_inc(&er->ref); - er->config = config; - ret = true; - } - raw_spin_unlock_irqrestore(&er->lock, flags); - - return ret; - } - /* - * The ZDP_CTL_FVC MSR has 4 fields which are used to control - * events 0xd ~ 0x10. Besides these 4 fields, there are additional - * fields which are shared. - */ - idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; - if (WARN_ON_ONCE(idx >= 4)) - return false; - - /* mask of the shared fields */ - if (uncore_nhmex) - mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK; - else - mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK; - er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; - - raw_spin_lock_irqsave(&er->lock, flags); - /* add mask of the non-shared field if it's in use */ - if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) { - if (uncore_nhmex) - mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); - else - mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); - } - - if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) { - atomic_add(1 << (idx * 8), &er->ref); - if (uncore_nhmex) - mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK | - NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); - else - mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK | - WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); - er->config &= ~mask; - er->config |= (config & mask); - ret = true; - } - raw_spin_unlock_irqrestore(&er->lock, flags); - - return ret; -} - -static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx) -{ - struct intel_uncore_extra_reg *er; - - if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { - er = &box->shared_regs[idx]; - atomic_dec(&er->ref); - return; - } - - idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; - er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; - atomic_sub(1 << (idx * 8), &er->ref); -} - -static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); - u64 config = reg1->config; - - /* get the non-shared control bits and shift them */ - idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; - if (uncore_nhmex) - config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); - else - config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); - if (new_idx > orig_idx) { - idx = new_idx - orig_idx; - config <<= 3 * idx; - } else { - idx = orig_idx - new_idx; - config >>= 3 * idx; - } - - /* add the shared control bits back */ - if (uncore_nhmex) - config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; - else - config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; - config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; - if (modify) { - /* adjust the main event selector */ - if (new_idx > orig_idx) - hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; - else - hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; - reg1->config = config; - reg1->idx = ~0xff | new_idx; - } - return config; -} - -static struct event_constraint * -nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; - int i, idx[2], alloc = 0; - u64 config1 = reg1->config; - - idx[0] = __BITS_VALUE(reg1->idx, 0, 8); - idx[1] = __BITS_VALUE(reg1->idx, 1, 8); -again: - for (i = 0; i < 2; i++) { - if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) - idx[i] = 0xff; - - if (idx[i] == 0xff) - continue; - - if (!nhmex_mbox_get_shared_reg(box, idx[i], - __BITS_VALUE(config1, i, 32))) - goto fail; - alloc |= (0x1 << i); - } - - /* for the match/mask registers */ - if (reg2->idx != EXTRA_REG_NONE && - (uncore_box_is_fake(box) || !reg2->alloc) && - !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config)) - goto fail; - - /* - * If it's a fake box -- as per validate_{group,event}() we - * shouldn't touch event state and we can avoid doing so - * since both will only call get_event_constraints() once - * on each event, this avoids the need for reg->alloc. - */ - if (!uncore_box_is_fake(box)) { - if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) - nhmex_mbox_alter_er(event, idx[0], true); - reg1->alloc |= alloc; - if (reg2->idx != EXTRA_REG_NONE) - reg2->alloc = 1; - } - return NULL; -fail: - if (idx[0] != 0xff && !(alloc & 0x1) && - idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { - /* - * events 0xd ~ 0x10 are functional identical, but are - * controlled by different fields in the ZDP_CTL_FVC - * register. If we failed to take one field, try the - * rest 3 choices. - */ - BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff); - idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; - idx[0] = (idx[0] + 1) % 4; - idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; - if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) { - config1 = nhmex_mbox_alter_er(event, idx[0], false); - goto again; - } - } - - if (alloc & 0x1) - nhmex_mbox_put_shared_reg(box, idx[0]); - if (alloc & 0x2) - nhmex_mbox_put_shared_reg(box, idx[1]); - return &constraint_empty; -} - -static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; - - if (uncore_box_is_fake(box)) - return; - - if (reg1->alloc & 0x1) - nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8)); - if (reg1->alloc & 0x2) - nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8)); - reg1->alloc = 0; - - if (reg2->alloc) { - nhmex_mbox_put_shared_reg(box, reg2->idx); - reg2->alloc = 0; - } -} - -static int nhmex_mbox_extra_reg_idx(struct extra_reg *er) -{ - if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) - return er->idx; - return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd; -} - -static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct intel_uncore_type *type = box->pmu->type; - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; - struct extra_reg *er; - unsigned msr; - int reg_idx = 0; - /* - * The mbox events may require 2 extra MSRs at the most. But only - * the lower 32 bits in these MSRs are significant, so we can use - * config1 to pass two MSRs' config. - */ - for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) { - if (er->event != (event->hw.config & er->config_mask)) - continue; - if (event->attr.config1 & ~er->valid_mask) - return -EINVAL; - - msr = er->msr + type->msr_offset * box->pmu->pmu_idx; - if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff)) - return -EINVAL; - - /* always use the 32~63 bits to pass the PLD config */ - if (er->idx == EXTRA_REG_NHMEX_M_PLD) - reg_idx = 1; - else if (WARN_ON_ONCE(reg_idx > 0)) - return -EINVAL; - - reg1->idx &= ~(0xff << (reg_idx * 8)); - reg1->reg &= ~(0xffff << (reg_idx * 16)); - reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8); - reg1->reg |= msr << (reg_idx * 16); - reg1->config = event->attr.config1; - reg_idx++; - } - /* - * The mbox only provides ability to perform address matching - * for the PLD events. - */ - if (reg_idx == 2) { - reg2->idx = EXTRA_REG_NHMEX_M_FILTER; - if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN) - reg2->config = event->attr.config2; - else - reg2->config = ~0ULL; - if (box->pmu->pmu_idx == 0) - reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG; - else - reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG; - } - return 0; -} - -static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx) -{ - struct intel_uncore_extra_reg *er; - unsigned long flags; - u64 config; - - if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) - return box->shared_regs[idx].config; - - er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; - raw_spin_lock_irqsave(&er->lock, flags); - config = er->config; - raw_spin_unlock_irqrestore(&er->lock, flags); - return config; -} - -static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - int idx; - - idx = __BITS_VALUE(reg1->idx, 0, 8); - if (idx != 0xff) - wrmsrl(__BITS_VALUE(reg1->reg, 0, 16), - nhmex_mbox_shared_reg_config(box, idx)); - idx = __BITS_VALUE(reg1->idx, 1, 8); - if (idx != 0xff) - wrmsrl(__BITS_VALUE(reg1->reg, 1, 16), - nhmex_mbox_shared_reg_config(box, idx)); - - if (reg2->idx != EXTRA_REG_NONE) { - wrmsrl(reg2->reg, 0); - if (reg2->config != ~0ULL) { - wrmsrl(reg2->reg + 1, - reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK); - wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK & - (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT)); - wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN); - } - } - - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); -} - -DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3"); -DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5"); -DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6"); -DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7"); -DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13"); -DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21"); -DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en, filter_cfg_en, "config2:63"); -DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33"); -DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61"); -DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63"); - -static struct attribute *nhmex_uncore_mbox_formats_attr[] = { - &format_attr_count_mode.attr, - &format_attr_storage_mode.attr, - &format_attr_wrap_mode.attr, - &format_attr_flag_mode.attr, - &format_attr_inc_sel.attr, - &format_attr_set_flag_sel.attr, - &format_attr_filter_cfg_en.attr, - &format_attr_filter_match.attr, - &format_attr_filter_mask.attr, - &format_attr_dsp.attr, - &format_attr_thr.attr, - &format_attr_fvc.attr, - &format_attr_pgt.attr, - &format_attr_map.attr, - &format_attr_iss.attr, - &format_attr_pld.attr, - NULL, -}; - -static struct attribute_group nhmex_uncore_mbox_format_group = { - .name = "format", - .attrs = nhmex_uncore_mbox_formats_attr, -}; - -static struct uncore_event_desc nhmex_uncore_mbox_events[] = { - INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"), - INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"), - { /* end: all zeroes */ }, -}; - -static struct uncore_event_desc wsmex_uncore_mbox_events[] = { - INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"), - INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"), - { /* end: all zeroes */ }, -}; - -static struct intel_uncore_ops nhmex_uncore_mbox_ops = { - NHMEX_UNCORE_OPS_COMMON_INIT(), - .enable_event = nhmex_mbox_msr_enable_event, - .hw_config = nhmex_mbox_hw_config, - .get_constraint = nhmex_mbox_get_constraint, - .put_constraint = nhmex_mbox_put_constraint, -}; - -static struct intel_uncore_type nhmex_uncore_mbox = { - .name = "mbox", - .num_counters = 6, - .num_boxes = 2, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_M0_MSR_PMU_CTL0, - .perf_ctr = NHMEX_M0_MSR_PMU_CNT0, - .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL, - .msr_offset = NHMEX_M_MSR_OFFSET, - .pair_ctr_ctl = 1, - .num_shared_regs = 8, - .event_descs = nhmex_uncore_mbox_events, - .ops = &nhmex_uncore_mbox_ops, - .format_group = &nhmex_uncore_mbox_format_group, -}; - -static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - - /* adjust the main event selector and extra register index */ - if (reg1->idx % 2) { - reg1->idx--; - hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; - } else { - reg1->idx++; - hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; - } - - /* adjust extra register config */ - switch (reg1->idx % 6) { - case 2: - /* shift the 8~15 bits to the 0~7 bits */ - reg1->config >>= 8; - break; - case 3: - /* shift the 0~7 bits to the 8~15 bits */ - reg1->config <<= 8; - break; - }; -} - -/* - * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7. - * An event set consists of 6 events, the 3rd and 4th events in - * an event set use the same extra register. So an event set uses - * 5 extra registers. - */ -static struct event_constraint * -nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - struct intel_uncore_extra_reg *er; - unsigned long flags; - int idx, er_idx; - u64 config1; - bool ok = false; - - if (!uncore_box_is_fake(box) && reg1->alloc) - return NULL; - - idx = reg1->idx % 6; - config1 = reg1->config; -again: - er_idx = idx; - /* the 3rd and 4th events use the same extra register */ - if (er_idx > 2) - er_idx--; - er_idx += (reg1->idx / 6) * 5; - - er = &box->shared_regs[er_idx]; - raw_spin_lock_irqsave(&er->lock, flags); - if (idx < 2) { - if (!atomic_read(&er->ref) || er->config == reg1->config) { - atomic_inc(&er->ref); - er->config = reg1->config; - ok = true; - } - } else if (idx == 2 || idx == 3) { - /* - * these two events use different fields in a extra register, - * the 0~7 bits and the 8~15 bits respectively. - */ - u64 mask = 0xff << ((idx - 2) * 8); - if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) || - !((er->config ^ config1) & mask)) { - atomic_add(1 << ((idx - 2) * 8), &er->ref); - er->config &= ~mask; - er->config |= config1 & mask; - ok = true; - } - } else { - if (!atomic_read(&er->ref) || - (er->config == (hwc->config >> 32) && - er->config1 == reg1->config && - er->config2 == reg2->config)) { - atomic_inc(&er->ref); - er->config = (hwc->config >> 32); - er->config1 = reg1->config; - er->config2 = reg2->config; - ok = true; - } - } - raw_spin_unlock_irqrestore(&er->lock, flags); - - if (!ok) { - /* - * The Rbox events are always in pairs. The paired - * events are functional identical, but use different - * extra registers. If we failed to take an extra - * register, try the alternative. - */ - idx ^= 1; - if (idx != reg1->idx % 6) { - if (idx == 2) - config1 >>= 8; - else if (idx == 3) - config1 <<= 8; - goto again; - } - } else { - if (!uncore_box_is_fake(box)) { - if (idx != reg1->idx % 6) - nhmex_rbox_alter_er(box, event); - reg1->alloc = 1; - } - return NULL; - } - return &constraint_empty; -} - -static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct intel_uncore_extra_reg *er; - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - int idx, er_idx; - - if (uncore_box_is_fake(box) || !reg1->alloc) - return; - - idx = reg1->idx % 6; - er_idx = idx; - if (er_idx > 2) - er_idx--; - er_idx += (reg1->idx / 6) * 5; - - er = &box->shared_regs[er_idx]; - if (idx == 2 || idx == 3) - atomic_sub(1 << ((idx - 2) * 8), &er->ref); - else - atomic_dec(&er->ref); - - reg1->alloc = 0; -} - -static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; - int idx; - - idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >> - NHMEX_R_PMON_CTL_EV_SEL_SHIFT; - if (idx >= 0x18) - return -EINVAL; - - reg1->idx = idx; - reg1->config = event->attr.config1; - - switch (idx % 6) { - case 4: - case 5: - hwc->config |= event->attr.config & (~0ULL << 32); - reg2->config = event->attr.config2; - break; - }; - return 0; -} - -static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - int idx, port; - - idx = reg1->idx; - port = idx / 6 + box->pmu->pmu_idx * 4; - - switch (idx % 6) { - case 0: - wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config); - break; - case 1: - wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config); - break; - case 2: - case 3: - wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port), - uncore_shared_reg_config(box, 2 + (idx / 6) * 5)); - break; - case 4: - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port), - hwc->config >> 32); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config); - break; - case 5: - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port), - hwc->config >> 32); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config); - break; - }; - - wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | - (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK)); -} - -DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63"); -DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63"); -DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63"); -DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15"); -DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31"); - -static struct attribute *nhmex_uncore_rbox_formats_attr[] = { - &format_attr_event5.attr, - &format_attr_xbr_mm_cfg.attr, - &format_attr_xbr_match.attr, - &format_attr_xbr_mask.attr, - &format_attr_qlx_cfg.attr, - &format_attr_iperf_cfg.attr, - NULL, -}; - -static struct attribute_group nhmex_uncore_rbox_format_group = { - .name = "format", - .attrs = nhmex_uncore_rbox_formats_attr, -}; - -static struct uncore_event_desc nhmex_uncore_rbox_events[] = { - INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"), - INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"), - INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"), - INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"), - INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"), - INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"), - { /* end: all zeroes */ }, -}; - -static struct intel_uncore_ops nhmex_uncore_rbox_ops = { - NHMEX_UNCORE_OPS_COMMON_INIT(), - .enable_event = nhmex_rbox_msr_enable_event, - .hw_config = nhmex_rbox_hw_config, - .get_constraint = nhmex_rbox_get_constraint, - .put_constraint = nhmex_rbox_put_constraint, -}; - -static struct intel_uncore_type nhmex_uncore_rbox = { - .name = "rbox", - .num_counters = 8, - .num_boxes = 2, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_R_MSR_PMON_CTL0, - .perf_ctr = NHMEX_R_MSR_PMON_CNT0, - .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_R_MSR_GLOBAL_CTL, - .msr_offset = NHMEX_R_MSR_OFFSET, - .pair_ctr_ctl = 1, - .num_shared_regs = 20, - .event_descs = nhmex_uncore_rbox_events, - .ops = &nhmex_uncore_rbox_ops, - .format_group = &nhmex_uncore_rbox_format_group -}; - -static struct intel_uncore_type *nhmex_msr_uncores[] = { - &nhmex_uncore_ubox, - &nhmex_uncore_cbox, - &nhmex_uncore_bbox, - &nhmex_uncore_sbox, - &nhmex_uncore_mbox, - &nhmex_uncore_rbox, - &nhmex_uncore_wbox, - NULL, -}; -/* end of Nehalem-EX uncore support */ - static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx) { struct hw_perf_event *hwc = &event->hw; @@ -3140,7 +170,7 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_eve hwc->event_base = uncore_perf_ctr(box, hwc->idx); } -static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) +void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) { u64 prev_count, new_count, delta; int shift; @@ -3201,14 +231,14 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) return HRTIMER_RESTART; } -static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) +void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) { __hrtimer_start_range_ns(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), 0, HRTIMER_MODE_REL_PINNED, 0); } -static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) +void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) { hrtimer_cancel(&box->hrtimer); } @@ -3291,7 +321,7 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve } if (event->attr.config == UNCORE_FIXED_EVENT) - return &constraint_fixed; + return &uncore_constraint_fixed; if (type->constraints) { for_each_event_constraint(c, type->constraints) { @@ -3496,7 +526,7 @@ static void uncore_pmu_event_del(struct perf_event *event, int flags) event->hw.last_tag = ~0ULL; } -static void uncore_pmu_event_read(struct perf_event *event) +void uncore_pmu_event_read(struct perf_event *event) { struct intel_uncore_box *box = uncore_event_to_box(event); uncore_perf_event_update(box, event); @@ -3635,7 +665,7 @@ static struct attribute_group uncore_pmu_attr_group = { .attrs = uncore_pmu_attrs, }; -static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) +static int uncore_pmu_register(struct intel_uncore_pmu *pmu) { int ret; @@ -3758,9 +788,6 @@ fail: return ret; } -static struct pci_driver *uncore_pci_driver; -static bool pcidrv_registered; - /* * add a pci uncore device */ @@ -3770,18 +797,20 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id struct intel_uncore_box *box; struct intel_uncore_type *type; int phys_id; + bool first_box = false; - phys_id = pcibus_to_physid[pdev->bus->number]; + phys_id = uncore_pcibus_to_physid[pdev->bus->number]; if (phys_id < 0) return -ENODEV; if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { - extra_pci_dev[phys_id][UNCORE_PCI_DEV_IDX(id->driver_data)] = pdev; + int idx = UNCORE_PCI_DEV_IDX(id->driver_data); + uncore_extra_pci_dev[phys_id][idx] = pdev; pci_set_drvdata(pdev, NULL); return 0; } - type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; + type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; box = uncore_alloc_box(type, NUMA_NO_NODE); if (!box) return -ENOMEM; @@ -3803,9 +832,13 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id pci_set_drvdata(pdev, box); raw_spin_lock(&uncore_box_lock); + if (list_empty(&pmu->box_list)) + first_box = true; list_add_tail(&box->list, &pmu->box_list); raw_spin_unlock(&uncore_box_lock); + if (first_box) + uncore_pmu_register(pmu); return 0; } @@ -3813,13 +846,14 @@ static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box = pci_get_drvdata(pdev); struct intel_uncore_pmu *pmu; - int i, cpu, phys_id = pcibus_to_physid[pdev->bus->number]; + int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number]; + bool last_box = false; box = pci_get_drvdata(pdev); if (!box) { for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { - if (extra_pci_dev[phys_id][i] == pdev) { - extra_pci_dev[phys_id][i] = NULL; + if (uncore_extra_pci_dev[phys_id][i] == pdev) { + uncore_extra_pci_dev[phys_id][i] = NULL; break; } } @@ -3835,6 +869,8 @@ static void uncore_pci_remove(struct pci_dev *pdev) raw_spin_lock(&uncore_box_lock); list_del(&box->list); + if (list_empty(&pmu->box_list)) + last_box = true; raw_spin_unlock(&uncore_box_lock); for_each_possible_cpu(cpu) { @@ -3846,6 +882,9 @@ static void uncore_pci_remove(struct pci_dev *pdev) WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); kfree(box); + + if (last_box) + perf_pmu_unregister(&pmu->pmu); } static int __init uncore_pci_init(void) @@ -3854,46 +893,32 @@ static int __init uncore_pci_init(void) switch (boot_cpu_data.x86_model) { case 45: /* Sandy Bridge-EP */ - ret = snbep_pci2phy_map_init(0x3ce0); - if (ret) - return ret; - pci_uncores = snbep_pci_uncores; - uncore_pci_driver = &snbep_uncore_pci_driver; + ret = snbep_uncore_pci_init(); break; - case 62: /* IvyTown */ - ret = snbep_pci2phy_map_init(0x0e1e); - if (ret) - return ret; - pci_uncores = ivt_pci_uncores; - uncore_pci_driver = &ivt_uncore_pci_driver; + case 62: /* Ivy Bridge-EP */ + ret = ivbep_uncore_pci_init(); + break; + case 63: /* Haswell-EP */ + ret = hswep_uncore_pci_init(); break; case 42: /* Sandy Bridge */ - ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC); - if (ret) - return ret; - pci_uncores = snb_pci_uncores; - uncore_pci_driver = &snb_uncore_pci_driver; + ret = snb_uncore_pci_init(); break; case 58: /* Ivy Bridge */ - ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC); - if (ret) - return ret; - pci_uncores = snb_pci_uncores; - uncore_pci_driver = &ivb_uncore_pci_driver; + ret = ivb_uncore_pci_init(); break; case 60: /* Haswell */ case 69: /* Haswell Celeron */ - ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC); - if (ret) - return ret; - pci_uncores = snb_pci_uncores; - uncore_pci_driver = &hsw_uncore_pci_driver; + ret = hsw_uncore_pci_init(); break; default: return 0; } - ret = uncore_types_init(pci_uncores); + if (ret) + return ret; + + ret = uncore_types_init(uncore_pci_uncores); if (ret) return ret; @@ -3904,7 +929,7 @@ static int __init uncore_pci_init(void) if (ret == 0) pcidrv_registered = true; else - uncore_types_exit(pci_uncores); + uncore_types_exit(uncore_pci_uncores); return ret; } @@ -3914,7 +939,7 @@ static void __init uncore_pci_exit(void) if (pcidrv_registered) { pcidrv_registered = false; pci_unregister_driver(uncore_pci_driver); - uncore_types_exit(pci_uncores); + uncore_types_exit(uncore_pci_uncores); } } @@ -3940,8 +965,8 @@ static void uncore_cpu_dying(int cpu) struct intel_uncore_box *box; int i, j; - for (i = 0; msr_uncores[i]; i++) { - type = msr_uncores[i]; + for (i = 0; uncore_msr_uncores[i]; i++) { + type = uncore_msr_uncores[i]; for (j = 0; j < type->num_boxes; j++) { pmu = &type->pmus[j]; box = *per_cpu_ptr(pmu->box, cpu); @@ -3961,8 +986,8 @@ static int uncore_cpu_starting(int cpu) phys_id = topology_physical_package_id(cpu); - for (i = 0; msr_uncores[i]; i++) { - type = msr_uncores[i]; + for (i = 0; uncore_msr_uncores[i]; i++) { + type = uncore_msr_uncores[i]; for (j = 0; j < type->num_boxes; j++) { pmu = &type->pmus[j]; box = *per_cpu_ptr(pmu->box, cpu); @@ -4002,8 +1027,8 @@ static int uncore_cpu_prepare(int cpu, int phys_id) struct intel_uncore_box *box; int i, j; - for (i = 0; msr_uncores[i]; i++) { - type = msr_uncores[i]; + for (i = 0; uncore_msr_uncores[i]; i++) { + type = uncore_msr_uncores[i]; for (j = 0; j < type->num_boxes; j++) { pmu = &type->pmus[j]; if (pmu->func_id < 0) @@ -4083,8 +1108,8 @@ static void uncore_event_exit_cpu(int cpu) if (target >= 0) cpumask_set_cpu(target, &uncore_cpu_mask); - uncore_change_context(msr_uncores, cpu, target); - uncore_change_context(pci_uncores, cpu, target); + uncore_change_context(uncore_msr_uncores, cpu, target); + uncore_change_context(uncore_pci_uncores, cpu, target); } static void uncore_event_init_cpu(int cpu) @@ -4099,8 +1124,8 @@ static void uncore_event_init_cpu(int cpu) cpumask_set_cpu(cpu, &uncore_cpu_mask); - uncore_change_context(msr_uncores, -1, cpu); - uncore_change_context(pci_uncores, -1, cpu); + uncore_change_context(uncore_msr_uncores, -1, cpu); + uncore_change_context(uncore_pci_uncores, -1, cpu); } static int uncore_cpu_notifier(struct notifier_block *self, @@ -4160,47 +1185,37 @@ static void __init uncore_cpu_setup(void *dummy) static int __init uncore_cpu_init(void) { - int ret, max_cores; + int ret; - max_cores = boot_cpu_data.x86_max_cores; switch (boot_cpu_data.x86_model) { case 26: /* Nehalem */ case 30: case 37: /* Westmere */ case 44: - msr_uncores = nhm_msr_uncores; + nhm_uncore_cpu_init(); break; case 42: /* Sandy Bridge */ case 58: /* Ivy Bridge */ - if (snb_uncore_cbox.num_boxes > max_cores) - snb_uncore_cbox.num_boxes = max_cores; - msr_uncores = snb_msr_uncores; + snb_uncore_cpu_init(); break; case 45: /* Sandy Bridge-EP */ - if (snbep_uncore_cbox.num_boxes > max_cores) - snbep_uncore_cbox.num_boxes = max_cores; - msr_uncores = snbep_msr_uncores; + snbep_uncore_cpu_init(); break; case 46: /* Nehalem-EX */ - uncore_nhmex = true; case 47: /* Westmere-EX aka. Xeon E7 */ - if (!uncore_nhmex) - nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events; - if (nhmex_uncore_cbox.num_boxes > max_cores) - nhmex_uncore_cbox.num_boxes = max_cores; - msr_uncores = nhmex_msr_uncores; + nhmex_uncore_cpu_init(); break; - case 62: /* IvyTown */ - if (ivt_uncore_cbox.num_boxes > max_cores) - ivt_uncore_cbox.num_boxes = max_cores; - msr_uncores = ivt_msr_uncores; + case 62: /* Ivy Bridge-EP */ + ivbep_uncore_cpu_init(); + break; + case 63: /* Haswell-EP */ + hswep_uncore_cpu_init(); break; - default: return 0; } - ret = uncore_types_init(msr_uncores); + ret = uncore_types_init(uncore_msr_uncores); if (ret) return ret; @@ -4213,16 +1228,8 @@ static int __init uncore_pmus_register(void) struct intel_uncore_type *type; int i, j; - for (i = 0; msr_uncores[i]; i++) { - type = msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - uncore_pmu_register(pmu); - } - } - - for (i = 0; pci_uncores[i]; i++) { - type = pci_uncores[i]; + for (i = 0; uncore_msr_uncores[i]; i++) { + type = uncore_msr_uncores[i]; for (j = 0; j < type->num_boxes; j++) { pmu = &type->pmus[j]; uncore_pmu_register(pmu); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 90236f0c94a9..18eb78bbdd10 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -24,395 +24,6 @@ #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) -/* SNB event control */ -#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff -#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 -#define SNB_UNC_CTL_EDGE_DET (1 << 18) -#define SNB_UNC_CTL_EN (1 << 22) -#define SNB_UNC_CTL_INVERT (1 << 23) -#define SNB_UNC_CTL_CMASK_MASK 0x1f000000 -#define NHM_UNC_CTL_CMASK_MASK 0xff000000 -#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0) - -#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ - SNB_UNC_CTL_UMASK_MASK | \ - SNB_UNC_CTL_EDGE_DET | \ - SNB_UNC_CTL_INVERT | \ - SNB_UNC_CTL_CMASK_MASK) - -#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ - SNB_UNC_CTL_UMASK_MASK | \ - SNB_UNC_CTL_EDGE_DET | \ - SNB_UNC_CTL_INVERT | \ - NHM_UNC_CTL_CMASK_MASK) - -/* SNB global control register */ -#define SNB_UNC_PERF_GLOBAL_CTL 0x391 -#define SNB_UNC_FIXED_CTR_CTRL 0x394 -#define SNB_UNC_FIXED_CTR 0x395 - -/* SNB uncore global control */ -#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1) -#define SNB_UNC_GLOBAL_CTL_EN (1 << 29) - -/* SNB Cbo register */ -#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700 -#define SNB_UNC_CBO_0_PER_CTR0 0x706 -#define SNB_UNC_CBO_MSR_OFFSET 0x10 - -/* NHM global control register */ -#define NHM_UNC_PERF_GLOBAL_CTL 0x391 -#define NHM_UNC_FIXED_CTR 0x394 -#define NHM_UNC_FIXED_CTR_CTRL 0x395 - -/* NHM uncore global control */ -#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1) -#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32) - -/* NHM uncore register */ -#define NHM_UNC_PERFEVTSEL0 0x3c0 -#define NHM_UNC_UNCORE_PMC0 0x3b0 - -/* SNB-EP Box level control */ -#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) -#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1) -#define SNBEP_PMON_BOX_CTL_FRZ (1 << 8) -#define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16) -#define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ - SNBEP_PMON_BOX_CTL_RST_CTRS | \ - SNBEP_PMON_BOX_CTL_FRZ_EN) -/* SNB-EP event control */ -#define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff -#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00 -#define SNBEP_PMON_CTL_RST (1 << 17) -#define SNBEP_PMON_CTL_EDGE_DET (1 << 18) -#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) -#define SNBEP_PMON_CTL_EN (1 << 22) -#define SNBEP_PMON_CTL_INVERT (1 << 23) -#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000 -#define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PMON_CTL_UMASK_MASK | \ - SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_PMON_CTL_INVERT | \ - SNBEP_PMON_CTL_TRESH_MASK) - -/* SNB-EP Ubox event control */ -#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000 -#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \ - (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PMON_CTL_UMASK_MASK | \ - SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_PMON_CTL_INVERT | \ - SNBEP_U_MSR_PMON_CTL_TRESH_MASK) - -#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19) -#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ - SNBEP_CBO_PMON_CTL_TID_EN) - -/* SNB-EP PCU event control */ -#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000 -#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000 -#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30) -#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31) -#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \ - (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ - SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_PMON_CTL_EV_SEL_EXT | \ - SNBEP_PMON_CTL_INVERT | \ - SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ - SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ - SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) - -#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \ - (SNBEP_PMON_RAW_EVENT_MASK | \ - SNBEP_PMON_CTL_EV_SEL_EXT) - -/* SNB-EP pci control register */ -#define SNBEP_PCI_PMON_BOX_CTL 0xf4 -#define SNBEP_PCI_PMON_CTL0 0xd8 -/* SNB-EP pci counter register */ -#define SNBEP_PCI_PMON_CTR0 0xa0 - -/* SNB-EP home agent register */ -#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40 -#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44 -#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48 -/* SNB-EP memory controller register */ -#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0 -#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0 -/* SNB-EP QPI register */ -#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228 -#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c -#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238 -#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c - -/* SNB-EP Ubox register */ -#define SNBEP_U_MSR_PMON_CTR0 0xc16 -#define SNBEP_U_MSR_PMON_CTL0 0xc10 - -#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08 -#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09 - -/* SNB-EP Cbo register */ -#define SNBEP_C0_MSR_PMON_CTR0 0xd16 -#define SNBEP_C0_MSR_PMON_CTL0 0xd10 -#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04 -#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14 -#define SNBEP_CBO_MSR_OFFSET 0x20 - -#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID 0x1f -#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID 0x3fc00 -#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE 0x7c0000 -#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC 0xff800000 - -#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) { \ - .event = (e), \ - .msr = SNBEP_C0_MSR_PMON_BOX_FILTER, \ - .config_mask = (m), \ - .idx = (i) \ -} - -/* SNB-EP PCU register */ -#define SNBEP_PCU_MSR_PMON_CTR0 0xc36 -#define SNBEP_PCU_MSR_PMON_CTL0 0xc30 -#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24 -#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34 -#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff -#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc -#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd - -/* IVT event control */ -#define IVT_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ - SNBEP_PMON_BOX_CTL_RST_CTRS) -#define IVT_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PMON_CTL_UMASK_MASK | \ - SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_PMON_CTL_TRESH_MASK) -/* IVT Ubox */ -#define IVT_U_MSR_PMON_GLOBAL_CTL 0xc00 -#define IVT_U_PMON_GLOBAL_FRZ_ALL (1 << 31) -#define IVT_U_PMON_GLOBAL_UNFRZ_ALL (1 << 29) - -#define IVT_U_MSR_PMON_RAW_EVENT_MASK \ - (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PMON_CTL_UMASK_MASK | \ - SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_U_MSR_PMON_CTL_TRESH_MASK) -/* IVT Cbo */ -#define IVT_CBO_MSR_PMON_RAW_EVENT_MASK (IVT_PMON_RAW_EVENT_MASK | \ - SNBEP_CBO_PMON_CTL_TID_EN) - -#define IVT_CB0_MSR_PMON_BOX_FILTER_TID (0x1fULL << 0) -#define IVT_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 5) -#define IVT_CB0_MSR_PMON_BOX_FILTER_STATE (0x3fULL << 17) -#define IVT_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32) -#define IVT_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52) -#define IVT_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61) -#define IVT_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62) -#define IVT_CB0_MSR_PMON_BOX_FILTER_IOSC (0x1ULL << 63) - -/* IVT home agent */ -#define IVT_HA_PCI_PMON_CTL_Q_OCC_RST (1 << 16) -#define IVT_HA_PCI_PMON_RAW_EVENT_MASK \ - (IVT_PMON_RAW_EVENT_MASK | \ - IVT_HA_PCI_PMON_CTL_Q_OCC_RST) -/* IVT PCU */ -#define IVT_PCU_MSR_PMON_RAW_EVENT_MASK \ - (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PMON_CTL_EV_SEL_EXT | \ - SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ - SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ - SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ - SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) -/* IVT QPI */ -#define IVT_QPI_PCI_PMON_RAW_EVENT_MASK \ - (IVT_PMON_RAW_EVENT_MASK | \ - SNBEP_PMON_CTL_EV_SEL_EXT) - -/* NHM-EX event control */ -#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff -#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00 -#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0) -#define NHMEX_PMON_CTL_EDGE_DET (1 << 18) -#define NHMEX_PMON_CTL_PMI_EN (1 << 20) -#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22) -#define NHMEX_PMON_CTL_INVERT (1 << 23) -#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000 -#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \ - NHMEX_PMON_CTL_UMASK_MASK | \ - NHMEX_PMON_CTL_EDGE_DET | \ - NHMEX_PMON_CTL_INVERT | \ - NHMEX_PMON_CTL_TRESH_MASK) - -/* NHM-EX Ubox */ -#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00 -#define NHMEX_U_MSR_PMON_CTR 0xc11 -#define NHMEX_U_MSR_PMON_EV_SEL 0xc10 - -#define NHMEX_U_PMON_GLOBAL_EN (1 << 0) -#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e -#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28) -#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29) -#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31) - -#define NHMEX_U_PMON_RAW_EVENT_MASK \ - (NHMEX_PMON_CTL_EV_SEL_MASK | \ - NHMEX_PMON_CTL_EDGE_DET) - -/* NHM-EX Cbox */ -#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00 -#define NHMEX_C0_MSR_PMON_CTR0 0xd11 -#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10 -#define NHMEX_C_MSR_OFFSET 0x20 - -/* NHM-EX Bbox */ -#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20 -#define NHMEX_B0_MSR_PMON_CTR0 0xc31 -#define NHMEX_B0_MSR_PMON_CTL0 0xc30 -#define NHMEX_B_MSR_OFFSET 0x40 -#define NHMEX_B0_MSR_MATCH 0xe45 -#define NHMEX_B0_MSR_MASK 0xe46 -#define NHMEX_B1_MSR_MATCH 0xe4d -#define NHMEX_B1_MSR_MASK 0xe4e - -#define NHMEX_B_PMON_CTL_EN (1 << 0) -#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1 -#define NHMEX_B_PMON_CTL_EV_SEL_MASK \ - (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT) -#define NHMEX_B_PMON_CTR_SHIFT 6 -#define NHMEX_B_PMON_CTR_MASK \ - (0x3 << NHMEX_B_PMON_CTR_SHIFT) -#define NHMEX_B_PMON_RAW_EVENT_MASK \ - (NHMEX_B_PMON_CTL_EV_SEL_MASK | \ - NHMEX_B_PMON_CTR_MASK) - -/* NHM-EX Sbox */ -#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40 -#define NHMEX_S0_MSR_PMON_CTR0 0xc51 -#define NHMEX_S0_MSR_PMON_CTL0 0xc50 -#define NHMEX_S_MSR_OFFSET 0x80 -#define NHMEX_S0_MSR_MM_CFG 0xe48 -#define NHMEX_S0_MSR_MATCH 0xe49 -#define NHMEX_S0_MSR_MASK 0xe4a -#define NHMEX_S1_MSR_MM_CFG 0xe58 -#define NHMEX_S1_MSR_MATCH 0xe59 -#define NHMEX_S1_MSR_MASK 0xe5a - -#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63) -#define NHMEX_S_EVENT_TO_R_PROG_EV 0 - -/* NHM-EX Mbox */ -#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0 -#define NHMEX_M0_MSR_PMU_DSP 0xca5 -#define NHMEX_M0_MSR_PMU_ISS 0xca6 -#define NHMEX_M0_MSR_PMU_MAP 0xca7 -#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8 -#define NHMEX_M0_MSR_PMU_PGT 0xca9 -#define NHMEX_M0_MSR_PMU_PLD 0xcaa -#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab -#define NHMEX_M0_MSR_PMU_CTL0 0xcb0 -#define NHMEX_M0_MSR_PMU_CNT0 0xcb1 -#define NHMEX_M_MSR_OFFSET 0x40 -#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54 -#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c - -#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63) -#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL -#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL -#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34 - -#define NHMEX_M_PMON_CTL_EN (1 << 0) -#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1) -#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2 -#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \ - (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT) -#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4 -#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \ - (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT) -#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6) -#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7) -#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9 -#define NHMEX_M_PMON_CTL_INC_SEL_MASK \ - (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) -#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19 -#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \ - (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) -#define NHMEX_M_PMON_RAW_EVENT_MASK \ - (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \ - NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \ - NHMEX_M_PMON_CTL_WRAP_MODE | \ - NHMEX_M_PMON_CTL_FLAG_MODE | \ - NHMEX_M_PMON_CTL_INC_SEL_MASK | \ - NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) - -#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23)) -#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n))) - -#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24)) -#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n))) - -/* - * use the 9~13 bits to select event If the 7th bit is not set, - * otherwise use the 19~21 bits to select event. - */ -#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) -#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \ - NHMEX_M_PMON_CTL_FLAG_MODE) -#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \ - NHMEX_M_PMON_CTL_FLAG_MODE) -#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \ - NHMEX_M_PMON_CTL_FLAG_MODE) -#define MBOX_INC_SEL_EXTAR_REG(c, r) \ - EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \ - MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r) -#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \ - EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \ - MBOX_SET_FLAG_SEL_MASK, \ - (u64)-1, NHMEX_M_##r) - -/* NHM-EX Rbox */ -#define NHMEX_R_MSR_GLOBAL_CTL 0xe00 -#define NHMEX_R_MSR_PMON_CTL0 0xe10 -#define NHMEX_R_MSR_PMON_CNT0 0xe11 -#define NHMEX_R_MSR_OFFSET 0x20 - -#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \ - ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4)) -#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n)) -#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n)) -#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \ - (((n) < 4 ? 0 : 0x10) + (n) * 4) -#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \ - (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) -#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \ - (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1) -#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \ - (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2) -#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \ - (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) -#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \ - (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1) -#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \ - (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2) - -#define NHMEX_R_PMON_CTL_EN (1 << 0) -#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1 -#define NHMEX_R_PMON_CTL_EV_SEL_MASK \ - (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT) -#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6) -#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK - -/* NHM-EX Wbox */ -#define NHMEX_W_MSR_GLOBAL_CTL 0xc80 -#define NHMEX_W_MSR_PMON_CNT0 0xc90 -#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91 -#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394 -#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395 - -#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31) - struct intel_uncore_ops; struct intel_uncore_pmu; struct intel_uncore_box; @@ -505,6 +116,9 @@ struct uncore_event_desc { const char *config; }; +ssize_t uncore_event_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf); + #define INTEL_UNCORE_EVENT_DESC(_name, _config) \ { \ .attr = __ATTR(_name, 0444, uncore_event_show, NULL), \ @@ -522,15 +136,6 @@ static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ static struct kobj_attribute format_attr_##_var = \ __ATTR(_name, 0444, __uncore_##_var##_show, NULL) - -static ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct uncore_event_desc *event = - container_of(attr, struct uncore_event_desc, attr); - return sprintf(buf, "%s", event->config); -} - static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) { return box->pmu->type->box_ctl; @@ -694,3 +299,41 @@ static inline bool uncore_box_is_fake(struct intel_uncore_box *box) { return (box->phys_id < 0); } + +struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event); +struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu); +struct intel_uncore_box *uncore_event_to_box(struct perf_event *event); +u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event); +void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); +void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); +void uncore_pmu_event_read(struct perf_event *event); +void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event); +struct event_constraint * +uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event); +void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event); +u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); + +extern struct intel_uncore_type **uncore_msr_uncores; +extern struct intel_uncore_type **uncore_pci_uncores; +extern struct pci_driver *uncore_pci_driver; +extern int uncore_pcibus_to_physid[256]; +extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; +extern struct event_constraint uncore_constraint_empty; + +/* perf_event_intel_uncore_snb.c */ +int snb_uncore_pci_init(void); +int ivb_uncore_pci_init(void); +int hsw_uncore_pci_init(void); +void snb_uncore_cpu_init(void); +void nhm_uncore_cpu_init(void); + +/* perf_event_intel_uncore_snbep.c */ +int snbep_uncore_pci_init(void); +void snbep_uncore_cpu_init(void); +int ivbep_uncore_pci_init(void); +void ivbep_uncore_cpu_init(void); +int hswep_uncore_pci_init(void); +void hswep_uncore_cpu_init(void); + +/* perf_event_intel_uncore_nhmex.c */ +void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c new file mode 100644 index 000000000000..2749965afed0 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c @@ -0,0 +1,1221 @@ +/* Nehalem-EX/Westmere-EX uncore support */ +#include "perf_event_intel_uncore.h" + +/* NHM-EX event control */ +#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff +#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00 +#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0) +#define NHMEX_PMON_CTL_EDGE_DET (1 << 18) +#define NHMEX_PMON_CTL_PMI_EN (1 << 20) +#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22) +#define NHMEX_PMON_CTL_INVERT (1 << 23) +#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000 +#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \ + NHMEX_PMON_CTL_UMASK_MASK | \ + NHMEX_PMON_CTL_EDGE_DET | \ + NHMEX_PMON_CTL_INVERT | \ + NHMEX_PMON_CTL_TRESH_MASK) + +/* NHM-EX Ubox */ +#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00 +#define NHMEX_U_MSR_PMON_CTR 0xc11 +#define NHMEX_U_MSR_PMON_EV_SEL 0xc10 + +#define NHMEX_U_PMON_GLOBAL_EN (1 << 0) +#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e +#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28) +#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29) +#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31) + +#define NHMEX_U_PMON_RAW_EVENT_MASK \ + (NHMEX_PMON_CTL_EV_SEL_MASK | \ + NHMEX_PMON_CTL_EDGE_DET) + +/* NHM-EX Cbox */ +#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00 +#define NHMEX_C0_MSR_PMON_CTR0 0xd11 +#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10 +#define NHMEX_C_MSR_OFFSET 0x20 + +/* NHM-EX Bbox */ +#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20 +#define NHMEX_B0_MSR_PMON_CTR0 0xc31 +#define NHMEX_B0_MSR_PMON_CTL0 0xc30 +#define NHMEX_B_MSR_OFFSET 0x40 +#define NHMEX_B0_MSR_MATCH 0xe45 +#define NHMEX_B0_MSR_MASK 0xe46 +#define NHMEX_B1_MSR_MATCH 0xe4d +#define NHMEX_B1_MSR_MASK 0xe4e + +#define NHMEX_B_PMON_CTL_EN (1 << 0) +#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1 +#define NHMEX_B_PMON_CTL_EV_SEL_MASK \ + (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT) +#define NHMEX_B_PMON_CTR_SHIFT 6 +#define NHMEX_B_PMON_CTR_MASK \ + (0x3 << NHMEX_B_PMON_CTR_SHIFT) +#define NHMEX_B_PMON_RAW_EVENT_MASK \ + (NHMEX_B_PMON_CTL_EV_SEL_MASK | \ + NHMEX_B_PMON_CTR_MASK) + +/* NHM-EX Sbox */ +#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40 +#define NHMEX_S0_MSR_PMON_CTR0 0xc51 +#define NHMEX_S0_MSR_PMON_CTL0 0xc50 +#define NHMEX_S_MSR_OFFSET 0x80 +#define NHMEX_S0_MSR_MM_CFG 0xe48 +#define NHMEX_S0_MSR_MATCH 0xe49 +#define NHMEX_S0_MSR_MASK 0xe4a +#define NHMEX_S1_MSR_MM_CFG 0xe58 +#define NHMEX_S1_MSR_MATCH 0xe59 +#define NHMEX_S1_MSR_MASK 0xe5a + +#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63) +#define NHMEX_S_EVENT_TO_R_PROG_EV 0 + +/* NHM-EX Mbox */ +#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0 +#define NHMEX_M0_MSR_PMU_DSP 0xca5 +#define NHMEX_M0_MSR_PMU_ISS 0xca6 +#define NHMEX_M0_MSR_PMU_MAP 0xca7 +#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8 +#define NHMEX_M0_MSR_PMU_PGT 0xca9 +#define NHMEX_M0_MSR_PMU_PLD 0xcaa +#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab +#define NHMEX_M0_MSR_PMU_CTL0 0xcb0 +#define NHMEX_M0_MSR_PMU_CNT0 0xcb1 +#define NHMEX_M_MSR_OFFSET 0x40 +#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54 +#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c + +#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63) +#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL +#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL +#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34 + +#define NHMEX_M_PMON_CTL_EN (1 << 0) +#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1) +#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2 +#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \ + (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT) +#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4 +#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \ + (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT) +#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6) +#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7) +#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9 +#define NHMEX_M_PMON_CTL_INC_SEL_MASK \ + (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) +#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19 +#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \ + (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) +#define NHMEX_M_PMON_RAW_EVENT_MASK \ + (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \ + NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \ + NHMEX_M_PMON_CTL_WRAP_MODE | \ + NHMEX_M_PMON_CTL_FLAG_MODE | \ + NHMEX_M_PMON_CTL_INC_SEL_MASK | \ + NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) + +#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23)) +#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n))) + +#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24)) +#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n))) + +/* + * use the 9~13 bits to select event If the 7th bit is not set, + * otherwise use the 19~21 bits to select event. + */ +#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) +#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_INC_SEL_EXTAR_REG(c, r) \ + EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \ + MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r) +#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \ + EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \ + MBOX_SET_FLAG_SEL_MASK, \ + (u64)-1, NHMEX_M_##r) + +/* NHM-EX Rbox */ +#define NHMEX_R_MSR_GLOBAL_CTL 0xe00 +#define NHMEX_R_MSR_PMON_CTL0 0xe10 +#define NHMEX_R_MSR_PMON_CNT0 0xe11 +#define NHMEX_R_MSR_OFFSET 0x20 + +#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \ + ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4)) +#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n)) +#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n)) +#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \ + (((n) < 4 ? 0 : 0x10) + (n) * 4) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \ + (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \ + (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2) + +#define NHMEX_R_PMON_CTL_EN (1 << 0) +#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1 +#define NHMEX_R_PMON_CTL_EV_SEL_MASK \ + (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT) +#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6) +#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK + +/* NHM-EX Wbox */ +#define NHMEX_W_MSR_GLOBAL_CTL 0xc80 +#define NHMEX_W_MSR_PMON_CNT0 0xc90 +#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91 +#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394 +#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395 + +#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31) + +#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ + ((1ULL << (n)) - 1))) + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7"); +DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63"); + +static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) +{ + wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); +} + +static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + u64 config; + + if (msr) { + rdmsrl(msr, config); + config &= ~((1ULL << uncore_num_counters(box)) - 1); + /* WBox has a fixed counter */ + if (uncore_msr_fixed_ctl(box)) + config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN; + wrmsrl(msr, config); + } +} + +static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + u64 config; + + if (msr) { + rdmsrl(msr, config); + config |= (1ULL << uncore_num_counters(box)) - 1; + /* WBox has a fixed counter */ + if (uncore_msr_fixed_ctl(box)) + config |= NHMEX_W_PMON_GLOBAL_FIXED_EN; + wrmsrl(msr, config); + } +} + +static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + wrmsrl(event->hw.config_base, 0); +} + +static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx >= UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); + else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); + else + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); +} + +#define NHMEX_UNCORE_OPS_COMMON_INIT() \ + .init_box = nhmex_uncore_msr_init_box, \ + .disable_box = nhmex_uncore_msr_disable_box, \ + .enable_box = nhmex_uncore_msr_enable_box, \ + .disable_event = nhmex_uncore_msr_disable_event, \ + .read_counter = uncore_msr_read_counter + +static struct intel_uncore_ops nhmex_uncore_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_uncore_msr_enable_event, +}; + +static struct attribute *nhmex_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_edge.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_ubox_format_group = { + .name = "format", + .attrs = nhmex_uncore_ubox_formats_attr, +}; + +static struct intel_uncore_type nhmex_uncore_ubox = { + .name = "ubox", + .num_counters = 1, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_U_MSR_PMON_EV_SEL, + .perf_ctr = NHMEX_U_MSR_PMON_CTR, + .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_ubox_format_group +}; + +static struct attribute *nhmex_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_cbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_cbox_formats_attr, +}; + +/* msr offset for each instance of cbox */ +static unsigned nhmex_cbox_msr_offsets[] = { + 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0, +}; + +static struct intel_uncore_type nhmex_uncore_cbox = { + .name = "cbox", + .num_counters = 6, + .num_boxes = 10, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0, + .perf_ctr = NHMEX_C0_MSR_PMON_CTR0, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL, + .msr_offsets = nhmex_cbox_msr_offsets, + .pair_ctr_ctl = 1, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_cbox_format_group +}; + +static struct uncore_event_desc nhmex_uncore_wbox_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type nhmex_uncore_wbox = { + .name = "wbox", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_W_MSR_PMON_CNT0, + .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0, + .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR, + .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_W_MSR_GLOBAL_CTL, + .pair_ctr_ctl = 1, + .event_descs = nhmex_uncore_wbox_events, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_cbox_format_group +}; + +static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int ctr, ev_sel; + + ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >> + NHMEX_B_PMON_CTR_SHIFT; + ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >> + NHMEX_B_PMON_CTL_EV_SEL_SHIFT; + + /* events that do not use the match/mask registers */ + if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) || + (ctr == 2 && ev_sel != 0x4) || ctr == 3) + return 0; + + if (box->pmu->pmu_idx == 0) + reg1->reg = NHMEX_B0_MSR_MATCH; + else + reg1->reg = NHMEX_B1_MSR_MATCH; + reg1->idx = 0; + reg1->config = event->attr.config1; + reg2->config = event->attr.config2; + return 0; +} + +static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + wrmsrl(reg1->reg, reg1->config); + wrmsrl(reg1->reg + 1, reg2->config); + } + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK)); +} + +/* + * The Bbox has 4 counters, but each counter monitors different events. + * Use bits 6-7 in the event config to select counter. + */ +static struct event_constraint nhmex_uncore_bbox_constraints[] = { + EVENT_CONSTRAINT(0 , 1, 0xc0), + EVENT_CONSTRAINT(0x40, 2, 0xc0), + EVENT_CONSTRAINT(0x80, 4, 0xc0), + EVENT_CONSTRAINT(0xc0, 8, 0xc0), + EVENT_CONSTRAINT_END, +}; + +static struct attribute *nhmex_uncore_bbox_formats_attr[] = { + &format_attr_event5.attr, + &format_attr_counter.attr, + &format_attr_match.attr, + &format_attr_mask.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_bbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_bbox_formats_attr, +}; + +static struct intel_uncore_ops nhmex_uncore_bbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_bbox_msr_enable_event, + .hw_config = nhmex_bbox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_bbox = { + .name = "bbox", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_B0_MSR_PMON_CTL0, + .perf_ctr = NHMEX_B0_MSR_PMON_CTR0, + .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL, + .msr_offset = NHMEX_B_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 1, + .constraints = nhmex_uncore_bbox_constraints, + .ops = &nhmex_uncore_bbox_ops, + .format_group = &nhmex_uncore_bbox_format_group +}; + +static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + /* only TO_R_PROG_EV event uses the match/mask register */ + if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) != + NHMEX_S_EVENT_TO_R_PROG_EV) + return 0; + + if (box->pmu->pmu_idx == 0) + reg1->reg = NHMEX_S0_MSR_MM_CFG; + else + reg1->reg = NHMEX_S1_MSR_MM_CFG; + reg1->idx = 0; + reg1->config = event->attr.config1; + reg2->config = event->attr.config2; + return 0; +} + +static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + wrmsrl(reg1->reg, 0); + wrmsrl(reg1->reg + 1, reg1->config); + wrmsrl(reg1->reg + 2, reg2->config); + wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN); + } + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); +} + +static struct attribute *nhmex_uncore_sbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_match.attr, + &format_attr_mask.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_sbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_sbox_formats_attr, +}; + +static struct intel_uncore_ops nhmex_uncore_sbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_sbox_msr_enable_event, + .hw_config = nhmex_sbox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_sbox = { + .name = "sbox", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_S0_MSR_PMON_CTL0, + .perf_ctr = NHMEX_S0_MSR_PMON_CTR0, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL, + .msr_offset = NHMEX_S_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 1, + .ops = &nhmex_uncore_sbox_ops, + .format_group = &nhmex_uncore_sbox_format_group +}; + +enum { + EXTRA_REG_NHMEX_M_FILTER, + EXTRA_REG_NHMEX_M_DSP, + EXTRA_REG_NHMEX_M_ISS, + EXTRA_REG_NHMEX_M_MAP, + EXTRA_REG_NHMEX_M_MSC_THR, + EXTRA_REG_NHMEX_M_PGT, + EXTRA_REG_NHMEX_M_PLD, + EXTRA_REG_NHMEX_M_ZDP_CTL_FVC, +}; + +static struct extra_reg nhmex_uncore_mbox_extra_regs[] = { + MBOX_INC_SEL_EXTAR_REG(0x0, DSP), + MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR), + MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR), + MBOX_INC_SEL_EXTAR_REG(0x9, ISS), + /* event 0xa uses two extra registers */ + MBOX_INC_SEL_EXTAR_REG(0xa, ISS), + MBOX_INC_SEL_EXTAR_REG(0xa, PLD), + MBOX_INC_SEL_EXTAR_REG(0xb, PLD), + /* events 0xd ~ 0x10 use the same extra register */ + MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0x16, PGT), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP), + EVENT_EXTRA_END +}; + +/* Nehalem-EX or Westmere-EX ? */ +static bool uncore_nhmex; + +static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + bool ret = false; + u64 mask; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + er = &box->shared_regs[idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (!atomic_read(&er->ref) || er->config == config) { + atomic_inc(&er->ref); + er->config = config; + ret = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + return ret; + } + /* + * The ZDP_CTL_FVC MSR has 4 fields which are used to control + * events 0xd ~ 0x10. Besides these 4 fields, there are additional + * fields which are shared. + */ + idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (WARN_ON_ONCE(idx >= 4)) + return false; + + /* mask of the shared fields */ + if (uncore_nhmex) + mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK; + else + mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK; + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + + raw_spin_lock_irqsave(&er->lock, flags); + /* add mask of the non-shared field if it's in use */ + if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) { + if (uncore_nhmex) + mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + else + mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + } + + if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) { + atomic_add(1 << (idx * 8), &er->ref); + if (uncore_nhmex) + mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK | + NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + else + mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK | + WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + er->config &= ~mask; + er->config |= (config & mask); + ret = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + return ret; +} + +static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + er = &box->shared_regs[idx]; + atomic_dec(&er->ref); + return; + } + + idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + atomic_sub(1 << (idx * 8), &er->ref); +} + +static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); + u64 config = reg1->config; + + /* get the non-shared control bits and shift them */ + idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (uncore_nhmex) + config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + else + config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + if (new_idx > orig_idx) { + idx = new_idx - orig_idx; + config <<= 3 * idx; + } else { + idx = orig_idx - new_idx; + config >>= 3 * idx; + } + + /* add the shared control bits back */ + if (uncore_nhmex) + config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + else + config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + if (modify) { + /* adjust the main event selector */ + if (new_idx > orig_idx) + hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; + else + hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; + reg1->config = config; + reg1->idx = ~0xff | new_idx; + } + return config; +} + +static struct event_constraint * +nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + int i, idx[2], alloc = 0; + u64 config1 = reg1->config; + + idx[0] = __BITS_VALUE(reg1->idx, 0, 8); + idx[1] = __BITS_VALUE(reg1->idx, 1, 8); +again: + for (i = 0; i < 2; i++) { + if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) + idx[i] = 0xff; + + if (idx[i] == 0xff) + continue; + + if (!nhmex_mbox_get_shared_reg(box, idx[i], + __BITS_VALUE(config1, i, 32))) + goto fail; + alloc |= (0x1 << i); + } + + /* for the match/mask registers */ + if (reg2->idx != EXTRA_REG_NONE && + (uncore_box_is_fake(box) || !reg2->alloc) && + !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config)) + goto fail; + + /* + * If it's a fake box -- as per validate_{group,event}() we + * shouldn't touch event state and we can avoid doing so + * since both will only call get_event_constraints() once + * on each event, this avoids the need for reg->alloc. + */ + if (!uncore_box_is_fake(box)) { + if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) + nhmex_mbox_alter_er(event, idx[0], true); + reg1->alloc |= alloc; + if (reg2->idx != EXTRA_REG_NONE) + reg2->alloc = 1; + } + return NULL; +fail: + if (idx[0] != 0xff && !(alloc & 0x1) && + idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + /* + * events 0xd ~ 0x10 are functional identical, but are + * controlled by different fields in the ZDP_CTL_FVC + * register. If we failed to take one field, try the + * rest 3 choices. + */ + BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff); + idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + idx[0] = (idx[0] + 1) % 4; + idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) { + config1 = nhmex_mbox_alter_er(event, idx[0], false); + goto again; + } + } + + if (alloc & 0x1) + nhmex_mbox_put_shared_reg(box, idx[0]); + if (alloc & 0x2) + nhmex_mbox_put_shared_reg(box, idx[1]); + return &uncore_constraint_empty; +} + +static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + + if (uncore_box_is_fake(box)) + return; + + if (reg1->alloc & 0x1) + nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8)); + if (reg1->alloc & 0x2) + nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8)); + reg1->alloc = 0; + + if (reg2->alloc) { + nhmex_mbox_put_shared_reg(box, reg2->idx); + reg2->alloc = 0; + } +} + +static int nhmex_mbox_extra_reg_idx(struct extra_reg *er) +{ + if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) + return er->idx; + return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd; +} + +static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_type *type = box->pmu->type; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + struct extra_reg *er; + unsigned msr; + int reg_idx = 0; + /* + * The mbox events may require 2 extra MSRs at the most. But only + * the lower 32 bits in these MSRs are significant, so we can use + * config1 to pass two MSRs' config. + */ + for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + if (event->attr.config1 & ~er->valid_mask) + return -EINVAL; + + msr = er->msr + type->msr_offset * box->pmu->pmu_idx; + if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff)) + return -EINVAL; + + /* always use the 32~63 bits to pass the PLD config */ + if (er->idx == EXTRA_REG_NHMEX_M_PLD) + reg_idx = 1; + else if (WARN_ON_ONCE(reg_idx > 0)) + return -EINVAL; + + reg1->idx &= ~(0xff << (reg_idx * 8)); + reg1->reg &= ~(0xffff << (reg_idx * 16)); + reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8); + reg1->reg |= msr << (reg_idx * 16); + reg1->config = event->attr.config1; + reg_idx++; + } + /* + * The mbox only provides ability to perform address matching + * for the PLD events. + */ + if (reg_idx == 2) { + reg2->idx = EXTRA_REG_NHMEX_M_FILTER; + if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN) + reg2->config = event->attr.config2; + else + reg2->config = ~0ULL; + if (box->pmu->pmu_idx == 0) + reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG; + else + reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG; + } + return 0; +} + +static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + u64 config; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) + return box->shared_regs[idx].config; + + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + raw_spin_lock_irqsave(&er->lock, flags); + config = er->config; + raw_spin_unlock_irqrestore(&er->lock, flags); + return config; +} + +static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int idx; + + idx = __BITS_VALUE(reg1->idx, 0, 8); + if (idx != 0xff) + wrmsrl(__BITS_VALUE(reg1->reg, 0, 16), + nhmex_mbox_shared_reg_config(box, idx)); + idx = __BITS_VALUE(reg1->idx, 1, 8); + if (idx != 0xff) + wrmsrl(__BITS_VALUE(reg1->reg, 1, 16), + nhmex_mbox_shared_reg_config(box, idx)); + + if (reg2->idx != EXTRA_REG_NONE) { + wrmsrl(reg2->reg, 0); + if (reg2->config != ~0ULL) { + wrmsrl(reg2->reg + 1, + reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK); + wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK & + (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT)); + wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN); + } + } + + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); +} + +DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3"); +DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5"); +DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6"); +DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7"); +DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13"); +DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21"); +DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en, filter_cfg_en, "config2:63"); +DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33"); +DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61"); +DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63"); + +static struct attribute *nhmex_uncore_mbox_formats_attr[] = { + &format_attr_count_mode.attr, + &format_attr_storage_mode.attr, + &format_attr_wrap_mode.attr, + &format_attr_flag_mode.attr, + &format_attr_inc_sel.attr, + &format_attr_set_flag_sel.attr, + &format_attr_filter_cfg_en.attr, + &format_attr_filter_match.attr, + &format_attr_filter_mask.attr, + &format_attr_dsp.attr, + &format_attr_thr.attr, + &format_attr_fvc.attr, + &format_attr_pgt.attr, + &format_attr_map.attr, + &format_attr_iss.attr, + &format_attr_pld.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_mbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_mbox_formats_attr, +}; + +static struct uncore_event_desc nhmex_uncore_mbox_events[] = { + INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"), + INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"), + { /* end: all zeroes */ }, +}; + +static struct uncore_event_desc wsmex_uncore_mbox_events[] = { + INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"), + INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhmex_uncore_mbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_mbox_msr_enable_event, + .hw_config = nhmex_mbox_hw_config, + .get_constraint = nhmex_mbox_get_constraint, + .put_constraint = nhmex_mbox_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_mbox = { + .name = "mbox", + .num_counters = 6, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_M0_MSR_PMU_CTL0, + .perf_ctr = NHMEX_M0_MSR_PMU_CNT0, + .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL, + .msr_offset = NHMEX_M_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 8, + .event_descs = nhmex_uncore_mbox_events, + .ops = &nhmex_uncore_mbox_ops, + .format_group = &nhmex_uncore_mbox_format_group, +}; + +static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + /* adjust the main event selector and extra register index */ + if (reg1->idx % 2) { + reg1->idx--; + hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + } else { + reg1->idx++; + hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + } + + /* adjust extra register config */ + switch (reg1->idx % 6) { + case 2: + /* shift the 8~15 bits to the 0~7 bits */ + reg1->config >>= 8; + break; + case 3: + /* shift the 0~7 bits to the 8~15 bits */ + reg1->config <<= 8; + break; + } +} + +/* + * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7. + * An event set consists of 6 events, the 3rd and 4th events in + * an event set use the same extra register. So an event set uses + * 5 extra registers. + */ +static struct event_constraint * +nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + struct intel_uncore_extra_reg *er; + unsigned long flags; + int idx, er_idx; + u64 config1; + bool ok = false; + + if (!uncore_box_is_fake(box) && reg1->alloc) + return NULL; + + idx = reg1->idx % 6; + config1 = reg1->config; +again: + er_idx = idx; + /* the 3rd and 4th events use the same extra register */ + if (er_idx > 2) + er_idx--; + er_idx += (reg1->idx / 6) * 5; + + er = &box->shared_regs[er_idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (idx < 2) { + if (!atomic_read(&er->ref) || er->config == reg1->config) { + atomic_inc(&er->ref); + er->config = reg1->config; + ok = true; + } + } else if (idx == 2 || idx == 3) { + /* + * these two events use different fields in a extra register, + * the 0~7 bits and the 8~15 bits respectively. + */ + u64 mask = 0xff << ((idx - 2) * 8); + if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) || + !((er->config ^ config1) & mask)) { + atomic_add(1 << ((idx - 2) * 8), &er->ref); + er->config &= ~mask; + er->config |= config1 & mask; + ok = true; + } + } else { + if (!atomic_read(&er->ref) || + (er->config == (hwc->config >> 32) && + er->config1 == reg1->config && + er->config2 == reg2->config)) { + atomic_inc(&er->ref); + er->config = (hwc->config >> 32); + er->config1 = reg1->config; + er->config2 = reg2->config; + ok = true; + } + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (!ok) { + /* + * The Rbox events are always in pairs. The paired + * events are functional identical, but use different + * extra registers. If we failed to take an extra + * register, try the alternative. + */ + idx ^= 1; + if (idx != reg1->idx % 6) { + if (idx == 2) + config1 >>= 8; + else if (idx == 3) + config1 <<= 8; + goto again; + } + } else { + if (!uncore_box_is_fake(box)) { + if (idx != reg1->idx % 6) + nhmex_rbox_alter_er(box, event); + reg1->alloc = 1; + } + return NULL; + } + return &uncore_constraint_empty; +} + +static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + int idx, er_idx; + + if (uncore_box_is_fake(box) || !reg1->alloc) + return; + + idx = reg1->idx % 6; + er_idx = idx; + if (er_idx > 2) + er_idx--; + er_idx += (reg1->idx / 6) * 5; + + er = &box->shared_regs[er_idx]; + if (idx == 2 || idx == 3) + atomic_sub(1 << ((idx - 2) * 8), &er->ref); + else + atomic_dec(&er->ref); + + reg1->alloc = 0; +} + +static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + int idx; + + idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >> + NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + if (idx >= 0x18) + return -EINVAL; + + reg1->idx = idx; + reg1->config = event->attr.config1; + + switch (idx % 6) { + case 4: + case 5: + hwc->config |= event->attr.config & (~0ULL << 32); + reg2->config = event->attr.config2; + break; + } + return 0; +} + +static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int idx, port; + + idx = reg1->idx; + port = idx / 6 + box->pmu->pmu_idx * 4; + + switch (idx % 6) { + case 0: + wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config); + break; + case 1: + wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config); + break; + case 2: + case 3: + wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port), + uncore_shared_reg_config(box, 2 + (idx / 6) * 5)); + break; + case 4: + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port), + hwc->config >> 32); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config); + break; + case 5: + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port), + hwc->config >> 32); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config); + break; + } + + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK)); +} + +DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63"); +DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15"); +DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31"); + +static struct attribute *nhmex_uncore_rbox_formats_attr[] = { + &format_attr_event5.attr, + &format_attr_xbr_mm_cfg.attr, + &format_attr_xbr_match.attr, + &format_attr_xbr_mask.attr, + &format_attr_qlx_cfg.attr, + &format_attr_iperf_cfg.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_rbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_rbox_formats_attr, +}; + +static struct uncore_event_desc nhmex_uncore_rbox_events[] = { + INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"), + INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"), + INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"), + INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"), + INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"), + INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhmex_uncore_rbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_rbox_msr_enable_event, + .hw_config = nhmex_rbox_hw_config, + .get_constraint = nhmex_rbox_get_constraint, + .put_constraint = nhmex_rbox_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_rbox = { + .name = "rbox", + .num_counters = 8, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_R_MSR_PMON_CTL0, + .perf_ctr = NHMEX_R_MSR_PMON_CNT0, + .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_R_MSR_GLOBAL_CTL, + .msr_offset = NHMEX_R_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 20, + .event_descs = nhmex_uncore_rbox_events, + .ops = &nhmex_uncore_rbox_ops, + .format_group = &nhmex_uncore_rbox_format_group +}; + +static struct intel_uncore_type *nhmex_msr_uncores[] = { + &nhmex_uncore_ubox, + &nhmex_uncore_cbox, + &nhmex_uncore_bbox, + &nhmex_uncore_sbox, + &nhmex_uncore_mbox, + &nhmex_uncore_rbox, + &nhmex_uncore_wbox, + NULL, +}; + +void nhmex_uncore_cpu_init(void) +{ + if (boot_cpu_data.x86_model == 46) + uncore_nhmex = true; + else + nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events; + if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + uncore_msr_uncores = nhmex_msr_uncores; +} +/* end of Nehalem-EX uncore support */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c new file mode 100644 index 000000000000..3001015b755c --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -0,0 +1,636 @@ +/* Nehalem/SandBridge/Haswell uncore support */ +#include "perf_event_intel_uncore.h" + +/* SNB event control */ +#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff +#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 +#define SNB_UNC_CTL_EDGE_DET (1 << 18) +#define SNB_UNC_CTL_EN (1 << 22) +#define SNB_UNC_CTL_INVERT (1 << 23) +#define SNB_UNC_CTL_CMASK_MASK 0x1f000000 +#define NHM_UNC_CTL_CMASK_MASK 0xff000000 +#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0) + +#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + SNB_UNC_CTL_CMASK_MASK) + +#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + NHM_UNC_CTL_CMASK_MASK) + +/* SNB global control register */ +#define SNB_UNC_PERF_GLOBAL_CTL 0x391 +#define SNB_UNC_FIXED_CTR_CTRL 0x394 +#define SNB_UNC_FIXED_CTR 0x395 + +/* SNB uncore global control */ +#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1) +#define SNB_UNC_GLOBAL_CTL_EN (1 << 29) + +/* SNB Cbo register */ +#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700 +#define SNB_UNC_CBO_0_PER_CTR0 0x706 +#define SNB_UNC_CBO_MSR_OFFSET 0x10 + +/* NHM global control register */ +#define NHM_UNC_PERF_GLOBAL_CTL 0x391 +#define NHM_UNC_FIXED_CTR 0x394 +#define NHM_UNC_FIXED_CTR_CTRL 0x395 + +/* NHM uncore global control */ +#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1) +#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32) + +/* NHM uncore register */ +#define NHM_UNC_PERFEVTSEL0 0x3c0 +#define NHM_UNC_UNCORE_PMC0 0x3b0 + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); +DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); + +/* Sandy Bridge uncore support */ +static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx < UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); + else + wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); +} + +static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + wrmsrl(event->hw.config_base, 0); +} + +static void snb_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) { + wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); + } +} + +static struct uncore_event_desc snb_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + { /* end: all zeroes */ }, +}; + +static struct attribute *snb_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask5.attr, + NULL, +}; + +static struct attribute_group snb_uncore_format_group = { + .name = "format", + .attrs = snb_uncore_formats_attr, +}; + +static struct intel_uncore_ops snb_uncore_msr_ops = { + .init_box = snb_uncore_msr_init_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct event_constraint snb_uncore_cbox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x80, 0x1), + UNCORE_EVENT_CONSTRAINT(0x83, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snb_uncore_cbox = { + .name = "cbox", + .num_counters = 2, + .num_boxes = 4, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, + .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, + .fixed_ctr = SNB_UNC_FIXED_CTR, + .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .constraints = snb_uncore_cbox_constraints, + .ops = &snb_uncore_msr_ops, + .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, +}; + +static struct intel_uncore_type *snb_msr_uncores[] = { + &snb_uncore_cbox, + NULL, +}; + +void snb_uncore_cpu_init(void) +{ + uncore_msr_uncores = snb_msr_uncores; + if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; +} + +enum { + SNB_PCI_UNCORE_IMC, +}; + +static struct uncore_event_desc snb_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"), + INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"), + INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"), + + { /* end: all zeroes */ }, +}; + +#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff +#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48 + +/* page size multiple covering all config regs */ +#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000 + +#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1 +#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050 +#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2 +#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 +#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE + +static struct attribute *snb_uncore_imc_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group snb_uncore_imc_format_group = { + .name = "format", + .attrs = snb_uncore_imc_formats_attr, +}; + +static void snb_uncore_imc_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET; + resource_size_t addr; + u32 pci_dword; + + pci_read_config_dword(pdev, where, &pci_dword); + addr = pci_dword; + +#ifdef CONFIG_PHYS_ADDR_T_64BIT + pci_read_config_dword(pdev, where + 4, &pci_dword); + addr |= ((resource_size_t)pci_dword << 32); +#endif + + addr &= ~(PAGE_SIZE - 1); + + box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); + box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; +} + +static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) +{} + +static void snb_uncore_imc_disable_box(struct intel_uncore_box *box) +{} + +static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{} + +static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{} + +static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + return (u64)*(unsigned int *)(box->io_addr + hwc->event_base); +} + +/* + * custom event_init() function because we define our own fixed, free + * running counters, so we do not want to conflict with generic uncore + * logic. Also simplifies processing + */ +static int snb_uncore_imc_event_init(struct perf_event *event) +{ + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + struct hw_perf_event *hwc = &event->hw; + u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK; + int idx, base; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + pmu = uncore_event_to_pmu(event); + /* no device found for this pmu */ + if (pmu->func_id < 0) + return -ENOENT; + + /* Sampling not supported yet */ + if (hwc->sample_period) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest || + event->attr.sample_period) /* no sampling */ + return -EINVAL; + + /* + * Place all uncore events for a particular physical package + * onto a single cpu + */ + if (event->cpu < 0) + return -EINVAL; + + /* check only supported bits are set */ + if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK) + return -EINVAL; + + box = uncore_pmu_to_box(pmu, event->cpu); + if (!box || box->cpu < 0) + return -EINVAL; + + event->cpu = box->cpu; + + event->hw.idx = -1; + event->hw.last_tag = ~0ULL; + event->hw.extra_reg.idx = EXTRA_REG_NONE; + event->hw.branch_reg.idx = EXTRA_REG_NONE; + /* + * check event is known (whitelist, determines counter) + */ + switch (cfg) { + case SNB_UNCORE_PCI_IMC_DATA_READS: + base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE; + idx = UNCORE_PMC_IDX_FIXED; + break; + case SNB_UNCORE_PCI_IMC_DATA_WRITES: + base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; + idx = UNCORE_PMC_IDX_FIXED + 1; + break; + default: + return -EINVAL; + } + + /* must be done before validate_group */ + event->hw.event_base = base; + event->hw.config = cfg; + event->hw.idx = idx; + + /* no group validation needed, we have free running counters */ + + return 0; +} + +static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + return 0; +} + +static void snb_uncore_imc_event_start(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + u64 count; + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + event->hw.state = 0; + box->n_active++; + + list_add_tail(&event->active_entry, &box->active_list); + + count = snb_uncore_imc_read_counter(box, event); + local64_set(&event->hw.prev_count, count); + + if (box->n_active == 1) + uncore_pmu_start_hrtimer(box); +} + +static void snb_uncore_imc_event_stop(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + box->n_active--; + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + list_del(&event->active_entry); + + if (box->n_active == 0) + uncore_pmu_cancel_hrtimer(box); + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + uncore_perf_event_update(box, event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +static int snb_uncore_imc_event_add(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + struct hw_perf_event *hwc = &event->hw; + + if (!box) + return -ENODEV; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + + snb_uncore_imc_event_start(event, 0); + + box->n_events++; + + return 0; +} + +static void snb_uncore_imc_event_del(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + int i; + + snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); + + for (i = 0; i < box->n_events; i++) { + if (event == box->event_list[i]) { + --box->n_events; + break; + } + } +} + +static int snb_pci2phy_map_init(int devid) +{ + struct pci_dev *dev = NULL; + int bus; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev); + if (!dev) + return -ENOTTY; + + bus = dev->bus->number; + + uncore_pcibus_to_physid[bus] = 0; + + pci_dev_put(dev); + + return 0; +} + +static struct pmu snb_uncore_imc_pmu = { + .task_ctx_nr = perf_invalid_context, + .event_init = snb_uncore_imc_event_init, + .add = snb_uncore_imc_event_add, + .del = snb_uncore_imc_event_del, + .start = snb_uncore_imc_event_start, + .stop = snb_uncore_imc_event_stop, + .read = uncore_pmu_event_read, +}; + +static struct intel_uncore_ops snb_uncore_imc_ops = { + .init_box = snb_uncore_imc_init_box, + .enable_box = snb_uncore_imc_enable_box, + .disable_box = snb_uncore_imc_disable_box, + .disable_event = snb_uncore_imc_disable_event, + .enable_event = snb_uncore_imc_enable_event, + .hw_config = snb_uncore_imc_hw_config, + .read_counter = snb_uncore_imc_read_counter, +}; + +static struct intel_uncore_type snb_uncore_imc = { + .name = "imc", + .num_counters = 2, + .num_boxes = 1, + .fixed_ctr_bits = 32, + .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE, + .event_descs = snb_uncore_imc_events, + .format_group = &snb_uncore_imc_format_group, + .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE, + .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK, + .ops = &snb_uncore_imc_ops, + .pmu = &snb_uncore_imc_pmu, +}; + +static struct intel_uncore_type *snb_pci_uncores[] = { + [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc, + NULL, +}; + +static const struct pci_device_id snb_uncore_pci_ids[] = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, +}; + +static const struct pci_device_id ivb_uncore_pci_ids[] = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, +}; + +static const struct pci_device_id hsw_uncore_pci_ids[] = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, +}; + +static struct pci_driver snb_uncore_pci_driver = { + .name = "snb_uncore", + .id_table = snb_uncore_pci_ids, +}; + +static struct pci_driver ivb_uncore_pci_driver = { + .name = "ivb_uncore", + .id_table = ivb_uncore_pci_ids, +}; + +static struct pci_driver hsw_uncore_pci_driver = { + .name = "hsw_uncore", + .id_table = hsw_uncore_pci_ids, +}; + +struct imc_uncore_pci_dev { + __u32 pci_id; + struct pci_driver *driver; +}; +#define IMC_DEV(a, d) \ + { .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) } + +static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { + IMC_DEV(SNB_IMC, &snb_uncore_pci_driver), + IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */ + IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ + IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ + { /* end marker */ } +}; + + +#define for_each_imc_pci_id(x, t) \ + for (x = (t); (x)->pci_id; x++) + +static struct pci_driver *imc_uncore_find_dev(void) +{ + const struct imc_uncore_pci_dev *p; + int ret; + + for_each_imc_pci_id(p, desktop_imc_pci_ids) { + ret = snb_pci2phy_map_init(p->pci_id); + if (ret == 0) + return p->driver; + } + return NULL; +} + +static int imc_uncore_pci_init(void) +{ + struct pci_driver *imc_drv = imc_uncore_find_dev(); + + if (!imc_drv) + return -ENODEV; + + uncore_pci_uncores = snb_pci_uncores; + uncore_pci_driver = imc_drv; + + return 0; +} + +int snb_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} + +int ivb_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} +int hsw_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} + +/* end of Sandy Bridge uncore support */ + +/* Nehalem uncore support */ +static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0); +} + +static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); +} + +static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx < UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); + else + wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); +} + +static struct attribute *nhm_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask8.attr, + NULL, +}; + +static struct attribute_group nhm_uncore_format_group = { + .name = "format", + .attrs = nhm_uncore_formats_attr, +}; + +static struct uncore_event_desc nhm_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"), + INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"), + INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"), + INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"), + INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"), + INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"), + INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"), + INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhm_uncore_msr_ops = { + .disable_box = nhm_uncore_msr_disable_box, + .enable_box = nhm_uncore_msr_enable_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = nhm_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct intel_uncore_type nhm_uncore = { + .name = "", + .num_counters = 8, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .event_ctl = NHM_UNC_PERFEVTSEL0, + .perf_ctr = NHM_UNC_UNCORE_PMC0, + .fixed_ctr = NHM_UNC_FIXED_CTR, + .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL, + .event_mask = NHM_UNC_RAW_EVENT_MASK, + .event_descs = nhm_uncore_events, + .ops = &nhm_uncore_msr_ops, + .format_group = &nhm_uncore_format_group, +}; + +static struct intel_uncore_type *nhm_msr_uncores[] = { + &nhm_uncore, + NULL, +}; + +void nhm_uncore_cpu_init(void) +{ + uncore_msr_uncores = nhm_msr_uncores; +} + +/* end of Nehalem uncore support */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c new file mode 100644 index 000000000000..adf138eac85c --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -0,0 +1,2258 @@ +/* SandyBridge-EP/IvyTown uncore support */ +#include "perf_event_intel_uncore.h" + + +/* SNB-EP Box level control */ +#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) +#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1) +#define SNBEP_PMON_BOX_CTL_FRZ (1 << 8) +#define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16) +#define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ + SNBEP_PMON_BOX_CTL_RST_CTRS | \ + SNBEP_PMON_BOX_CTL_FRZ_EN) +/* SNB-EP event control */ +#define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff +#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00 +#define SNBEP_PMON_CTL_RST (1 << 17) +#define SNBEP_PMON_CTL_EDGE_DET (1 << 18) +#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) +#define SNBEP_PMON_CTL_EN (1 << 22) +#define SNBEP_PMON_CTL_INVERT (1 << 23) +#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000 +#define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_INVERT | \ + SNBEP_PMON_CTL_TRESH_MASK) + +/* SNB-EP Ubox event control */ +#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000 +#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_INVERT | \ + SNBEP_U_MSR_PMON_CTL_TRESH_MASK) + +#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19) +#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) + +/* SNB-EP PCU event control */ +#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000 +#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000 +#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30) +#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31) +#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_EV_SEL_EXT | \ + SNBEP_PMON_CTL_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) + +#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_PMON_CTL_EV_SEL_EXT) + +/* SNB-EP pci control register */ +#define SNBEP_PCI_PMON_BOX_CTL 0xf4 +#define SNBEP_PCI_PMON_CTL0 0xd8 +/* SNB-EP pci counter register */ +#define SNBEP_PCI_PMON_CTR0 0xa0 + +/* SNB-EP home agent register */ +#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40 +#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44 +#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48 +/* SNB-EP memory controller register */ +#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0 +#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0 +/* SNB-EP QPI register */ +#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228 +#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c +#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238 +#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c + +/* SNB-EP Ubox register */ +#define SNBEP_U_MSR_PMON_CTR0 0xc16 +#define SNBEP_U_MSR_PMON_CTL0 0xc10 + +#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08 +#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09 + +/* SNB-EP Cbo register */ +#define SNBEP_C0_MSR_PMON_CTR0 0xd16 +#define SNBEP_C0_MSR_PMON_CTL0 0xd10 +#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04 +#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14 +#define SNBEP_CBO_MSR_OFFSET 0x20 + +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID 0x1f +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID 0x3fc00 +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE 0x7c0000 +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC 0xff800000 + +#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) { \ + .event = (e), \ + .msr = SNBEP_C0_MSR_PMON_BOX_FILTER, \ + .config_mask = (m), \ + .idx = (i) \ +} + +/* SNB-EP PCU register */ +#define SNBEP_PCU_MSR_PMON_CTR0 0xc36 +#define SNBEP_PCU_MSR_PMON_CTL0 0xc30 +#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff +#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc +#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd + +/* IVBEP event control */ +#define IVBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ + SNBEP_PMON_BOX_CTL_RST_CTRS) +#define IVBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_TRESH_MASK) +/* IVBEP Ubox */ +#define IVBEP_U_MSR_PMON_GLOBAL_CTL 0xc00 +#define IVBEP_U_PMON_GLOBAL_FRZ_ALL (1 << 31) +#define IVBEP_U_PMON_GLOBAL_UNFRZ_ALL (1 << 29) + +#define IVBEP_U_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_U_MSR_PMON_CTL_TRESH_MASK) +/* IVBEP Cbo */ +#define IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK (IVBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) + +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_TID (0x1fULL << 0) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 5) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE (0x3fULL << 17) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63) + +/* IVBEP home agent */ +#define IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST (1 << 16) +#define IVBEP_HA_PCI_PMON_RAW_EVENT_MASK \ + (IVBEP_PMON_RAW_EVENT_MASK | \ + IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST) +/* IVBEP PCU */ +#define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_EV_SEL_EXT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) +/* IVBEP QPI */ +#define IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK \ + (IVBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_PMON_CTL_EV_SEL_EXT) + +#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ + ((1ULL << (n)) - 1))) + +/* Haswell-EP Ubox */ +#define HSWEP_U_MSR_PMON_CTR0 0x705 +#define HSWEP_U_MSR_PMON_CTL0 0x709 +#define HSWEP_U_MSR_PMON_FILTER 0x707 + +#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL 0x703 +#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTR 0x704 + +#define HSWEP_U_MSR_PMON_BOX_FILTER_TID (0x1 << 0) +#define HSWEP_U_MSR_PMON_BOX_FILTER_CID (0x1fULL << 1) +#define HSWEP_U_MSR_PMON_BOX_FILTER_MASK \ + (HSWEP_U_MSR_PMON_BOX_FILTER_TID | \ + HSWEP_U_MSR_PMON_BOX_FILTER_CID) + +/* Haswell-EP CBo */ +#define HSWEP_C0_MSR_PMON_CTR0 0xe08 +#define HSWEP_C0_MSR_PMON_CTL0 0xe01 +#define HSWEP_C0_MSR_PMON_BOX_CTL 0xe00 +#define HSWEP_C0_MSR_PMON_BOX_FILTER0 0xe05 +#define HSWEP_CBO_MSR_OFFSET 0x10 + + +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_TID (0x3fULL << 0) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 6) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE (0x7fULL << 17) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63) + + +/* Haswell-EP Sbox */ +#define HSWEP_S0_MSR_PMON_CTR0 0x726 +#define HSWEP_S0_MSR_PMON_CTL0 0x721 +#define HSWEP_S0_MSR_PMON_BOX_CTL 0x720 +#define HSWEP_SBOX_MSR_OFFSET 0xa +#define HSWEP_S_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) + +/* Haswell-EP PCU */ +#define HSWEP_PCU_MSR_PMON_CTR0 0x717 +#define HSWEP_PCU_MSR_PMON_CTL0 0x711 +#define HSWEP_PCU_MSR_PMON_BOX_CTL 0x710 +#define HSWEP_PCU_MSR_PMON_BOX_FILTER 0x715 + + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); +DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); +DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); +DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5"); +DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5"); +DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); +DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62"); +DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61"); +DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51"); +DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35"); +DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31"); +DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17"); +DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12"); +DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8"); +DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4"); +DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63"); +DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51"); +DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35"); +DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31"); +DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17"); +DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12"); +DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8"); +DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4"); +DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); + +static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + u32 config = 0; + + if (!pci_read_config_dword(pdev, box_ctl, &config)) { + config |= SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); + } +} + +static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + u32 config = 0; + + if (!pci_read_config_dword(pdev, box_ctl, &config)) { + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); + } +} + +static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, hwc->config); +} + +static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); + pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); + + return count; +} + +static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + + pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT); +} + +static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + u64 config; + unsigned msr; + + msr = uncore_msr_box_ctl(box); + if (msr) { + rdmsrl(msr, config); + config |= SNBEP_PMON_BOX_CTL_FRZ; + wrmsrl(msr, config); + } +} + +static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + u64 config; + unsigned msr; + + msr = uncore_msr_box_ctl(box); + if (msr) { + rdmsrl(msr, config); + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + wrmsrl(msr, config); + } +} + +static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0)); + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config); +} + +static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + + if (msr) + wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); +} + +static struct attribute *snbep_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute *snbep_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + NULL, +}; + +static struct attribute *snbep_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid.attr, + &format_attr_filter_nid.attr, + &format_attr_filter_state.attr, + &format_attr_filter_opc.attr, + NULL, +}; + +static struct attribute *snbep_uncore_pcu_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_occ_sel.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge.attr, + &format_attr_filter_band0.attr, + &format_attr_filter_band1.attr, + &format_attr_filter_band2.attr, + &format_attr_filter_band3.attr, + NULL, +}; + +static struct attribute *snbep_uncore_qpi_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_match_rds.attr, + &format_attr_match_rnid30.attr, + &format_attr_match_rnid4.attr, + &format_attr_match_dnid.attr, + &format_attr_match_mc.attr, + &format_attr_match_opc.attr, + &format_attr_match_vnw.attr, + &format_attr_match0.attr, + &format_attr_match1.attr, + &format_attr_mask_rds.attr, + &format_attr_mask_rnid30.attr, + &format_attr_mask_rnid4.attr, + &format_attr_mask_dnid.attr, + &format_attr_mask_mc.attr, + &format_attr_mask_opc.attr, + &format_attr_mask_vnw.attr, + &format_attr_mask0.attr, + &format_attr_mask1.attr, + NULL, +}; + +static struct uncore_event_desc snbep_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), + { /* end: all zeroes */ }, +}; + +static struct uncore_event_desc snbep_uncore_qpi_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"), + INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), + INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"), + INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"), + { /* end: all zeroes */ }, +}; + +static struct attribute_group snbep_uncore_format_group = { + .name = "format", + .attrs = snbep_uncore_formats_attr, +}; + +static struct attribute_group snbep_uncore_ubox_format_group = { + .name = "format", + .attrs = snbep_uncore_ubox_formats_attr, +}; + +static struct attribute_group snbep_uncore_cbox_format_group = { + .name = "format", + .attrs = snbep_uncore_cbox_formats_attr, +}; + +static struct attribute_group snbep_uncore_pcu_format_group = { + .name = "format", + .attrs = snbep_uncore_pcu_formats_attr, +}; + +static struct attribute_group snbep_uncore_qpi_format_group = { + .name = "format", + .attrs = snbep_uncore_qpi_formats_attr, +}; + +#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ + .init_box = snbep_uncore_msr_init_box, \ + .disable_box = snbep_uncore_msr_disable_box, \ + .enable_box = snbep_uncore_msr_enable_box, \ + .disable_event = snbep_uncore_msr_disable_event, \ + .enable_event = snbep_uncore_msr_enable_event, \ + .read_counter = uncore_msr_read_counter + +static struct intel_uncore_ops snbep_uncore_msr_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), +}; + +#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT() \ + .init_box = snbep_uncore_pci_init_box, \ + .disable_box = snbep_uncore_pci_disable_box, \ + .enable_box = snbep_uncore_pci_enable_box, \ + .disable_event = snbep_uncore_pci_disable_event, \ + .read_counter = snbep_uncore_pci_read_counter + +static struct intel_uncore_ops snbep_uncore_pci_ops = { + SNBEP_UNCORE_PCI_OPS_COMMON_INIT(), + .enable_event = snbep_uncore_pci_enable_event, \ +}; + +static struct event_constraint snbep_uncore_cbox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x01, 0x1), + UNCORE_EVENT_CONSTRAINT(0x02, 0x3), + UNCORE_EVENT_CONSTRAINT(0x04, 0x3), + UNCORE_EVENT_CONSTRAINT(0x05, 0x3), + UNCORE_EVENT_CONSTRAINT(0x07, 0x3), + UNCORE_EVENT_CONSTRAINT(0x09, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x1), + UNCORE_EVENT_CONSTRAINT(0x12, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x3), + UNCORE_EVENT_CONSTRAINT(0x1b, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), + EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x31, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x35, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), + UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct event_constraint snbep_uncore_r2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x12, 0x1), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x24, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct event_constraint snbep_uncore_r3qpi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x12, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x20, 0x3), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x22, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x24, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x29, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2a, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2b, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2e, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x30, 0x3), + UNCORE_EVENT_CONSTRAINT(0x31, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x3), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snbep_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNBEP_U_MSR_PMON_CTR0, + .event_ctl = SNBEP_U_MSR_PMON_CTL0, + .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &snbep_uncore_msr_ops, + .format_group = &snbep_uncore_ubox_format_group, +}; + +static struct extra_reg snbep_uncore_cbox_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2), + EVENT_EXTRA_END +}; + +static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + int i; + + if (uncore_box_is_fake(box)) + return; + + for (i = 0; i < 5; i++) { + if (reg1->alloc & (0x1 << i)) + atomic_sub(1 << (i * 6), &er->ref); + } + reg1->alloc = 0; +} + +static struct event_constraint * +__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event, + u64 (*cbox_filter_mask)(int fields)) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + int i, alloc = 0; + unsigned long flags; + u64 mask; + + if (reg1->idx == EXTRA_REG_NONE) + return NULL; + + raw_spin_lock_irqsave(&er->lock, flags); + for (i = 0; i < 5; i++) { + if (!(reg1->idx & (0x1 << i))) + continue; + if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) + continue; + + mask = cbox_filter_mask(0x1 << i); + if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) || + !((reg1->config ^ er->config) & mask)) { + atomic_add(1 << (i * 6), &er->ref); + er->config &= ~mask; + er->config |= reg1->config & mask; + alloc |= (0x1 << i); + } else { + break; + } + } + raw_spin_unlock_irqrestore(&er->lock, flags); + if (i < 5) + goto fail; + + if (!uncore_box_is_fake(box)) + reg1->alloc |= alloc; + + return NULL; +fail: + for (; i >= 0; i--) { + if (alloc & (0x1 << i)) + atomic_sub(1 << (i * 6), &er->ref); + } + return &uncore_constraint_empty; +} + +static u64 snbep_cbox_filter_mask(int fields) +{ + u64 mask = 0; + + if (fields & 0x1) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID; + if (fields & 0x4) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC; + + return mask; +} + +static struct event_constraint * +snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask); +} + +static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + + SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static struct intel_uncore_ops snbep_uncore_cbox_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snbep_cbox_hw_config, + .get_constraint = snbep_cbox_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type snbep_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 44, + .event_ctl = SNBEP_C0_MSR_PMON_CTL0, + .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, + .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = SNBEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = snbep_uncore_cbox_constraints, + .ops = &snbep_uncore_cbox_ops, + .format_group = &snbep_uncore_cbox_format_group, +}; + +static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + u64 config = reg1->config; + + if (new_idx > reg1->idx) + config <<= 8 * (new_idx - reg1->idx); + else + config >>= 8 * (reg1->idx - new_idx); + + if (modify) { + hwc->config += new_idx - reg1->idx; + reg1->config = config; + reg1->idx = new_idx; + } + return config; +} + +static struct event_constraint * +snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + unsigned long flags; + int idx = reg1->idx; + u64 mask, config1 = reg1->config; + bool ok = false; + + if (reg1->idx == EXTRA_REG_NONE || + (!uncore_box_is_fake(box) && reg1->alloc)) + return NULL; +again: + mask = 0xffULL << (idx * 8); + raw_spin_lock_irqsave(&er->lock, flags); + if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) || + !((config1 ^ er->config) & mask)) { + atomic_add(1 << (idx * 8), &er->ref); + er->config &= ~mask; + er->config |= config1 & mask; + ok = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (!ok) { + idx = (idx + 1) % 4; + if (idx != reg1->idx) { + config1 = snbep_pcu_alter_er(event, idx, false); + goto again; + } + return &uncore_constraint_empty; + } + + if (!uncore_box_is_fake(box)) { + if (idx != reg1->idx) + snbep_pcu_alter_er(event, idx, true); + reg1->alloc = 1; + } + return NULL; +} + +static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + + if (uncore_box_is_fake(box) || !reg1->alloc) + return; + + atomic_sub(1 << (reg1->idx * 8), &er->ref); + reg1->alloc = 0; +} + +static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; + + if (ev_sel >= 0xb && ev_sel <= 0xe) { + reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; + reg1->idx = ev_sel - 0xb; + reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8)); + } + return 0; +} + +static struct intel_uncore_ops snbep_uncore_pcu_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snbep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type snbep_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, + .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snbep_uncore_pcu_ops, + .format_group = &snbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *snbep_msr_uncores[] = { + &snbep_uncore_ubox, + &snbep_uncore_cbox, + &snbep_uncore_pcu, + NULL, +}; + +void snbep_uncore_cpu_init(void) +{ + if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + uncore_msr_uncores = snbep_msr_uncores; +} + +enum { + SNBEP_PCI_QPI_PORT0_FILTER, + SNBEP_PCI_QPI_PORT1_FILTER, +}; + +static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) { + reg1->idx = 0; + reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0; + reg1->config = event->attr.config1; + reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0; + reg2->config = event->attr.config2; + } + return 0; +} + +static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; + struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx]; + if (filter_pdev) { + pci_write_config_dword(filter_pdev, reg1->reg, + (u32)reg1->config); + pci_write_config_dword(filter_pdev, reg1->reg + 4, + (u32)(reg1->config >> 32)); + pci_write_config_dword(filter_pdev, reg2->reg, + (u32)reg2->config); + pci_write_config_dword(filter_pdev, reg2->reg + 4, + (u32)(reg2->config >> 32)); + } + } + + pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops snbep_uncore_qpi_ops = { + SNBEP_UNCORE_PCI_OPS_COMMON_INIT(), + .enable_event = snbep_qpi_enable_event, + .hw_config = snbep_qpi_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +#define SNBEP_UNCORE_PCI_COMMON_INIT() \ + .perf_ctr = SNBEP_PCI_PMON_CTR0, \ + .event_ctl = SNBEP_PCI_PMON_CTL0, \ + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \ + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ + .ops = &snbep_uncore_pci_ops, \ + .format_group = &snbep_uncore_format_group + +static struct intel_uncore_type snbep_uncore_ha = { + .name = "ha", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = snbep_uncore_imc_events, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_qpi = { + .name = "qpi", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snbep_uncore_qpi_ops, + .event_descs = snbep_uncore_qpi_events, + .format_group = &snbep_uncore_qpi_format_group, +}; + + +static struct intel_uncore_type snbep_uncore_r2pcie = { + .name = "r2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r2pcie_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 3, + .num_boxes = 2, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r3qpi_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +enum { + SNBEP_PCI_UNCORE_HA, + SNBEP_PCI_UNCORE_IMC, + SNBEP_PCI_UNCORE_QPI, + SNBEP_PCI_UNCORE_R2PCIE, + SNBEP_PCI_UNCORE_R3QPI, +}; + +static struct intel_uncore_type *snbep_pci_uncores[] = { + [SNBEP_PCI_UNCORE_HA] = &snbep_uncore_ha, + [SNBEP_PCI_UNCORE_IMC] = &snbep_uncore_imc, + [SNBEP_PCI_UNCORE_QPI] = &snbep_uncore_qpi, + [SNBEP_PCI_UNCORE_R2PCIE] = &snbep_uncore_r2pcie, + [SNBEP_PCI_UNCORE_R3QPI] = &snbep_uncore_r3qpi, + NULL, +}; + +static const struct pci_device_id snbep_uncore_pci_ids[] = { + { /* Home Agent */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0), + }, + { /* MC Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0), + }, + { /* MC Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1), + }, + { /* MC Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2), + }, + { /* MC Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3), + }, + { /* QPI Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0), + }, + { /* QPI Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1), + }, + { /* R2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0), + }, + { /* R3QPI Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0), + }, + { /* R3QPI Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver snbep_uncore_pci_driver = { + .name = "snbep_uncore", + .id_table = snbep_uncore_pci_ids, +}; + +/* + * build pci bus to socket mapping + */ +static int snbep_pci2phy_map_init(int devid) +{ + struct pci_dev *ubox_dev = NULL; + int i, bus, nodeid; + int err = 0; + u32 config = 0; + + while (1) { + /* find the UBOX device */ + ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev); + if (!ubox_dev) + break; + bus = ubox_dev->bus->number; + /* get the Node ID of the local register */ + err = pci_read_config_dword(ubox_dev, 0x40, &config); + if (err) + break; + nodeid = config; + /* get the Node ID mapping */ + err = pci_read_config_dword(ubox_dev, 0x54, &config); + if (err) + break; + /* + * every three bits in the Node ID mapping register maps + * to a particular node. + */ + for (i = 0; i < 8; i++) { + if (nodeid == ((config >> (3 * i)) & 0x7)) { + uncore_pcibus_to_physid[bus] = i; + break; + } + } + } + + if (!err) { + /* + * For PCI bus with no UBOX device, find the next bus + * that has UBOX device and use its mapping. + */ + i = -1; + for (bus = 255; bus >= 0; bus--) { + if (uncore_pcibus_to_physid[bus] >= 0) + i = uncore_pcibus_to_physid[bus]; + else + uncore_pcibus_to_physid[bus] = i; + } + } + + if (ubox_dev) + pci_dev_put(ubox_dev); + + return err ? pcibios_err_to_errno(err) : 0; +} + +int snbep_uncore_pci_init(void) +{ + int ret = snbep_pci2phy_map_init(0x3ce0); + if (ret) + return ret; + uncore_pci_uncores = snbep_pci_uncores; + uncore_pci_driver = &snbep_uncore_pci_driver; + return 0; +} +/* end of Sandy Bridge-EP uncore support */ + +/* IvyTown uncore support */ +static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + if (msr) + wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT); +} + +static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + + pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT); +} + +#define IVBEP_UNCORE_MSR_OPS_COMMON_INIT() \ + .init_box = ivbep_uncore_msr_init_box, \ + .disable_box = snbep_uncore_msr_disable_box, \ + .enable_box = snbep_uncore_msr_enable_box, \ + .disable_event = snbep_uncore_msr_disable_event, \ + .enable_event = snbep_uncore_msr_enable_event, \ + .read_counter = uncore_msr_read_counter + +static struct intel_uncore_ops ivbep_uncore_msr_ops = { + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), +}; + +static struct intel_uncore_ops ivbep_uncore_pci_ops = { + .init_box = ivbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +#define IVBEP_UNCORE_PCI_COMMON_INIT() \ + .perf_ctr = SNBEP_PCI_PMON_CTR0, \ + .event_ctl = SNBEP_PCI_PMON_CTL0, \ + .event_mask = IVBEP_PMON_RAW_EVENT_MASK, \ + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ + .ops = &ivbep_uncore_pci_ops, \ + .format_group = &ivbep_uncore_format_group + +static struct attribute *ivbep_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute *ivbep_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + NULL, +}; + +static struct attribute *ivbep_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid.attr, + &format_attr_filter_link.attr, + &format_attr_filter_state2.attr, + &format_attr_filter_nid2.attr, + &format_attr_filter_opc2.attr, + &format_attr_filter_nc.attr, + &format_attr_filter_c6.attr, + &format_attr_filter_isoc.attr, + NULL, +}; + +static struct attribute *ivbep_uncore_pcu_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_occ_sel.attr, + &format_attr_edge.attr, + &format_attr_thresh5.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge.attr, + &format_attr_filter_band0.attr, + &format_attr_filter_band1.attr, + &format_attr_filter_band2.attr, + &format_attr_filter_band3.attr, + NULL, +}; + +static struct attribute *ivbep_uncore_qpi_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_thresh8.attr, + &format_attr_match_rds.attr, + &format_attr_match_rnid30.attr, + &format_attr_match_rnid4.attr, + &format_attr_match_dnid.attr, + &format_attr_match_mc.attr, + &format_attr_match_opc.attr, + &format_attr_match_vnw.attr, + &format_attr_match0.attr, + &format_attr_match1.attr, + &format_attr_mask_rds.attr, + &format_attr_mask_rnid30.attr, + &format_attr_mask_rnid4.attr, + &format_attr_mask_dnid.attr, + &format_attr_mask_mc.attr, + &format_attr_mask_opc.attr, + &format_attr_mask_vnw.attr, + &format_attr_mask0.attr, + &format_attr_mask1.attr, + NULL, +}; + +static struct attribute_group ivbep_uncore_format_group = { + .name = "format", + .attrs = ivbep_uncore_formats_attr, +}; + +static struct attribute_group ivbep_uncore_ubox_format_group = { + .name = "format", + .attrs = ivbep_uncore_ubox_formats_attr, +}; + +static struct attribute_group ivbep_uncore_cbox_format_group = { + .name = "format", + .attrs = ivbep_uncore_cbox_formats_attr, +}; + +static struct attribute_group ivbep_uncore_pcu_format_group = { + .name = "format", + .attrs = ivbep_uncore_pcu_formats_attr, +}; + +static struct attribute_group ivbep_uncore_qpi_format_group = { + .name = "format", + .attrs = ivbep_uncore_qpi_formats_attr, +}; + +static struct intel_uncore_type ivbep_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNBEP_U_MSR_PMON_CTR0, + .event_ctl = SNBEP_U_MSR_PMON_CTL0, + .event_mask = IVBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_ubox_format_group, +}; + +static struct extra_reg ivbep_uncore_cbox_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8), + EVENT_EXTRA_END +}; + +static u64 ivbep_cbox_filter_mask(int fields) +{ + u64 mask = 0; + + if (fields & 0x1) + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK; + if (fields & 0x4) + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NID; + if (fields & 0x10) { + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC; + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NC; + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_C6; + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC; + } + + return mask; +} + +static struct event_constraint * +ivbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, ivbep_cbox_filter_mask); +} + +static int ivbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = ivbep_uncore_cbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + + SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & ivbep_cbox_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + u64 filter = uncore_shared_reg_config(box, 0); + wrmsrl(reg1->reg, filter & 0xffffffff); + wrmsrl(reg1->reg + 6, filter >> 32); + } + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops ivbep_uncore_cbox_ops = { + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = ivbep_cbox_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = ivbep_cbox_hw_config, + .get_constraint = ivbep_cbox_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type ivbep_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 15, + .perf_ctr_bits = 44, + .event_ctl = SNBEP_C0_MSR_PMON_CTL0, + .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, + .event_mask = IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = SNBEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = snbep_uncore_cbox_constraints, + .ops = &ivbep_uncore_cbox_ops, + .format_group = &ivbep_uncore_cbox_format_group, +}; + +static struct intel_uncore_ops ivbep_uncore_pcu_ops = { + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snbep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type ivbep_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, + .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, + .event_mask = IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &ivbep_uncore_pcu_ops, + .format_group = &ivbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *ivbep_msr_uncores[] = { + &ivbep_uncore_ubox, + &ivbep_uncore_cbox, + &ivbep_uncore_pcu, + NULL, +}; + +void ivbep_uncore_cpu_init(void) +{ + if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + uncore_msr_uncores = ivbep_msr_uncores; +} + +static struct intel_uncore_type ivbep_uncore_ha = { + .name = "ha", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + IVBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type ivbep_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = snbep_uncore_imc_events, + IVBEP_UNCORE_PCI_COMMON_INIT(), +}; + +/* registers in IRP boxes are not properly aligned */ +static unsigned ivbep_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4}; +static unsigned ivbep_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0}; + +static void ivbep_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], + hwc->config | SNBEP_PMON_CTL_EN); +} + +static void ivbep_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], hwc->config); +} + +static u64 ivbep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx], (u32 *)&count); + pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1); + + return count; +} + +static struct intel_uncore_ops ivbep_uncore_irp_ops = { + .init_box = ivbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = ivbep_uncore_irp_disable_event, + .enable_event = ivbep_uncore_irp_enable_event, + .read_counter = ivbep_uncore_irp_read_counter, +}; + +static struct intel_uncore_type ivbep_uncore_irp = { + .name = "irp", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_mask = IVBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &ivbep_uncore_irp_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct intel_uncore_ops ivbep_uncore_qpi_ops = { + .init_box = ivbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_qpi_enable_event, + .read_counter = snbep_uncore_pci_read_counter, + .hw_config = snbep_qpi_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type ivbep_uncore_qpi = { + .name = "qpi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &ivbep_uncore_qpi_ops, + .format_group = &ivbep_uncore_qpi_format_group, +}; + +static struct intel_uncore_type ivbep_uncore_r2pcie = { + .name = "r2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r2pcie_constraints, + IVBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type ivbep_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 3, + .num_boxes = 2, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r3qpi_constraints, + IVBEP_UNCORE_PCI_COMMON_INIT(), +}; + +enum { + IVBEP_PCI_UNCORE_HA, + IVBEP_PCI_UNCORE_IMC, + IVBEP_PCI_UNCORE_IRP, + IVBEP_PCI_UNCORE_QPI, + IVBEP_PCI_UNCORE_R2PCIE, + IVBEP_PCI_UNCORE_R3QPI, +}; + +static struct intel_uncore_type *ivbep_pci_uncores[] = { + [IVBEP_PCI_UNCORE_HA] = &ivbep_uncore_ha, + [IVBEP_PCI_UNCORE_IMC] = &ivbep_uncore_imc, + [IVBEP_PCI_UNCORE_IRP] = &ivbep_uncore_irp, + [IVBEP_PCI_UNCORE_QPI] = &ivbep_uncore_qpi, + [IVBEP_PCI_UNCORE_R2PCIE] = &ivbep_uncore_r2pcie, + [IVBEP_PCI_UNCORE_R3QPI] = &ivbep_uncore_r3qpi, + NULL, +}; + +static const struct pci_device_id ivbep_uncore_pci_ids[] = { + { /* Home Agent 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 0), + }, + { /* Home Agent 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 1), + }, + { /* MC0 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 0), + }, + { /* MC0 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 1), + }, + { /* MC0 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 2), + }, + { /* MC0 Channel 4 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 3), + }, + { /* MC1 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 4), + }, + { /* MC1 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 5), + }, + { /* MC1 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 6), + }, + { /* MC1 Channel 4 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 7), + }, + { /* IRP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IRP, 0), + }, + { /* QPI0 Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 0), + }, + { /* QPI0 Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 1), + }, + { /* QPI1 Port 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 2), + }, + { /* R2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R2PCIE, 0), + }, + { /* R3QPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 0), + }, + { /* R3QPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 1), + }, + { /* R3QPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 2), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver ivbep_uncore_pci_driver = { + .name = "ivbep_uncore", + .id_table = ivbep_uncore_pci_ids, +}; + +int ivbep_uncore_pci_init(void) +{ + int ret = snbep_pci2phy_map_init(0x0e1e); + if (ret) + return ret; + uncore_pci_uncores = ivbep_pci_uncores; + uncore_pci_driver = &ivbep_uncore_pci_driver; + return 0; +} +/* end of IvyTown uncore support */ + +/* Haswell-EP uncore support */ +static struct attribute *hswep_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + &format_attr_filter_tid2.attr, + &format_attr_filter_cid.attr, + NULL, +}; + +static struct attribute_group hswep_uncore_ubox_format_group = { + .name = "format", + .attrs = hswep_uncore_ubox_formats_attr, +}; + +static int hswep_ubox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + reg1->reg = HSWEP_U_MSR_PMON_FILTER; + reg1->config = event->attr.config1 & HSWEP_U_MSR_PMON_BOX_FILTER_MASK; + reg1->idx = 0; + return 0; +} + +static struct intel_uncore_ops hswep_uncore_ubox_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = hswep_ubox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type hswep_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = HSWEP_U_MSR_PMON_CTR0, + .event_ctl = HSWEP_U_MSR_PMON_CTL0, + .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL, + .num_shared_regs = 1, + .ops = &hswep_uncore_ubox_ops, + .format_group = &hswep_uncore_ubox_format_group, +}; + +static struct attribute *hswep_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid3.attr, + &format_attr_filter_link2.attr, + &format_attr_filter_state3.attr, + &format_attr_filter_nid2.attr, + &format_attr_filter_opc2.attr, + &format_attr_filter_nc.attr, + &format_attr_filter_c6.attr, + &format_attr_filter_isoc.attr, + NULL, +}; + +static struct attribute_group hswep_uncore_cbox_format_group = { + .name = "format", + .attrs = hswep_uncore_cbox_formats_attr, +}; + +static struct event_constraint hswep_uncore_cbox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x01, 0x1), + UNCORE_EVENT_CONSTRAINT(0x09, 0x1), + UNCORE_EVENT_CONSTRAINT(0x11, 0x1), + UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), + UNCORE_EVENT_CONSTRAINT(0x3e, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct extra_reg hswep_uncore_cbox_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4028, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4032, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4029, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4033, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x402A, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x12), + SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8), + EVENT_EXTRA_END +}; + +static u64 hswep_cbox_filter_mask(int fields) +{ + u64 mask = 0; + if (fields & 0x1) + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK; + if (fields & 0x4) + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NID; + if (fields & 0x10) { + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC; + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NC; + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_C6; + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC; + } + return mask; +} + +static struct event_constraint * +hswep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, hswep_cbox_filter_mask); +} + +static int hswep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = hswep_uncore_cbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 + + HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & hswep_cbox_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static void hswep_cbox_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + u64 filter = uncore_shared_reg_config(box, 0); + wrmsrl(reg1->reg, filter & 0xffffffff); + wrmsrl(reg1->reg + 1, filter >> 32); + } + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops hswep_uncore_cbox_ops = { + .init_box = snbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = hswep_cbox_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = hswep_cbox_hw_config, + .get_constraint = hswep_cbox_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type hswep_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 18, + .perf_ctr_bits = 44, + .event_ctl = HSWEP_C0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_C0_MSR_PMON_CTR0, + .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = hswep_uncore_cbox_constraints, + .ops = &hswep_uncore_cbox_ops, + .format_group = &hswep_uncore_cbox_format_group, +}; + +static struct attribute *hswep_uncore_sbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute_group hswep_uncore_sbox_format_group = { + .name = "format", + .attrs = hswep_uncore_sbox_formats_attr, +}; + +static struct intel_uncore_type hswep_uncore_sbox = { + .name = "sbox", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 44, + .event_ctl = HSWEP_S0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_S0_MSR_PMON_CTR0, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_SBOX_MSR_OFFSET, + .ops = &snbep_uncore_msr_ops, + .format_group = &hswep_uncore_sbox_format_group, +}; + +static int hswep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; + + if (ev_sel >= 0xb && ev_sel <= 0xe) { + reg1->reg = HSWEP_PCU_MSR_PMON_BOX_FILTER; + reg1->idx = ev_sel - 0xb; + reg1->config = event->attr.config1 & (0xff << reg1->idx); + } + return 0; +} + +static struct intel_uncore_ops hswep_uncore_pcu_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = hswep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type hswep_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0, + .event_ctl = HSWEP_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &hswep_uncore_pcu_ops, + .format_group = &snbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *hswep_msr_uncores[] = { + &hswep_uncore_ubox, + &hswep_uncore_cbox, + &hswep_uncore_sbox, + &hswep_uncore_pcu, + NULL, +}; + +void hswep_uncore_cpu_init(void) +{ + if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + uncore_msr_uncores = hswep_msr_uncores; +} + +static struct intel_uncore_type hswep_uncore_ha = { + .name = "ha", + .num_counters = 5, + .num_boxes = 2, + .perf_ctr_bits = 48, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct uncore_event_desc hswep_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type hswep_uncore_imc = { + .name = "imc", + .num_counters = 5, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = hswep_uncore_imc_events, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_ops hswep_uncore_irp_ops = { + .init_box = snbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = ivbep_uncore_irp_disable_event, + .enable_event = ivbep_uncore_irp_enable_event, + .read_counter = ivbep_uncore_irp_read_counter, +}; + +static struct intel_uncore_type hswep_uncore_irp = { + .name = "irp", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &hswep_uncore_irp_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct intel_uncore_type hswep_uncore_qpi = { + .name = "qpi", + .num_counters = 5, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snbep_uncore_qpi_ops, + .format_group = &snbep_uncore_qpi_format_group, +}; + +static struct event_constraint hswep_uncore_r2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x23, 0x1), + UNCORE_EVENT_CONSTRAINT(0x24, 0x1), + UNCORE_EVENT_CONSTRAINT(0x25, 0x1), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x27, 0x1), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x29, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2a, 0x1), + UNCORE_EVENT_CONSTRAINT(0x2b, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x35, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type hswep_uncore_r2pcie = { + .name = "r2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .constraints = hswep_uncore_r2pcie_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct event_constraint hswep_uncore_r3qpi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x01, 0x3), + UNCORE_EVENT_CONSTRAINT(0x07, 0x7), + UNCORE_EVENT_CONSTRAINT(0x08, 0x7), + UNCORE_EVENT_CONSTRAINT(0x09, 0x7), + UNCORE_EVENT_CONSTRAINT(0x0a, 0x7), + UNCORE_EVENT_CONSTRAINT(0x0e, 0x7), + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x12, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x14, 0x3), + UNCORE_EVENT_CONSTRAINT(0x15, 0x3), + UNCORE_EVENT_CONSTRAINT(0x1f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x20, 0x3), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x22, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x29, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2e, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x31, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x3), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type hswep_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 44, + .constraints = hswep_uncore_r3qpi_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +enum { + HSWEP_PCI_UNCORE_HA, + HSWEP_PCI_UNCORE_IMC, + HSWEP_PCI_UNCORE_IRP, + HSWEP_PCI_UNCORE_QPI, + HSWEP_PCI_UNCORE_R2PCIE, + HSWEP_PCI_UNCORE_R3QPI, +}; + +static struct intel_uncore_type *hswep_pci_uncores[] = { + [HSWEP_PCI_UNCORE_HA] = &hswep_uncore_ha, + [HSWEP_PCI_UNCORE_IMC] = &hswep_uncore_imc, + [HSWEP_PCI_UNCORE_IRP] = &hswep_uncore_irp, + [HSWEP_PCI_UNCORE_QPI] = &hswep_uncore_qpi, + [HSWEP_PCI_UNCORE_R2PCIE] = &hswep_uncore_r2pcie, + [HSWEP_PCI_UNCORE_R3QPI] = &hswep_uncore_r3qpi, + NULL, +}; + +static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = { + { /* Home Agent 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0), + }, + { /* Home Agent 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f38), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 1), + }, + { /* MC0 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb0), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 0), + }, + { /* MC0 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb1), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 1), + }, + { /* MC0 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb4), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 2), + }, + { /* MC0 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb5), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 3), + }, + { /* MC1 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd0), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 4), + }, + { /* MC1 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd1), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 5), + }, + { /* MC1 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd4), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 6), + }, + { /* MC1 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd5), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 7), + }, + { /* IRP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f39), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IRP, 0), + }, + { /* QPI0 Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f32), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 0), + }, + { /* QPI0 Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f33), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 1), + }, + { /* QPI1 Port 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3a), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 2), + }, + { /* R2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f34), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R2PCIE, 0), + }, + { /* R3QPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f36), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 0), + }, + { /* R3QPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f37), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 1), + }, + { /* R3QPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3e), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 2), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), + }, + { /* QPI Port 1 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver hswep_uncore_pci_driver = { + .name = "hswep_uncore", + .id_table = hswep_uncore_pci_ids, +}; + +int hswep_uncore_pci_init(void) +{ + int ret = snbep_pci2phy_map_init(0x2f1e); + if (ret) + return ret; + uncore_pci_uncores = hswep_pci_uncores; + uncore_pci_driver = &hswep_uncore_pci_driver; + return 0; +} +/* end of Haswell-EP uncore support */ diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c index 838fa8772c62..5b0c232d1ee6 100644 --- a/arch/x86/kernel/cpu/perf_event_knc.c +++ b/arch/x86/kernel/cpu/perf_event_knc.c @@ -217,7 +217,7 @@ static int knc_pmu_handle_irq(struct pt_regs *regs) int bit, loops; u64 status; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); knc_pmu_disable_all(); diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 5d466b7d8609..f2e56783af3d 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -915,7 +915,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) static void p4_pmu_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -984,7 +984,7 @@ static void p4_pmu_enable_event(struct perf_event *event) static void p4_pmu_enable_all(int added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -1004,7 +1004,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) int idx, handled = 0; u64 val; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); for (idx = 0; idx < x86_pmu.num_counters; idx++) { int overflow; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 507de8066594..f5ab56d14287 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -4,9 +4,14 @@ * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) * * Copyright (C) IBM Corporation, 2004. All rights reserved. + * Copyright (C) Red Hat Inc., 2014. All rights reserved. + * Authors: + * Vivek Goyal <vgoyal@redhat.com> * */ +#define pr_fmt(fmt) "kexec: " fmt + #include <linux/types.h> #include <linux/kernel.h> #include <linux/smp.h> @@ -16,6 +21,7 @@ #include <linux/elf.h> #include <linux/elfcore.h> #include <linux/module.h> +#include <linux/slab.h> #include <asm/processor.h> #include <asm/hardirq.h> @@ -28,6 +34,45 @@ #include <asm/reboot.h> #include <asm/virtext.h> +/* Alignment required for elf header segment */ +#define ELF_CORE_HEADER_ALIGN 4096 + +/* This primarily represents number of split ranges due to exclusion */ +#define CRASH_MAX_RANGES 16 + +struct crash_mem_range { + u64 start, end; +}; + +struct crash_mem { + unsigned int nr_ranges; + struct crash_mem_range ranges[CRASH_MAX_RANGES]; +}; + +/* Misc data about ram ranges needed to prepare elf headers */ +struct crash_elf_data { + struct kimage *image; + /* + * Total number of ram ranges we have after various adjustments for + * GART, crash reserved region etc. + */ + unsigned int max_nr_ranges; + unsigned long gart_start, gart_end; + + /* Pointer to elf header */ + void *ehdr; + /* Pointer to next phdr */ + void *bufp; + struct crash_mem mem; +}; + +/* Used while preparing memory map entries for second kernel */ +struct crash_memmap_data { + struct boot_params *params; + /* Type of memory */ + unsigned int type; +}; + int in_crash_kexec; /* @@ -39,6 +84,7 @@ int in_crash_kexec; */ crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); +unsigned long crash_zero_bytes; static inline void cpu_crash_vmclear_loaded_vmcss(void) { @@ -135,3 +181,520 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #endif crash_save_cpu(regs, safe_smp_processor_id()); } + +#ifdef CONFIG_KEXEC_FILE +static int get_nr_ram_ranges_callback(unsigned long start_pfn, + unsigned long nr_pfn, void *arg) +{ + int *nr_ranges = arg; + + (*nr_ranges)++; + return 0; +} + +static int get_gart_ranges_callback(u64 start, u64 end, void *arg) +{ + struct crash_elf_data *ced = arg; + + ced->gart_start = start; + ced->gart_end = end; + + /* Not expecting more than 1 gart aperture */ + return 1; +} + + +/* Gather all the required information to prepare elf headers for ram regions */ +static void fill_up_crash_elf_data(struct crash_elf_data *ced, + struct kimage *image) +{ + unsigned int nr_ranges = 0; + + ced->image = image; + + walk_system_ram_range(0, -1, &nr_ranges, + get_nr_ram_ranges_callback); + + ced->max_nr_ranges = nr_ranges; + + /* + * We don't create ELF headers for GART aperture as an attempt + * to dump this memory in second kernel leads to hang/crash. + * If gart aperture is present, one needs to exclude that region + * and that could lead to need of extra phdr. + */ + walk_iomem_res("GART", IORESOURCE_MEM, 0, -1, + ced, get_gart_ranges_callback); + + /* + * If we have gart region, excluding that could potentially split + * a memory range, resulting in extra header. Account for that. + */ + if (ced->gart_end) + ced->max_nr_ranges++; + + /* Exclusion of crash region could split memory ranges */ + ced->max_nr_ranges++; + + /* If crashk_low_res is not 0, another range split possible */ + if (crashk_low_res.end) + ced->max_nr_ranges++; +} + +static int exclude_mem_range(struct crash_mem *mem, + unsigned long long mstart, unsigned long long mend) +{ + int i, j; + unsigned long long start, end; + struct crash_mem_range temp_range = {0, 0}; + + for (i = 0; i < mem->nr_ranges; i++) { + start = mem->ranges[i].start; + end = mem->ranges[i].end; + + if (mstart > end || mend < start) + continue; + + /* Truncate any area outside of range */ + if (mstart < start) + mstart = start; + if (mend > end) + mend = end; + + /* Found completely overlapping range */ + if (mstart == start && mend == end) { + mem->ranges[i].start = 0; + mem->ranges[i].end = 0; + if (i < mem->nr_ranges - 1) { + /* Shift rest of the ranges to left */ + for (j = i; j < mem->nr_ranges - 1; j++) { + mem->ranges[j].start = + mem->ranges[j+1].start; + mem->ranges[j].end = + mem->ranges[j+1].end; + } + } + mem->nr_ranges--; + return 0; + } + + if (mstart > start && mend < end) { + /* Split original range */ + mem->ranges[i].end = mstart - 1; + temp_range.start = mend + 1; + temp_range.end = end; + } else if (mstart != start) + mem->ranges[i].end = mstart - 1; + else + mem->ranges[i].start = mend + 1; + break; + } + + /* If a split happend, add the split to array */ + if (!temp_range.end) + return 0; + + /* Split happened */ + if (i == CRASH_MAX_RANGES - 1) { + pr_err("Too many crash ranges after split\n"); + return -ENOMEM; + } + + /* Location where new range should go */ + j = i + 1; + if (j < mem->nr_ranges) { + /* Move over all ranges one slot towards the end */ + for (i = mem->nr_ranges - 1; i >= j; i--) + mem->ranges[i + 1] = mem->ranges[i]; + } + + mem->ranges[j].start = temp_range.start; + mem->ranges[j].end = temp_range.end; + mem->nr_ranges++; + return 0; +} + +/* + * Look for any unwanted ranges between mstart, mend and remove them. This + * might lead to split and split ranges are put in ced->mem.ranges[] array + */ +static int elf_header_exclude_ranges(struct crash_elf_data *ced, + unsigned long long mstart, unsigned long long mend) +{ + struct crash_mem *cmem = &ced->mem; + int ret = 0; + + memset(cmem->ranges, 0, sizeof(cmem->ranges)); + + cmem->ranges[0].start = mstart; + cmem->ranges[0].end = mend; + cmem->nr_ranges = 1; + + /* Exclude crashkernel region */ + ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (ret) + return ret; + + if (crashk_low_res.end) { + ret = exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + return ret; + } + + /* Exclude GART region */ + if (ced->gart_end) { + ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end); + if (ret) + return ret; + } + + return ret; +} + +static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg) +{ + struct crash_elf_data *ced = arg; + Elf64_Ehdr *ehdr; + Elf64_Phdr *phdr; + unsigned long mstart, mend; + struct kimage *image = ced->image; + struct crash_mem *cmem; + int ret, i; + + ehdr = ced->ehdr; + + /* Exclude unwanted mem ranges */ + ret = elf_header_exclude_ranges(ced, start, end); + if (ret) + return ret; + + /* Go through all the ranges in ced->mem.ranges[] and prepare phdr */ + cmem = &ced->mem; + + for (i = 0; i < cmem->nr_ranges; i++) { + mstart = cmem->ranges[i].start; + mend = cmem->ranges[i].end; + + phdr = ced->bufp; + ced->bufp += sizeof(Elf64_Phdr); + + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_offset = mstart; + + /* + * If a range matches backup region, adjust offset to backup + * segment. + */ + if (mstart == image->arch.backup_src_start && + (mend - mstart + 1) == image->arch.backup_src_sz) + phdr->p_offset = image->arch.backup_load_addr; + + phdr->p_paddr = mstart; + phdr->p_vaddr = (unsigned long long) __va(mstart); + phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; + phdr->p_align = 0; + ehdr->e_phnum++; + pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", + phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, + ehdr->e_phnum, phdr->p_offset); + } + + return ret; +} + +static int prepare_elf64_headers(struct crash_elf_data *ced, + void **addr, unsigned long *sz) +{ + Elf64_Ehdr *ehdr; + Elf64_Phdr *phdr; + unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; + unsigned char *buf, *bufp; + unsigned int cpu; + unsigned long long notes_addr; + int ret; + + /* extra phdr for vmcoreinfo elf note */ + nr_phdr = nr_cpus + 1; + nr_phdr += ced->max_nr_ranges; + + /* + * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping + * area on x86_64 (ffffffff80000000 - ffffffffa0000000). + * I think this is required by tools like gdb. So same physical + * memory will be mapped in two elf headers. One will contain kernel + * text virtual addresses and other will have __va(physical) addresses. + */ + + nr_phdr++; + elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); + elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); + + buf = vzalloc(elf_sz); + if (!buf) + return -ENOMEM; + + bufp = buf; + ehdr = (Elf64_Ehdr *)bufp; + bufp += sizeof(Elf64_Ehdr); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2LSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + ehdr->e_ident[EI_OSABI] = ELF_OSABI; + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); + ehdr->e_type = ET_CORE; + ehdr->e_machine = ELF_ARCH; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + + /* Prepare one phdr of type PT_NOTE for each present cpu */ + for_each_present_cpu(cpu) { + phdr = (Elf64_Phdr *)bufp; + bufp += sizeof(Elf64_Phdr); + phdr->p_type = PT_NOTE; + notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); + phdr->p_offset = phdr->p_paddr = notes_addr; + phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); + (ehdr->e_phnum)++; + } + + /* Prepare one PT_NOTE header for vmcoreinfo */ + phdr = (Elf64_Phdr *)bufp; + bufp += sizeof(Elf64_Phdr); + phdr->p_type = PT_NOTE; + phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); + phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note); + (ehdr->e_phnum)++; + +#ifdef CONFIG_X86_64 + /* Prepare PT_LOAD type program header for kernel text region */ + phdr = (Elf64_Phdr *)bufp; + bufp += sizeof(Elf64_Phdr); + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_vaddr = (Elf64_Addr)_text; + phdr->p_filesz = phdr->p_memsz = _end - _text; + phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); + (ehdr->e_phnum)++; +#endif + + /* Prepare PT_LOAD headers for system ram chunks. */ + ced->ehdr = ehdr; + ced->bufp = bufp; + ret = walk_system_ram_res(0, -1, ced, + prepare_elf64_ram_headers_callback); + if (ret < 0) + return ret; + + *addr = buf; + *sz = elf_sz; + return 0; +} + +/* Prepare elf headers. Return addr and size */ +static int prepare_elf_headers(struct kimage *image, void **addr, + unsigned long *sz) +{ + struct crash_elf_data *ced; + int ret; + + ced = kzalloc(sizeof(*ced), GFP_KERNEL); + if (!ced) + return -ENOMEM; + + fill_up_crash_elf_data(ced, image); + + /* By default prepare 64bit headers */ + ret = prepare_elf64_headers(ced, addr, sz); + kfree(ced); + return ret; +} + +static int add_e820_entry(struct boot_params *params, struct e820entry *entry) +{ + unsigned int nr_e820_entries; + + nr_e820_entries = params->e820_entries; + if (nr_e820_entries >= E820MAX) + return 1; + + memcpy(¶ms->e820_map[nr_e820_entries], entry, + sizeof(struct e820entry)); + params->e820_entries++; + return 0; +} + +static int memmap_entry_callback(u64 start, u64 end, void *arg) +{ + struct crash_memmap_data *cmd = arg; + struct boot_params *params = cmd->params; + struct e820entry ei; + + ei.addr = start; + ei.size = end - start + 1; + ei.type = cmd->type; + add_e820_entry(params, &ei); + + return 0; +} + +static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem, + unsigned long long mstart, + unsigned long long mend) +{ + unsigned long start, end; + int ret = 0; + + cmem->ranges[0].start = mstart; + cmem->ranges[0].end = mend; + cmem->nr_ranges = 1; + + /* Exclude Backup region */ + start = image->arch.backup_load_addr; + end = start + image->arch.backup_src_sz - 1; + ret = exclude_mem_range(cmem, start, end); + if (ret) + return ret; + + /* Exclude elf header region */ + start = image->arch.elf_load_addr; + end = start + image->arch.elf_headers_sz - 1; + return exclude_mem_range(cmem, start, end); +} + +/* Prepare memory map for crash dump kernel */ +int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) +{ + int i, ret = 0; + unsigned long flags; + struct e820entry ei; + struct crash_memmap_data cmd; + struct crash_mem *cmem; + + cmem = vzalloc(sizeof(struct crash_mem)); + if (!cmem) + return -ENOMEM; + + memset(&cmd, 0, sizeof(struct crash_memmap_data)); + cmd.params = params; + + /* Add first 640K segment */ + ei.addr = image->arch.backup_src_start; + ei.size = image->arch.backup_src_sz; + ei.type = E820_RAM; + add_e820_entry(params, &ei); + + /* Add ACPI tables */ + cmd.type = E820_ACPI; + flags = IORESOURCE_MEM | IORESOURCE_BUSY; + walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd, + memmap_entry_callback); + + /* Add ACPI Non-volatile Storage */ + cmd.type = E820_NVS; + walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd, + memmap_entry_callback); + + /* Add crashk_low_res region */ + if (crashk_low_res.end) { + ei.addr = crashk_low_res.start; + ei.size = crashk_low_res.end - crashk_low_res.start + 1; + ei.type = E820_RAM; + add_e820_entry(params, &ei); + } + + /* Exclude some ranges from crashk_res and add rest to memmap */ + ret = memmap_exclude_ranges(image, cmem, crashk_res.start, + crashk_res.end); + if (ret) + goto out; + + for (i = 0; i < cmem->nr_ranges; i++) { + ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1; + + /* If entry is less than a page, skip it */ + if (ei.size < PAGE_SIZE) + continue; + ei.addr = cmem->ranges[i].start; + ei.type = E820_RAM; + add_e820_entry(params, &ei); + } + +out: + vfree(cmem); + return ret; +} + +static int determine_backup_region(u64 start, u64 end, void *arg) +{ + struct kimage *image = arg; + + image->arch.backup_src_start = start; + image->arch.backup_src_sz = end - start + 1; + + /* Expecting only one range for backup region */ + return 1; +} + +int crash_load_segments(struct kimage *image) +{ + unsigned long src_start, src_sz, elf_sz; + void *elf_addr; + int ret; + + /* + * Determine and load a segment for backup area. First 640K RAM + * region is backup source + */ + + ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END, + image, determine_backup_region); + + /* Zero or postive return values are ok */ + if (ret < 0) + return ret; + + src_start = image->arch.backup_src_start; + src_sz = image->arch.backup_src_sz; + + /* Add backup segment. */ + if (src_sz) { + /* + * Ideally there is no source for backup segment. This is + * copied in purgatory after crash. Just add a zero filled + * segment for now to make sure checksum logic works fine. + */ + ret = kexec_add_buffer(image, (char *)&crash_zero_bytes, + sizeof(crash_zero_bytes), src_sz, + PAGE_SIZE, 0, -1, 0, + &image->arch.backup_load_addr); + if (ret) + return ret; + pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n", + image->arch.backup_load_addr, src_start, src_sz); + } + + /* Prepare elf headers and add a segment */ + ret = prepare_elf_headers(image, &elf_addr, &elf_sz); + if (ret) + return ret; + + image->arch.elf_headers = elf_addr; + image->arch.elf_headers_sz = elf_sz; + + ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz, + ELF_CORE_HEADER_ALIGN, 0, -1, 0, + &image->arch.elf_load_addr); + if (ret) { + vfree((void *)image->arch.elf_headers); + return ret; + } + pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->arch.elf_load_addr, elf_sz, elf_sz); + + return ret; +} +#endif /* CONFIG_KEXEC_FILE */ diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 7db54b5d5f86..3d3503351242 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -21,6 +21,7 @@ #include <asm/apic.h> #include <asm/pci_x86.h> #include <asm/setup.h> +#include <asm/i8259.h> __initdata u64 initial_dtb; char __initdata cmd_line[COMMAND_LINE_SIZE]; @@ -165,82 +166,6 @@ static void __init dtb_lapic_setup(void) #ifdef CONFIG_X86_IO_APIC static unsigned int ioapic_id; -static void __init dtb_add_ioapic(struct device_node *dn) -{ - struct resource r; - int ret; - - ret = of_address_to_resource(dn, 0, &r); - if (ret) { - printk(KERN_ERR "Can't obtain address from node %s.\n", - dn->full_name); - return; - } - mp_register_ioapic(++ioapic_id, r.start, gsi_top); -} - -static void __init dtb_ioapic_setup(void) -{ - struct device_node *dn; - - for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic") - dtb_add_ioapic(dn); - - if (nr_ioapics) { - of_ioapic = 1; - return; - } - printk(KERN_ERR "Error: No information about IO-APIC in OF.\n"); -} -#else -static void __init dtb_ioapic_setup(void) {} -#endif - -static void __init dtb_apic_setup(void) -{ - dtb_lapic_setup(); - dtb_ioapic_setup(); -} - -#ifdef CONFIG_OF_FLATTREE -static void __init x86_flattree_get_config(void) -{ - u32 size, map_len; - void *dt; - - if (!initial_dtb) - return; - - map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128); - - initial_boot_params = dt = early_memremap(initial_dtb, map_len); - size = of_get_flat_dt_size(); - if (map_len < size) { - early_iounmap(dt, map_len); - initial_boot_params = dt = early_memremap(initial_dtb, size); - map_len = size; - } - - unflatten_and_copy_device_tree(); - early_iounmap(dt, map_len); -} -#else -static inline void x86_flattree_get_config(void) { } -#endif - -void __init x86_dtb_init(void) -{ - x86_flattree_get_config(); - - if (!of_have_populated_dt()) - return; - - dtb_setup_hpet(); - dtb_apic_setup(); -} - -#ifdef CONFIG_X86_IO_APIC - struct of_ioapic_type { u32 out_type; u32 trigger; @@ -276,10 +201,8 @@ static int ioapic_xlate(struct irq_domain *domain, const u32 *intspec, u32 intsize, irq_hw_number_t *out_hwirq, u32 *out_type) { - struct io_apic_irq_attr attr; struct of_ioapic_type *it; - u32 line, idx; - int rc; + u32 line, idx, gsi; if (WARN_ON(intsize < 2)) return -EINVAL; @@ -291,13 +214,10 @@ static int ioapic_xlate(struct irq_domain *domain, it = &of_ioapic_type[intspec[1]]; - idx = (u32) domain->host_data; - set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); - - rc = io_apic_setup_irq_pin_once(irq_find_mapping(domain, line), - cpu_to_node(0), &attr); - if (rc) - return rc; + idx = (u32)(long)domain->host_data; + gsi = mp_pin_to_gsi(idx, line); + if (mp_set_gsi_attr(gsi, it->trigger, it->polarity, cpu_to_node(0))) + return -EBUSY; *out_hwirq = line; *out_type = it->out_type; @@ -305,81 +225,86 @@ static int ioapic_xlate(struct irq_domain *domain, } const struct irq_domain_ops ioapic_irq_domain_ops = { + .map = mp_irqdomain_map, + .unmap = mp_irqdomain_unmap, .xlate = ioapic_xlate, }; -static void dt_add_ioapic_domain(unsigned int ioapic_num, - struct device_node *np) +static void __init dtb_add_ioapic(struct device_node *dn) { - struct irq_domain *id; - struct mp_ioapic_gsi *gsi_cfg; + struct resource r; int ret; - int num; - - gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); - num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; - - id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops, - (void *)ioapic_num); - BUG_ON(!id); - if (gsi_cfg->gsi_base == 0) { - /* - * The first NR_IRQS_LEGACY irq descs are allocated in - * early_irq_init() and need just a mapping. The - * remaining irqs need both. All of them are preallocated - * and assigned so we can keep the 1:1 mapping which the ioapic - * is having. - */ - irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); - - if (num > NR_IRQS_LEGACY) { - ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, - NR_IRQS_LEGACY, num - NR_IRQS_LEGACY); - if (ret) - pr_err("Error creating mapping for the " - "remaining IRQs: %d\n", ret); - } - irq_set_default_host(id); - } else { - ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num); - if (ret) - pr_err("Error creating IRQ mapping: %d\n", ret); + struct ioapic_domain_cfg cfg = { + .type = IOAPIC_DOMAIN_DYNAMIC, + .ops = &ioapic_irq_domain_ops, + .dev = dn, + }; + + ret = of_address_to_resource(dn, 0, &r); + if (ret) { + printk(KERN_ERR "Can't obtain address from node %s.\n", + dn->full_name); + return; } + mp_register_ioapic(++ioapic_id, r.start, gsi_top, &cfg); } -static void __init ioapic_add_ofnode(struct device_node *np) +static void __init dtb_ioapic_setup(void) { - struct resource r; - int i, ret; + struct device_node *dn; - ret = of_address_to_resource(np, 0, &r); - if (ret) { - printk(KERN_ERR "Failed to obtain address for %s\n", - np->full_name); + for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic") + dtb_add_ioapic(dn); + + if (nr_ioapics) { + of_ioapic = 1; return; } + printk(KERN_ERR "Error: No information about IO-APIC in OF.\n"); +} +#else +static void __init dtb_ioapic_setup(void) {} +#endif - for (i = 0; i < nr_ioapics; i++) { - if (r.start == mpc_ioapic_addr(i)) { - dt_add_ioapic_domain(i, np); - return; - } - } - printk(KERN_ERR "IOxAPIC at %s is not registered.\n", np->full_name); +static void __init dtb_apic_setup(void) +{ + dtb_lapic_setup(); + dtb_ioapic_setup(); } -void __init x86_add_irq_domains(void) +#ifdef CONFIG_OF_FLATTREE +static void __init x86_flattree_get_config(void) { - struct device_node *dp; + u32 size, map_len; + void *dt; - if (!of_have_populated_dt()) + if (!initial_dtb) return; - for_each_node_with_property(dp, "interrupt-controller") { - if (of_device_is_compatible(dp, "intel,ce4100-ioapic")) - ioapic_add_ofnode(dp); + map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128); + + initial_boot_params = dt = early_memremap(initial_dtb, map_len); + size = of_get_flat_dt_size(); + if (map_len < size) { + early_iounmap(dt, map_len); + initial_boot_params = dt = early_memremap(initial_dtb, size); + map_len = size; } + + unflatten_and_copy_device_tree(); + early_iounmap(dt, map_len); } #else -void __init x86_add_irq_domains(void) { } +static inline void x86_flattree_get_config(void) { } #endif + +void __init x86_dtb_init(void) +{ + x86_flattree_get_config(); + + if (!of_have_populated_dt()) + return; + + dtb_setup_hpet(); + dtb_apic_setup(); +} diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 988c00a1f60d..49f886481615 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -682,15 +682,14 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len) * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). * * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. + * overlapping entries. */ void __init e820_mark_nosave_regions(unsigned long limit_pfn) { int i; - unsigned long pfn; + unsigned long pfn = 0; - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); - for (i = 1; i < e820.nr_map; i++) { + for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; if (pfn < PFN_UP(ei->addr)) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 47c410d99f5d..344b63f18d14 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -447,16 +447,14 @@ sysenter_exit: sysenter_audit: testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry - addl $4,%esp - CFI_ADJUST_CFA_OFFSET -4 - /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ - /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ - /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ - movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ - movl %eax,%edx /* 2nd arg: syscall number */ - movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ + /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */ + movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */ + /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */ + pushl_cfi PT_ESI(%esp) /* a3: 5th arg */ + pushl_cfi PT_EDX+4(%esp) /* a2: 4th arg */ call __audit_syscall_entry - pushl_cfi %ebx + popl_cfi %ecx /* get that remapped edx off the stack */ + popl_cfi %ecx /* get that remapped esi off the stack */ movl PT_EAX(%esp),%eax /* reload syscall number */ jmp sysenter_do_call @@ -683,7 +681,7 @@ END(syscall_badsys) sysenter_badsys: movl $-ENOSYS,%eax jmp sysenter_after_call -END(syscall_badsys) +END(sysenter_badsys) CFI_ENDPROC .macro FIXUP_ESPFIX_STACK diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 2fac1343a90b..df088bb03fb3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -404,8 +404,8 @@ GLOBAL(system_call_after_swapgs) * and short: */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8,0 - movq %rax,ORIG_RAX-ARGOFFSET(%rsp) + SAVE_ARGS 8, 0, rax_enosys=1 + movq_cfi rax,(ORIG_RAX-ARGOFFSET) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) @@ -417,7 +417,7 @@ system_call_fastpath: andl $__SYSCALL_MASK,%eax cmpl $__NR_syscall_max,%eax #endif - ja badsys + ja ret_from_sys_call /* and return regs->ax */ movq %r10,%rcx call *sys_call_table(,%rax,8) # XXX: rip relative movq %rax,RAX-ARGOFFSET(%rsp) @@ -476,28 +476,8 @@ sysret_signal: FIXUP_TOP_OF_STACK %r11, -ARGOFFSET jmp int_check_syscall_exit_work -badsys: - movq $-ENOSYS,RAX-ARGOFFSET(%rsp) - jmp ret_from_sys_call - #ifdef CONFIG_AUDITSYSCALL /* - * Fast path for syscall audit without full syscall trace. - * We just call __audit_syscall_entry() directly, and then - * jump back to the normal fast path. - */ -auditsys: - movq %r10,%r9 /* 6th arg: 4th syscall arg */ - movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ - movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ - movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ - movq %rax,%rsi /* 2nd arg: syscall number */ - movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ - call __audit_syscall_entry - LOAD_ARGS 0 /* reload call-clobbered registers */ - jmp system_call_fastpath - - /* * Return fast path for syscall audit. Call __audit_syscall_exit() * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT * masked off. @@ -514,18 +494,25 @@ sysret_audit: /* Do syscall tracing */ tracesys: -#ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) - jz auditsys -#endif + leaq -REST_SKIP(%rsp), %rdi + movq $AUDIT_ARCH_X86_64, %rsi + call syscall_trace_enter_phase1 + test %rax, %rax + jnz tracesys_phase2 /* if needed, run the slow path */ + LOAD_ARGS 0 /* else restore clobbered regs */ + jmp system_call_fastpath /* and return to the fast path */ + +tracesys_phase2: SAVE_REST - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ FIXUP_TOP_OF_STACK %rdi - movq %rsp,%rdi - call syscall_trace_enter + movq %rsp, %rdi + movq $AUDIT_ARCH_X86_64, %rsi + movq %rax,%rdx + call syscall_trace_enter_phase2 + /* * Reload arg registers from stack in case ptrace changed them. - * We don't reload %rax because syscall_trace_enter() returned + * We don't reload %rax because syscall_trace_entry_phase2() returned * the value it wants us to use in the table lookup. */ LOAD_ARGS ARGOFFSET, 1 @@ -536,7 +523,7 @@ tracesys: andl $__SYSCALL_MASK,%eax cmpl $__NR_syscall_max,%eax #endif - ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ + ja int_ret_from_sys_call /* RAX(%rsp) is already set */ movq %r10,%rcx /* fixup for C */ call *sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 5f9cf20cdb68..3d5fb509bdeb 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -108,7 +108,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) int i; for (i = 0; i < HBP_NUM; i++) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); if (!*slot) { *slot = bp; @@ -122,7 +122,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) set_debugreg(info->address, i); __this_cpu_write(cpu_debugreg[i], info->address); - dr7 = &__get_cpu_var(cpu_dr7); + dr7 = this_cpu_ptr(&cpu_dr7); *dr7 |= encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); @@ -146,7 +146,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) int i; for (i = 0; i < HBP_NUM; i++) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); if (*slot == bp) { *slot = NULL; @@ -157,7 +157,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) return; - dr7 = &__get_cpu_var(cpu_dr7); + dr7 = this_cpu_ptr(&cpu_dr7); *dr7 &= ~__encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index d5dd80814419..a9a4229f6161 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -375,7 +375,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, /* * These bits must be zero. */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + memset(xsave_hdr->reserved, 0, 48); return ret; } diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 8af817105e29..e7cc5370cd2f 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -111,8 +111,7 @@ static void make_8259A_irq(unsigned int irq) { disable_irq_nosync(irq); io_apic_irqs &= ~(1<<irq); - irq_set_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, - i8259A_chip.name); + irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq); enable_irq(irq); } diff --git a/arch/x86/kernel/iosf_mbi.c b/arch/x86/kernel/iosf_mbi.c index d30acdc1229d..82f8d02f0df2 100644 --- a/arch/x86/kernel/iosf_mbi.c +++ b/arch/x86/kernel/iosf_mbi.c @@ -22,10 +22,13 @@ #include <linux/init.h> #include <linux/spinlock.h> #include <linux/pci.h> +#include <linux/debugfs.h> +#include <linux/capability.h> #include <asm/iosf_mbi.h> #define PCI_DEVICE_ID_BAYTRAIL 0x0F00 +#define PCI_DEVICE_ID_BRASWELL 0x2280 #define PCI_DEVICE_ID_QUARK_X1000 0x0958 static DEFINE_SPINLOCK(iosf_mbi_lock); @@ -187,6 +190,89 @@ bool iosf_mbi_available(void) } EXPORT_SYMBOL(iosf_mbi_available); +#ifdef CONFIG_IOSF_MBI_DEBUG +static u32 dbg_mdr; +static u32 dbg_mcr; +static u32 dbg_mcrx; + +static int mcr_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} + +static int mcr_set(void *data, u64 val) +{ + u8 command = ((u32)val & 0xFF000000) >> 24, + port = ((u32)val & 0x00FF0000) >> 16, + offset = ((u32)val & 0x0000FF00) >> 8; + int err; + + *(u32 *)data = val; + + if (!capable(CAP_SYS_RAWIO)) + return -EACCES; + + if (command & 1u) + err = iosf_mbi_write(port, + command, + dbg_mcrx | offset, + dbg_mdr); + else + err = iosf_mbi_read(port, + command, + dbg_mcrx | offset, + &dbg_mdr); + + return err; +} +DEFINE_SIMPLE_ATTRIBUTE(iosf_mcr_fops, mcr_get, mcr_set , "%llx\n"); + +static struct dentry *iosf_dbg; + +static void iosf_sideband_debug_init(void) +{ + struct dentry *d; + + iosf_dbg = debugfs_create_dir("iosf_sb", NULL); + if (IS_ERR_OR_NULL(iosf_dbg)) + return; + + /* mdr */ + d = debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr); + if (IS_ERR_OR_NULL(d)) + goto cleanup; + + /* mcrx */ + debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx); + if (IS_ERR_OR_NULL(d)) + goto cleanup; + + /* mcr - initiates mailbox tranaction */ + debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops); + if (IS_ERR_OR_NULL(d)) + goto cleanup; + + return; + +cleanup: + debugfs_remove_recursive(d); +} + +static void iosf_debugfs_init(void) +{ + iosf_sideband_debug_init(); +} + +static void iosf_debugfs_remove(void) +{ + debugfs_remove_recursive(iosf_dbg); +} +#else +static inline void iosf_debugfs_init(void) { } +static inline void iosf_debugfs_remove(void) { } +#endif /* CONFIG_IOSF_MBI_DEBUG */ + static int iosf_mbi_probe(struct pci_dev *pdev, const struct pci_device_id *unused) { @@ -202,8 +288,9 @@ static int iosf_mbi_probe(struct pci_dev *pdev, return 0; } -static DEFINE_PCI_DEVICE_TABLE(iosf_mbi_pci_ids) = { +static const struct pci_device_id iosf_mbi_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BAYTRAIL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BRASWELL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_QUARK_X1000) }, { 0, }, }; @@ -217,11 +304,15 @@ static struct pci_driver iosf_mbi_pci_driver = { static int __init iosf_mbi_init(void) { + iosf_debugfs_init(); + return pci_register_driver(&iosf_mbi_pci_driver); } static void __exit iosf_mbi_exit(void) { + iosf_debugfs_remove(); + pci_unregister_driver(&iosf_mbi_pci_driver); if (mbi_pdev) { pci_dev_put(mbi_pdev); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 4d1c746892eb..e4b503d5558c 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -52,13 +52,13 @@ static inline void stack_overflow_check(struct pt_regs *regs) regs->sp <= curbase + THREAD_SIZE) return; - irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) + + irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) + STACK_TOP_MARGIN; - irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr); + irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr); if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) return; - oist = &__get_cpu_var(orig_ist); + oist = this_cpu_ptr(&orig_ist); estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN; estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; if (regs->sp >= estack_top && regs->sp <= estack_bottom) diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 1de84e3ab4e0..15d741ddfeeb 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -41,7 +41,7 @@ __visible void smp_trace_irq_work_interrupt(struct pt_regs *regs) void arch_irq_work_raise(void) { #ifdef CONFIG_X86_LOCAL_APIC - if (!cpu_has_apic) + if (!arch_irq_work_has_interrupt()) return; apic->send_IPI_self(IRQ_WORK_VECTOR); diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7f50156542fb..4de73ee78361 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -70,7 +70,6 @@ int vector_used_by_percpu_irq(unsigned int vector) void __init init_ISA_irqs(void) { struct irq_chip *chip = legacy_pic->chip; - const char *name = chip->name; int i; #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) @@ -78,8 +77,8 @@ void __init init_ISA_irqs(void) #endif legacy_pic->init(0); - for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) - irq_set_chip_and_handler_name(i, chip, handle_level_irq, name); + for (i = 0; i < nr_legacy_irqs(); i++) + irq_set_chip_and_handler(i, chip, handle_level_irq); } void __init init_IRQ(void) @@ -87,12 +86,6 @@ void __init init_IRQ(void) int i; /* - * We probably need a better place for this, but it works for - * now ... - */ - x86_add_irq_domains(); - - /* * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. * If these IRQ's are handled by legacy interrupt-controllers like PIC, * then this configuration will likely be static after the boot. If @@ -100,7 +93,7 @@ void __init init_IRQ(void) * then this vector space can be freed and re-used dynamically as the * irq's migrate etc. */ - for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) + for (i = 0; i < nr_legacy_irqs(); i++) per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; x86_init.irqs.intr_init(); @@ -121,7 +114,7 @@ void setup_vector_irq(int cpu) * legacy PIC, for the new cpu that is coming online, setup the static * legacy vector to irq mapping: */ - for (irq = 0; irq < legacy_pic->nr_legacy_irqs; irq++) + for (irq = 0; irq < nr_legacy_irqs(); irq++) per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; #endif @@ -209,7 +202,7 @@ void __init native_init_IRQ(void) set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); } - if (!acpi_ioapic && !of_ioapic) + if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) setup_irq(2, &irq2); #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c new file mode 100644 index 000000000000..ca05f86481aa --- /dev/null +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -0,0 +1,554 @@ +/* + * Kexec bzImage loader + * + * Copyright (C) 2014 Red Hat Inc. + * Authors: + * Vivek Goyal <vgoyal@redhat.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define pr_fmt(fmt) "kexec-bzImage64: " fmt + +#include <linux/string.h> +#include <linux/printk.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/kexec.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/efi.h> +#include <linux/verify_pefile.h> +#include <keys/system_keyring.h> + +#include <asm/bootparam.h> +#include <asm/setup.h> +#include <asm/crash.h> +#include <asm/efi.h> +#include <asm/kexec-bzimage64.h> + +#define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */ + +/* + * Defines lowest physical address for various segments. Not sure where + * exactly these limits came from. Current bzimage64 loader in kexec-tools + * uses these so I am retaining it. It can be changed over time as we gain + * more insight. + */ +#define MIN_PURGATORY_ADDR 0x3000 +#define MIN_BOOTPARAM_ADDR 0x3000 +#define MIN_KERNEL_LOAD_ADDR 0x100000 +#define MIN_INITRD_LOAD_ADDR 0x1000000 + +/* + * This is a place holder for all boot loader specific data structure which + * gets allocated in one call but gets freed much later during cleanup + * time. Right now there is only one field but it can grow as need be. + */ +struct bzimage64_data { + /* + * Temporary buffer to hold bootparams buffer. This should be + * freed once the bootparam segment has been loaded. + */ + void *bootparams_buf; +}; + +static int setup_initrd(struct boot_params *params, + unsigned long initrd_load_addr, unsigned long initrd_len) +{ + params->hdr.ramdisk_image = initrd_load_addr & 0xffffffffUL; + params->hdr.ramdisk_size = initrd_len & 0xffffffffUL; + + params->ext_ramdisk_image = initrd_load_addr >> 32; + params->ext_ramdisk_size = initrd_len >> 32; + + return 0; +} + +static int setup_cmdline(struct kimage *image, struct boot_params *params, + unsigned long bootparams_load_addr, + unsigned long cmdline_offset, char *cmdline, + unsigned long cmdline_len) +{ + char *cmdline_ptr = ((char *)params) + cmdline_offset; + unsigned long cmdline_ptr_phys, len; + uint32_t cmdline_low_32, cmdline_ext_32; + + memcpy(cmdline_ptr, cmdline, cmdline_len); + if (image->type == KEXEC_TYPE_CRASH) { + len = sprintf(cmdline_ptr + cmdline_len - 1, + " elfcorehdr=0x%lx", image->arch.elf_load_addr); + cmdline_len += len; + } + cmdline_ptr[cmdline_len - 1] = '\0'; + + pr_debug("Final command line is: %s\n", cmdline_ptr); + cmdline_ptr_phys = bootparams_load_addr + cmdline_offset; + cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL; + cmdline_ext_32 = cmdline_ptr_phys >> 32; + + params->hdr.cmd_line_ptr = cmdline_low_32; + if (cmdline_ext_32) + params->ext_cmd_line_ptr = cmdline_ext_32; + + return 0; +} + +static int setup_e820_entries(struct boot_params *params) +{ + unsigned int nr_e820_entries; + + nr_e820_entries = e820_saved.nr_map; + + /* TODO: Pass entries more than E820MAX in bootparams setup data */ + if (nr_e820_entries > E820MAX) + nr_e820_entries = E820MAX; + + params->e820_entries = nr_e820_entries; + memcpy(¶ms->e820_map, &e820_saved.map, + nr_e820_entries * sizeof(struct e820entry)); + + return 0; +} + +#ifdef CONFIG_EFI +static int setup_efi_info_memmap(struct boot_params *params, + unsigned long params_load_addr, + unsigned int efi_map_offset, + unsigned int efi_map_sz) +{ + void *efi_map = (void *)params + efi_map_offset; + unsigned long efi_map_phys_addr = params_load_addr + efi_map_offset; + struct efi_info *ei = ¶ms->efi_info; + + if (!efi_map_sz) + return 0; + + efi_runtime_map_copy(efi_map, efi_map_sz); + + ei->efi_memmap = efi_map_phys_addr & 0xffffffff; + ei->efi_memmap_hi = efi_map_phys_addr >> 32; + ei->efi_memmap_size = efi_map_sz; + + return 0; +} + +static int +prepare_add_efi_setup_data(struct boot_params *params, + unsigned long params_load_addr, + unsigned int efi_setup_data_offset) +{ + unsigned long setup_data_phys; + struct setup_data *sd = (void *)params + efi_setup_data_offset; + struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data); + + esd->fw_vendor = efi.fw_vendor; + esd->runtime = efi.runtime; + esd->tables = efi.config_table; + esd->smbios = efi.smbios; + + sd->type = SETUP_EFI; + sd->len = sizeof(struct efi_setup_data); + + /* Add setup data */ + setup_data_phys = params_load_addr + efi_setup_data_offset; + sd->next = params->hdr.setup_data; + params->hdr.setup_data = setup_data_phys; + + return 0; +} + +static int +setup_efi_state(struct boot_params *params, unsigned long params_load_addr, + unsigned int efi_map_offset, unsigned int efi_map_sz, + unsigned int efi_setup_data_offset) +{ + struct efi_info *current_ei = &boot_params.efi_info; + struct efi_info *ei = ¶ms->efi_info; + + if (!current_ei->efi_memmap_size) + return 0; + + /* + * If 1:1 mapping is not enabled, second kernel can not setup EFI + * and use EFI run time services. User space will have to pass + * acpi_rsdp=<addr> on kernel command line to make second kernel boot + * without efi. + */ + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + + ei->efi_loader_signature = current_ei->efi_loader_signature; + ei->efi_systab = current_ei->efi_systab; + ei->efi_systab_hi = current_ei->efi_systab_hi; + + ei->efi_memdesc_version = current_ei->efi_memdesc_version; + ei->efi_memdesc_size = efi_get_runtime_map_desc_size(); + + setup_efi_info_memmap(params, params_load_addr, efi_map_offset, + efi_map_sz); + prepare_add_efi_setup_data(params, params_load_addr, + efi_setup_data_offset); + return 0; +} +#endif /* CONFIG_EFI */ + +static int +setup_boot_parameters(struct kimage *image, struct boot_params *params, + unsigned long params_load_addr, + unsigned int efi_map_offset, unsigned int efi_map_sz, + unsigned int efi_setup_data_offset) +{ + unsigned int nr_e820_entries; + unsigned long long mem_k, start, end; + int i, ret = 0; + + /* Get subarch from existing bootparams */ + params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch; + + /* Copying screen_info will do? */ + memcpy(¶ms->screen_info, &boot_params.screen_info, + sizeof(struct screen_info)); + + /* Fill in memsize later */ + params->screen_info.ext_mem_k = 0; + params->alt_mem_k = 0; + + /* Default APM info */ + memset(¶ms->apm_bios_info, 0, sizeof(params->apm_bios_info)); + + /* Default drive info */ + memset(¶ms->hd0_info, 0, sizeof(params->hd0_info)); + memset(¶ms->hd1_info, 0, sizeof(params->hd1_info)); + + /* Default sysdesc table */ + params->sys_desc_table.length = 0; + + if (image->type == KEXEC_TYPE_CRASH) { + ret = crash_setup_memmap_entries(image, params); + if (ret) + return ret; + } else + setup_e820_entries(params); + + nr_e820_entries = params->e820_entries; + + for (i = 0; i < nr_e820_entries; i++) { + if (params->e820_map[i].type != E820_RAM) + continue; + start = params->e820_map[i].addr; + end = params->e820_map[i].addr + params->e820_map[i].size - 1; + + if ((start <= 0x100000) && end > 0x100000) { + mem_k = (end >> 10) - (0x100000 >> 10); + params->screen_info.ext_mem_k = mem_k; + params->alt_mem_k = mem_k; + if (mem_k > 0xfc00) + params->screen_info.ext_mem_k = 0xfc00; /* 64M*/ + if (mem_k > 0xffffffff) + params->alt_mem_k = 0xffffffff; + } + } + +#ifdef CONFIG_EFI + /* Setup EFI state */ + setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz, + efi_setup_data_offset); +#endif + + /* Setup EDD info */ + memcpy(params->eddbuf, boot_params.eddbuf, + EDDMAXNR * sizeof(struct edd_info)); + params->eddbuf_entries = boot_params.eddbuf_entries; + + memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer, + EDD_MBR_SIG_MAX * sizeof(unsigned int)); + + return ret; +} + +static int bzImage64_probe(const char *buf, unsigned long len) +{ + int ret = -ENOEXEC; + struct setup_header *header; + + /* kernel should be atleast two sectors long */ + if (len < 2 * 512) { + pr_err("File is too short to be a bzImage\n"); + return ret; + } + + header = (struct setup_header *)(buf + offsetof(struct boot_params, hdr)); + if (memcmp((char *)&header->header, "HdrS", 4) != 0) { + pr_err("Not a bzImage\n"); + return ret; + } + + if (header->boot_flag != 0xAA55) { + pr_err("No x86 boot sector present\n"); + return ret; + } + + if (header->version < 0x020C) { + pr_err("Must be at least protocol version 2.12\n"); + return ret; + } + + if (!(header->loadflags & LOADED_HIGH)) { + pr_err("zImage not a bzImage\n"); + return ret; + } + + if (!(header->xloadflags & XLF_KERNEL_64)) { + pr_err("Not a bzImage64. XLF_KERNEL_64 is not set.\n"); + return ret; + } + + if (!(header->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)) { + pr_err("XLF_CAN_BE_LOADED_ABOVE_4G is not set.\n"); + return ret; + } + + /* + * Can't handle 32bit EFI as it does not allow loading kernel + * above 4G. This should be handled by 32bit bzImage loader + */ + if (efi_enabled(EFI_RUNTIME_SERVICES) && !efi_enabled(EFI_64BIT)) { + pr_debug("EFI is 32 bit. Can't load kernel above 4G.\n"); + return ret; + } + + /* I've got a bzImage */ + pr_debug("It's a relocatable bzImage64\n"); + ret = 0; + + return ret; +} + +static void *bzImage64_load(struct kimage *image, char *kernel, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + + struct setup_header *header; + int setup_sects, kern16_size, ret = 0; + unsigned long setup_header_size, params_cmdline_sz, params_misc_sz; + struct boot_params *params; + unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr; + unsigned long purgatory_load_addr; + unsigned long kernel_bufsz, kernel_memsz, kernel_align; + char *kernel_buf; + struct bzimage64_data *ldata; + struct kexec_entry64_regs regs64; + void *stack; + unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr); + unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset; + + header = (struct setup_header *)(kernel + setup_hdr_offset); + setup_sects = header->setup_sects; + if (setup_sects == 0) + setup_sects = 4; + + kern16_size = (setup_sects + 1) * 512; + if (kernel_len < kern16_size) { + pr_err("bzImage truncated\n"); + return ERR_PTR(-ENOEXEC); + } + + if (cmdline_len > header->cmdline_size) { + pr_err("Kernel command line too long\n"); + return ERR_PTR(-EINVAL); + } + + /* + * In case of crash dump, we will append elfcorehdr=<addr> to + * command line. Make sure it does not overflow + */ + if (cmdline_len + MAX_ELFCOREHDR_STR_LEN > header->cmdline_size) { + pr_debug("Appending elfcorehdr=<addr> to command line exceeds maximum allowed length\n"); + return ERR_PTR(-EINVAL); + } + + /* Allocate and load backup region */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = crash_load_segments(image); + if (ret) + return ERR_PTR(ret); + } + + /* + * Load purgatory. For 64bit entry point, purgatory code can be + * anywhere. + */ + ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1, + &purgatory_load_addr); + if (ret) { + pr_err("Loading purgatory failed\n"); + return ERR_PTR(ret); + } + + pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); + + + /* + * Load Bootparams and cmdline and space for efi stuff. + * + * Allocate memory together for multiple data structures so + * that they all can go in single area/segment and we don't + * have to create separate segment for each. Keeps things + * little bit simple + */ + efi_map_sz = efi_get_runtime_map_size(); + efi_map_sz = ALIGN(efi_map_sz, 16); + params_cmdline_sz = sizeof(struct boot_params) + cmdline_len + + MAX_ELFCOREHDR_STR_LEN; + params_cmdline_sz = ALIGN(params_cmdline_sz, 16); + params_misc_sz = params_cmdline_sz + efi_map_sz + + sizeof(struct setup_data) + + sizeof(struct efi_setup_data); + + params = kzalloc(params_misc_sz, GFP_KERNEL); + if (!params) + return ERR_PTR(-ENOMEM); + efi_map_offset = params_cmdline_sz; + efi_setup_data_offset = efi_map_offset + efi_map_sz; + + /* Copy setup header onto bootparams. Documentation/x86/boot.txt */ + setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset; + + /* Is there a limit on setup header size? */ + memcpy(¶ms->hdr, (kernel + setup_hdr_offset), setup_header_size); + + ret = kexec_add_buffer(image, (char *)params, params_misc_sz, + params_misc_sz, 16, MIN_BOOTPARAM_ADDR, + ULONG_MAX, 1, &bootparam_load_addr); + if (ret) + goto out_free_params; + pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + bootparam_load_addr, params_misc_sz, params_misc_sz); + + /* Load kernel */ + kernel_buf = kernel + kern16_size; + kernel_bufsz = kernel_len - kern16_size; + kernel_memsz = PAGE_ALIGN(header->init_size); + kernel_align = header->kernel_alignment; + + ret = kexec_add_buffer(image, kernel_buf, + kernel_bufsz, kernel_memsz, kernel_align, + MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1, + &kernel_load_addr); + if (ret) + goto out_free_params; + + pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + kernel_load_addr, kernel_memsz, kernel_memsz); + + /* Load initrd high */ + if (initrd) { + ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len, + PAGE_SIZE, MIN_INITRD_LOAD_ADDR, + ULONG_MAX, 1, &initrd_load_addr); + if (ret) + goto out_free_params; + + pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + initrd_load_addr, initrd_len, initrd_len); + + setup_initrd(params, initrd_load_addr, initrd_len); + } + + setup_cmdline(image, params, bootparam_load_addr, + sizeof(struct boot_params), cmdline, cmdline_len); + + /* bootloader info. Do we need a separate ID for kexec kernel loader? */ + params->hdr.type_of_loader = 0x0D << 4; + params->hdr.loadflags = 0; + + /* Setup purgatory regs for entry */ + ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", ®s64, + sizeof(regs64), 1); + if (ret) + goto out_free_params; + + regs64.rbx = 0; /* Bootstrap Processor */ + regs64.rsi = bootparam_load_addr; + regs64.rip = kernel_load_addr + 0x200; + stack = kexec_purgatory_get_symbol_addr(image, "stack_end"); + if (IS_ERR(stack)) { + pr_err("Could not find address of symbol stack_end\n"); + ret = -EINVAL; + goto out_free_params; + } + + regs64.rsp = (unsigned long)stack; + ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", ®s64, + sizeof(regs64), 0); + if (ret) + goto out_free_params; + + ret = setup_boot_parameters(image, params, bootparam_load_addr, + efi_map_offset, efi_map_sz, + efi_setup_data_offset); + if (ret) + goto out_free_params; + + /* Allocate loader specific data */ + ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL); + if (!ldata) { + ret = -ENOMEM; + goto out_free_params; + } + + /* + * Store pointer to params so that it could be freed after loading + * params segment has been loaded and contents have been copied + * somewhere else. + */ + ldata->bootparams_buf = params; + return ldata; + +out_free_params: + kfree(params); + return ERR_PTR(ret); +} + +/* This cleanup function is called after various segments have been loaded */ +static int bzImage64_cleanup(void *loader_data) +{ + struct bzimage64_data *ldata = loader_data; + + if (!ldata) + return 0; + + kfree(ldata->bootparams_buf); + ldata->bootparams_buf = NULL; + + return 0; +} + +#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG +static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) +{ + bool trusted; + int ret; + + ret = verify_pefile_signature(kernel, kernel_len, + system_trusted_keyring, &trusted); + if (ret < 0) + return ret; + if (!trusted) + return -EKEYREJECTED; + return 0; +} +#endif + +struct kexec_file_ops kexec_bzImage64_ops = { + .probe = bzImage64_probe, + .load = bzImage64_load, + .cleanup = bzImage64_cleanup, +#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG + .verify_sig = bzImage64_verify_sig, +#endif +}; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index f304773285ae..f1314d0bcf0a 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -338,8 +338,10 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op) * a relative jump. */ rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; - if (abs(rel) > 0x7fffffff) + if (abs(rel) > 0x7fffffff) { + __arch_remove_optimized_kprobe(op, 0); return -ERANGE; + } buf = (u8 *)op->optinsn.insn; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 3dd8e2c4d74a..f6945bef2cd1 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -35,6 +35,7 @@ #include <linux/slab.h> #include <linux/kprobes.h> #include <linux/debugfs.h> +#include <linux/nmi.h> #include <asm/timer.h> #include <asm/cpu.h> #include <asm/traps.h> @@ -243,9 +244,9 @@ u32 kvm_read_and_reset_pf_reason(void) { u32 reason = 0; - if (__get_cpu_var(apf_reason).enabled) { - reason = __get_cpu_var(apf_reason).reason; - __get_cpu_var(apf_reason).reason = 0; + if (__this_cpu_read(apf_reason.enabled)) { + reason = __this_cpu_read(apf_reason.reason); + __this_cpu_write(apf_reason.reason, 0); } return reason; @@ -318,7 +319,7 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val) * there's no need for lock or memory barriers. * An optimization barrier is implied in apic write. */ - if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi))) + if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi))) return; apic_write(APIC_EOI, APIC_EOI_ACK); } @@ -329,13 +330,13 @@ void kvm_guest_cpu_init(void) return; if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { - u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason)); + u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); #ifdef CONFIG_PREEMPT pa |= KVM_ASYNC_PF_SEND_ALWAYS; #endif wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); - __get_cpu_var(apf_reason).enabled = 1; + __this_cpu_write(apf_reason.enabled, 1); printk(KERN_INFO"KVM setup async PF for cpu %d\n", smp_processor_id()); } @@ -344,8 +345,8 @@ void kvm_guest_cpu_init(void) unsigned long pa; /* Size alignment is implied but just to make it explicit. */ BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); - __get_cpu_var(kvm_apic_eoi) = 0; - pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi)) + __this_cpu_write(kvm_apic_eoi, 0); + pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi)) | KVM_MSR_ENABLED; wrmsrl(MSR_KVM_PV_EOI_EN, pa); } @@ -356,11 +357,11 @@ void kvm_guest_cpu_init(void) static void kvm_pv_disable_apf(void) { - if (!__get_cpu_var(apf_reason).enabled) + if (!__this_cpu_read(apf_reason.enabled)) return; wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); - __get_cpu_var(apf_reason).enabled = 0; + __this_cpu_write(apf_reason.enabled, 0); printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", smp_processor_id()); @@ -499,6 +500,13 @@ void __init kvm_guest_init(void) #else kvm_guest_cpu_init(); #endif + + /* + * Hard lockup detection is enabled by default. Disable it, as guests + * can get false positives too easily, for example if the host is + * overcommitted. + */ + watchdog_enable_hardlockup_detector(false); } static noinline uint32_t __kvm_cpuid_base(void) @@ -716,7 +724,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) if (in_nmi()) return; - w = &__get_cpu_var(klock_waiting); + w = this_cpu_ptr(&klock_waiting); cpu = smp_processor_id(); start = spin_time_start(); diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 1667b1de8d5d..72e8e310258d 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -247,7 +247,8 @@ void machine_kexec(struct kimage *image) /* now call it */ image->start = relocate_kernel_ptr((unsigned long)image->head, (unsigned long)page_list, - image->start, cpu_has_pae, + image->start, + boot_cpu_has(X86_FEATURE_PAE), image->preserve_context); #ifdef CONFIG_KEXEC_JUMP diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 679cef0791cd..485981059a40 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -6,6 +6,8 @@ * Version 2. See the file COPYING for more details. */ +#define pr_fmt(fmt) "kexec: " fmt + #include <linux/mm.h> #include <linux/kexec.h> #include <linux/string.h> @@ -21,6 +23,13 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/debugreg.h> +#include <asm/kexec-bzimage64.h> + +#ifdef CONFIG_KEXEC_FILE +static struct kexec_file_ops *kexec_file_loaders[] = { + &kexec_bzImage64_ops, +}; +#endif static void free_transition_pgtable(struct kimage *image) { @@ -171,6 +180,45 @@ static void load_segments(void) ); } +#ifdef CONFIG_KEXEC_FILE +/* Update purgatory as needed after various image segments have been prepared */ +static int arch_update_purgatory(struct kimage *image) +{ + int ret = 0; + + if (!image->file_mode) + return 0; + + /* Setup copying of backup region */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = kexec_purgatory_get_set_symbol(image, "backup_dest", + &image->arch.backup_load_addr, + sizeof(image->arch.backup_load_addr), 0); + if (ret) + return ret; + + ret = kexec_purgatory_get_set_symbol(image, "backup_src", + &image->arch.backup_src_start, + sizeof(image->arch.backup_src_start), 0); + if (ret) + return ret; + + ret = kexec_purgatory_get_set_symbol(image, "backup_sz", + &image->arch.backup_src_sz, + sizeof(image->arch.backup_src_sz), 0); + if (ret) + return ret; + } + + return ret; +} +#else /* !CONFIG_KEXEC_FILE */ +static inline int arch_update_purgatory(struct kimage *image) +{ + return 0; +} +#endif /* CONFIG_KEXEC_FILE */ + int machine_kexec_prepare(struct kimage *image) { unsigned long start_pgtable; @@ -184,6 +232,11 @@ int machine_kexec_prepare(struct kimage *image) if (result) return result; + /* update purgatory as needed */ + result = arch_update_purgatory(image); + if (result) + return result; + return 0; } @@ -283,3 +336,200 @@ void arch_crash_save_vmcoreinfo(void) (unsigned long)&_text - __START_KERNEL); } +/* arch-dependent functionality related to kexec file-based syscall */ + +#ifdef CONFIG_KEXEC_FILE +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + int i, ret = -ENOEXEC; + struct kexec_file_ops *fops; + + for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { + fops = kexec_file_loaders[i]; + if (!fops || !fops->probe) + continue; + + ret = fops->probe(buf, buf_len); + if (!ret) { + image->fops = fops; + return ret; + } + } + + return ret; +} + +void *arch_kexec_kernel_image_load(struct kimage *image) +{ + vfree(image->arch.elf_headers); + image->arch.elf_headers = NULL; + + if (!image->fops || !image->fops->load) + return ERR_PTR(-ENOEXEC); + + return image->fops->load(image, image->kernel_buf, + image->kernel_buf_len, image->initrd_buf, + image->initrd_buf_len, image->cmdline_buf, + image->cmdline_buf_len); +} + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + if (!image->fops || !image->fops->cleanup) + return 0; + + return image->fops->cleanup(image->image_loader_data); +} + +int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel, + unsigned long kernel_len) +{ + if (!image->fops || !image->fops->verify_sig) { + pr_debug("kernel loader does not support signature verification."); + return -EKEYREJECTED; + } + + return image->fops->verify_sig(kernel, kernel_len); +} + +/* + * Apply purgatory relocations. + * + * ehdr: Pointer to elf headers + * sechdrs: Pointer to section headers. + * relsec: section index of SHT_RELA section. + * + * TODO: Some of the code belongs to generic code. Move that in kexec.c. + */ +int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, + Elf64_Shdr *sechdrs, unsigned int relsec) +{ + unsigned int i; + Elf64_Rela *rel; + Elf64_Sym *sym; + void *location; + Elf64_Shdr *section, *symtabsec; + unsigned long address, sec_base, value; + const char *strtab, *name, *shstrtab; + + /* + * ->sh_offset has been modified to keep the pointer to section + * contents in memory + */ + rel = (void *)sechdrs[relsec].sh_offset; + + /* Section to which relocations apply */ + section = &sechdrs[sechdrs[relsec].sh_info]; + + pr_debug("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + + /* Associated symbol table */ + symtabsec = &sechdrs[sechdrs[relsec].sh_link]; + + /* String table */ + if (symtabsec->sh_link >= ehdr->e_shnum) { + /* Invalid strtab section number */ + pr_err("Invalid string table section index %d\n", + symtabsec->sh_link); + return -ENOEXEC; + } + + strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset; + + /* section header string table */ + shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset; + + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + + /* + * rel[i].r_offset contains byte offset from beginning + * of section to the storage unit affected. + * + * This is location to update (->sh_offset). This is temporary + * buffer where section is currently loaded. This will finally + * be loaded to a different address later, pointed to by + * ->sh_addr. kexec takes care of moving it + * (kexec_load_segment()). + */ + location = (void *)(section->sh_offset + rel[i].r_offset); + + /* Final address of the location */ + address = section->sh_addr + rel[i].r_offset; + + /* + * rel[i].r_info contains information about symbol table index + * w.r.t which relocation must be made and type of relocation + * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get + * these respectively. + */ + sym = (Elf64_Sym *)symtabsec->sh_offset + + ELF64_R_SYM(rel[i].r_info); + + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; + + pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n", + name, sym->st_info, sym->st_shndx, sym->st_value, + sym->st_size); + + if (sym->st_shndx == SHN_UNDEF) { + pr_err("Undefined symbol: %s\n", name); + return -ENOEXEC; + } + + if (sym->st_shndx == SHN_COMMON) { + pr_err("symbol '%s' in common section\n", name); + return -ENOEXEC; + } + + if (sym->st_shndx == SHN_ABS) + sec_base = 0; + else if (sym->st_shndx >= ehdr->e_shnum) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); + return -ENOEXEC; + } else + sec_base = sechdrs[sym->st_shndx].sh_addr; + + value = sym->st_value; + value += sec_base; + value += rel[i].r_addend; + + switch (ELF64_R_TYPE(rel[i].r_info)) { + case R_X86_64_NONE: + break; + case R_X86_64_64: + *(u64 *)location = value; + break; + case R_X86_64_32: + *(u32 *)location = value; + if (value != *(u32 *)location) + goto overflow; + break; + case R_X86_64_32S: + *(s32 *)location = value; + if ((s64)value != *(s32 *)location) + goto overflow; + break; + case R_X86_64_PC32: + value -= (u64)address; + *(u32 *)location = value; + break; + default: + pr_err("Unknown rela relocation: %llu\n", + ELF64_R_TYPE(rel[i].r_info)); + return -ENOEXEC; + } + } + return 0; + +overflow: + pr_err("Overflow in relocation type %d value 0x%lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), value); + return -ENOEXEC; +} +#endif /* CONFIG_KEXEC_FILE */ diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index d2b56489d70f..2d2a237f2c73 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/smp.h> #include <linux/pci.h> +#include <linux/irqdomain.h> #include <asm/mtrr.h> #include <asm/mpspec.h> @@ -67,7 +68,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) boot_cpu_physical_apicid = m->apicid; } - printk(KERN_INFO "Processor #%d%s\n", m->apicid, bootup_cpu); + pr_info("Processor #%d%s\n", m->apicid, bootup_cpu); generic_processor_info(apicid, m->apicver); } @@ -87,9 +88,8 @@ static void __init MP_bus_info(struct mpc_bus *m) #if MAX_MP_BUSSES < 256 if (m->busid >= MAX_MP_BUSSES) { - printk(KERN_WARNING "MP table busid value (%d) for bustype %s " - " is too large, max. supported is %d\n", - m->busid, str, MAX_MP_BUSSES - 1); + pr_warn("MP table busid value (%d) for bustype %s is too large, max. supported is %d\n", + m->busid, str, MAX_MP_BUSSES - 1); return; } #endif @@ -110,19 +110,29 @@ static void __init MP_bus_info(struct mpc_bus *m) mp_bus_id_to_type[m->busid] = MP_BUS_EISA; #endif } else - printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); + pr_warn("Unknown bustype %s - ignoring\n", str); } +static struct irq_domain_ops mp_ioapic_irqdomain_ops = { + .map = mp_irqdomain_map, + .unmap = mp_irqdomain_unmap, +}; + static void __init MP_ioapic_info(struct mpc_ioapic *m) { + struct ioapic_domain_cfg cfg = { + .type = IOAPIC_DOMAIN_LEGACY, + .ops = &mp_ioapic_irqdomain_ops, + }; + if (m->flags & MPC_APIC_USABLE) - mp_register_ioapic(m->apicid, m->apicaddr, gsi_top); + mp_register_ioapic(m->apicid, m->apicaddr, gsi_top, &cfg); } static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) { - apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," - " IRQ %02x, APIC ID %x, APIC INT %02x\n", + apic_printk(APIC_VERBOSE, + "Int: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC INT %02x\n", mp_irq->irqtype, mp_irq->irqflag & 3, (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus, mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); @@ -135,8 +145,8 @@ static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {} static void __init MP_lintsrc_info(struct mpc_lintsrc *m) { - apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," - " IRQ %02x, APIC ID %x, APIC LINT %02x\n", + apic_printk(APIC_VERBOSE, + "Lint: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC LINT %02x\n", m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbusid, m->srcbusirq, m->destapic, m->destapiclint); } @@ -148,34 +158,33 @@ static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str) { if (memcmp(mpc->signature, MPC_SIGNATURE, 4)) { - printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", + pr_err("MPTABLE: bad signature [%c%c%c%c]!\n", mpc->signature[0], mpc->signature[1], mpc->signature[2], mpc->signature[3]); return 0; } if (mpf_checksum((unsigned char *)mpc, mpc->length)) { - printk(KERN_ERR "MPTABLE: checksum error!\n"); + pr_err("MPTABLE: checksum error!\n"); return 0; } if (mpc->spec != 0x01 && mpc->spec != 0x04) { - printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", - mpc->spec); + pr_err("MPTABLE: bad table version (%d)!!\n", mpc->spec); return 0; } if (!mpc->lapic) { - printk(KERN_ERR "MPTABLE: null local APIC address!\n"); + pr_err("MPTABLE: null local APIC address!\n"); return 0; } memcpy(oem, mpc->oem, 8); oem[8] = 0; - printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); + pr_info("MPTABLE: OEM ID: %s\n", oem); memcpy(str, mpc->productid, 12); str[12] = 0; - printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); + pr_info("MPTABLE: Product ID: %s\n", str); - printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->lapic); + pr_info("MPTABLE: APIC at: 0x%X\n", mpc->lapic); return 1; } @@ -188,8 +197,8 @@ static void skip_entry(unsigned char **ptr, int *count, int size) static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt) { - printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n" - "type %x\n", *mpt); + pr_err("Your mptable is wrong, contact your HW vendor!\n"); + pr_cont("type %x\n", *mpt); print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, 1, mpc, mpc->length, 1); } @@ -207,9 +216,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) if (!smp_check_mpc(mpc, oem, str)) return 0; -#ifdef CONFIG_X86_32 - generic_mps_oem_check(mpc, oem, str); -#endif /* Initialize the lapic mapping */ if (!acpi_lapic) register_lapic_address(mpc->lapic); @@ -259,7 +265,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) } if (!num_processors) - printk(KERN_ERR "MPTABLE: no processors registered!\n"); + pr_err("MPTABLE: no processors registered!\n"); return num_processors; } @@ -295,16 +301,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) * If it does, we assume it's valid. */ if (mpc_default_type == 5) { - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... " - "falling back to ELCR\n"); + pr_info("ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) - printk(KERN_ERR "ELCR contains invalid data... " - "not using ELCR\n"); + pr_err("ELCR contains invalid data... not using ELCR\n"); else { - printk(KERN_INFO - "Using ELCR to identify PCI interrupts\n"); + pr_info("Using ELCR to identify PCI interrupts\n"); ELCR_fallback = 1; } } @@ -353,7 +356,7 @@ static void __init construct_ioapic_table(int mpc_default_type) bus.busid = 0; switch (mpc_default_type) { default: - printk(KERN_ERR "???\nUnknown standard configuration %d\n", + pr_err("???\nUnknown standard configuration %d\n", mpc_default_type); /* fall through */ case 1: @@ -462,8 +465,8 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 0; #endif - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n" - "... disabling SMP support. (tell your hw vendor)\n"); + pr_err("BIOS bug, MP table errors detected!...\n"); + pr_cont("... disabling SMP support. (tell your hw vendor)\n"); early_iounmap(mpc, size); return -1; } @@ -481,8 +484,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) if (!mp_irq_entries) { struct mpc_bus bus; - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " - "using default mptable. (tell your hw vendor)\n"); + pr_err("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); bus.type = MP_BUS; bus.busid = 0; @@ -516,14 +518,14 @@ void __init default_get_smp_config(unsigned int early) if (acpi_lapic && acpi_ioapic) return; - printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", - mpf->specification); + pr_info("Intel MultiProcessor Specification v1.%d\n", + mpf->specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) if (mpf->feature2 & (1 << 7)) { - printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); + pr_info(" IMCR and PIC compatibility mode.\n"); pic_mode = 1; } else { - printk(KERN_INFO " Virtual Wire compatibility mode.\n"); + pr_info(" Virtual Wire compatibility mode.\n"); pic_mode = 0; } #endif @@ -539,8 +541,7 @@ void __init default_get_smp_config(unsigned int early) return; } - printk(KERN_INFO "Default MP configuration #%d\n", - mpf->feature1); + pr_info("Default MP configuration #%d\n", mpf->feature1); construct_default_ISA_mptable(mpf->feature1); } else if (mpf->physptr) { @@ -550,7 +551,7 @@ void __init default_get_smp_config(unsigned int early) BUG(); if (!early) - printk(KERN_INFO "Processors: %d\n", num_processors); + pr_info("Processors: %d\n", num_processors); /* * Only use the first configuration found. */ @@ -583,10 +584,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) #endif mpf_found = mpf; - printk(KERN_INFO "found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n", - (unsigned long long) virt_to_phys(mpf), - (unsigned long long) virt_to_phys(mpf) + - sizeof(*mpf) - 1, mpf); + pr_info("found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n", + (unsigned long long) virt_to_phys(mpf), + (unsigned long long) virt_to_phys(mpf) + + sizeof(*mpf) - 1, mpf); mem = virt_to_phys(mpf); memblock_reserve(mem, sizeof(*mpf)); @@ -735,7 +736,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, int nr_m_spare = 0; unsigned char *mpt = ((unsigned char *)mpc) + count; - printk(KERN_INFO "mpc_length %x\n", mpc->length); + pr_info("mpc_length %x\n", mpc->length); while (count < mpc->length) { switch (*mpt) { case MP_PROCESSOR: @@ -862,13 +863,13 @@ static int __init update_mp_table(void) if (!smp_check_mpc(mpc, oem, str)) return 0; - printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf)); - printk(KERN_INFO "physptr: %x\n", mpf->physptr); + pr_info("mpf: %llx\n", (u64)virt_to_phys(mpf)); + pr_info("physptr: %x\n", mpf->physptr); if (mpc_new_phys && mpc->length > mpc_new_length) { mpc_new_phys = 0; - printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", - mpc_new_length); + pr_info("mpc_new_length is %ld, please use alloc_mptable=8k\n", + mpc_new_length); } if (!mpc_new_phys) { @@ -879,10 +880,10 @@ static int __init update_mp_table(void) mpc->checksum = 0xff; new = mpf_checksum((unsigned char *)mpc, mpc->length); if (old == new) { - printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); + pr_info("mpc is readonly, please try alloc_mptable instead\n"); return 0; } - printk(KERN_INFO "use in-position replacing\n"); + pr_info("use in-position replacing\n"); } else { mpf->physptr = mpc_new_phys; mpc_new = phys_to_virt(mpc_new_phys); @@ -892,7 +893,7 @@ static int __init update_mp_table(void) if (mpc_new_phys - mpf->physptr) { struct mpf_intel *mpf_new; /* steal 16 bytes from [0, 1k) */ - printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); + pr_info("mpf new: %x\n", 0x400 - 16); mpf_new = phys_to_virt(0x400 - 16); memcpy(mpf_new, mpf, 16); mpf = mpf_new; @@ -900,7 +901,7 @@ static int __init update_mp_table(void) } mpf->checksum = 0; mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16); - printk(KERN_INFO "physptr new: %x\n", mpf->physptr); + pr_info("physptr new: %x\n", mpf->physptr); } /* diff --git a/arch/x86/kernel/pmc_atom.c b/arch/x86/kernel/pmc_atom.c index 0c424a67985d..0ee5025e0fa4 100644 --- a/arch/x86/kernel/pmc_atom.c +++ b/arch/x86/kernel/pmc_atom.c @@ -235,6 +235,11 @@ err: pmc_dbgfs_unregister(pmc); return -ENODEV; } +#else +static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) +{ + return 0; +} #endif /* CONFIG_DEBUG_FS */ static int pmc_setup_dev(struct pci_dev *pdev) @@ -262,14 +267,12 @@ static int pmc_setup_dev(struct pci_dev *pdev) /* PMC hardware registers setup */ pmc_hw_reg_setup(pmc); -#ifdef CONFIG_DEBUG_FS ret = pmc_dbgfs_register(pmc, pdev); if (ret) { iounmap(pmc->regmap); - return ret; } -#endif /* CONFIG_DEBUG_FS */ - return 0; + + return ret; } /* diff --git a/arch/x86/kernel/preempt.S b/arch/x86/kernel/preempt.S deleted file mode 100644 index ca7f0d58a87d..000000000000 --- a/arch/x86/kernel/preempt.S +++ /dev/null @@ -1,25 +0,0 @@ - -#include <linux/linkage.h> -#include <asm/dwarf2.h> -#include <asm/asm.h> -#include <asm/calling.h> - -ENTRY(___preempt_schedule) - CFI_STARTPROC - SAVE_ALL - call preempt_schedule - RESTORE_ALL - ret - CFI_ENDPROC - -#ifdef CONFIG_CONTEXT_TRACKING - -ENTRY(___preempt_schedule_context) - CFI_STARTPROC - SAVE_ALL - call preempt_schedule_context - RESTORE_ALL - ret - CFI_ENDPROC - -#endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4505e2a950d8..e127ddaa2d5a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -64,14 +64,16 @@ EXPORT_SYMBOL_GPL(task_xstate_cachep); */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - int ret; - *dst = *src; - if (fpu_allocated(&src->thread.fpu)) { - memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); - ret = fpu_alloc(&dst->thread.fpu); - if (ret) - return ret; + + dst->thread.fpu_counter = 0; + dst->thread.fpu.has_fpu = 0; + dst->thread.fpu.last_cpu = ~0; + dst->thread.fpu.state = NULL; + if (tsk_used_math(src)) { + int err = fpu_alloc(&dst->thread.fpu); + if (err) + return err; fpu_copy(dst, src); } return 0; @@ -93,6 +95,7 @@ void arch_task_cache_init(void) kmem_cache_create("task_xstate", xstate_size, __alignof__(union thread_xstate), SLAB_PANIC | SLAB_NOTRACK, NULL); + setup_xstate_comp(); } /* diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 7bc86bbe7485..8f3ebfe710d0 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -138,6 +138,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp = (unsigned long) childregs; p->thread.sp0 = (unsigned long) (childregs+1); + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ @@ -152,9 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->orig_ax = -1; childregs->cs = __KERNEL_CS | get_kernel_rpl(); childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; - p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); return 0; } *childregs = *current_pt_regs(); @@ -165,13 +164,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.ip = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(current_pt_regs()); - p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ca5b02d405c3..3ed4a68d4013 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -163,7 +163,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp = (unsigned long) childregs; p->thread.usersp = me->thread.usersp; set_tsk_thread_flag(p, TIF_FORK); - p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); @@ -193,8 +192,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->sp = sp; err = -ENOMEM; - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 678c0ada3b3c..749b0e423419 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1441,24 +1441,126 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, force_sig_info(SIGTRAP, &info, tsk); } - -#ifdef CONFIG_X86_32 -# define IS_IA32 1 -#elif defined CONFIG_IA32_EMULATION -# define IS_IA32 is_compat_task() -#else -# define IS_IA32 0 +static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) +{ +#ifdef CONFIG_X86_64 + if (arch == AUDIT_ARCH_X86_64) { + audit_syscall_entry(regs->orig_ax, regs->di, + regs->si, regs->dx, regs->r10); + } else #endif + { + audit_syscall_entry(regs->orig_ax, regs->bx, + regs->cx, regs->dx, regs->si); + } +} /* - * We must return the syscall number to actually look up in the table. - * This can be -1L to skip running any syscall at all. + * We can return 0 to resume the syscall or anything else to go to phase + * 2. If we resume the syscall, we need to put something appropriate in + * regs->orig_ax. + * + * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax + * are fully functional. + * + * For phase 2's benefit, our return value is: + * 0: resume the syscall + * 1: go to phase 2; no seccomp phase 2 needed + * anything else: go to phase 2; pass return value to seccomp */ -long syscall_trace_enter(struct pt_regs *regs) +unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) +{ + unsigned long ret = 0; + u32 work; + + BUG_ON(regs != task_pt_regs(current)); + + work = ACCESS_ONCE(current_thread_info()->flags) & + _TIF_WORK_SYSCALL_ENTRY; + + /* + * If TIF_NOHZ is set, we are required to call user_exit() before + * doing anything that could touch RCU. + */ + if (work & _TIF_NOHZ) { + user_exit(); + work &= ~TIF_NOHZ; + } + +#ifdef CONFIG_SECCOMP + /* + * Do seccomp first -- it should minimize exposure of other + * code, and keeping seccomp fast is probably more valuable + * than the rest of this. + */ + if (work & _TIF_SECCOMP) { + struct seccomp_data sd; + + sd.arch = arch; + sd.nr = regs->orig_ax; + sd.instruction_pointer = regs->ip; +#ifdef CONFIG_X86_64 + if (arch == AUDIT_ARCH_X86_64) { + sd.args[0] = regs->di; + sd.args[1] = regs->si; + sd.args[2] = regs->dx; + sd.args[3] = regs->r10; + sd.args[4] = regs->r8; + sd.args[5] = regs->r9; + } else +#endif + { + sd.args[0] = regs->bx; + sd.args[1] = regs->cx; + sd.args[2] = regs->dx; + sd.args[3] = regs->si; + sd.args[4] = regs->di; + sd.args[5] = regs->bp; + } + + BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); + BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); + + ret = seccomp_phase1(&sd); + if (ret == SECCOMP_PHASE1_SKIP) { + regs->orig_ax = -1; + ret = 0; + } else if (ret != SECCOMP_PHASE1_OK) { + return ret; /* Go directly to phase 2 */ + } + + work &= ~_TIF_SECCOMP; + } +#endif + + /* Do our best to finish without phase 2. */ + if (work == 0) + return ret; /* seccomp and/or nohz only (ret == 0 here) */ + +#ifdef CONFIG_AUDITSYSCALL + if (work == _TIF_SYSCALL_AUDIT) { + /* + * If there is no more work to be done except auditing, + * then audit in phase 1. Phase 2 always audits, so, if + * we audit here, then we can't go on to phase 2. + */ + do_audit_syscall_entry(regs, arch); + return 0; + } +#endif + + return 1; /* Something is enabled that we can't handle in phase 1 */ +} + +/* Returns the syscall nr to run (which should match regs->orig_ax). */ +long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, + unsigned long phase1_result) { long ret = 0; + u32 work = ACCESS_ONCE(current_thread_info()->flags) & + _TIF_WORK_SYSCALL_ENTRY; - user_exit(); + BUG_ON(regs != task_pt_regs(current)); /* * If we stepped into a sysenter/syscall insn, it trapped in @@ -1467,17 +1569,21 @@ long syscall_trace_enter(struct pt_regs *regs) * do_debug() and we need to set it again to restore the user * state. If we entered on the slow path, TF was already set. */ - if (test_thread_flag(TIF_SINGLESTEP)) + if (work & _TIF_SINGLESTEP) regs->flags |= X86_EFLAGS_TF; - /* do the secure computing check first */ - if (secure_computing(regs->orig_ax)) { +#ifdef CONFIG_SECCOMP + /* + * Call seccomp_phase2 before running the other hooks so that + * they can see any changes made by a seccomp tracer. + */ + if (phase1_result > 1 && seccomp_phase2(phase1_result)) { /* seccomp failures shouldn't expose any additional code. */ - ret = -1L; - goto out; + return -1; } +#endif - if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) + if (unlikely(work & _TIF_SYSCALL_EMU)) ret = -1L; if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && @@ -1487,23 +1593,22 @@ long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->orig_ax); - if (IS_IA32) - audit_syscall_entry(AUDIT_ARCH_I386, - regs->orig_ax, - regs->bx, regs->cx, - regs->dx, regs->si); -#ifdef CONFIG_X86_64 - else - audit_syscall_entry(AUDIT_ARCH_X86_64, - regs->orig_ax, - regs->di, regs->si, - regs->dx, regs->r10); -#endif + do_audit_syscall_entry(regs, arch); -out: return ret ?: regs->orig_ax; } +long syscall_trace_enter(struct pt_regs *regs) +{ + u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); + + if (phase1_result == 0) + return regs->orig_ax; + else + return syscall_trace_enter_phase2(regs, arch, phase1_result); +} + void syscall_trace_leave(struct pt_regs *regs) { bool step; diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index ff898bbf579d..176a0f99d4da 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -498,6 +498,24 @@ void force_hpet_resume(void) } /* + * According to the datasheet e6xx systems have the HPET hardwired to + * 0xfed00000 + */ +static void e6xx_force_enable_hpet(struct pci_dev *dev) +{ + if (hpet_address || force_hpet_address) + return; + + force_hpet_address = 0xFED00000; + force_hpet_resume_type = NONE_FORCE_HPET_RESUME; + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " + "0x%lx\n", force_hpet_address); + return; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU, + e6xx_force_enable_hpet); + +/* * HPET MSI on some boards (ATI SB700/SB800) has side effect on * floppy DMA. Disable HPET MSI on such platforms. * See erratum #27 (Misinterpreted MSI Requests May Result in diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 41ead8d3bc0b..ab08aa2276fb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -879,6 +879,15 @@ void __init setup_arch(char **cmdline_p) KERNEL_PGD_PTRS); load_cr3(swapper_pg_dir); + /* + * Note: Quark X1000 CPUs advertise PGE incorrectly and require + * a cr3 based tlb flush, so the following __flush_tlb_all() + * will not flush anything because the cpu quirk which clears + * X86_FEATURE_PGE has not been invoked yet. Though due to the + * load_cr3() above the TLB has been flushed already. The + * quirk is invoked before subsequent calls to __flush_tlb_all() + * so proper operation is guaranteed. + */ __flush_tlb_all(); #else printk(KERN_INFO "Command line: %s\n", boot_command_line); @@ -1119,7 +1128,6 @@ void __init setup_arch(char **cmdline_p) setup_real_mode(); memblock_set_current_limit(get_max_mapped()); - dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); /* * NOTE: On x86-32, only from this point on, fixmaps are ready for use. @@ -1150,6 +1158,7 @@ void __init setup_arch(char **cmdline_p) early_acpi_boot_init(); initmem_init(); + dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); /* * Reserve memory for crash kernel after SRAT is parsed so that it diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 2851d63c1202..ed37a768d0fc 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -675,6 +675,11 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * handler too. */ regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF); + /* + * Ensure the signal handler starts with the new fpu state. + */ + if (used_math()) + drop_init_fpu(current); } signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5492798930ef..4d2128ac70bd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -111,7 +111,6 @@ atomic_t init_deasserted; static void smp_callin(void) { int cpuid, phys_id; - unsigned long timeout; /* * If waken up by an INIT in an 82489DX configuration @@ -130,37 +129,6 @@ static void smp_callin(void) * (This works even if the APIC is not enabled.) */ phys_id = read_apic_id(); - if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { - panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, - phys_id, cpuid); - } - pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); - - /* - * STARTUP IPIs are fragile beasts as they might sometimes - * trigger some glue motherboard logic. Complete APIC bus - * silence for 1 second, this overestimates the time the - * boot CPU is spending to send the up to 2 STARTUP IPIs - * by a factor of two. This should be enough. - */ - - /* - * Waiting 2s total for startup (udelay is not yet working) - */ - timeout = jiffies + 2*HZ; - while (time_before(jiffies, timeout)) { - /* - * Has the boot CPU finished it's STARTUP sequence? - */ - if (cpumask_test_cpu(cpuid, cpu_callout_mask)) - break; - cpu_relax(); - } - - if (!time_before(jiffies, timeout)) { - panic("%s: CPU%d started up but did not get a callout!\n", - __func__, cpuid); - } /* * the boot CPU has finished the init stage and is spinning @@ -168,10 +136,6 @@ static void smp_callin(void) * CPU, first the APIC. (this is probably redundant on most * boards) */ - - pr_debug("CALLIN, before setup_local_APIC()\n"); - if (apic->smp_callin_clear_local_apic) - apic->smp_callin_clear_local_apic(); setup_local_APIC(); end_local_APIC_setup(); @@ -300,11 +264,19 @@ void smp_store_cpu_info(int id) } static bool +topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) +{ + int cpu1 = c->cpu_index, cpu2 = o->cpu_index; + + return (cpu_to_node(cpu1) == cpu_to_node(cpu2)); +} + +static bool topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name) { int cpu1 = c->cpu_index, cpu2 = o->cpu_index; - return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2), + return !WARN_ONCE(!topology_same_node(c, o), "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! " "[node: %d != %d]. Ignoring dependency.\n", cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2)); @@ -345,17 +317,44 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) return false; } -static bool match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) +/* + * Unlike the other levels, we do not enforce keeping a + * multicore group inside a NUMA node. If this happens, we will + * discard the MC level of the topology later. + */ +static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { - if (c->phys_proc_id == o->phys_proc_id) { - if (cpu_has(c, X86_FEATURE_AMD_DCM)) - return true; - - return topology_sane(c, o, "mc"); - } + if (c->phys_proc_id == o->phys_proc_id) + return true; return false; } +static struct sched_domain_topology_level numa_inside_package_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, +#endif +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, +#endif + { NULL, }, +}; +/* + * set_sched_topology() sets the topology internal to a CPU. The + * NUMA topologies are layered on top of it to build the full + * system topology. + * + * If NUMA nodes are observed to occur within a CPU package, this + * function should be called. It forces the sched domain code to + * only use the SMT level for the CPU portion of the topology. + * This essentially falls back to relying on NUMA information + * from the SRAT table to describe the entire system topology + * (except for hyperthreads). + */ +static void primarily_use_numa_for_topology(void) +{ + set_sched_topology(numa_inside_package_topology); +} + void set_cpu_sibling_map(int cpu) { bool has_smt = smp_num_siblings > 1; @@ -392,7 +391,7 @@ void set_cpu_sibling_map(int cpu) for_each_cpu(i, cpu_sibling_setup_mask) { o = &cpu_data(i); - if ((i == cpu) || (has_mp && match_mc(c, o))) { + if ((i == cpu) || (has_mp && match_die(c, o))) { link_mask(core, cpu, i); /* @@ -414,6 +413,8 @@ void set_cpu_sibling_map(int cpu) } else if (i != cpu && !c->booted_cores) c->booted_cores = cpu_data(i).booted_cores; } + if (match_die(c, o) && !topology_same_node(c, o)) + primarily_use_numa_for_topology(); } } @@ -757,8 +758,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) unsigned long start_ip = real_mode_header->trampoline_start; unsigned long boot_error = 0; - int timeout; int cpu0_nmi_registered = 0; + unsigned long timeout; /* Just in case we booted with a single CPU. */ alternatives_enable_smp(); @@ -806,6 +807,15 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) } /* + * AP might wait on cpu_callout_mask in cpu_init() with + * cpu_initialized_mask set if previous attempt to online + * it timed-out. Clear cpu_initialized_mask so that after + * INIT/SIPI it could start with a clean state. + */ + cpumask_clear_cpu(cpu, cpu_initialized_mask); + smp_mb(); + + /* * Wake up a CPU in difference cases: * - Use the method in the APIC driver if it's defined * Otherwise, @@ -819,53 +829,38 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) if (!boot_error) { /* - * allow APs to start initializing. + * Wait 10s total for a response from AP */ - pr_debug("Before Callout %d\n", cpu); - cpumask_set_cpu(cpu, cpu_callout_mask); - pr_debug("After Callout %d\n", cpu); + boot_error = -1; + timeout = jiffies + 10*HZ; + while (time_before(jiffies, timeout)) { + if (cpumask_test_cpu(cpu, cpu_initialized_mask)) { + /* + * Tell AP to proceed with initialization + */ + cpumask_set_cpu(cpu, cpu_callout_mask); + boot_error = 0; + break; + } + udelay(100); + schedule(); + } + } + if (!boot_error) { /* - * Wait 5s total for a response + * Wait till AP completes initial initialization */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpumask_test_cpu(cpu, cpu_callin_mask)) - break; /* It has booted */ - udelay(100); + while (!cpumask_test_cpu(cpu, cpu_callin_mask)) { /* * Allow other tasks to run while we wait for the * AP to come online. This also gives a chance * for the MTRR work(triggered by the AP coming online) * to be completed in the stop machine context. */ + udelay(100); schedule(); } - - if (cpumask_test_cpu(cpu, cpu_callin_mask)) { - print_cpu_msr(&cpu_data(cpu)); - pr_debug("CPU%d: has booted.\n", cpu); - } else { - boot_error = 1; - if (*trampoline_status == 0xA5A5A5A5) - /* trampoline started but...? */ - pr_err("CPU%d: Stuck ??\n", cpu); - else - /* trampoline code not run */ - pr_err("CPU%d: Not responding\n", cpu); - if (apic->inquire_remote_apic) - apic->inquire_remote_apic(apicid); - } - } - - if (boot_error) { - /* Try to put things back the way they were before ... */ - numa_remove_cpu(cpu); /* was set by numa_add_cpu */ - - /* was set by do_boot_cpu() */ - cpumask_clear_cpu(cpu, cpu_callout_mask); - - /* was set by cpu_init() */ - cpumask_clear_cpu(cpu, cpu_initialized_mask); } /* mark "stuck" area as not stuck */ @@ -1143,10 +1138,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) enable_IO_APIC(); bsp_end_local_APIC_setup(); - - if (apic->setup_portio_remap) - apic->setup_portio_remap(); - smpboot_setup_io_apic(); /* * Set up local APIC timer on boot CPU. @@ -1292,6 +1283,9 @@ static void remove_siblinginfo(int cpu) for_each_cpu(sibling, cpu_sibling_mask(cpu)) cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling)); + for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) + cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); + cpumask_clear(cpu_llc_shared_mask(cpu)); cpumask_clear(cpu_sibling_mask(cpu)); cpumask_clear(cpu_core_mask(cpu)); c->phys_proc_id = 0; @@ -1322,6 +1316,8 @@ void cpu_disable_common(void) fixup_irqs(); } +static DEFINE_PER_CPU(struct completion, die_complete); + int native_cpu_disable(void) { int ret; @@ -1331,26 +1327,24 @@ int native_cpu_disable(void) return ret; clear_local_APIC(); - + init_completion(&per_cpu(die_complete, smp_processor_id())); cpu_disable_common(); + return 0; } void native_cpu_die(unsigned int cpu) { /* We don't do anything here: idle task is faking death itself. */ - unsigned int i; + wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ); - for (i = 0; i < 10; i++) { - /* They ack this in play_dead by setting CPU_DEAD */ - if (per_cpu(cpu_state, cpu) == CPU_DEAD) { - if (system_state == SYSTEM_RUNNING) - pr_info("CPU %u is now offline\n", cpu); - return; - } - msleep(100); + /* They ack this in play_dead() by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) { + if (system_state == SYSTEM_RUNNING) + pr_info("CPU %u is now offline\n", cpu); + } else { + pr_err("CPU %u didn't die...\n", cpu); } - pr_err("CPU %u didn't die...\n", cpu); } void play_dead_common(void) @@ -1362,6 +1356,7 @@ void play_dead_common(void) mb(); /* Ack it */ __this_cpu_write(cpu_state, CPU_DEAD); + complete(&per_cpu(die_complete, smp_processor_id())); /* * With physical CPU hotplug, we should halt the cpu diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index bf7ef5ce29df..0fa29609b2c4 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -68,6 +68,8 @@ static struct irqaction irq0 = { void __init setup_default_timer_irq(void) { + if (!nr_legacy_irqs()) + return; setup_irq(0, &irq0); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 56b0c338061e..b7e50bba3bbb 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -950,7 +950,7 @@ core_initcall(cpufreq_tsc); static struct clocksource clocksource_tsc; /* - * We compare the TSC to the cycle_last value in the clocksource + * We used to compare the TSC to the cycle_last value in the clocksource * structure to avoid a nasty time-warp. This can be observed in a * very small window right after one CPU updated cycle_last under * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which @@ -960,26 +960,23 @@ static struct clocksource clocksource_tsc; * due to the unsigned delta calculation of the time keeping core * code, which is necessary to support wrapping clocksources like pm * timer. + * + * This sanity check is now done in the core timekeeping code. + * checking the result of read_tsc() - cycle_last for being negative. + * That works because CLOCKSOURCE_MASK(64) does not mask out any bit. */ static cycle_t read_tsc(struct clocksource *cs) { - cycle_t ret = (cycle_t)get_cycles(); - - return ret >= clocksource_tsc.cycle_last ? - ret : clocksource_tsc.cycle_last; -} - -static void resume_tsc(struct clocksource *cs) -{ - if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) - clocksource_tsc.cycle_last = 0; + return (cycle_t)get_cycles(); } +/* + * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() + */ static struct clocksource clocksource_tsc = { .name = "tsc", .rating = 300, .read = read_tsc, - .resume = resume_tsc, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_MUST_VERIFY, @@ -1169,14 +1166,17 @@ void __init tsc_init(void) x86_init.timers.tsc_pre_init(); - if (!cpu_has_tsc) + if (!cpu_has_tsc) { + setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; + } tsc_khz = x86_platform.calibrate_tsc(); cpu_khz = tsc_khz; if (!tsc_khz) { mark_tsc_unstable("could not calculate TSC khz"); + setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; } diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index b99b9ad8540c..ee22c1d93ae5 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -152,7 +152,7 @@ static void __init detect_vsmp_box(void) is_vsmp = 1; } -int is_vsmp_box(void) +static int is_vsmp_box(void) { if (is_vsmp != -1) return is_vsmp; @@ -166,7 +166,7 @@ int is_vsmp_box(void) static void __init detect_vsmp_box(void) { } -int is_vsmp_box(void) +static int is_vsmp_box(void) { return 0; } diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index e1e1e80fc6a6..957779f4eb40 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -216,7 +216,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) */ regs->orig_ax = syscall_nr; regs->ax = -ENOSYS; - tmp = secure_computing(syscall_nr); + tmp = secure_computing(); if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { warn_bad_vsyscall(KERN_DEBUG, regs, "seccomp tried to change syscall nr or ip"); diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c index 9531fbb123ba..c7d791f32b98 100644 --- a/arch/x86/kernel/vsyscall_gtod.c +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -31,29 +31,30 @@ void update_vsyscall(struct timekeeper *tk) gtod_write_begin(vdata); /* copy vsyscall data */ - vdata->vclock_mode = tk->clock->archdata.vclock_mode; - vdata->cycle_last = tk->clock->cycle_last; - vdata->mask = tk->clock->mask; - vdata->mult = tk->mult; - vdata->shift = tk->shift; + vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode; + vdata->cycle_last = tk->tkr.cycle_last; + vdata->mask = tk->tkr.mask; + vdata->mult = tk->tkr.mult; + vdata->shift = tk->tkr.shift; vdata->wall_time_sec = tk->xtime_sec; - vdata->wall_time_snsec = tk->xtime_nsec; + vdata->wall_time_snsec = tk->tkr.xtime_nsec; vdata->monotonic_time_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_snsec = tk->xtime_nsec + vdata->monotonic_time_snsec = tk->tkr.xtime_nsec + ((u64)tk->wall_to_monotonic.tv_nsec - << tk->shift); + << tk->tkr.shift); while (vdata->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->shift)) { + (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { vdata->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->shift; + ((u64)NSEC_PER_SEC) << tk->tkr.shift; vdata->monotonic_time_sec++; } vdata->wall_time_coarse_sec = tk->xtime_sec; - vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift); + vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> + tk->tkr.shift); vdata->monotonic_time_coarse_sec = vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index a4b451c6addf..4c540c4719d8 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -8,6 +8,7 @@ #include <linux/bootmem.h> #include <linux/compat.h> +#include <linux/cpu.h> #include <asm/i387.h> #include <asm/fpu-internal.h> #include <asm/sigframe.h> @@ -24,7 +25,9 @@ u64 pcntxt_mask; struct xsave_struct *init_xstate_buf; static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; -static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; +static unsigned int *xstate_offsets, *xstate_sizes; +static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8]; +static unsigned int xstate_features; /* * If a processor implementation discern that a processor state component is @@ -268,8 +271,6 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) return -1; - drop_init_fpu(tsk); /* trigger finit */ - return 0; } @@ -283,7 +284,7 @@ sanitize_restored_xstate(struct task_struct *tsk, if (use_xsave()) { /* These bits must be zero. */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + memset(xsave_hdr->reserved, 0, 48); /* * Init the state that is not present in the memory @@ -399,8 +400,11 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) set_used_math(); } - if (use_eager_fpu()) + if (use_eager_fpu()) { + preempt_disable(); math_state_restore(); + preempt_enable(); + } return err; } else { @@ -479,6 +483,52 @@ static void __init setup_xstate_features(void) } /* + * This function sets up offsets and sizes of all extended states in + * xsave area. This supports both standard format and compacted format + * of the xsave aread. + * + * Input: void + * Output: void + */ +void setup_xstate_comp(void) +{ + unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8]; + int i; + + /* + * The FP xstates and SSE xstates are legacy states. They are always + * in the fixed offsets in the xsave area in either compacted form + * or standard form. + */ + xstate_comp_offsets[0] = 0; + xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space); + + if (!cpu_has_xsaves) { + for (i = 2; i < xstate_features; i++) { + if (test_bit(i, (unsigned long *)&pcntxt_mask)) { + xstate_comp_offsets[i] = xstate_offsets[i]; + xstate_comp_sizes[i] = xstate_sizes[i]; + } + } + return; + } + + xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; + + for (i = 2; i < xstate_features; i++) { + if (test_bit(i, (unsigned long *)&pcntxt_mask)) + xstate_comp_sizes[i] = xstate_sizes[i]; + else + xstate_comp_sizes[i] = 0; + + if (i > 2) + xstate_comp_offsets[i] = xstate_comp_offsets[i-1] + + xstate_comp_sizes[i-1]; + + } +} + +/* * setup the xstate image representing the init state */ static void __init setup_init_fpu_buf(void) @@ -496,15 +546,21 @@ static void __init setup_init_fpu_buf(void) setup_xstate_features(); + if (cpu_has_xsaves) { + init_xstate_buf->xsave_hdr.xcomp_bv = + (u64)1 << 63 | pcntxt_mask; + init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask; + } + /* * Init all the features state with header_bv being 0x0 */ - xrstor_state(init_xstate_buf, -1); + xrstor_state_booting(init_xstate_buf, -1); /* * Dump the init state again. This is to identify the init state * of any feature which is not represented by all zero's. */ - xsave_state(init_xstate_buf, -1); + xsave_state_booting(init_xstate_buf, -1); } static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; @@ -520,6 +576,30 @@ static int __init eager_fpu_setup(char *s) } __setup("eagerfpu=", eager_fpu_setup); + +/* + * Calculate total size of enabled xstates in XCR0/pcntxt_mask. + */ +static void __init init_xstate_size(void) +{ + unsigned int eax, ebx, ecx, edx; + int i; + + if (!cpu_has_xsaves) { + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + xstate_size = ebx; + return; + } + + xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; + for (i = 2; i < 64; i++) { + if (test_bit(i, (unsigned long *)&pcntxt_mask)) { + cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); + xstate_size += eax; + } + } +} + /* * Enable and initialize the xsave feature. */ @@ -551,8 +631,7 @@ static void __init xstate_enable_boot_cpu(void) /* * Recompute the context size for enabled features */ - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; + init_xstate_size(); update_regset_xstate_info(xstate_size, pcntxt_mask); prepare_fx_sw_frame(); @@ -572,8 +651,9 @@ static void __init xstate_enable_boot_cpu(void) } } - pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", - pcntxt_mask, xstate_size); + pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n", + pcntxt_mask, xstate_size, + cpu_has_xsaves ? "compacted form" : "standard form"); } /* @@ -635,3 +715,26 @@ void eager_fpu_init(void) else fxrstor_checking(&init_xstate_buf->i387); } + +/* + * Given the xsave area and a state inside, this function returns the + * address of the state. + * + * This is the API that is called to get xstate address in either + * standard format or compacted format of xsave area. + * + * Inputs: + * xsave: base address of the xsave area; + * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE, + * etc.) + * Output: + * address of the state in the xsave area. + */ +void *get_xsave_addr(struct xsave_struct *xsave, int xstate) +{ + int feature = fls64(xstate) - 1; + if (!test_bit(feature, (unsigned long *)&pcntxt_mask)) + return NULL; + + return (void *)xsave + xstate_comp_offsets[feature]; +} |