From 159a80b2142df709416ab369113de7d511c48331 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 11 Oct 2011 19:39:16 +0200 Subject: oprofile, x86: Add kernel parameter oprofile.cpu_type=timer We need this to better test x86 NMI timer mode. Otherwise it is very hard to setup systems with NMI timer enabled, but we have this e.g. in virtual machine environments. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 68894fdc034b..7ca4d43e8988 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -613,24 +613,36 @@ static int __init p4_init(char **cpu_type) return 0; } -static int force_arch_perfmon; -static int force_cpu_type(const char *str, struct kernel_param *kp) +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, + arch_perfmon, +}; + +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) { - if (!strcmp(str, "arch_perfmon")) { - force_arch_perfmon = 1; + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing NMI timer mode\n"); + } else if (!strcmp(str, "arch_perfmon")) { + force_cpu_type = arch_perfmon; printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); + } else { + force_cpu_type = 0; } return 0; } -module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ - if (force_arch_perfmon && cpu_has_arch_perfmon) + if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon) return 0; /* @@ -697,6 +709,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (!cpu_has_apic) return -ENODEV; + if (force_cpu_type == timer) + return -ENODEV; + switch (vendor) { case X86_VENDOR_AMD: /* Needs to be at least an Athlon (or hammer in 32bit mode) */ -- cgit v1.2.3 From dcfce4a095932e6e95d83ad982be3609947963bc Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 11 Oct 2011 17:11:08 +0200 Subject: oprofile, x86: Reimplement nmi timer mode using perf event The legacy x86 nmi watchdog code was removed with the implementation of the perf based nmi watchdog. This broke Oprofile's nmi timer mode. To run nmi timer mode we relied on a continuous ticking nmi source which the nmi watchdog provided. The nmi tick was no longer available and current watchdog can not be used anymore since it runs with very long periods in the range of seconds. This patch reimplements the nmi timer mode using a perf counter nmi source. V2: * removing pr_info() * fix undefined reference to `__udivdi3' for 32 bit build * fix section mismatch of .cpuinit.data:nmi_timer_cpu_nb * removed nmi timer setup in arch/x86 * implemented function stubs for op_nmi_init/exit() * made code more readable in oprofile_init() V3: * fix architectural initialization in oprofile_init() * fix CONFIG_OPROFILE_NMI_TIMER dependencies Acked-by: Peter Zijlstra Signed-off-by: Robert Richter --- arch/Kconfig | 4 + arch/x86/oprofile/Makefile | 3 +- arch/x86/oprofile/init.c | 30 ++----- arch/x86/oprofile/nmi_timer_int.c | 66 --------------- drivers/oprofile/nmi_timer_int.c | 173 ++++++++++++++++++++++++++++++++++++++ drivers/oprofile/oprof.c | 24 +++--- drivers/oprofile/oprof.h | 9 ++ kernel/events/core.c | 2 + 8 files changed, 208 insertions(+), 103 deletions(-) delete mode 100644 arch/x86/oprofile/nmi_timer_int.c create mode 100644 drivers/oprofile/nmi_timer_int.c (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 4b0669cbb3b0..2505740b81d2 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -30,6 +30,10 @@ config OPROFILE_EVENT_MULTIPLEX config HAVE_OPROFILE bool +config OPROFILE_NMI_TIMER + def_bool y + depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI + config KPROBES bool "Kprobes" depends on MODULES diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile index 446902b2a6b6..1599f568f0e2 100644 --- a/arch/x86/oprofile/Makefile +++ b/arch/x86/oprofile/Makefile @@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprof.o cpu_buffer.o buffer_sync.o \ event_buffer.o oprofile_files.o \ oprofilefs.o oprofile_stats.o \ - timer_int.o ) + timer_int.o nmi_timer_int.o ) oprofile-y := $(DRIVER_OBJS) init.o backtrace.o oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ op_model_ppro.o op_model_p4.o -oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c index f148cf652678..9e138d00ad36 100644 --- a/arch/x86/oprofile/init.c +++ b/arch/x86/oprofile/init.c @@ -16,37 +16,23 @@ * with the NMI mode driver. */ +#ifdef CONFIG_X86_LOCAL_APIC extern int op_nmi_init(struct oprofile_operations *ops); -extern int op_nmi_timer_init(struct oprofile_operations *ops); extern void op_nmi_exit(void); -extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); +#else +static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; } +static void op_nmi_exit(void) { } +#endif -static int nmi_timer; +extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); int __init oprofile_arch_init(struct oprofile_operations *ops) { - int ret; - - ret = -ENODEV; - -#ifdef CONFIG_X86_LOCAL_APIC - ret = op_nmi_init(ops); -#endif - nmi_timer = (ret != 0); -#ifdef CONFIG_X86_IO_APIC - if (nmi_timer) - ret = op_nmi_timer_init(ops); -#endif ops->backtrace = x86_backtrace; - - return ret; + return op_nmi_init(ops); } - void oprofile_arch_exit(void) { -#ifdef CONFIG_X86_LOCAL_APIC - if (!nmi_timer) - op_nmi_exit(); -#endif + op_nmi_exit(); } diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c deleted file mode 100644 index 720bf5a53c51..000000000000 --- a/arch/x86/oprofile/nmi_timer_int.c +++ /dev/null @@ -1,66 +0,0 @@ -/** - * @file nmi_timer_int.c - * - * @remark Copyright 2003 OProfile authors - * @remark Read the file COPYING - * - * @author Zwane Mwaikambo - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static int profile_timer_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - switch (val) { - case DIE_NMI: - oprofile_add_sample(args->regs, 0); - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; -} - -static struct notifier_block profile_timer_exceptions_nb = { - .notifier_call = profile_timer_exceptions_notify, - .next = NULL, - .priority = NMI_LOW_PRIOR, -}; - -static int timer_start(void) -{ - if (register_die_notifier(&profile_timer_exceptions_nb)) - return 1; - return 0; -} - - -static void timer_stop(void) -{ - unregister_die_notifier(&profile_timer_exceptions_nb); - synchronize_sched(); /* Allow already-started NMIs to complete. */ -} - - -int __init op_nmi_timer_init(struct oprofile_operations *ops) -{ - ops->start = timer_start; - ops->stop = timer_stop; - ops->cpu_type = "timer"; - printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); - return 0; -} diff --git a/drivers/oprofile/nmi_timer_int.c b/drivers/oprofile/nmi_timer_int.c new file mode 100644 index 000000000000..76f1c9357f39 --- /dev/null +++ b/drivers/oprofile/nmi_timer_int.c @@ -0,0 +1,173 @@ +/** + * @file nmi_timer_int.c + * + * @remark Copyright 2011 Advanced Micro Devices, Inc. + * + * @author Robert Richter + */ + +#include +#include +#include +#include +#include + +#ifdef CONFIG_OPROFILE_NMI_TIMER + +static DEFINE_PER_CPU(struct perf_event *, nmi_timer_events); +static int ctr_running; + +static struct perf_event_attr nmi_timer_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, +}; + +static void nmi_timer_callback(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + event->hw.interrupts = 0; /* don't throttle interrupts */ + oprofile_add_sample(regs, 0); +} + +static int nmi_timer_start_cpu(int cpu) +{ + struct perf_event *event = per_cpu(nmi_timer_events, cpu); + + if (!event) { + event = perf_event_create_kernel_counter(&nmi_timer_attr, cpu, NULL, + nmi_timer_callback, NULL); + if (IS_ERR(event)) + return PTR_ERR(event); + per_cpu(nmi_timer_events, cpu) = event; + } + + if (event && ctr_running) + perf_event_enable(event); + + return 0; +} + +static void nmi_timer_stop_cpu(int cpu) +{ + struct perf_event *event = per_cpu(nmi_timer_events, cpu); + + if (event && ctr_running) + perf_event_disable(event); +} + +static int nmi_timer_cpu_notifier(struct notifier_block *b, unsigned long action, + void *data) +{ + int cpu = (unsigned long)data; + switch (action) { + case CPU_DOWN_FAILED: + case CPU_ONLINE: + nmi_timer_start_cpu(cpu); + break; + case CPU_DOWN_PREPARE: + nmi_timer_stop_cpu(cpu); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block nmi_timer_cpu_nb = { + .notifier_call = nmi_timer_cpu_notifier +}; + +static int nmi_timer_start(void) +{ + int cpu; + + get_online_cpus(); + ctr_running = 1; + for_each_online_cpu(cpu) + nmi_timer_start_cpu(cpu); + put_online_cpus(); + + return 0; +} + +static void nmi_timer_stop(void) +{ + int cpu; + + get_online_cpus(); + for_each_online_cpu(cpu) + nmi_timer_stop_cpu(cpu); + ctr_running = 0; + put_online_cpus(); +} + +static void nmi_timer_shutdown(void) +{ + struct perf_event *event; + int cpu; + + get_online_cpus(); + unregister_cpu_notifier(&nmi_timer_cpu_nb); + for_each_possible_cpu(cpu) { + event = per_cpu(nmi_timer_events, cpu); + if (!event) + continue; + perf_event_disable(event); + per_cpu(nmi_timer_events, cpu) = NULL; + perf_event_release_kernel(event); + } + + put_online_cpus(); +} + +static int nmi_timer_setup(void) +{ + int cpu, err; + u64 period; + + /* clock cycles per tick: */ + period = (u64)cpu_khz * 1000; + do_div(period, HZ); + nmi_timer_attr.sample_period = period; + + get_online_cpus(); + err = register_cpu_notifier(&nmi_timer_cpu_nb); + if (err) + goto out; + /* can't attach events to offline cpus: */ + for_each_online_cpu(cpu) { + err = nmi_timer_start_cpu(cpu); + if (err) + break; + } + if (err) + nmi_timer_shutdown(); +out: + put_online_cpus(); + return err; +} + +int __init op_nmi_timer_init(struct oprofile_operations *ops) +{ + int err = 0; + + err = nmi_timer_setup(); + if (err) + return err; + nmi_timer_shutdown(); /* only check, don't alloc */ + + ops->create_files = NULL; + ops->setup = nmi_timer_setup; + ops->shutdown = nmi_timer_shutdown; + ops->start = nmi_timer_start; + ops->stop = nmi_timer_stop; + ops->cpu_type = "timer"; + + printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); + + return 0; +} + +#endif diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index f7cd06967aed..ed2c3ec07024 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -246,26 +246,24 @@ static int __init oprofile_init(void) int err; /* always init architecture to setup backtrace support */ + timer_mode = 0; err = oprofile_arch_init(&oprofile_ops); + if (!err) { + if (!timer && !oprofilefs_register()) + return 0; + oprofile_arch_exit(); + } - timer_mode = err || timer; /* fall back to timer mode on errors */ - if (timer_mode) { - if (!err) - oprofile_arch_exit(); + /* setup timer mode: */ + timer_mode = 1; + /* no nmi timer mode if oprofile.timer is set */ + if (timer || op_nmi_timer_init(&oprofile_ops)) { err = oprofile_timer_init(&oprofile_ops); if (err) return err; } - err = oprofilefs_register(); - if (!err) - return 0; - - /* failed */ - if (!timer_mode) - oprofile_arch_exit(); - - return err; + return oprofilefs_register(); } diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 177b73de5e5f..769fb0fcac44 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -36,6 +36,15 @@ struct dentry; void oprofile_create_files(struct super_block *sb, struct dentry *root); int oprofile_timer_init(struct oprofile_operations *ops); void oprofile_timer_exit(void); +#ifdef CONFIG_OPROFILE_NMI_TIMER +int op_nmi_timer_init(struct oprofile_operations *ops); +#else +static inline int op_nmi_timer_init(struct oprofile_operations *ops) +{ + return -ENODEV; +} +#endif + int oprofile_set_ulong(unsigned long *addr, unsigned long val); int oprofile_set_timeout(unsigned long time); diff --git a/kernel/events/core.c b/kernel/events/core.c index d1a1bee35228..d2e28bdd523a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1322,6 +1322,7 @@ retry: } raw_spin_unlock_irq(&ctx->lock); } +EXPORT_SYMBOL_GPL(perf_event_disable); static void perf_set_shadow_time(struct perf_event *event, struct perf_event_context *ctx, @@ -1806,6 +1807,7 @@ retry: out: raw_spin_unlock_irq(&ctx->lock); } +EXPORT_SYMBOL_GPL(perf_event_enable); int perf_event_refresh(struct perf_event *event, int refresh) { -- cgit v1.2.3 From 1ec454baf1245df4fdb5dae728da3363630ce6de Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Oct 2011 23:01:09 +0900 Subject: x86, perf: Add a build-time sanity test to the x86 decoder Add a sanity test of x86 insn decoder against a stream of randomly generated input, at build time. This test is also able to reproduce any bug that might trigger by allowing the passing of random-seed and iteration-number to the test, or by passing input which has invalid byte code. Changes in V2: - Code cleanup. - Show how to reproduce the error by insn_sanity test. Signed-off-by: Masami Hiramatsu Cc: acme@redhat.com Cc: ming.m.lin@intel.com Cc: robert.richter@amd.com Cc: ravitillo@lbl.gov Cc: yrl.pp-manager.tt@hitachi.com Cc: Andi Kleen Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20111020140109.20938.92572.stgit@localhost.localdomain Signed-off-by: Ingo Molnar --- arch/x86/include/asm/insn.h | 7 ++ arch/x86/tools/Makefile | 10 +- arch/x86/tools/insn_sanity.c | 275 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 arch/x86/tools/insn_sanity.c (limited to 'arch') diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 88c765e16410..74df3f1eddfd 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -137,6 +137,13 @@ static inline int insn_is_avx(struct insn *insn) return (insn->vex_prefix.value != 0); } +/* Ensure this instruction is decoded completely */ +static inline int insn_complete(struct insn *insn) +{ + return insn->opcode.got && insn->modrm.got && insn->sib.got && + insn->displacement.got && insn->immediate.got; +} + static inline insn_byte_t insn_vex_m_bits(struct insn *insn) { if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index f82082677337..3255c3df67f4 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -18,14 +18,22 @@ chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk quiet_cmd_posttest = TEST $@ cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) -posttest: $(obj)/test_get_len vmlinux +quiet_cmd_sanitytest = TEST $@ + cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000 + +posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity $(call cmd,posttest) + $(call cmd,sanitytest) hostprogs-y := test_get_len +hostprogs-y := insn_sanity # -I needed for generated C source and C source which in the kernel tree. HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ +HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ + # Dependencies are also needed. $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c +$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c new file mode 100644 index 000000000000..334d9de7d0ca --- /dev/null +++ b/arch/x86/tools/insn_sanity.c @@ -0,0 +1,275 @@ +/* + * x86 decoder sanity test - based on test_get_insn.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + * Copyright (C) Hitachi, Ltd., 2011 + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define unlikely(cond) (cond) +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) + +#include +#include +#include + +/* + * Test of instruction analysis against tampering. + * Feed random binary to instruction decoder and ensure not to + * access out-of-instruction-buffer. + */ + +#define DEFAULT_MAX_ITER 10000 +#define INSN_NOP 0x90 + +static const char *prog; /* Program name */ +static int verbose; /* Verbosity */ +static int x86_64; /* x86-64 bit mode flag */ +static unsigned int seed; /* Random seed */ +static unsigned long iter_start; /* Start of iteration number */ +static unsigned long iter_end = DEFAULT_MAX_ITER; /* End of iteration number */ +static FILE *input_file; /* Input file name */ + +static void usage(const char *err) +{ + if (err) + fprintf(stderr, "Error: %s\n\n", err); + fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog); + fprintf(stderr, "\t-y 64bit mode\n"); + fprintf(stderr, "\t-n 32bit mode\n"); + fprintf(stderr, "\t-v Verbose mode\n"); + fprintf(stderr, "\t-s Give a random seed (and iteration number)\n"); + fprintf(stderr, "\t-m Give a maximum iteration number\n"); + fprintf(stderr, "\t-i Give an input file with decoded binary\n"); + exit(1); +} + +static void dump_field(FILE *fp, const char *name, const char *indent, + struct insn_field *field) +{ + fprintf(fp, "%s.%s = {\n", indent, name); + fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", + indent, field->value, field->bytes[0], field->bytes[1], + field->bytes[2], field->bytes[3]); + fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, + field->got, field->nbytes); +} + +static void dump_insn(FILE *fp, struct insn *insn) +{ + fprintf(fp, "Instruction = {\n"); + dump_field(fp, "prefixes", "\t", &insn->prefixes); + dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); + dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); + dump_field(fp, "opcode", "\t", &insn->opcode); + dump_field(fp, "modrm", "\t", &insn->modrm); + dump_field(fp, "sib", "\t", &insn->sib); + dump_field(fp, "displacement", "\t", &insn->displacement); + dump_field(fp, "immediate1", "\t", &insn->immediate1); + dump_field(fp, "immediate2", "\t", &insn->immediate2); + fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", + insn->attr, insn->opnd_bytes, insn->addr_bytes); + fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", + insn->length, insn->x86_64, insn->kaddr); +} + +static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter, + unsigned char *insn_buf, struct insn *insn) +{ + int i; + + fprintf(fp, "%s:\n", msg); + + dump_insn(stderr, insn); + + fprintf(fp, "You can reproduce this with below command(s);\n"); + + /* Input a decoded instruction sequence directly */ + fprintf(fp, " $ echo "); + for (i = 0; i < MAX_INSN_SIZE; i++) + fprintf(fp, " %02x", insn_buf[i]); + fprintf(fp, " | %s -i -\n", prog); + + if (!input_file) { + fprintf(fp, "Or \n"); + /* Give a seed and iteration number */ + fprintf(fp, " $ %s -s 0x%x,%lu\n", prog, seed, nr_iter); + } +} + +static void init_random_seed(void) +{ + int fd; + + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + goto fail; + + if (read(fd, &seed, sizeof(seed)) != sizeof(seed)) + goto fail; + + close(fd); + return; +fail: + usage("Failed to open /dev/urandom"); +} + +/* Read given instruction sequence from the input file */ +static int read_next_insn(unsigned char *insn_buf) +{ + char buf[256] = "", *tmp; + int i; + + tmp = fgets(buf, ARRAY_SIZE(buf), input_file); + if (tmp == NULL || feof(input_file)) + return 0; + + for (i = 0; i < MAX_INSN_SIZE; i++) { + insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16); + if (*tmp != ' ') + break; + } + + return i; +} + +static int generate_insn(unsigned char *insn_buf) +{ + int i; + + if (input_file) + return read_next_insn(insn_buf); + + /* Fills buffer with random binary up to MAX_INSN_SIZE */ + for (i = 0; i < MAX_INSN_SIZE - 1; i += 2) + *(unsigned short *)(&insn_buf[i]) = random() & 0xffff; + + while (i < MAX_INSN_SIZE) + insn_buf[i++] = random() & 0xff; + + return i; +} + +static void parse_args(int argc, char **argv) +{ + int c; + char *tmp = NULL; + int set_seed = 0; + + prog = argv[0]; + while ((c = getopt(argc, argv, "ynvs:m:i:")) != -1) { + switch (c) { + case 'y': + x86_64 = 1; + break; + case 'n': + x86_64 = 0; + break; + case 'v': + verbose = 1; + break; + case 'i': + if (strcmp("-", optarg) == 0) + input_file = stdin; + else + input_file = fopen(optarg, "r"); + if (!input_file) + usage("Failed to open input file"); + break; + case 's': + seed = (unsigned int)strtoul(optarg, &tmp, 0); + if (*tmp == ',') { + optarg = tmp + 1; + iter_start = strtoul(optarg, &tmp, 0); + } + if (*tmp != '\0' || tmp == optarg) + usage("Failed to parse seed"); + set_seed = 1; + break; + case 'm': + iter_end = strtoul(optarg, &tmp, 0); + if (*tmp != '\0' || tmp == optarg) + usage("Failed to parse max_iter"); + break; + default: + usage(NULL); + } + } + + /* Check errors */ + if (iter_end < iter_start) + usage("Max iteration number must be bigger than iter-num"); + + if (set_seed && input_file) + usage("Don't use input file (-i) with random seed (-s)"); + + /* Initialize random seed */ + if (!input_file) { + if (!set_seed) /* No seed is given */ + init_random_seed(); + srand(seed); + } +} + +int main(int argc, char **argv) +{ + struct insn insn; + int insns = 0; + int errors = 0; + unsigned long i; + unsigned char insn_buf[MAX_INSN_SIZE * 2]; + + parse_args(argc, argv); + + /* Prepare stop bytes with NOPs */ + memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE); + + for (i = 0; i < iter_end; i++) { + if (generate_insn(insn_buf) <= 0) + break; + + if (i < iter_start) /* Skip to given iteration number */ + continue; + + /* Decode an instruction */ + insn_init(&insn, insn_buf, x86_64); + insn_get_length(&insn); + + if (verbose && !insn_complete(&insn)) + dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn); + + if (insn.next_byte <= insn.kaddr || + insn.kaddr + MAX_INSN_SIZE < insn.next_byte) { + /* Access out-of-range memory */ + dump_stream(stdout, "Error: Found an access violation", i, insn_buf, &insn); + errors++; + } + insns++; + } + + fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed); + + return errors ? 1 : 0; +} -- cgit v1.2.3 From 1056c3e916f12cdd8042ab27dfccbb3a9e871df0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 5 Dec 2011 21:05:33 +0900 Subject: x86/tools: Fix Makefile to build all test tools Fix arch/x86/tools/Makefile to compile both test tools correctly. This bug leads build error. Signed-off-by: Masami Hiramatsu Cc: "H. Peter Anvin" Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20111205120533.15475.62047.stgit@cloud Signed-off-by: Ingo Molnar --- arch/x86/tools/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 3255c3df67f4..d511aa97533a 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -25,8 +25,7 @@ posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity $(call cmd,posttest) $(call cmd,sanitytest) -hostprogs-y := test_get_len -hostprogs-y := insn_sanity +hostprogs-y += test_get_len insn_sanity # -I needed for generated C source and C source which in the kernel tree. HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ -- cgit v1.2.3 From 130b78b2bf16d5d89091db38374faef896360cf9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 5 Dec 2011 21:05:39 +0900 Subject: x86: Fix instruction decoder to handle grouped AVX instructions For reducing memory usage of attribute table, x86 instruction decoder puts "Group" attribute only on "no-last-prefix" attribute table (same as vex_p == 0 case). Thus, the decoder should look no-last-prefix table first, and then only if it is not a group, move on to "with-last-prefix" table (vex_p != 0). However, current implementation, inat_get_avx_attribute() looks with-last-prefix directly. So, when decoding a grouped AVX instruction, the decoder fails to find correct group because there is no "Group" attribute on the table. This ends up with the mis-decoding of instructions, as Ingo reported in http://thread.gmane.org/gmane.linux.kernel/1214103 This patch fixes it to check no-last-prefix table first even if that is an AVX instruction, and get an attribute from "with last-prefix" table only if that is not a group. Reported-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Cc: "H. Peter Anvin" Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20111205120539.15475.91428.stgit@cloud Signed-off-by: Ingo Molnar --- arch/x86/lib/inat.c | 9 ++++++++- arch/x86/lib/insn.c | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 46fc4ee09fc4..88ad5fbda6e1 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c @@ -82,9 +82,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, const insn_attr_t *table; if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) return 0; - table = inat_avx_tables[vex_m][vex_p]; + /* At first, this checks the master table */ + table = inat_avx_tables[vex_m][0]; if (!table) return 0; + if (!inat_is_group(table[opcode]) && vex_p) { + /* If this is not a group, get attribute directly */ + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + } return table[opcode]; } diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 374562ed6704..5a1f9f3e3fbb 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -202,7 +202,7 @@ void insn_get_opcode(struct insn *insn) m = insn_vex_m_bits(insn); p = insn_vex_p_bits(insn); insn->attr = inat_get_avx_attribute(op, m, p); - if (!inat_accept_vex(insn->attr)) + if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) insn->attr = 0; /* This instruction is bad */ goto end; /* VEX has only 1 byte for opcode */ } @@ -249,6 +249,8 @@ void insn_get_modrm(struct insn *insn) pfx = insn_last_prefix(insn); insn->attr = inat_get_group_attribute(mod, pfx, insn->attr); + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) + insn->attr = 0; /* This is bad */ } } -- cgit v1.2.3 From bfbe9015de5c78d1808cd09526b9166b2e6aa440 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 5 Dec 2011 21:05:45 +0900 Subject: x86/tools: Fix instruction decoder message output Fix instruction decoder test (insn_sanity), so that it doesn't show both info and error messages twice on same instruction. (In that case, show only error message) Signed-off-by: Masami Hiramatsu Cc: "H. Peter Anvin" Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20111205120545.15475.7928.stgit@cloud Signed-off-by: Ingo Molnar --- arch/x86/tools/insn_sanity.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c index 334d9de7d0ca..20256037ac7d 100644 --- a/arch/x86/tools/insn_sanity.c +++ b/arch/x86/tools/insn_sanity.c @@ -257,15 +257,14 @@ int main(int argc, char **argv) insn_init(&insn, insn_buf, x86_64); insn_get_length(&insn); - if (verbose && !insn_complete(&insn)) - dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn); - if (insn.next_byte <= insn.kaddr || insn.kaddr + MAX_INSN_SIZE < insn.next_byte) { /* Access out-of-range memory */ dump_stream(stdout, "Error: Found an access violation", i, insn_buf, &insn); errors++; - } + } else if (verbose && !insn_complete(&insn)) + dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn); + insns++; } -- cgit v1.2.3 From e70825fc51e149366ab5659bd36beb73aad187a0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 5 Dec 2011 21:05:50 +0900 Subject: x86/tools: Fix insn_sanity message outputs Fix x86 instruction decoder test to dump all error messages to stderr and others to stdout. Signed-off-by: Masami Hiramatsu Cc: "H. Peter Anvin" Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20111205120550.15475.70149.stgit@cloud Signed-off-by: Ingo Molnar --- arch/x86/tools/insn_sanity.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c index 20256037ac7d..b6720d6b38cb 100644 --- a/arch/x86/tools/insn_sanity.c +++ b/arch/x86/tools/insn_sanity.c @@ -102,7 +102,7 @@ static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter, fprintf(fp, "%s:\n", msg); - dump_insn(stderr, insn); + dump_insn(fp, insn); fprintf(fp, "You can reproduce this with below command(s);\n"); @@ -260,7 +260,7 @@ int main(int argc, char **argv) if (insn.next_byte <= insn.kaddr || insn.kaddr + MAX_INSN_SIZE < insn.next_byte) { /* Access out-of-range memory */ - dump_stream(stdout, "Error: Found an access violation", i, insn_buf, &insn); + dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn); errors++; } else if (verbose && !insn_complete(&insn)) dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn); -- cgit v1.2.3 From a9c373d03326e98c5f05ca64a1108790d25e28a9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 5 Dec 2011 21:05:57 +0900 Subject: x86: Update instruction decoder to support new AVX formats Since new Intel software developers manual introduces new format for AVX instruction set (including AVX2), it is important to update x86-opcode-map.txt to fit those changes. Signed-off-by: Masami Hiramatsu Cc: "H. Peter Anvin" Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20111205120557.15475.13236.stgit@cloud Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 606 +++++++++++++++++++---------------- arch/x86/tools/gen-insn-attr-x86.awk | 21 +- 2 files changed, 345 insertions(+), 282 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index a793da5e560e..5b83c51c12e0 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -1,5 +1,11 @@ # x86 Opcode Maps # +# This is (mostly) based on following documentations. +# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2 +# (#325383-040US, October 2011) +# - Intel(R) Advanced Vector Extensions Programming Reference +# (#319433-011,JUNE 2011). +# # # Table: table-name # Referrer: escaped-name @@ -15,10 +21,13 @@ # EndTable # # AVX Superscripts -# (VEX): this opcode can accept VEX prefix. -# (oVEX): this opcode requires VEX prefix. -# (o128): this opcode only supports 128bit VEX. -# (o256): this opcode only supports 256bit VEX. +# (v): this opcode requires VEX prefix. +# (v1): this opcode only supports 128bit VEX. +# +# Last Prefix Superscripts +# - (66): the last prefix is 0x66 +# - (F3): the last prefix is 0xF3 +# - (F2): the last prefix is 0xF2 # Table: one byte opcode @@ -199,8 +208,8 @@ a0: MOV AL,Ob a1: MOV rAX,Ov a2: MOV Ob,AL a3: MOV Ov,rAX -a4: MOVS/B Xb,Yb -a5: MOVS/W/D/Q Xv,Yv +a4: MOVS/B Yb,Xb +a5: MOVS/W/D/Q Yv,Xv a6: CMPS/B Xb,Yb a7: CMPS/W/D Xv,Yv a8: TEST AL,Ib @@ -233,8 +242,8 @@ c0: Grp2 Eb,Ib (1A) c1: Grp2 Ev,Ib (1A) c2: RETN Iw (f64) c3: RETN -c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) -c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) +c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) +c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) c6: Grp11 Eb,Ib (1A) c7: Grp11 Ev,Iz (1A) c8: ENTER Iw,Ib @@ -320,14 +329,19 @@ AVXcode: 1 # 3DNow! uses the last imm byte as opcode extension. 0f: 3DNow! Pq,Qq,Ib # 0x0f 0x10-0x1f -10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) -11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) -12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) -13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) -14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) -15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) -16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) -17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) +# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands +# but it actually has operands. And also, vmovss and vmovsd only accept 128bit. +# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. +# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming +# Reference A.1 +10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) +11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) +12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) +13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) +14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) +15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) +16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) +17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) 18: Grp16 (1A) 19: 1a: @@ -345,14 +359,14 @@ AVXcode: 1 25: 26: 27: -28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) -29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) -2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) -2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) -2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) -2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) -2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) -2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) +28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) +29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) +2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) +2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) +2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) +2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) +2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) +2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) # 0x0f 0x30-0x3f 30: WRMSR 31: RDTSC @@ -388,65 +402,66 @@ AVXcode: 1 4e: CMOVLE/NG Gv,Ev 4f: CMOVNLE/G Gv,Ev # 0x0f 0x50-0x5f -50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) -51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) -52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) -53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) -54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) -55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) -56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) -57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) -58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) -59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) -5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) -5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) -5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) -5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) -5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) -5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) +50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) +51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) +52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) +53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) +54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) +55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) +56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) +57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) +58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) +59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) +5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) +5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) +5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) +5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) +5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) # 0x0f 0x60-0x6f -60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) -61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) -62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) -63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) -64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) -65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) -66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) -67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) -68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) -69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) -6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) -6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) -6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) -6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) -6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) -6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) +60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) +61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) +62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) +63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) +64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) +65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) +66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) +67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) +68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) +69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) +6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) +6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) +6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) +6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) +6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) +6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) # 0x0f 0x70-0x7f -70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) +70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) 71: Grp12 (1A) 72: Grp13 (1A) 73: Grp14 (1A) -74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) -75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) -76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) -77: emms/vzeroupper/vzeroall (VEX) -78: VMREAD Ed/q,Gd/q -79: VMWRITE Gd/q,Ed/q +74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) +75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) +76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) +# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. +77: emms | vzeroupper | vzeroall +78: VMREAD Ey,Gy +79: VMWRITE Gy,Ey 7a: 7b: -7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) -7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) -7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) -7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) +7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) +7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) +7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) +7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) # 0x0f 0x80-0x8f 80: JO Jz (f64) 81: JNO Jz (f64) -82: JB/JNAE/JC Jz (f64) -83: JNB/JAE/JNC Jz (f64) -84: JZ/JE Jz (f64) -85: JNZ/JNE Jz (f64) +82: JB/JC/JNAE Jz (f64) +83: JAE/JNB/JNC Jz (f64) +84: JE/JZ Jz (f64) +85: JNE/JNZ Jz (f64) 86: JBE/JNA Jz (f64) -87: JNBE/JA Jz (f64) +87: JA/JNBE Jz (f64) 88: JS Jz (f64) 89: JNS Jz (f64) 8a: JP/JPE Jz (f64) @@ -502,18 +517,18 @@ b8: JMPE | POPCNT Gv,Ev (F3) b9: Grp10 (1A) ba: Grp8 Ev,Ib (1A) bb: BTC Ev,Gv -bc: BSF Gv,Ev -bd: BSR Gv,Ev +bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) +bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) be: MOVSX Gv,Eb bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf c0: XADD Eb,Gb c1: XADD Ev,Gv -c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) -c3: movnti Md/q,Gd/q -c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) -c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) -c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) +c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) +c3: movnti My,Gy +c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) +c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) +c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) c7: Grp9 (1A) c8: BSWAP RAX/EAX/R8/R8D c9: BSWAP RCX/ECX/R9/R9D @@ -524,55 +539,55 @@ cd: BSWAP RBP/EBP/R13/R13D ce: BSWAP RSI/ESI/R14/R14D cf: BSWAP RDI/EDI/R15/R15D # 0x0f 0xd0-0xdf -d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) -d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) -d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) -d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) -d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) -d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) -d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) -d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) -d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) -d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) -da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) -db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) -dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) -dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) -de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) -df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) +d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) +d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) +d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) +d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) +d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) +d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) +d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) +d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) +d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) +da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) +db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) +dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) +dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) +de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) +df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) # 0x0f 0xe0-0xef -e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) -e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) -e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) -e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) -e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) -e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) -e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) -e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) -e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) -e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) -ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) -eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) -ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) -ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) -ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) -ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) +e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) +e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) +e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) +e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) +e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) +e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) +e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) +e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) +e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) +e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) +ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) +eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) +ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) +ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) +ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) +ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) # 0x0f 0xf0-0xff -f0: lddqu Vdq,Mdq (F2),(VEX) -f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) -f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) -f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) -f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) -f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) -f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) -f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) -f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) -f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) -fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) -fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) -fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) -fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) -fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) +f0: vlddqu Vx,Mx (F2) +f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) +f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) +f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) +f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) +f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) +f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) +f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) +f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) +f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) +fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) +fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) +fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) +fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) +fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) ff: EndTable @@ -580,155 +595,193 @@ Table: 3-byte opcode 1 (0x0f 0x38) Referrer: 3-byte escape 1 AVXcode: 2 # 0x0f 0x38 0x00-0x0f -00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) -01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) -02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) -03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) -04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) -05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) -06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) -07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) -08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) -09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) -0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) -0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) -0c: Vpermilps /r (66),(oVEX) -0d: Vpermilpd /r (66),(oVEX) -0e: vtestps /r (66),(oVEX) -0f: vtestpd /r (66),(oVEX) +00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) +01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) +02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) +03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) +04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) +05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) +06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) +07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) +08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) +09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) +0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) +0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) +0c: vpermilps Vx,Hx,Wx (66),(v) +0d: vpermilpd Vx,Hx,Wx (66),(v) +0e: vtestps Vx,Wx (66),(v) +0f: vtestpd Vx,Wx (66),(v) # 0x0f 0x38 0x10-0x1f 10: pblendvb Vdq,Wdq (66) 11: 12: -13: +13: vcvtph2ps Vx,Wx,Ib (66),(v) 14: blendvps Vdq,Wdq (66) 15: blendvpd Vdq,Wdq (66) -16: -17: ptest Vdq,Wdq (66),(VEX) -18: vbroadcastss /r (66),(oVEX) -19: vbroadcastsd /r (66),(oVEX),(o256) -1a: vbroadcastf128 /r (66),(oVEX),(o256) +16: vpermps Vqq,Hqq,Wqq (66),(v) +17: vptest Vx,Wx (66) +18: vbroadcastss Vx,Wd (66),(v) +19: vbroadcastsd Vqq,Wq (66),(v) +1a: vbroadcastf128 Vqq,Mdq (66),(v) 1b: -1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) -1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) -1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) +1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) +1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) +1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) 1f: # 0x0f 0x38 0x20-0x2f -20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) -21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) -22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) -23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) -24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) -25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) +20: vpmovsxbw Vx,Ux/Mq (66),(v1) +21: vpmovsxbd Vx,Ux/Md (66),(v1) +22: vpmovsxbq Vx,Ux/Mw (66),(v1) +23: vpmovsxwd Vx,Ux/Mq (66),(v1) +24: vpmovsxwq Vx,Ux/Md (66),(v1) +25: vpmovsxdq Vx,Ux/Mq (66),(v1) 26: 27: -28: pmuldq Vdq,Wdq (66),(VEX),(o128) -29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) -2a: movntdqa Vdq,Mdq (66),(VEX),(o128) -2b: packusdw Vdq,Wdq (66),(VEX),(o128) -2c: vmaskmovps(ld) /r (66),(oVEX) -2d: vmaskmovpd(ld) /r (66),(oVEX) -2e: vmaskmovps(st) /r (66),(oVEX) -2f: vmaskmovpd(st) /r (66),(oVEX) +28: vpmuldq Vx,Hx,Wx (66),(v1) +29: vpcmpeqq Vx,Hx,Wx (66),(v1) +2a: vmovntdqa Vx,Mx (66),(v1) +2b: vpackusdw Vx,Hx,Wx (66),(v1) +2c: vmaskmovps Vx,Hx,Mx (66),(v) +2d: vmaskmovpd Vx,Hx,Mx (66),(v) +2e: vmaskmovps Mx,Hx,Vx (66),(v) +2f: vmaskmovpd Mx,Hx,Vx (66),(v) # 0x0f 0x38 0x30-0x3f -30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) -31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) -32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) -33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) -34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) -35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) -36: -37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) -38: pminsb Vdq,Wdq (66),(VEX),(o128) -39: pminsd Vdq,Wdq (66),(VEX),(o128) -3a: pminuw Vdq,Wdq (66),(VEX),(o128) -3b: pminud Vdq,Wdq (66),(VEX),(o128) -3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) -3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) -3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) -3f: pmaxud Vdq,Wdq (66),(VEX),(o128) +30: vpmovzxbw Vx,Ux/Mq (66),(v1) +31: vpmovzxbd Vx,Ux/Md (66),(v1) +32: vpmovzxbq Vx,Ux/Mw (66),(v1) +33: vpmovzxwd Vx,Ux/Mq (66),(v1) +34: vpmovzxwq Vx,Ux/Md (66),(v1) +35: vpmovzxdq Vx,Ux/Mq (66),(v1) +36: vpermd Vqq,Hqq,Wqq (66),(v) +37: vpcmpgtq Vx,Hx,Wx (66),(v1) +38: vpminsb Vx,Hx,Wx (66),(v1) +39: vpminsd Vx,Hx,Wx (66),(v1) +3a: vpminuw Vx,Hx,Wx (66),(v1) +3b: vpminud Vx,Hx,Wx (66),(v1) +3c: vpmaxsb Vx,Hx,Wx (66),(v1) +3d: vpmaxsd Vx,Hx,Wx (66),(v1) +3e: vpmaxuw Vx,Hx,Wx (66),(v1) +3f: vpmaxud Vx,Hx,Wx (66),(v1) # 0x0f 0x38 0x40-0x8f -40: pmulld Vdq,Wdq (66),(VEX),(o128) -41: phminposuw Vdq,Wdq (66),(VEX),(o128) -80: INVEPT Gd/q,Mdq (66) -81: INVPID Gd/q,Mdq (66) +40: vpmulld Vx,Hx,Wx (66),(v1) +41: vphminposuw Vdq,Wdq (66),(v1) +42: +43: +44: +45: vpsrlvd/q Vx,Hx,Wx (66),(v) +46: vpsravd Vx,Hx,Wx (66),(v) +47: vpsllvd/q Vx,Hx,Wx (66),(v) +# Skip 0x48-0x57 +58: vpbroadcastd Vx,Wx (66),(v) +59: vpbroadcastq Vx,Wx (66),(v) +5a: vbroadcasti128 Vqq,Mdq (66),(v) +# Skip 0x5b-0x77 +78: vpbroadcastb Vx,Wx (66),(v) +79: vpbroadcastw Vx,Wx (66),(v) +# Skip 0x7a-0x7f +80: INVEPT Gy,Mdq (66) +81: INVPID Gy,Mdq (66) +82: INVPCID Gy,Mdq (66) +8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) # 0x0f 0x38 0x90-0xbf (FMA) -96: vfmaddsub132pd/ps /r (66),(VEX) -97: vfmsubadd132pd/ps /r (66),(VEX) -98: vfmadd132pd/ps /r (66),(VEX) -99: vfmadd132sd/ss /r (66),(VEX),(o128) -9a: vfmsub132pd/ps /r (66),(VEX) -9b: vfmsub132sd/ss /r (66),(VEX),(o128) -9c: vfnmadd132pd/ps /r (66),(VEX) -9d: vfnmadd132sd/ss /r (66),(VEX),(o128) -9e: vfnmsub132pd/ps /r (66),(VEX) -9f: vfnmsub132sd/ss /r (66),(VEX),(o128) -a6: vfmaddsub213pd/ps /r (66),(VEX) -a7: vfmsubadd213pd/ps /r (66),(VEX) -a8: vfmadd213pd/ps /r (66),(VEX) -a9: vfmadd213sd/ss /r (66),(VEX),(o128) -aa: vfmsub213pd/ps /r (66),(VEX) -ab: vfmsub213sd/ss /r (66),(VEX),(o128) -ac: vfnmadd213pd/ps /r (66),(VEX) -ad: vfnmadd213sd/ss /r (66),(VEX),(o128) -ae: vfnmsub213pd/ps /r (66),(VEX) -af: vfnmsub213sd/ss /r (66),(VEX),(o128) -b6: vfmaddsub231pd/ps /r (66),(VEX) -b7: vfmsubadd231pd/ps /r (66),(VEX) -b8: vfmadd231pd/ps /r (66),(VEX) -b9: vfmadd231sd/ss /r (66),(VEX),(o128) -ba: vfmsub231pd/ps /r (66),(VEX) -bb: vfmsub231sd/ss /r (66),(VEX),(o128) -bc: vfnmadd231pd/ps /r (66),(VEX) -bd: vfnmadd231sd/ss /r (66),(VEX),(o128) -be: vfnmsub231pd/ps /r (66),(VEX) -bf: vfnmsub231sd/ss /r (66),(VEX),(o128) +90: vgatherdd/q Vx,Hx,Wx (66),(v) +91: vgatherqd/q Vx,Hx,Wx (66),(v) +92: vgatherdps/d Vx,Hx,Wx (66),(v) +93: vgatherqps/d Vx,Hx,Wx (66),(v) +94: +95: +96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) +97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) +98: vfmadd132ps/d Vx,Hx,Wx (66),(v) +99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) +9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) +9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) +9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) +a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) +a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) +a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) +ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) +ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) +af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) +b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) +b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) +b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) +bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) +bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) +bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) # 0x0f 0x38 0xc0-0xff -db: aesimc Vdq,Wdq (66),(VEX),(o128) -dc: aesenc Vdq,Wdq (66),(VEX),(o128) -dd: aesenclast Vdq,Wdq (66),(VEX),(o128) -de: aesdec Vdq,Wdq (66),(VEX),(o128) -df: aesdeclast Vdq,Wdq (66),(VEX),(o128) -f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) -f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) +db: VAESIMC Vdq,Wdq (66),(v1) +dc: VAESENC Vdq,Hdq,Wdq (66),(v1) +dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) +de: VAESDEC Vdq,Hdq,Wdq (66),(v1) +df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) +f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) +f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) +f3: ANDN Gy,By,Ey (v) +f4: Grp17 (1A) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) +f6: MULX By,Gy,rDX,Ey (F2),(v) +f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) EndTable Table: 3-byte opcode 2 (0x0f 0x3a) Referrer: 3-byte escape 2 AVXcode: 3 # 0x0f 0x3a 0x00-0xff -04: vpermilps /r,Ib (66),(oVEX) -05: vpermilpd /r,Ib (66),(oVEX) -06: vperm2f128 /r,Ib (66),(oVEX),(o256) -08: roundps Vdq,Wdq,Ib (66),(VEX) -09: roundpd Vdq,Wdq,Ib (66),(VEX) -0a: roundss Vss,Wss,Ib (66),(VEX),(o128) -0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) -0c: blendps Vdq,Wdq,Ib (66),(VEX) -0d: blendpd Vdq,Wdq,Ib (66),(VEX) -0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) -0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) -14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) -15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) -16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) -17: extractps Ed,Vdq,Ib (66),(VEX),(o128) -18: vinsertf128 /r,Ib (66),(oVEX),(o256) -19: vextractf128 /r,Ib (66),(oVEX),(o256) -20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) -21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) -22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) -40: dpps Vdq,Wdq,Ib (66),(VEX) -41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) -42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) -44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) -4a: vblendvps /r,Ib (66),(oVEX) -4b: vblendvpd /r,Ib (66),(oVEX) -4c: vpblendvb /r,Ib (66),(oVEX),(o128) -60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) -61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) -62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) -63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) -df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) +00: vpermq Vqq,Wqq,Ib (66),(v) +01: vpermpd Vqq,Wqq,Ib (66),(v) +02: vpblendd Vx,Hx,Wx,Ib (66),(v) +03: +04: vpermilps Vx,Wx,Ib (66),(v) +05: vpermilpd Vx,Wx,Ib (66),(v) +06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) +07: +08: vroundps Vx,Wx,Ib (66) +09: vroundpd Vx,Wx,Ib (66) +0a: vroundss Vss,Wss,Ib (66),(v1) +0b: vroundsd Vsd,Wsd,Ib (66),(v1) +0c: vblendps Vx,Hx,Wx,Ib (66) +0d: vblendpd Vx,Hx,Wx,Ib (66) +0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) +0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) +14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) +15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) +16: vpextrd/q Ey,Vdq,Ib (66),(v1) +17: vextractps Ed,Vdq,Ib (66),(v1) +18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) +19: vextractf128 Wdq,Vqq,Ib (66),(v) +1d: vcvtps2ph Wx,Vx,Ib (66),(v) +20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) +21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) +22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) +38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) +39: vextracti128 Wdq,Vqq,Ib (66),(v) +40: vdpps Vx,Hx,Wx,Ib (66) +41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) +42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) +44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) +46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) +4a: vblendvps Vx,Hx,Wx,Lx (66),(v) +4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) +4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) +61: vpcmpestri Vdq,Wdq,Ib (66),(v1) +62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) +63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) +f0: RORX Gy,Ey,Ib (F2),(v) EndTable GrpTable: Grp1 @@ -790,7 +843,7 @@ GrpTable: Grp5 2: CALLN Ev (f64) 3: CALLF Ep 4: JMPN Ev (f64) -5: JMPF Ep +5: JMPF Mp 6: PUSH Ev (d64) 7: EndTable @@ -807,7 +860,7 @@ EndTable GrpTable: Grp7 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) -2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) 3: LIDT Ms 4: SMSW Mw/Rv 5: @@ -824,44 +877,45 @@ EndTable GrpTable: Grp9 1: CMPXCHG8B/16B Mq/Mdq -6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) -7: VMPTRST Mq +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +7: VMPTRST Mq | VMPTRST Mq (F3) EndTable GrpTable: Grp10 EndTable GrpTable: Grp11 +# Note: the operands are given by group opcode 0: MOV EndTable GrpTable: Grp12 -2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) -4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) -6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) +2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) +4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) +6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) EndTable GrpTable: Grp13 -2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) -4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) -6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) +2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) +4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) +6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) EndTable GrpTable: Grp14 -2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) -3: psrldq Udq,Ib (66),(11B),(VEX),(o128) -6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) -7: pslldq Udq,Ib (66),(11B),(VEX),(o128) +2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) +3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) +6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) +7: vpslldq Hx,Ux,Ib (66),(11B),(v1) EndTable GrpTable: Grp15 -0: fxsave -1: fxstor -2: ldmxcsr (VEX) -3: stmxcsr (VEX) +0: fxsave | RDFSBASE Ry (F3),(11B) +1: fxstor | RDGSBASE Ry (F3),(11B) +2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) +3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE 5: XRSTOR | lfence (11B) -6: mfence (11B) +6: XSAVEOPT | mfence (11B) 7: clflush | sfence (11B) EndTable @@ -872,6 +926,12 @@ GrpTable: Grp16 3: prefetch T2 EndTable +GrpTable: Grp17 +1: BLSR By,Ey (v) +2: BLSMSK By,Ey (v) +3: BLSI By,Ey (v) +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index eaf11f52fc0b..5f6a5b6c3a15 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -47,7 +47,7 @@ BEGIN { sep_expr = "^\\|$" group_expr = "^Grp[0-9A-Za-z]+" - imm_expr = "^[IJAO][a-z]" + imm_expr = "^[IJAOL][a-z]" imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" @@ -59,6 +59,7 @@ BEGIN { imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" imm_flag["Ob"] = "INAT_MOFFSET" imm_flag["Ov"] = "INAT_MOFFSET" + imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" force64_expr = "\\([df]64\\)" @@ -70,8 +71,12 @@ BEGIN { lprefix3_expr = "\\(F2\\)" max_lprefix = 4 - vexok_expr = "\\(VEX\\)" - vexonly_expr = "\\(oVEX\\)" + # All opcodes starting with lower-case 'v' or with (v1) superscript + # accepts VEX prefix + vexok_opcode_expr = "^v.*" + vexok_expr = "\\(v1\\)" + # All opcodes with (v) superscript supports *only* VEX prefix + vexonly_expr = "\\(v\\)" prefix_expr = "\\(Prefix\\)" prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" @@ -85,8 +90,8 @@ BEGIN { prefix_num["SEG=GS"] = "INAT_PFX_GS" prefix_num["SEG=SS"] = "INAT_PFX_SS" prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" - prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" - prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" + prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" + prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" clear_vars() } @@ -310,12 +315,10 @@ function convert_operands(count,opnd, i,j,imm,mod) if (match(opcode, fpu_expr)) flags = add_flags(flags, "INAT_MODRM") - # check VEX only code + # check VEX codes if (match(ext, vexonly_expr)) flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") - - # check VEX only code - if (match(ext, vexok_expr)) + else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) flags = add_flags(flags, "INAT_VEXOK") # check prefixes -- cgit v1.2.3 From 9dde9dc0a81c7aeb863b35121d09011f09b4897c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 5 Dec 2011 21:06:03 +0900 Subject: x86/tools: Add decoded instruction dump mode Add instruction dump mode to insn_sanity tool for checking decoder really decoded instructions. This mode is enabled when passing double -v (-vv) to insn_sanity. It is useful for who wants to check whether the decoder can decode some instructions correctly. e.g. $ echo 0f 73 10 11 | ./insn_sanity -y -vv -i - Instruction = { .prefixes = { .value = 0, bytes[] = {0, 0, 0, 0}, .got = 1, .nbytes = 0}, .rex_prefix = { .value = 0, bytes[] = {0, 0, 0, 0}, .got = 1, .nbytes = 0}, .vex_prefix = { .value = 0, bytes[] = {0, 0, 0, 0}, .got = 1, .nbytes = 0}, .opcode = { .value = 29455, bytes[] = {f, 73, 0, 0}, .got = 1, .nbytes = 2}, .modrm = { .value = 16, bytes[] = {10, 0, 0, 0}, .got = 1, .nbytes = 1}, .sib = { .value = 0, bytes[] = {0, 0, 0, 0}, .got = 1, .nbytes = 0}, .displacement = { .value = 0, bytes[] = {0, 0, 0, 0}, .got = 1, .nbytes = 0}, .immediate1 = { .value = 17, bytes[] = {11, 0, 0, 0}, .got = 1, .nbytes = 1}, .immediate2 = { .value = 0, bytes[] = {0, 0, 0, 0}, .got = 0, .nbytes = 0}, .attr = 44800, .opnd_bytes = 4, .addr_bytes = 8, .length = 4, .x86_64 = 1, .kaddr = 0x7fff0f7d9430} Success: decoded and checked 1 given instructions with 0 errors (seed:0x0) Signed-off-by: Masami Hiramatsu Cc: "H. Peter Anvin" Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20111205120603.15475.91192.stgit@cloud Signed-off-by: Ingo Molnar --- arch/x86/tools/insn_sanity.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c index b6720d6b38cb..cc2f8c131286 100644 --- a/arch/x86/tools/insn_sanity.c +++ b/arch/x86/tools/insn_sanity.c @@ -59,7 +59,7 @@ static void usage(const char *err) fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog); fprintf(stderr, "\t-y 64bit mode\n"); fprintf(stderr, "\t-n 32bit mode\n"); - fprintf(stderr, "\t-v Verbose mode\n"); + fprintf(stderr, "\t-v Verbosity(-vv dumps any decoded result)\n"); fprintf(stderr, "\t-s Give a random seed (and iteration number)\n"); fprintf(stderr, "\t-m Give a maximum iteration number\n"); fprintf(stderr, "\t-i Give an input file with decoded binary\n"); @@ -188,7 +188,7 @@ static void parse_args(int argc, char **argv) x86_64 = 0; break; case 'v': - verbose = 1; + verbose++; break; case 'i': if (strcmp("-", optarg) == 0) @@ -264,7 +264,8 @@ int main(int argc, char **argv) errors++; } else if (verbose && !insn_complete(&insn)) dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn); - + else if (verbose >= 2) + dump_insn(stdout, &insn); insns++; } -- cgit v1.2.3 From 1e2ad28f80b4e155678259238f51edebc19e4014 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 18 Nov 2011 12:35:21 +0100 Subject: perf, x86: Implement event scheduler helper functions This patch introduces x86 perf scheduler code helper functions. We need this to later add more complex functionality to support overlapping counter constraints (next patch). The algorithm is modified so that the range of weight values is now generated from the constraints. There shouldn't be other functional changes. With the helper functions the scheduler is controlled. There are functions to initialize, traverse the event list, find unused counters etc. The scheduler keeps its own state. V3: * Added macro for_each_set_bit_cont(). * Changed functions interfaces of perf_sched_find_counter() and perf_sched_next_event() to use bool as return value. * Added some comments to make code better understandable. V4: * Fix broken event assignment if weight of the first event is not wmin (perf_sched_init()). Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1321616122-1533-2-git-send-email-robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 185 ++++++++++++++++++++++++++++----------- include/linux/bitops.h | 10 ++- 2 files changed, 140 insertions(+), 55 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bda212a0010..5a469d3d0c66 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -484,18 +484,145 @@ static inline int is_x86_event(struct perf_event *event) return event->pmu == &pmu; } +/* + * Event scheduler state: + * + * Assign events iterating over all events and counters, beginning + * with events with least weights first. Keep the current iterator + * state in struct sched_state. + */ +struct sched_state { + int weight; + int event; /* event index */ + int counter; /* counter index */ + int unassigned; /* number of events to be assigned left */ + unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; +}; + +struct perf_sched { + int max_weight; + int max_events; + struct event_constraint **constraints; + struct sched_state state; +}; + +/* + * Initialize interator that runs through all events and counters. + */ +static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, + int num, int wmin, int wmax) +{ + int idx; + + memset(sched, 0, sizeof(*sched)); + sched->max_events = num; + sched->max_weight = wmax; + sched->constraints = c; + + for (idx = 0; idx < num; idx++) { + if (c[idx]->weight == wmin) + break; + } + + sched->state.event = idx; /* start with min weight */ + sched->state.weight = wmin; + sched->state.unassigned = num; +} + +/* + * Select a counter for the current event to schedule. Return true on + * success. + */ +static bool perf_sched_find_counter(struct perf_sched *sched) +{ + struct event_constraint *c; + int idx; + + if (!sched->state.unassigned) + return false; + + if (sched->state.event >= sched->max_events) + return false; + + c = sched->constraints[sched->state.event]; + + /* Grab the first unused counter starting with idx */ + idx = sched->state.counter; + for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { + if (!__test_and_set_bit(idx, sched->state.used)) + break; + } + sched->state.counter = idx; + + if (idx >= X86_PMC_IDX_MAX) + return false; + + return true; +} + +/* + * Go through all unassigned events and find the next one to schedule. + * Take events with the least weight first. Return true on success. + */ +static bool perf_sched_next_event(struct perf_sched *sched) +{ + struct event_constraint *c; + + if (!sched->state.unassigned || !--sched->state.unassigned) + return false; + + do { + /* next event */ + sched->state.event++; + if (sched->state.event >= sched->max_events) { + /* next weight */ + sched->state.event = 0; + sched->state.weight++; + if (sched->state.weight > sched->max_weight) + return false; + } + c = sched->constraints[sched->state.event]; + } while (c->weight != sched->state.weight); + + sched->state.counter = 0; /* start with first counter */ + + return true; +} + +/* + * Assign a counter for each event. + */ +static int perf_assign_events(struct event_constraint **constraints, int n, + int wmin, int wmax, int *assign) +{ + struct perf_sched sched; + + perf_sched_init(&sched, constraints, n, wmin, wmax); + + do { + if (!perf_sched_find_counter(&sched)) + break; /* failed */ + if (assign) + assign[sched.state.event] = sched.state.counter; + } while (perf_sched_next_event(&sched)); + + return sched.state.unassigned; +} + int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - int i, j, w, wmax, num = 0; + int i, wmin, wmax, num = 0; struct hw_perf_event *hwc; bitmap_zero(used_mask, X86_PMC_IDX_MAX); - for (i = 0; i < n; i++) { + for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); constraints[i] = c; + wmin = min(wmin, c->weight); + wmax = max(wmax, c->weight); } /* @@ -521,59 +648,11 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (assign) assign[i] = hwc->idx; } - if (i == n) - goto done; - /* - * begin slow path - */ + /* slow path */ + if (i != n) + num = perf_assign_events(constraints, n, wmin, wmax, assign); - bitmap_zero(used_mask, X86_PMC_IDX_MAX); - - /* - * weight = number of possible counters - * - * 1 = most constrained, only works on one counter - * wmax = least constrained, works on any counter - * - * assign events to counters starting with most - * constrained events. - */ - wmax = x86_pmu.num_counters; - - /* - * when fixed event counters are present, - * wmax is incremented by 1 to account - * for one more choice - */ - if (x86_pmu.num_counters_fixed) - wmax++; - - for (w = 1, num = n; num && w <= wmax; w++) { - /* for each event */ - for (i = 0; num && i < n; i++) { - c = constraints[i]; - hwc = &cpuc->event_list[i]->hw; - - if (c->weight != w) - continue; - - for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { - if (!test_bit(j, used_mask)) - break; - } - - if (j == X86_PMC_IDX_MAX) - break; - - __set_bit(j, used_mask); - - if (assign) - assign[i] = j; - num--; - } - } -done: /* * scheduling failed or is just a simulation, * free resources if necessary diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a3ef66a2a083..3c1063acb2ab 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -22,8 +22,14 @@ extern unsigned long __sw_hweight64(__u64 w); #include #define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_first_bit((addr), (size)); \ - (bit) < (size); \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_cont(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) static __inline__ int get_bitmask_order(unsigned int count) -- cgit v1.2.3 From bc1738f6ee83015f090867813dcca4d690e7917c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 18 Nov 2011 12:35:22 +0100 Subject: perf, x86: Fix event scheduler for constraints with overlapping counters The current x86 event scheduler fails to resolve scheduling problems of certain combinations of events and constraints. This happens if the counter mask of such an event is not a subset of any other counter mask of a constraint with an equal or higher weight, e.g. constraints of the AMD family 15h pmu: counter mask weight amd_f15_PMC30 0x09 2 <--- overlapping counters amd_f15_PMC20 0x07 3 amd_f15_PMC53 0x38 3 The scheduler does not find then an existing solution. Here is an example: event code counter failure possible solution 0x02E PMC[3,0] 0 3 0x043 PMC[2:0] 1 0 0x045 PMC[2:0] 2 1 0x046 PMC[2:0] FAIL 2 The event scheduler may not select the correct counter in the first cycle because it needs to know which subsequent events will be scheduled. It may fail to schedule the events then. To solve this, we now save the scheduler state of events with overlapping counter counstraints. If we fail to schedule the events we rollback to those states and try to use another free counter. Constraints with overlapping counters are marked with a new introduced overlap flag. We set the overlap flag for such constraints to give the scheduler a hint which events to select for counter rescheduling. The EVENT_CONSTRAINT_OVERLAP() macro can be used for this. Care must be taken as the rescheduling algorithm is O(n!) which will increase scheduling cycles for an over-commited system dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros and its counter masks must be kept at a minimum. Thus, the current stack is limited to 2 states to limit the number of loops the algorithm takes in the worst case. On systems with no overlapping-counter constraints, this implementation does not increase the loop count compared to the previous algorithm. V2: * Renamed redo -> overlap. * Reimplementation using perf scheduling helper functions. V3: * Added WARN_ON_ONCE() if out of save states. * Changed function interface of perf_sched_restore_state() to use bool as return value. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1321616122-1533-3-git-send-email-robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 45 ++++++++++++++++++++++++++++++++++-- arch/x86/kernel/cpu/perf_event.h | 30 ++++++++++++++++++++++-- arch/x86/kernel/cpu/perf_event_amd.c | 2 +- 3 files changed, 72 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5a469d3d0c66..fa6fdec5afbc 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -499,11 +499,16 @@ struct sched_state { unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; }; +/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ +#define SCHED_STATES_MAX 2 + struct perf_sched { int max_weight; int max_events; struct event_constraint **constraints; struct sched_state state; + int saved_states; + struct sched_state saved[SCHED_STATES_MAX]; }; /* @@ -529,11 +534,34 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint ** sched->state.unassigned = num; } +static void perf_sched_save_state(struct perf_sched *sched) +{ + if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) + return; + + sched->saved[sched->saved_states] = sched->state; + sched->saved_states++; +} + +static bool perf_sched_restore_state(struct perf_sched *sched) +{ + if (!sched->saved_states) + return false; + + sched->saved_states--; + sched->state = sched->saved[sched->saved_states]; + + /* continue with next counter: */ + clear_bit(sched->state.counter++, sched->state.used); + + return true; +} + /* * Select a counter for the current event to schedule. Return true on * success. */ -static bool perf_sched_find_counter(struct perf_sched *sched) +static bool __perf_sched_find_counter(struct perf_sched *sched) { struct event_constraint *c; int idx; @@ -557,6 +585,19 @@ static bool perf_sched_find_counter(struct perf_sched *sched) if (idx >= X86_PMC_IDX_MAX) return false; + if (c->overlap) + perf_sched_save_state(sched); + + return true; +} + +static bool perf_sched_find_counter(struct perf_sched *sched) +{ + while (!__perf_sched_find_counter(sched)) { + if (!perf_sched_restore_state(sched)) + return false; + } + return true; } @@ -1250,7 +1291,7 @@ static int __init init_hw_perf_events(void) unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, - 0, x86_pmu.num_counters); + 0, x86_pmu.num_counters, 0); if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index b9698d40ac4b..51a985cbc12f 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -45,6 +45,7 @@ struct event_constraint { u64 code; u64 cmask; int weight; + int overlap; }; struct amd_nb { @@ -151,15 +152,40 @@ struct cpu_hw_events { void *kfree_on_online; }; -#define __EVENT_CONSTRAINT(c, n, m, w) {\ +#define __EVENT_CONSTRAINT(c, n, m, w, o) {\ { .idxmsk64 = (n) }, \ .code = (c), \ .cmask = (m), \ .weight = (w), \ + .overlap = (o), \ } #define EVENT_CONSTRAINT(c, n, m) \ - __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) + +/* + * The overlap flag marks event constraints with overlapping counter + * masks. This is the case if the counter mask of such an event is not + * a subset of any other counter mask of a constraint with an equal or + * higher weight, e.g.: + * + * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); + * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); + * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); + * + * The event scheduler may not select the correct counter in the first + * cycle because it needs to know which subsequent events will be + * scheduled. It may fail to schedule the events then. So we set the + * overlap flag for such constraints to give the scheduler a hint which + * events to select for counter rescheduling. + * + * Care must be taken as the rescheduling algorithm is O(n!) which + * will increase scheduling cycles for an over-commited system + * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros + * and its counter masks must be kept at a minimum. + */ +#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) /* * Constraint on the Event code. diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index aeefd45697a2..0397b23be8e9 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = { static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); -static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); +static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); -- cgit v1.2.3 From 4defea8559bc0f97a899d94c8d19d3b8bb802bc4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Nov 2011 15:15:42 +0100 Subject: perf, x86: Prefer fixed-purpose counters when scheduling This avoids a scheduling failure for cases like: cycles, cycles, instructions, instructions (on Core2) Which would end up being programmed like: PMC0, PMC1, FP-instructions, fail Because all events will have the same weight. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-8tnwb92asqj7xajqqoty4gel@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fa6fdec5afbc..66f8ba9a67f9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -574,16 +574,25 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) c = sched->constraints[sched->state.event]; + /* Prefer fixed purpose counters */ + if (x86_pmu.num_counters_fixed) { + idx = X86_PMC_IDX_FIXED; + for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { + if (!__test_and_set_bit(idx, sched->state.used)) + goto done; + } + } /* Grab the first unused counter starting with idx */ idx = sched->state.counter; - for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { + for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { if (!__test_and_set_bit(idx, sched->state.used)) - break; + goto done; } - sched->state.counter = idx; - if (idx >= X86_PMC_IDX_MAX) - return false; + return false; + +done: + sched->state.counter = idx; if (c->overlap) perf_sched_save_state(sched); -- cgit v1.2.3 From 9cdbe1cbac4ec318037297175587a0080acc9d11 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 6 Dec 2011 17:27:29 +0100 Subject: jump_label, x86: Fix section mismatch WARNING: arch/x86/kernel/built-in.o(.text+0x4c71): Section mismatch in reference from the function arch_jump_label_transform_static() to the function .init.text:text_poke_early() The function arch_jump_label_transform_static() references the function __init text_poke_early(). This is often because arch_jump_label_transform_static lacks a __init annotation or the annotation of text_poke_early is wrong. Signed-off-by: Peter Zijlstra Cc: Jason Baron Link: http://lkml.kernel.org/n/tip-9lefe89mrvurrwpqw5h8xm8z@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/jump_label.c | 2 +- kernel/jump_label.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index ea9d5f2f13ef..2889b3d43882 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry, put_online_cpus(); } -void arch_jump_label_transform_static(struct jump_entry *entry, +__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type) { __jump_label_transform(entry, type, text_poke_early); diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 51a175ab0a03..3fb7b79c86fd 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -142,7 +142,7 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start, * running code can override this to make the non-live update case * cheaper. */ -void __weak arch_jump_label_transform_static(struct jump_entry *entry, +void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type) { arch_jump_label_transform(entry, type); -- cgit v1.2.3 From ffb871bc9156ee2e5cf442f61250c5bd6aad17e3 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 10 Nov 2011 14:57:26 +0200 Subject: x86, perf: Disable non available architectural events Intel CPUs report non-available architectural events in cpuid leaf 0AH.EBX. Use it to disable events that are not available according to CPU. Signed-off-by: Gleb Natapov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1320929850-10480-7-git-send-email-gleb@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 14 ++++++++++++++ arch/x86/kernel/cpu/perf_event.h | 5 +++++ arch/x86/kernel/cpu/perf_event_intel.c | 28 +++++++++++++++++++++++----- 3 files changed, 42 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index f61c62f7d5d8..c6998bc75456 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -57,6 +57,7 @@ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 +#define ARCH_PERFMON_EVENTS_COUNT 7 /* * Intel "Architectural Performance Monitoring" CPUID @@ -72,6 +73,19 @@ union cpuid10_eax { unsigned int full; }; +union cpuid10_ebx { + struct { + unsigned int no_unhalted_core_cycles:1; + unsigned int no_instructions_retired:1; + unsigned int no_unhalted_reference_cycles:1; + unsigned int no_llc_reference:1; + unsigned int no_llc_misses:1; + unsigned int no_branch_instruction_retired:1; + unsigned int no_branch_misses_retired:1; + } split; + unsigned int full; +}; + union cpuid10_edx { struct { unsigned int num_counters_fixed:5; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 51a985cbc12f..f49c5c21085c 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -285,6 +285,11 @@ struct x86_pmu { int num_counters_fixed; int cntval_bits; u64 cntval_mask; + union { + unsigned long events_maskl; + unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; + }; + int events_mask_len; int apic; u64 max_period; struct event_constraint * diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 8d601b18bf9f..201156b80a37 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1552,13 +1552,23 @@ static void intel_sandybridge_quirks(void) x86_pmu.pebs_constraints = NULL; } +static const int intel_event_id_to_hw_id[] __initconst = { + PERF_COUNT_HW_CPU_CYCLES, + PERF_COUNT_HW_INSTRUCTIONS, + PERF_COUNT_HW_BUS_CYCLES, + PERF_COUNT_HW_CACHE_REFERENCES, + PERF_COUNT_HW_CACHE_MISSES, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + PERF_COUNT_HW_BRANCH_MISSES, +}; + __init int intel_pmu_init(void) { union cpuid10_edx edx; union cpuid10_eax eax; + union cpuid10_ebx ebx; unsigned int unused; - unsigned int ebx; - int version; + int version, bit; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -1574,8 +1584,8 @@ __init int intel_pmu_init(void) * Check whether the Architectural PerfMon supports * Branch Misses Retired hw_event or not. */ - cpuid(10, &eax.full, &ebx, &unused, &edx.full); - if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) + cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); + if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) return -ENODEV; version = eax.split.version_id; @@ -1651,7 +1661,7 @@ __init int intel_pmu_init(void) /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; - if (ebx & 0x40) { + if (ebx.split.no_branch_misses_retired) { /* * Erratum AAJ80 detected, we work it around by using * the BR_MISP_EXEC.ANY event. This will over-count @@ -1659,6 +1669,7 @@ __init int intel_pmu_init(void) * architectural event which is often completely bogus: */ intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; + ebx.split.no_branch_misses_retired = 0; pr_cont("erratum AAJ80 worked around, "); } @@ -1738,5 +1749,12 @@ __init int intel_pmu_init(void) break; } } + x86_pmu.events_maskl = ebx.full; + x86_pmu.events_mask_len = eax.split.mask_length; + + /* disable event that reported as not presend by cpuid */ + for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_event_id_to_hw_id)) + intel_perfmon_event_map[intel_event_id_to_hw_id[bit]] = 0; + return 0; } -- cgit v1.2.3 From c1d6f42f1a42c721513e2f388c208e5348004f64 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 6 Dec 2011 14:07:15 +0100 Subject: perf, x86: Implement arch event mask as quirk Implement the disabling of arch events as a quirk so that we can print a message along with it. This creates some visibility into the problem space and could allow us to work on adding more work-around like the AAJ80 one. Requested-by: Ingo Molnar Cc: Gleb Natapov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-wcja2z48wklzu1b0nkz0a5y7@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 5 ++- arch/x86/kernel/cpu/perf_event.h | 16 ++++++- arch/x86/kernel/cpu/perf_event_intel.c | 80 +++++++++++++++++++++------------- 3 files changed, 68 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 66f8ba9a67f9..55889e0b1452 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1248,6 +1248,7 @@ static void __init pmu_check_apic(void) static int __init init_hw_perf_events(void) { + struct x86_pmu_quirk *quirk; struct event_constraint *c; int err; @@ -1276,8 +1277,8 @@ static int __init init_hw_perf_events(void) pr_cont("%s PMU driver.\n", x86_pmu.name); - if (x86_pmu.quirks) - x86_pmu.quirks(); + for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) + quirk->func(); if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index f49c5c21085c..8944062f46e2 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -261,6 +261,11 @@ union perf_capabilities { u64 capabilities; }; +struct x86_pmu_quirk { + struct x86_pmu_quirk *next; + void (*func)(void); +}; + /* * struct x86_pmu - generic x86 pmu */ @@ -299,7 +304,7 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); struct event_constraint *event_constraints; - void (*quirks)(void); + struct x86_pmu_quirk *quirks; int perfctr_second_write; int (*cpu_prepare)(int cpu); @@ -340,6 +345,15 @@ struct x86_pmu { struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); }; +#define x86_add_quirk(func_) \ +do { \ + static struct x86_pmu_quirk __quirk __initdata = { \ + .func = func_, \ + }; \ + __quirk.next = x86_pmu.quirks; \ + x86_pmu.quirks = &__quirk; \ +} while (0) + #define ERF_NO_HT_SHARING 1 #define ERF_HAS_RSP_1 2 diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 201156b80a37..2c3bf53d0302 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1519,7 +1519,7 @@ static __initconst const struct x86_pmu intel_pmu = { .guest_get_msrs = intel_guest_get_msrs, }; -static void intel_clovertown_quirks(void) +static __init void intel_clovertown_quirk(void) { /* * PEBS is unreliable due to: @@ -1545,30 +1545,61 @@ static void intel_clovertown_quirks(void) x86_pmu.pebs_constraints = NULL; } -static void intel_sandybridge_quirks(void) +static __init void intel_sandybridge_quirk(void) { printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); x86_pmu.pebs = 0; x86_pmu.pebs_constraints = NULL; } -static const int intel_event_id_to_hw_id[] __initconst = { - PERF_COUNT_HW_CPU_CYCLES, - PERF_COUNT_HW_INSTRUCTIONS, - PERF_COUNT_HW_BUS_CYCLES, - PERF_COUNT_HW_CACHE_REFERENCES, - PERF_COUNT_HW_CACHE_MISSES, - PERF_COUNT_HW_BRANCH_INSTRUCTIONS, - PERF_COUNT_HW_BRANCH_MISSES, +static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { + { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, + { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, + { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, + { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, + { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, + { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, + { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, }; +static __init void intel_arch_events_quirk(void) +{ + int bit; + + /* disable event that reported as not presend by cpuid */ + for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { + intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; + printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", + intel_arch_events_map[bit].name); + } +} + +static __init void intel_nehalem_quirk(void) +{ + union cpuid10_ebx ebx; + + ebx.full = x86_pmu.events_maskl; + if (ebx.split.no_branch_misses_retired) { + /* + * Erratum AAJ80 detected, we work it around by using + * the BR_MISP_EXEC.ANY event. This will over-count + * branch-misses, but it's still much better than the + * architectural event which is often completely bogus: + */ + intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; + ebx.split.no_branch_misses_retired = 0; + x86_pmu.events_maskl = ebx.full; + printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); + } +} + __init int intel_pmu_init(void) { union cpuid10_edx edx; union cpuid10_eax eax; union cpuid10_ebx ebx; unsigned int unused; - int version, bit; + int version; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -1599,6 +1630,9 @@ __init int intel_pmu_init(void) x86_pmu.cntval_bits = eax.split.bit_width; x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; + x86_pmu.events_maskl = ebx.full; + x86_pmu.events_mask_len = eax.split.mask_length; + /* * Quirk: v2 perfmon does not report fixed-purpose events, so * assume at least 3 events: @@ -1618,6 +1652,8 @@ __init int intel_pmu_init(void) intel_ds_init(); + x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ + /* * Install the hw-cache-events table: */ @@ -1627,7 +1663,7 @@ __init int intel_pmu_init(void) break; case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - x86_pmu.quirks = intel_clovertown_quirks; + x86_add_quirk(intel_clovertown_quirk); case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ case 29: /* six-core 45 nm xeon "Dunnington" */ @@ -1661,18 +1697,8 @@ __init int intel_pmu_init(void) /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; - if (ebx.split.no_branch_misses_retired) { - /* - * Erratum AAJ80 detected, we work it around by using - * the BR_MISP_EXEC.ANY event. This will over-count - * branch-misses, but it's still much better than the - * architectural event which is often completely bogus: - */ - intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; - ebx.split.no_branch_misses_retired = 0; + x86_add_quirk(intel_nehalem_quirk); - pr_cont("erratum AAJ80 worked around, "); - } pr_cont("Nehalem events, "); break; @@ -1712,7 +1738,7 @@ __init int intel_pmu_init(void) break; case 42: /* SandyBridge */ - x86_pmu.quirks = intel_sandybridge_quirks; + x86_add_quirk(intel_sandybridge_quirk); case 45: /* SandyBridge, "Romely-EP" */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -1749,12 +1775,6 @@ __init int intel_pmu_init(void) break; } } - x86_pmu.events_maskl = ebx.full; - x86_pmu.events_mask_len = eax.split.mask_length; - - /* disable event that reported as not presend by cpuid */ - for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_event_id_to_hw_id)) - intel_perfmon_event_map[intel_event_id_to_hw_id[bit]] = 0; return 0; } -- cgit v1.2.3 From b3d9468a8bd218a695e3a0ff112cd4efd27b670a Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 10 Nov 2011 14:57:27 +0200 Subject: perf, x86: Expose perf capability to other modules KVM needs to know perf capability to decide which PMU it can expose to a guest. Signed-off-by: Gleb Natapov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1320929850-10480-8-git-send-email-gleb@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 15 +++++++++++++++ arch/x86/kernel/cpu/perf_event.c | 12 ++++++++++++ 2 files changed, 27 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index c6998bc75456..b50e9d15aae0 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -95,6 +95,15 @@ union cpuid10_edx { unsigned int full; }; +struct x86_pmu_capability { + int version; + int num_counters_gp; + int num_counters_fixed; + int bit_width_gp; + int bit_width_fixed; + unsigned int events_mask; + int events_mask_len; +}; /* * Fixed-purpose performance events: @@ -216,6 +225,7 @@ struct perf_guest_switch_msr { }; extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); +extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); #else static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) { @@ -223,6 +233,11 @@ static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) return NULL; } +static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) +{ + memset(cap, 0, sizeof(*cap)); +} + static inline void perf_events_lapic_init(void) { } #endif diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 55889e0b1452..930fe4879542 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1696,3 +1696,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs) return misc; } + +void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) +{ + cap->version = x86_pmu.version; + cap->num_counters_gp = x86_pmu.num_counters; + cap->num_counters_fixed = x86_pmu.num_counters_fixed; + cap->bit_width_gp = x86_pmu.cntval_bits; + cap->bit_width_fixed = x86_pmu.cntval_bits; + cap->events_mask = (unsigned int)x86_pmu.events_maskl; + cap->events_mask_len = x86_pmu.events_mask_len; +} +EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); -- cgit v1.2.3 From f8c852031a383ac260ae37df7ad063d42d0ed271 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 7 Dec 2011 11:16:38 +0100 Subject: oprofile: Fix oprofile_timer_exit() breakage Removing remainings of oprofile_timer_exit() completly. Signed-off-by: Robert Richter --- arch/s390/oprofile/init.c | 1 - drivers/oprofile/oprof.h | 1 - 2 files changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 6efc18b5e60a..113d7cbbc065 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -171,7 +171,6 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) static void oprofile_hwsampler_exit(void) { - oprofile_timer_exit(); hwsampler_shutdown(); } diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 769fb0fcac44..d32ef816337c 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -35,7 +35,6 @@ struct dentry; void oprofile_create_files(struct super_block *sb, struct dentry *root); int oprofile_timer_init(struct oprofile_operations *ops); -void oprofile_timer_exit(void); #ifdef CONFIG_OPROFILE_NMI_TIMER int op_nmi_timer_init(struct oprofile_operations *ops); #else -- cgit v1.2.3 From dd3c4670d7fafeb18bf7542fbcfd2606fb06a4a1 Mon Sep 17 00:00:00 2001 From: Andreas Krebbel Date: Fri, 25 Nov 2011 20:03:05 +0100 Subject: oprofile, s390: Add event interface to the System z hardware sampling module With this patch the OProfile Basic Mode Sampling support for System z is enhanced with a counter file system. That way hardware sampling can be configured using the user space tools with only little modifications. With the patch by default new cpu_types (s390/z10, s390/z196) are returned in order to indicate that we are running a CPU which provides the hardware sampling facility. Existing user space tools will complain about an unknown cpu type. In order to be compatible with existing user space tools the `cpu_type' module parameter has been added. Setting the parameter to `timer' will force the module to return `timer' as cpu_type. The module will still try to use hardware sampling if available and the hwsampling virtual filesystem will be also be available for configuration. So this has a different effect than using the generic oprofile module parameter `timer=1'. If the basic mode sampling is enabled on the machine and the cpu_type=timer parameter is not used the kernel module will provide the following virtual filesystem: /dev/oprofile/0/enabled /dev/oprofile/0/event /dev/oprofile/0/count /dev/oprofile/0/unit_mask /dev/oprofile/0/kernel /dev/oprofile/0/user In the counter file system only the values of 'enabled', 'count', 'kernel', and 'user' are evaluated by the kernel module. Everything else must contain fixed values. The 'event' value only supports a single event - HWSAMPLING with value 0. The 'count' value specifies the hardware sampling rate as it is passed to the CPU measurement facility. The 'kernel' and 'user' flags can now be used to filter for samples when using hardware sampling. Additionally also the following file will be created: /dev/oprofile/timer/enabled This will always be the inverted value of /dev/oprofile/0/enabled. 0 is not accepted without hardware sampling. Signed-off-by: Andreas Krebbel Signed-off-by: Robert Richter --- Documentation/kernel-parameters.txt | 2 + arch/s390/oprofile/hwsampler.c | 7 +- arch/s390/oprofile/init.c | 372 +++++++++++++++++++++++++++++++++--- arch/s390/oprofile/op_counter.h | 23 +++ 4 files changed, 374 insertions(+), 30 deletions(-) create mode 100644 arch/s390/oprofile/op_counter.h (limited to 'arch') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f7735a125f4b..60b98cea06fc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1851,6 +1851,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. timer: [X86] Force use of architectural NMI timer mode (see also oprofile.timer for generic hr timer mode) + [s390] Force legacy basic mode sampling + (report cpu_type "timer") oops=panic Always panic on oopses. Default is to just kill the process, but there is a small probability of diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 4552ce40c81a..cab0fe932c08 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -22,6 +22,7 @@ #include #include "hwsampler.h" +#include "op_counter.h" #define MAX_NUM_SDB 511 #define MIN_NUM_SDB 1 @@ -896,6 +897,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, if (sample_data_ptr->P == 1) { /* userspace sample */ unsigned int pid = sample_data_ptr->prim_asn; + if (!counter_config.user) + goto skip_sample; rcu_read_lock(); tsk = pid_task(find_vpid(pid), PIDTYPE_PID); if (tsk) @@ -903,6 +906,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, rcu_read_unlock(); } else { /* kernelspace sample */ + if (!counter_config.kernel) + goto skip_sample; regs = task_pt_regs(current); } @@ -910,7 +915,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, !sample_data_ptr->P, tsk); mutex_unlock(&hws_sem); - + skip_sample: sample_data_ptr++; } } diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 113d7cbbc065..6cf2286d0405 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -2,10 +2,11 @@ * arch/s390/oprofile/init.c * * S390 Version - * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Thomas Spatzier (tspat@de.ibm.com) * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) * * @remark Copyright 2002-2011 OProfile authors */ @@ -14,6 +15,8 @@ #include #include #include +#include +#include #include "../../../drivers/oprofile/oprof.h" @@ -22,6 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); #ifdef CONFIG_64BIT #include "hwsampler.h" +#include "op_counter.h" #define DEFAULT_INTERVAL 4127518 @@ -35,16 +39,41 @@ static unsigned long oprofile_max_interval; static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; -static int hwsampler_file; +static int hwsampler_enabled; static int hwsampler_running; /* start_mutex must be held to change */ +static int hwsampler_available; static struct oprofile_operations timer_ops; +struct op_counter_config counter_config; + +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, +}; +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) +{ + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing timer to be returned " + "as cpu type\n"); + } else { + force_cpu_type = 0; + } + + return 0; +} +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); +MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" + "(report cpu_type \"timer\""); + static int oprofile_hwsampler_start(void) { int retval; - hwsampler_running = hwsampler_file; + hwsampler_running = hwsampler_enabled; if (!hwsampler_running) return timer_ops.start(); @@ -72,10 +101,16 @@ static void oprofile_hwsampler_stop(void) return; } +/* + * File ops used for: + * /dev/oprofile/0/enabled + * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) + */ + static ssize_t hwsampler_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { - return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); + return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); } static ssize_t hwsampler_write(struct file *file, char const __user *buf, @@ -91,6 +126,9 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf, if (retval) return retval; + if (val != 0 && val != 1) + return -EINVAL; + if (oprofile_started) /* * save to do without locking as we set @@ -99,7 +137,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf, */ return -EBUSY; - hwsampler_file = val; + hwsampler_enabled = val; return count; } @@ -109,38 +147,311 @@ static const struct file_operations hwsampler_fops = { .write = hwsampler_write, }; +/* + * File ops used for: + * /dev/oprofile/0/count + * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) + * + * Make sure that the value is within the hardware range. + */ + +static ssize_t hw_interval_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, + count, offset); +} + +static ssize_t hw_interval_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + if (val < oprofile_min_interval) + oprofile_hw_interval = oprofile_min_interval; + else if (val > oprofile_max_interval) + oprofile_hw_interval = oprofile_max_interval; + else + oprofile_hw_interval = val; + + return count; +} + +static const struct file_operations hw_interval_fops = { + .read = hw_interval_read, + .write = hw_interval_write, +}; + +/* + * File ops used for: + * /dev/oprofile/0/event + * Only a single event with number 0 is supported with this counter. + * + * /dev/oprofile/0/unit_mask + * This is a dummy file needed by the user space tools. + * No value other than 0 is accepted or returned. + */ + +static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(0, buf, count, offset); +} + +static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + if (val != 0) + return -EINVAL; + return count; +} + +static const struct file_operations zero_fops = { + .read = hwsampler_zero_read, + .write = hwsampler_zero_write, +}; + +/* /dev/oprofile/0/kernel file ops. */ + +static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.kernel, + buf, count, offset); +} + +static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.kernel = val; + + return count; +} + +static const struct file_operations kernel_fops = { + .read = hwsampler_kernel_read, + .write = hwsampler_kernel_write, +}; + +/* /dev/oprofile/0/user file ops. */ + +static ssize_t hwsampler_user_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.user, + buf, count, offset); +} + +static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.user = val; + + return count; +} + +static const struct file_operations user_fops = { + .read = hwsampler_user_read, + .write = hwsampler_user_write, +}; + + +/* + * File ops used for: /dev/oprofile/timer/enabled + * The value always has to be the inverted value of hwsampler_enabled. So + * no separate variable is created. That way we do not need locking. + */ + +static ssize_t timer_enabled_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); +} + +static ssize_t timer_enabled_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + /* Timer cannot be disabled without having hardware sampling. */ + if (val == 0 && !hwsampler_available) + return -EINVAL; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_enabled = !val; + + return count; +} + +static const struct file_operations timer_enabled_fops = { + .read = timer_enabled_read, + .write = timer_enabled_write, +}; + + static int oprofile_create_hwsampling_files(struct super_block *sb, - struct dentry *root) + struct dentry *root) { - struct dentry *hw_dir; + struct dentry *dir; + + dir = oprofilefs_mkdir(sb, root, "timer"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops); + + if (!hwsampler_available) + return 0; /* reinitialize default values */ - hwsampler_file = 1; + hwsampler_enabled = 1; + counter_config.kernel = 1; + counter_config.user = 1; - hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); - if (!hw_dir) - return -EINVAL; + if (!force_cpu_type) { + /* + * Create the counter file system. A single virtual + * counter is created which can be used to + * enable/disable hardware sampling dynamically from + * user space. The user space will configure a single + * counter with a single event. The value of 'event' + * and 'unit_mask' are not evaluated by the kernel code + * and can only be set to 0. + */ + + dir = oprofilefs_mkdir(sb, root, "0"); + if (!dir) + return -EINVAL; - oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); - oprofilefs_create_ulong(sb, hw_dir, "hw_interval", - &oprofile_hw_interval); - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", - &oprofile_min_interval); - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", - &oprofile_max_interval); - oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", - &oprofile_sdbt_blocks); + oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops); + oprofilefs_create_file(sb, dir, "event", &zero_fops); + oprofilefs_create_file(sb, dir, "count", &hw_interval_fops); + oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops); + oprofilefs_create_file(sb, dir, "kernel", &kernel_fops); + oprofilefs_create_file(sb, dir, "user", &user_fops); + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } else { + /* + * Hardware sampling can be used but the cpu_type is + * forced to timer in order to deal with legacy user + * space tools. The /dev/oprofile/hwsampling fs is + * provided in that case. + */ + dir = oprofilefs_mkdir(sb, root, "hwsampling"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "hwsampler", + &hwsampler_fops); + oprofilefs_create_file(sb, dir, "hw_interval", + &hw_interval_fops); + oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval", + &oprofile_min_interval); + oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval", + &oprofile_max_interval); + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } return 0; } static int oprofile_hwsampler_init(struct oprofile_operations *ops) { + /* + * Initialize the timer mode infrastructure as well in order + * to be able to switch back dynamically. oprofile_timer_init + * is not supposed to fail. + */ + if (oprofile_timer_init(ops)) + BUG(); + + memcpy(&timer_ops, ops, sizeof(timer_ops)); + ops->create_files = oprofile_create_hwsampling_files; + + /* + * If the user space tools do not support newer cpu types, + * the force_cpu_type module parameter + * can be used to always return \"timer\" as cpu type. + */ + if (force_cpu_type != timer) { + struct cpuid id; + + get_cpu_id (&id); + + switch (id.machine) { + case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; + case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; + default: return -ENODEV; + } + } + if (hwsampler_setup()) return -ENODEV; /* - * create hwsampler files only if hwsampler_setup() succeeds. + * Query the range for the sampling interval from the + * hardware. */ oprofile_min_interval = hwsampler_query_min_interval(); if (oprofile_min_interval == 0) @@ -155,16 +466,11 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) if (oprofile_hw_interval > oprofile_max_interval) oprofile_hw_interval = oprofile_max_interval; - if (oprofile_timer_init(ops)) - return -ENODEV; - - printk(KERN_INFO "oprofile: using hardware sampling\n"); - - memcpy(&timer_ops, ops, sizeof(timer_ops)); + printk(KERN_INFO "oprofile: System z hardware sampling " + "facility found.\n"); ops->start = oprofile_hwsampler_start; ops->stop = oprofile_hwsampler_stop; - ops->create_files = oprofile_create_hwsampling_files; return 0; } @@ -181,7 +487,15 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) ops->backtrace = s390_backtrace; #ifdef CONFIG_64BIT - return oprofile_hwsampler_init(ops); + + /* + * -ENODEV is not reported to the caller. The module itself + * will use the timer mode sampling as fallback and this is + * always available. + */ + hwsampler_available = oprofile_hwsampler_init(ops) == 0; + + return 0; #else return -ENODEV; #endif diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h new file mode 100644 index 000000000000..1a8d3ca09014 --- /dev/null +++ b/arch/s390/oprofile/op_counter.h @@ -0,0 +1,23 @@ +/** + * arch/s390/oprofile/op_counter.h + * + * Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) + * + * @remark Copyright 2011 OProfile authors + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +struct op_counter_config { + /* `enabled' maps to the hwsampler_file variable. */ + /* `count' maps to the oprofile_hw_interval variable. */ + /* `event' and `unit_mask' are unused. */ + unsigned long kernel; + unsigned long user; +}; + +extern struct op_counter_config counter_config; + +#endif /* OP_COUNTER_H */ -- cgit v1.2.3 From cd09c0c40a971549800ce6a7e53c63f5139dd175 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Sun, 11 Dec 2011 00:28:51 +0100 Subject: perf events: Enable raw event support for Intel unhalted_reference_cycles event This patch adds the encoding and definitions necessary for the unhalted_reference_cycles event avaialble since Intel Core 2 processors. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1323559734-3488-2-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 15 ++++++++------- arch/x86/kernel/cpu/perf_event.c | 8 +++++++- arch/x86/kernel/cpu/perf_event_intel.c | 15 +++++---------- 3 files changed, 20 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index b50e9d15aae0..096c975e099f 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -112,23 +112,24 @@ struct x86_pmu_capability { /* * All 3 fixed-mode PMCs are configured via this single MSR: */ -#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d +#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d /* * The counts are available in three separate MSRs: */ /* Instr_Retired.Any: */ -#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 -#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) +#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 +#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) /* CPU_CLK_Unhalted.Core: */ -#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a -#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) +#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a +#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) /* CPU_CLK_Unhalted.Ref: */ -#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b -#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) +#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b +#define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2) +#define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES) /* * We model BTS tracing as another fixed-mode PMC. diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 930fe4879542..5adce1040b11 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1304,9 +1304,15 @@ static int __init init_hw_perf_events(void) 0, x86_pmu.num_counters, 0); if (x86_pmu.event_constraints) { + /* + * event on fixed counter2 (REF_CYCLES) only works on this + * counter, so do not extend mask to generic counters + */ for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask != X86_RAW_EVENT_MASK) + if (c->cmask != X86_RAW_EVENT_MASK + || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { continue; + } c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; c->weight += x86_pmu.num_counters; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2c3bf53d0302..61f865f947b3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -45,12 +45,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - /* - * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event - * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed - * ratio between these counters. - */ - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ @@ -68,7 +63,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ @@ -90,7 +85,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ @@ -102,7 +97,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ @@ -125,7 +120,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ EVENT_CONSTRAINT_END }; -- cgit v1.2.3 From 9c1497ea591b25d491f8e795f90a1405100b75ef Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Sun, 11 Dec 2011 00:28:53 +0100 Subject: perf events: Add Intel x86 mapping for PERF_COUNT_HW_REF_CPU_CYCLES Add event maps for Intel x86 processors (with architected PMU v2 or later). On AMD, there is frequency scaling but no Turbo. There is no core cycle event not subject to frequency scaling, therefore we do not provide a mapping. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1323559734-3488-4-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 61f865f947b3..cbfaaa2475ea 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, + [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ }; static struct event_constraint intel_core_event_constraints[] __read_mostly = -- cgit v1.2.3