diff options
author | Ingo Molnar | 2019-11-29 06:56:05 +0100 |
---|---|---|
committer | Ingo Molnar | 2019-11-29 06:56:05 +0100 |
commit | e680a41fcaf07ccac8817c589fc4824988b48eac (patch) | |
tree | 8993cdfbd30e3d95162c53bc6af4e410360f99a8 /tools | |
parent | 405b45376de90b3027aaf8c4de035c6bb721fa7e (diff) | |
parent | 5172672da02e483d9b3c4d814c3482d0c8ffb1a6 (diff) |
Merge tag 'perf-core-for-mingo-5.5-20191128' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
perf script:
Adrian Hunter:
- Fix brstackinsn for AUXTRACE.
- Fix invalid LBR/binary mismatch error.
perf diff:
Arnaldo Carvalho de Melo:
- Use llabs() with 64-bit values, fixing the build in some 32-bit
architectures.
perf pmu:
Andi Kleen:
- Use file system cache to optimize sysfs access.
x86:
Adrian Hunter:
- Add some more Intel instructions to the opcode map and to the perf
test entry:
gf2p8affineinvqb, gf2p8affineqb, gf2p8mulb, v4fmaddps,
v4fmaddss, v4fnmaddps, v4fnmaddss, vaesdec, vaesdeclast, vaesenc,
vaesenclast, vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps,
vgf2p8affineinvqb, vgf2p8affineqb, vgf2p8mulb, vp2intersectd,
vp2intersectq, vp4dpwssd, vp4dpwssds, vpclmulqdq, vpcompressb,
vpcompressw, vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds, vpexpandb,
vpexpandw, vpopcntb, vpopcntd, vpopcntq, vpopcntw, vpshldd, vpshldq,
vpshldvd, vpshldvq, vpshldvw, vpshldw, vpshrdd, vpshrdq, vpshrdvd,
vpshrdvq, vpshrdvw, vpshrdw, vpshufbitqmb.
perf affinity:
Andi Kleen:
- Add infrastructure to save/restore affinity
perf maps:
Arnaldo Carvalho de Melo:
- Merge 'struct maps' with 'struct map_groups', as there is a
1x1 relationship, simplifying code overal.
perf build:
Jiri Olsa:
- Allow to link with libbpf dynamicaly.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
65 files changed, 2198 insertions, 606 deletions
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index 0a0e9112f284..8908c58bd6cd 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -695,16 +695,28 @@ AVXcode: 2 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) 4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev) -# Skip 0x50-0x57 +50: vpdpbusd Vx,Hx,Wx (66),(ev) +51: vpdpbusds Vx,Hx,Wx (66),(ev) +52: vdpbf16ps Vx,Hx,Wx (F3),(ev) | vpdpwssd Vx,Hx,Wx (66),(ev) | vp4dpwssd Vdqq,Hdqq,Wdq (F2),(ev) +53: vpdpwssds Vx,Hx,Wx (66),(ev) | vp4dpwssds Vdqq,Hdqq,Wdq (F2),(ev) +54: vpopcntb/w Vx,Wx (66),(ev) +55: vpopcntd/q Vx,Wx (66),(ev) 58: vpbroadcastd Vx,Wx (66),(v) 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) -# Skip 0x5c-0x63 +# Skip 0x5c-0x61 +62: vpexpandb/w Vx,Wx (66),(ev) +63: vpcompressb/w Wx,Vx (66),(ev) 64: vpblendmd/q Vx,Hx,Wx (66),(ev) 65: vblendmps/d Vx,Hx,Wx (66),(ev) 66: vpblendmb/w Vx,Hx,Wx (66),(ev) -# Skip 0x67-0x74 +68: vp2intersectd/q Kx,Hx,Wx (F2),(ev) +# Skip 0x69-0x6f +70: vpshldvw Vx,Hx,Wx (66),(ev) +71: vpshldvd/q Vx,Hx,Wx (66),(ev) +72: vcvtne2ps2bf16 Vx,Hx,Wx (F2),(ev) | vcvtneps2bf16 Vx,Wx (F3),(ev) | vpshrdvw Vx,Hx,Wx (66),(ev) +73: vpshrdvd/q Vx,Hx,Wx (66),(ev) 75: vpermi2b/w Vx,Hx,Wx (66),(ev) 76: vpermi2d/q Vx,Hx,Wx (66),(ev) 77: vpermi2ps/d Vx,Hx,Wx (66),(ev) @@ -727,6 +739,7 @@ AVXcode: 2 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) 8d: vpermb/w Vx,Hx,Wx (66),(ev) 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) +8f: vpshufbitqmb Kx,Hx,Wx (66),(ev) # 0x0f 0x38 0x90-0xbf (FMA) 90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo) 91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo) @@ -738,8 +751,8 @@ AVXcode: 2 97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) 98: vfmadd132ps/d Vx,Hx,Wx (66),(v) 99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) -9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) -9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) | v4fmaddps Vdqq,Hdqq,Wdq (F2),(ev) +9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) | v4fmaddss Vdq,Hdq,Wdq (F2),(ev) 9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) @@ -752,8 +765,8 @@ a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) -aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) -ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) | v4fnmaddps Vdqq,Hdqq,Wdq (F2),(ev) +ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) | v4fnmaddss Vdq,Hdq,Wdq (F2),(ev) ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) @@ -780,11 +793,12 @@ ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev) cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev) +cf: vgf2p8mulb Vx,Wx (66) db: VAESIMC Vdq,Wdq (66),(v1) -dc: VAESENC Vdq,Hdq,Wdq (66),(v1) -dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) -de: VAESDEC Vdq,Hdq,Wdq (66),(v1) -df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) +dc: vaesenc Vx,Hx,Wx (66) +dd: vaesenclast Vx,Hx,Wx (66) +de: vaesdec Vx,Hx,Wx (66) +df: vaesdeclast Vx,Hx,Wx (66) f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) f2: ANDN Gy,By,Ey (v) @@ -848,7 +862,7 @@ AVXcode: 3 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) 42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo) 43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) -44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) +44: vpclmulqdq Vx,Hx,Wx,Ib (66) 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) 4a: vblendvps Vx,Hx,Wx,Lx (66),(v) 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) @@ -865,7 +879,13 @@ AVXcode: 3 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) 66: vfpclassps/d Vk,Wx,Ib (66),(ev) 67: vfpclassss/d Vk,Wx,Ib (66),(ev) +70: vpshldw Vx,Hx,Wx,Ib (66),(ev) +71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev) +72: vpshrdw Vx,Hx,Wx,Ib (66),(ev) +73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev) cc: sha1rnds4 Vdq,Wdq,Ib +ce: vgf2p8affineqb Vx,Wx,Ib (66) +cf: vgf2p8affineinvqb Vx,Wx,Ib (66) df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) f0: RORX Gy,Ey,Ib (F2),(v) EndTable diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 8a19753cc26a..574c2e0b9d20 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -96,7 +96,8 @@ FEATURE_TESTS_EXTRA := \ cxx \ llvm \ llvm-version \ - clang + clang \ + libbpf FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 8499385365c0..f30a89046aa3 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -53,6 +53,7 @@ FILES= \ test-zlib.bin \ test-lzma.bin \ test-bpf.bin \ + test-libbpf.bin \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ @@ -270,6 +271,9 @@ $(OUTPUT)test-get_cpuid.bin: $(OUTPUT)test-bpf.bin: $(BUILD) +$(OUTPUT)test-libbpf.bin: + $(BUILD) -lbpf + $(OUTPUT)test-sdt.bin: $(BUILD) diff --git a/tools/build/feature/test-libbpf.c b/tools/build/feature/test-libbpf.c new file mode 100644 index 000000000000..a508756cf4cc --- /dev/null +++ b/tools/build/feature/test-libbpf.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/libbpf.h> + +int main(void) +{ + return bpf_object__open("test") ? 0 : -1; +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 1783427da9b0..c90f4146e5a2 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -483,6 +483,16 @@ ifndef NO_LIBELF ifeq ($(feature-bpf), 1) CFLAGS += -DHAVE_LIBBPF_SUPPORT $(call detected,CONFIG_LIBBPF) + + # detecting libbpf without LIBBPF_DYNAMIC, so make VF=1 shows libbpf detection status + $(call feature_check,libbpf) + ifdef LIBBPF_DYNAMIC + ifeq ($(feature-libbpf), 1) + EXTLIBS += -lbpf + else + dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); + endif + endif endif ifndef NO_DWARF diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 1cd294468a1f..eae5d5e95952 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -116,6 +116,8 @@ include ../scripts/utilities.mak # # Define TCMALLOC to enable tcmalloc heap profiling. # +# Define LIBBPF_DYNAMIC to enable libbpf dynamic linking. +# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -360,7 +362,9 @@ export PERL_PATH PERFLIBS = $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) $(LIBPERF) ifndef NO_LIBBPF - PERFLIBS += $(LIBBPF) + ifndef LIBBPF_DYNAMIC + PERFLIBS += $(LIBBPF) + endif endif # We choose to avoid "if .. else if .. else .. endif endif" diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c index 2c35e532bc9a..ccfa87055c4a 100644 --- a/tools/perf/arch/arm/tests/dwarf-unwind.c +++ b/tools/perf/arch/arm/tests/dwarf-unwind.c @@ -3,7 +3,7 @@ #include "perf_regs.h" #include "thread.h" #include "map.h" -#include "map_groups.h" +#include "maps.h" #include "event.h" #include "debug.h" #include "tests/tests.h" @@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_ARM_SP]; - map = map_groups__find(thread->mg, (u64)sp); + map = maps__find(thread->maps, (u64)sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c index a6a407fa1b8b..46147a483049 100644 --- a/tools/perf/arch/arm64/tests/dwarf-unwind.c +++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c @@ -3,7 +3,7 @@ #include "perf_regs.h" #include "thread.h" #include "map.h" -#include "map_groups.h" +#include "maps.h" #include "event.h" #include "debug.h" #include "tests/tests.h" @@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_ARM64_SP]; - map = map_groups__find(thread->mg, (u64)sp); + map = maps__find(thread->maps, (u64)sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c index 5c178e4a1995..8efd9ed9e9db 100644 --- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c +++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c @@ -3,7 +3,7 @@ #include "perf_regs.h" #include "thread.h" #include "map.h" -#include "map_groups.h" +#include "maps.h" #include "event.h" #include "debug.h" #include "tests/tests.h" @@ -27,7 +27,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_POWERPC_R1]; - map = map_groups__find(thread->mg, (u64)sp); + map = maps__find(thread->maps, (u64)sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index 2a6662e42f89..0e136630659e 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -38,7 +38,7 @@ static int s390_call__parse(struct arch *arch, struct ins_operands *ops, return -1; target.addr = map__objdump_2mem(map, ops->target.addr); - if (map_groups__find_ams(ms->mg, &target) == 0 && + if (maps__find_ams(ms->maps, &target) == 0 && map__rip_2objdump(target.ms.map, map->map_ip(target.ms.map, target.addr)) == ops->target.addr) ops->target.sym = target.ms.sym; diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index 6ad0a1cedb13..ef43be9b6ec2 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -3,7 +3,7 @@ #include "perf_regs.h" #include "thread.h" #include "map.h" -#include "map_groups.h" +#include "maps.h" #include "event.h" #include "debug.h" #include "tests/tests.h" @@ -27,7 +27,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_X86_SP]; - map = map_groups__find(thread->mg, (u64)sp); + map = maps__find(thread->maps, (u64)sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index 58f8f2a095c4..e6461abc9e7b 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -667,6 +667,86 @@ "62 f2 55 0f 4f f4 \tvrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}",}, {{0x62, 0xf2, 0xd5, 0x0f, 0x4f, 0xf4, }, 6, 0, "", "", "62 f2 d5 0f 4f f4 \tvrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x50, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 50 d9 \tvpdpbusd %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x50, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 50 d9 \tvpdpbusd %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x50, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 50 d9 \tvpdpbusd %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x50, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 50 9c c8 78 56 34 12 \tvpdpbusd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x51, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 51 d9 \tvpdpbusds %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x51, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 51 d9 \tvpdpbusds %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x51, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 51 d9 \tvpdpbusds %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x51, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 51 9c c8 78 56 34 12 \tvpdpbusds 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6e, 0x08, 0x52, 0xd9, }, 6, 0, "", "", +"62 f2 6e 08 52 d9 \tvdpbf16ps %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6e, 0x28, 0x52, 0xd9, }, 6, 0, "", "", +"62 f2 6e 28 52 d9 \tvdpbf16ps %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6e, 0x48, 0x52, 0xd9, }, 6, 0, "", "", +"62 f2 6e 48 52 d9 \tvdpbf16ps %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6e, 0x48, 0x52, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6e 48 52 9c c8 78 56 34 12 \tvdpbf16ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x52, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 52 d9 \tvpdpwssd %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x52, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 52 d9 \tvpdpwssd %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x52, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 52 d9 \tvpdpwssd %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x52, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 52 9c c8 78 56 34 12 \tvpdpwssd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x7f, 0x48, 0x52, 0x20, }, 6, 0, "", "", +"62 f2 7f 48 52 20 \tvp4dpwssd (%eax),%zmm0,%zmm4",}, +{{0x62, 0xf2, 0x7f, 0x48, 0x52, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7f 48 52 a4 c8 78 56 34 12 \tvp4dpwssd 0x12345678(%eax,%ecx,8),%zmm0,%zmm4",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x53, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 53 d9 \tvpdpwssds %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x53, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 53 d9 \tvpdpwssds %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x53, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 53 d9 \tvpdpwssds %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x53, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 53 9c c8 78 56 34 12 \tvpdpwssds 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x7f, 0x48, 0x53, 0x20, }, 6, 0, "", "", +"62 f2 7f 48 53 20 \tvp4dpwssds (%eax),%zmm0,%zmm4",}, +{{0x62, 0xf2, 0x7f, 0x48, 0x53, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7f 48 53 a4 c8 78 56 34 12 \tvp4dpwssds 0x12345678(%eax,%ecx,8),%zmm0,%zmm4",}, +{{0x62, 0xf2, 0x7d, 0x08, 0x54, 0xd1, }, 6, 0, "", "", +"62 f2 7d 08 54 d1 \tvpopcntb %xmm1,%xmm2",}, +{{0x62, 0xf2, 0x7d, 0x28, 0x54, 0xd1, }, 6, 0, "", "", +"62 f2 7d 28 54 d1 \tvpopcntb %ymm1,%ymm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x54, 0xd1, }, 6, 0, "", "", +"62 f2 7d 48 54 d1 \tvpopcntb %zmm1,%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x54, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 54 94 c8 78 56 34 12 \tvpopcntb 0x12345678(%eax,%ecx,8),%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x08, 0x54, 0xd1, }, 6, 0, "", "", +"62 f2 fd 08 54 d1 \tvpopcntw %xmm1,%xmm2",}, +{{0x62, 0xf2, 0xfd, 0x28, 0x54, 0xd1, }, 6, 0, "", "", +"62 f2 fd 28 54 d1 \tvpopcntw %ymm1,%ymm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x54, 0xd1, }, 6, 0, "", "", +"62 f2 fd 48 54 d1 \tvpopcntw %zmm1,%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x54, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 fd 48 54 94 c8 78 56 34 12 \tvpopcntw 0x12345678(%eax,%ecx,8),%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x08, 0x55, 0xd1, }, 6, 0, "", "", +"62 f2 7d 08 55 d1 \tvpopcntd %xmm1,%xmm2",}, +{{0x62, 0xf2, 0x7d, 0x28, 0x55, 0xd1, }, 6, 0, "", "", +"62 f2 7d 28 55 d1 \tvpopcntd %ymm1,%ymm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x55, 0xd1, }, 6, 0, "", "", +"62 f2 7d 48 55 d1 \tvpopcntd %zmm1,%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x55, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 55 94 c8 78 56 34 12 \tvpopcntd 0x12345678(%eax,%ecx,8),%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x08, 0x55, 0xd1, }, 6, 0, "", "", +"62 f2 fd 08 55 d1 \tvpopcntq %xmm1,%xmm2",}, +{{0x62, 0xf2, 0xfd, 0x28, 0x55, 0xd1, }, 6, 0, "", "", +"62 f2 fd 28 55 d1 \tvpopcntq %ymm1,%ymm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x55, 0xd1, }, 6, 0, "", "", +"62 f2 fd 48 55 d1 \tvpopcntq %zmm1,%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x55, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 fd 48 55 94 c8 78 56 34 12 \tvpopcntq 0x12345678(%eax,%ecx,8),%zmm2",}, {{0xc4, 0xe2, 0x79, 0x59, 0xf4, }, 5, 0, "", "", "c4 e2 79 59 f4 \tvpbroadcastq %xmm4,%xmm6",}, {{0x62, 0xf2, 0x7d, 0x48, 0x59, 0xf7, }, 6, 0, "", "", @@ -681,6 +761,38 @@ "62 f2 7d 48 5b 31 \tvbroadcasti32x8 (%ecx),%zmm6",}, {{0x62, 0xf2, 0xfd, 0x48, 0x5b, 0x31, }, 6, 0, "", "", "62 f2 fd 48 5b 31 \tvbroadcasti64x4 (%ecx),%zmm6",}, +{{0x62, 0xf2, 0x7d, 0x08, 0x62, 0xd1, }, 6, 0, "", "", +"62 f2 7d 08 62 d1 \tvpexpandb %xmm1,%xmm2",}, +{{0x62, 0xf2, 0x7d, 0x28, 0x62, 0xd1, }, 6, 0, "", "", +"62 f2 7d 28 62 d1 \tvpexpandb %ymm1,%ymm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x62, 0xd1, }, 6, 0, "", "", +"62 f2 7d 48 62 d1 \tvpexpandb %zmm1,%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 62 94 c8 78 56 34 12 \tvpexpandb 0x12345678(%eax,%ecx,8),%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x08, 0x62, 0xd1, }, 6, 0, "", "", +"62 f2 fd 08 62 d1 \tvpexpandw %xmm1,%xmm2",}, +{{0x62, 0xf2, 0xfd, 0x28, 0x62, 0xd1, }, 6, 0, "", "", +"62 f2 fd 28 62 d1 \tvpexpandw %ymm1,%ymm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x62, 0xd1, }, 6, 0, "", "", +"62 f2 fd 48 62 d1 \tvpexpandw %zmm1,%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 fd 48 62 94 c8 78 56 34 12 \tvpexpandw 0x12345678(%eax,%ecx,8),%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x08, 0x63, 0xca, }, 6, 0, "", "", +"62 f2 7d 08 63 ca \tvpcompressb %xmm1,%xmm2",}, +{{0x62, 0xf2, 0x7d, 0x28, 0x63, 0xca, }, 6, 0, "", "", +"62 f2 7d 28 63 ca \tvpcompressb %ymm1,%ymm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x63, 0xca, }, 6, 0, "", "", +"62 f2 7d 48 63 ca \tvpcompressb %zmm1,%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x63, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 63 94 c8 78 56 34 12 \tvpcompressb %zmm2,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf2, 0xfd, 0x08, 0x63, 0xca, }, 6, 0, "", "", +"62 f2 fd 08 63 ca \tvpcompressw %xmm1,%xmm2",}, +{{0x62, 0xf2, 0xfd, 0x28, 0x63, 0xca, }, 6, 0, "", "", +"62 f2 fd 28 63 ca \tvpcompressw %ymm1,%ymm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x63, 0xca, }, 6, 0, "", "", +"62 f2 fd 48 63 ca \tvpcompressw %zmm1,%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x63, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 fd 48 63 94 c8 78 56 34 12 \tvpcompressw %zmm2,0x12345678(%eax,%ecx,8)",}, {{0x62, 0xf2, 0x55, 0x48, 0x64, 0xf4, }, 6, 0, "", "", "62 f2 55 48 64 f4 \tvpblendmd %zmm4,%zmm5,%zmm6",}, {{0x62, 0xf2, 0xd5, 0x48, 0x64, 0xf4, }, 6, 0, "", "", @@ -693,6 +805,86 @@ "62 f2 55 48 66 f4 \tvpblendmb %zmm4,%zmm5,%zmm6",}, {{0x62, 0xf2, 0xd5, 0x48, 0x66, 0xf4, }, 6, 0, "", "", "62 f2 d5 48 66 f4 \tvpblendmw %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x6f, 0x08, 0x68, 0xd9, }, 6, 0, "", "", +"62 f2 6f 08 68 d9 \tvp2intersectd %xmm1,%xmm2,%k3",}, +{{0x62, 0xf2, 0x6f, 0x28, 0x68, 0xd9, }, 6, 0, "", "", +"62 f2 6f 28 68 d9 \tvp2intersectd %ymm1,%ymm2,%k3",}, +{{0x62, 0xf2, 0x6f, 0x48, 0x68, 0xd9, }, 6, 0, "", "", +"62 f2 6f 48 68 d9 \tvp2intersectd %zmm1,%zmm2,%k3",}, +{{0x62, 0xf2, 0x6f, 0x48, 0x68, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6f 48 68 9c c8 78 56 34 12 \tvp2intersectd 0x12345678(%eax,%ecx,8),%zmm2,%k3",}, +{{0x62, 0xf2, 0xef, 0x08, 0x68, 0xd9, }, 6, 0, "", "", +"62 f2 ef 08 68 d9 \tvp2intersectq %xmm1,%xmm2,%k3",}, +{{0x62, 0xf2, 0xef, 0x28, 0x68, 0xd9, }, 6, 0, "", "", +"62 f2 ef 28 68 d9 \tvp2intersectq %ymm1,%ymm2,%k3",}, +{{0x62, 0xf2, 0xef, 0x48, 0x68, 0xd9, }, 6, 0, "", "", +"62 f2 ef 48 68 d9 \tvp2intersectq %zmm1,%zmm2,%k3",}, +{{0x62, 0xf2, 0xef, 0x48, 0x68, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ef 48 68 9c c8 78 56 34 12 \tvp2intersectq 0x12345678(%eax,%ecx,8),%zmm2,%k3",}, +{{0x62, 0xf2, 0xed, 0x08, 0x70, 0xd9, }, 6, 0, "", "", +"62 f2 ed 08 70 d9 \tvpshldvw %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0xed, 0x28, 0x70, 0xd9, }, 6, 0, "", "", +"62 f2 ed 28 70 d9 \tvpshldvw %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x70, 0xd9, }, 6, 0, "", "", +"62 f2 ed 48 70 d9 \tvpshldvw %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x70, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ed 48 70 9c c8 78 56 34 12 \tvpshldvw 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x71, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 71 d9 \tvpshldvd %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x71, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 71 d9 \tvpshldvd %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x71, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 71 d9 \tvpshldvd %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x71, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 71 9c c8 78 56 34 12 \tvpshldvd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x08, 0x71, 0xd9, }, 6, 0, "", "", +"62 f2 ed 08 71 d9 \tvpshldvq %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0xed, 0x28, 0x71, 0xd9, }, 6, 0, "", "", +"62 f2 ed 28 71 d9 \tvpshldvq %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x71, 0xd9, }, 6, 0, "", "", +"62 f2 ed 48 71 d9 \tvpshldvq %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x71, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ed 48 71 9c c8 78 56 34 12 \tvpshldvq 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6f, 0x08, 0x72, 0xd9, }, 6, 0, "", "", +"62 f2 6f 08 72 d9 \tvcvtne2ps2bf16 %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6f, 0x28, 0x72, 0xd9, }, 6, 0, "", "", +"62 f2 6f 28 72 d9 \tvcvtne2ps2bf16 %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6f, 0x48, 0x72, 0xd9, }, 6, 0, "", "", +"62 f2 6f 48 72 d9 \tvcvtne2ps2bf16 %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6f, 0x48, 0x72, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6f 48 72 9c c8 78 56 34 12 \tvcvtne2ps2bf16 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x7e, 0x08, 0x72, 0xd1, }, 6, 0, "", "", +"62 f2 7e 08 72 d1 \tvcvtneps2bf16 %xmm1,%xmm2",}, +{{0x62, 0xf2, 0x7e, 0x28, 0x72, 0xd1, }, 6, 0, "", "", +"62 f2 7e 28 72 d1 \tvcvtneps2bf16 %ymm1,%xmm2",}, +{{0x62, 0xf2, 0x7e, 0x48, 0x72, 0xd1, }, 6, 0, "", "", +"62 f2 7e 48 72 d1 \tvcvtneps2bf16 %zmm1,%ymm2",}, +{{0x62, 0xf2, 0x7e, 0x48, 0x72, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7e 48 72 94 c8 78 56 34 12 \tvcvtneps2bf16 0x12345678(%eax,%ecx,8),%ymm2",}, +{{0x62, 0xf2, 0xed, 0x08, 0x72, 0xd9, }, 6, 0, "", "", +"62 f2 ed 08 72 d9 \tvpshrdvw %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0xed, 0x28, 0x72, 0xd9, }, 6, 0, "", "", +"62 f2 ed 28 72 d9 \tvpshrdvw %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x72, 0xd9, }, 6, 0, "", "", +"62 f2 ed 48 72 d9 \tvpshrdvw %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x72, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ed 48 72 9c c8 78 56 34 12 \tvpshrdvw 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x73, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 73 d9 \tvpshrdvd %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x73, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 73 d9 \tvpshrdvd %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x73, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 73 d9 \tvpshrdvd %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x73, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 73 9c c8 78 56 34 12 \tvpshrdvd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x08, 0x73, 0xd9, }, 6, 0, "", "", +"62 f2 ed 08 73 d9 \tvpshrdvq %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0xed, 0x28, 0x73, 0xd9, }, 6, 0, "", "", +"62 f2 ed 28 73 d9 \tvpshrdvq %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x73, 0xd9, }, 6, 0, "", "", +"62 f2 ed 48 73 d9 \tvpshrdvq %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x73, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ed 48 73 9c c8 78 56 34 12 \tvpshrdvq 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, {{0x62, 0xf2, 0x55, 0x48, 0x75, 0xf4, }, 6, 0, "", "", "62 f2 55 48 75 f4 \tvpermi2b %zmm4,%zmm5,%zmm6",}, {{0x62, 0xf2, 0xd5, 0x48, 0x75, 0xf4, }, 6, 0, "", "", @@ -745,6 +937,14 @@ "62 f2 55 48 8d f4 \tvpermb %zmm4,%zmm5,%zmm6",}, {{0x62, 0xf2, 0xd5, 0x48, 0x8d, 0xf4, }, 6, 0, "", "", "62 f2 d5 48 8d f4 \tvpermw %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x8f, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 8f d9 \tvpshufbitqmb %xmm1,%xmm2,%k3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x8f, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 8f d9 \tvpshufbitqmb %ymm1,%ymm2,%k3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x8f, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 8f d9 \tvpshufbitqmb %zmm1,%zmm2,%k3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x8f, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 8f 9c c8 78 56 34 12 \tvpshufbitqmb 0x12345678(%eax,%ecx,8),%zmm2,%k3",}, {{0xc4, 0xe2, 0x69, 0x90, 0x4c, 0x7d, 0x02, }, 7, 0, "", "", "c4 e2 69 90 4c 7d 02 \tvpgatherdd %xmm2,0x2(%ebp,%xmm7,2),%xmm1",}, {{0xc4, 0xe2, 0xe9, 0x90, 0x4c, 0x7d, 0x04, }, 7, 0, "", "", @@ -761,6 +961,38 @@ "62 f2 7d 49 91 b4 fd 7b 00 00 00 \tvpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}",}, {{0x62, 0xf2, 0xfd, 0x49, 0x91, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", "62 f2 fd 49 91 b4 fd 7b 00 00 00 \tvpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}",}, +{{0xc4, 0xe2, 0x69, 0x9a, 0xd9, }, 5, 0, "", "", +"c4 e2 69 9a d9 \tvfmsub132ps %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0x9a, 0xd9, }, 5, 0, "", "", +"c4 e2 6d 9a d9 \tvfmsub132ps %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x9a, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 9a d9 \tvfmsub132ps %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x9a, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 9a 9c c8 78 56 34 12 \tvfmsub132ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0xc4, 0xe2, 0xe9, 0x9a, 0xd9, }, 5, 0, "", "", +"c4 e2 e9 9a d9 \tvfmsub132pd %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0xed, 0x9a, 0xd9, }, 5, 0, "", "", +"c4 e2 ed 9a d9 \tvfmsub132pd %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x9a, 0xd9, }, 6, 0, "", "", +"62 f2 ed 48 9a d9 \tvfmsub132pd %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x9a, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ed 48 9a 9c c8 78 56 34 12 \tvfmsub132pd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x7f, 0x48, 0x9a, 0x20, }, 6, 0, "", "", +"62 f2 7f 48 9a 20 \tv4fmaddps (%eax),%zmm0,%zmm4",}, +{{0x62, 0xf2, 0x7f, 0x48, 0x9a, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7f 48 9a a4 c8 78 56 34 12 \tv4fmaddps 0x12345678(%eax,%ecx,8),%zmm0,%zmm4",}, +{{0xc4, 0xe2, 0x69, 0x9b, 0xd9, }, 5, 0, "", "", +"c4 e2 69 9b d9 \tvfmsub132ss %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x69, 0x9b, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 69 9b 9c c8 78 56 34 12 \tvfmsub132ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0xe9, 0x9b, 0xd9, }, 5, 0, "", "", +"c4 e2 e9 9b d9 \tvfmsub132sd %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0xe9, 0x9b, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 e9 9b 9c c8 78 56 34 12 \tvfmsub132sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x7f, 0x08, 0x9b, 0x20, }, 6, 0, "", "", +"62 f2 7f 08 9b 20 \tv4fmaddss (%eax),%xmm0,%xmm4",}, +{{0x62, 0xf2, 0x7f, 0x08, 0x9b, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7f 08 9b a4 c8 78 56 34 12 \tv4fmaddss 0x12345678(%eax,%ecx,8),%xmm0,%xmm4",}, {{0x62, 0xf2, 0x7d, 0x49, 0xa0, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", "62 f2 7d 49 a0 b4 fd 7b 00 00 00 \tvpscatterdd %zmm6,0x7b(%ebp,%zmm7,8){%k1}",}, {{0x62, 0xf2, 0xfd, 0x49, 0xa0, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", @@ -777,6 +1009,38 @@ "62 f2 7d 49 a3 b4 fd 7b 00 00 00 \tvscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}",}, {{0x62, 0xf2, 0xfd, 0x49, 0xa3, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", "62 f2 fd 49 a3 b4 fd 7b 00 00 00 \tvscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}",}, +{{0xc4, 0xe2, 0x69, 0xaa, 0xd9, }, 5, 0, "", "", +"c4 e2 69 aa d9 \tvfmsub213ps %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0xaa, 0xd9, }, 5, 0, "", "", +"c4 e2 6d aa d9 \tvfmsub213ps %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xaa, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 aa d9 \tvfmsub213ps %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xaa, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 aa 9c c8 78 56 34 12 \tvfmsub213ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0xc4, 0xe2, 0xe9, 0xaa, 0xd9, }, 5, 0, "", "", +"c4 e2 e9 aa d9 \tvfmsub213pd %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0xed, 0xaa, 0xd9, }, 5, 0, "", "", +"c4 e2 ed aa d9 \tvfmsub213pd %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0xaa, 0xd9, }, 6, 0, "", "", +"62 f2 ed 48 aa d9 \tvfmsub213pd %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0xaa, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ed 48 aa 9c c8 78 56 34 12 \tvfmsub213pd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x7f, 0x48, 0xaa, 0x20, }, 6, 0, "", "", +"62 f2 7f 48 aa 20 \tv4fnmaddps (%eax),%zmm0,%zmm4",}, +{{0x62, 0xf2, 0x7f, 0x48, 0xaa, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7f 48 aa a4 c8 78 56 34 12 \tv4fnmaddps 0x12345678(%eax,%ecx,8),%zmm0,%zmm4",}, +{{0xc4, 0xe2, 0x69, 0xab, 0xd9, }, 5, 0, "", "", +"c4 e2 69 ab d9 \tvfmsub213ss %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x69, 0xab, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 69 ab 9c c8 78 56 34 12 \tvfmsub213ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0xe9, 0xab, 0xd9, }, 5, 0, "", "", +"c4 e2 e9 ab d9 \tvfmsub213sd %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0xe9, 0xab, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 e9 ab 9c c8 78 56 34 12 \tvfmsub213sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x7f, 0x08, 0xab, 0x20, }, 6, 0, "", "", +"62 f2 7f 08 ab 20 \tv4fnmaddss (%eax),%xmm0,%xmm4",}, +{{0x62, 0xf2, 0x7f, 0x08, 0xab, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7f 08 ab a4 c8 78 56 34 12 \tv4fnmaddss 0x12345678(%eax,%ecx,8),%xmm0,%xmm4",}, {{0x62, 0xf2, 0xd5, 0x48, 0xb4, 0xf4, }, 6, 0, "", "", "62 f2 d5 48 b4 f4 \tvpmadd52luq %zmm4,%zmm5,%zmm6",}, {{0x62, 0xf2, 0xd5, 0x48, 0xb5, 0xf4, }, 6, 0, "", "", @@ -805,6 +1069,50 @@ "62 f2 4d 0f cd fd \tvrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}",}, {{0x62, 0xf2, 0xcd, 0x0f, 0xcd, 0xfd, }, 6, 0, "", "", "62 f2 cd 0f cd fd \tvrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}",}, +{{0x66, 0x0f, 0x38, 0xcf, 0xd9, }, 5, 0, "", "", +"66 0f 38 cf d9 \tgf2p8mulb %xmm1,%xmm3",}, +{{0x66, 0x0f, 0x38, 0xcf, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 cf 9c c8 78 56 34 12 \tgf2p8mulb 0x12345678(%eax,%ecx,8),%xmm3",}, +{{0xc4, 0xe2, 0x69, 0xcf, 0xd9, }, 5, 0, "", "", +"c4 e2 69 cf d9 \tvgf2p8mulb %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0xcf, 0xd9, }, 5, 0, "", "", +"c4 e2 6d cf d9 \tvgf2p8mulb %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xcf, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 cf d9 \tvgf2p8mulb %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xcf, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 cf 9c c8 78 56 34 12 \tvgf2p8mulb 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0xc4, 0xe2, 0x69, 0xdc, 0xd9, }, 5, 0, "", "", +"c4 e2 69 dc d9 \tvaesenc %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0xdc, 0xd9, }, 5, 0, "", "", +"c4 e2 6d dc d9 \tvaesenc %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xdc, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 dc d9 \tvaesenc %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xdc, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 dc 9c c8 78 56 34 12 \tvaesenc 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0xc4, 0xe2, 0x69, 0xdd, 0xd9, }, 5, 0, "", "", +"c4 e2 69 dd d9 \tvaesenclast %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0xdd, 0xd9, }, 5, 0, "", "", +"c4 e2 6d dd d9 \tvaesenclast %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xdd, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 dd d9 \tvaesenclast %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xdd, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 dd 9c c8 78 56 34 12 \tvaesenclast 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0xc4, 0xe2, 0x69, 0xde, 0xd9, }, 5, 0, "", "", +"c4 e2 69 de d9 \tvaesdec %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0xde, 0xd9, }, 5, 0, "", "", +"c4 e2 6d de d9 \tvaesdec %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xde, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 de d9 \tvaesdec %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xde, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 de 9c c8 78 56 34 12 \tvaesdec 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0xc4, 0xe2, 0x69, 0xdf, 0xd9, }, 5, 0, "", "", +"c4 e2 69 df d9 \tvaesdeclast %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0xdf, 0xd9, }, 5, 0, "", "", +"c4 e2 6d df d9 \tvaesdeclast %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xdf, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 df d9 \tvaesdeclast %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xdf, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 df 9c c8 78 56 34 12 \tvaesdeclast 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, {{0x62, 0xf3, 0x4d, 0x48, 0x03, 0xfd, 0x12, }, 7, 0, "", "", "62 f3 4d 48 03 fd 12 \tvalignd $0x12,%zmm5,%zmm6,%zmm7",}, {{0x62, 0xf3, 0xcd, 0x48, 0x03, 0xfd, 0x12, }, 7, 0, "", "", @@ -905,6 +1213,12 @@ "62 f3 4d 48 43 fd 12 \tvshufi32x4 $0x12,%zmm5,%zmm6,%zmm7",}, {{0x62, 0xf3, 0xcd, 0x48, 0x43, 0xfd, 0x12, }, 7, 0, "", "", "62 f3 cd 48 43 fd 12 \tvshufi64x2 $0x12,%zmm5,%zmm6,%zmm7",}, +{{0xc4, 0xe3, 0x69, 0x44, 0xd9, 0x12, }, 6, 0, "", "", +"c4 e3 69 44 d9 12 \tvpclmulqdq $0x12,%xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe3, 0x6d, 0x44, 0xd9, 0x12, }, 6, 0, "", "", +"c4 e3 6d 44 d9 12 \tvpclmulqdq $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0x6d, 0x48, 0x44, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 48 44 d9 12 \tvpclmulqdq $0x12,%zmm1,%zmm2,%zmm3",}, {{0x62, 0xf3, 0x4d, 0x48, 0x50, 0xfd, 0x12, }, 7, 0, "", "", "62 f3 4d 48 50 fd 12 \tvrangeps $0x12,%zmm5,%zmm6,%zmm7",}, {{0x62, 0xf3, 0xcd, 0x48, 0x50, 0xfd, 0x12, }, 7, 0, "", "", @@ -937,6 +1251,58 @@ "62 f3 7d 08 67 ef 12 \tvfpclassss $0x12,%xmm7,%k5",}, {{0x62, 0xf3, 0xfd, 0x08, 0x67, 0xef, 0x12, }, 7, 0, "", "", "62 f3 fd 08 67 ef 12 \tvfpclasssd $0x12,%xmm7,%k5",}, +{{0x62, 0xf3, 0xed, 0x08, 0x70, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 08 70 d9 12 \tvpshldw $0x12,%xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf3, 0xed, 0x28, 0x70, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 28 70 d9 12 \tvpshldw $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0xed, 0x48, 0x70, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 48 70 d9 12 \tvpshldw $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf3, 0x6d, 0x08, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 08 71 d9 12 \tvpshldd $0x12,%xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf3, 0x6d, 0x28, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 28 71 d9 12 \tvpshldd $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0x6d, 0x48, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 48 71 d9 12 \tvpshldd $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf3, 0xed, 0x08, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 08 71 d9 12 \tvpshldq $0x12,%xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf3, 0xed, 0x28, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 28 71 d9 12 \tvpshldq $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0xed, 0x48, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 48 71 d9 12 \tvpshldq $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf3, 0xed, 0x08, 0x72, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 08 72 d9 12 \tvpshrdw $0x12,%xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf3, 0xed, 0x28, 0x72, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 28 72 d9 12 \tvpshrdw $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0xed, 0x48, 0x72, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 48 72 d9 12 \tvpshrdw $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf3, 0x6d, 0x08, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 08 73 d9 12 \tvpshrdd $0x12,%xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf3, 0x6d, 0x28, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 28 73 d9 12 \tvpshrdd $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0x6d, 0x48, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 48 73 d9 12 \tvpshrdd $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf3, 0xed, 0x08, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 08 73 d9 12 \tvpshrdq $0x12,%xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf3, 0xed, 0x28, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 28 73 d9 12 \tvpshrdq $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0xed, 0x48, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 48 73 d9 12 \tvpshrdq $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x66, 0x0f, 0x3a, 0xce, 0xd9, 0x12, }, 6, 0, "", "", +"66 0f 3a ce d9 12 \tgf2p8affineqb $0x12,%xmm1,%xmm3",}, +{{0xc4, 0xe3, 0xe9, 0xce, 0xd9, 0x12, }, 6, 0, "", "", +"c4 e3 e9 ce d9 12 \tvgf2p8affineqb $0x12,%xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe3, 0xed, 0xce, 0xd9, 0x12, }, 6, 0, "", "", +"c4 e3 ed ce d9 12 \tvgf2p8affineqb $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0xed, 0x48, 0xce, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 48 ce d9 12 \tvgf2p8affineqb $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x66, 0x0f, 0x3a, 0xcf, 0xd9, 0x12, }, 6, 0, "", "", +"66 0f 3a cf d9 12 \tgf2p8affineinvqb $0x12,%xmm1,%xmm3",}, +{{0xc4, 0xe3, 0xe9, 0xcf, 0xd9, 0x12, }, 6, 0, "", "", +"c4 e3 e9 cf d9 12 \tvgf2p8affineinvqb $0x12,%xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe3, 0xed, 0xcf, 0xd9, 0x12, }, 6, 0, "", "", +"c4 e3 ed cf d9 12 \tvgf2p8affineinvqb $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0xed, 0x48, 0xcf, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 48 cf d9 12 \tvgf2p8affineinvqb $0x12,%zmm1,%zmm2,%zmm3",}, {{0x62, 0xf1, 0x4d, 0x48, 0x72, 0xc5, 0x12, }, 7, 0, "", "", "62 f1 4d 48 72 c5 12 \tvprord $0x12,%zmm5,%zmm6",}, {{0x62, 0xf1, 0xcd, 0x48, 0x72, 0xc5, 0x12, }, 7, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index 656f8aed31de..567ecccfad7c 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -587,6 +587,112 @@ "62 02 35 07 4f d0 \tvrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}",}, {{0x62, 0x02, 0xb5, 0x07, 0x4f, 0xd0, }, 6, 0, "", "", "62 02 b5 07 4f d0 \tvrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x50, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 50 d9 \tvpdpbusd %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x50, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 50 d9 \tvpdpbusd %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x50, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 50 d9 \tvpdpbusd %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x50, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 50 9c c8 78 56 34 12 \tvpdpbusd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0x50, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 50 9c c8 78 56 34 12 \tvpdpbusd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x51, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 51 d9 \tvpdpbusds %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x51, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 51 d9 \tvpdpbusds %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x51, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 51 d9 \tvpdpbusds %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x51, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 51 9c c8 78 56 34 12 \tvpdpbusds 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0x51, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 51 9c c8 78 56 34 12 \tvpdpbusds 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6e, 0x08, 0x52, 0xd9, }, 6, 0, "", "", +"62 f2 6e 08 52 d9 \tvdpbf16ps %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6e, 0x28, 0x52, 0xd9, }, 6, 0, "", "", +"62 f2 6e 28 52 d9 \tvdpbf16ps %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6e, 0x48, 0x52, 0xd9, }, 6, 0, "", "", +"62 f2 6e 48 52 d9 \tvdpbf16ps %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6e, 0x48, 0x52, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6e 48 52 9c c8 78 56 34 12 \tvdpbf16ps 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6e, 0x48, 0x52, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6e 48 52 9c c8 78 56 34 12 \tvdpbf16ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x52, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 52 d9 \tvpdpwssd %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x52, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 52 d9 \tvpdpwssd %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x52, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 52 d9 \tvpdpwssd %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x52, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 52 9c c8 78 56 34 12 \tvpdpwssd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0x52, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 52 9c c8 78 56 34 12 \tvpdpwssd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x7f, 0x48, 0x52, 0x20, }, 6, 0, "", "", +"62 f2 7f 48 52 20 \tvp4dpwssd (%rax),%zmm0,%zmm4",}, +{{0x67, 0x62, 0xf2, 0x7f, 0x48, 0x52, 0x20, }, 7, 0, "", "", +"67 62 f2 7f 48 52 20 \tvp4dpwssd (%eax),%zmm0,%zmm4",}, +{{0x62, 0xf2, 0x7f, 0x48, 0x52, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7f 48 52 a4 c8 78 56 34 12 \tvp4dpwssd 0x12345678(%rax,%rcx,8),%zmm0,%zmm4",}, +{{0x67, 0x62, 0xf2, 0x7f, 0x48, 0x52, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7f 48 52 a4 c8 78 56 34 12 \tvp4dpwssd 0x12345678(%eax,%ecx,8),%zmm0,%zmm4",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x53, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 53 d9 \tvpdpwssds %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x53, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 53 d9 \tvpdpwssds %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x53, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 53 d9 \tvpdpwssds %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x53, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 53 9c c8 78 56 34 12 \tvpdpwssds 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0x53, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 53 9c c8 78 56 34 12 \tvpdpwssds 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x7f, 0x48, 0x53, 0x20, }, 6, 0, "", "", +"62 f2 7f 48 53 20 \tvp4dpwssds (%rax),%zmm0,%zmm4",}, +{{0x67, 0x62, 0xf2, 0x7f, 0x48, 0x53, 0x20, }, 7, 0, "", "", +"67 62 f2 7f 48 53 20 \tvp4dpwssds (%eax),%zmm0,%zmm4",}, +{{0x62, 0xf2, 0x7f, 0x48, 0x53, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7f 48 53 a4 c8 78 56 34 12 \tvp4dpwssds 0x12345678(%rax,%rcx,8),%zmm0,%zmm4",}, +{{0x67, 0x62, 0xf2, 0x7f, 0x48, 0x53, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7f 48 53 a4 c8 78 56 34 12 \tvp4dpwssds 0x12345678(%eax,%ecx,8),%zmm0,%zmm4",}, +{{0x62, 0xf2, 0x7d, 0x08, 0x54, 0xd1, }, 6, 0, "", "", +"62 f2 7d 08 54 d1 \tvpopcntb %xmm1,%xmm2",}, +{{0x62, 0xf2, 0x7d, 0x28, 0x54, 0xd1, }, 6, 0, "", "", +"62 f2 7d 28 54 d1 \tvpopcntb %ymm1,%ymm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x54, 0xd1, }, 6, 0, "", "", +"62 f2 7d 48 54 d1 \tvpopcntb %zmm1,%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x54, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 54 94 c8 78 56 34 12 \tvpopcntb 0x12345678(%rax,%rcx,8),%zmm2",}, +{{0x67, 0x62, 0xf2, 0x7d, 0x48, 0x54, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7d 48 54 94 c8 78 56 34 12 \tvpopcntb 0x12345678(%eax,%ecx,8),%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x08, 0x54, 0xd1, }, 6, 0, "", "", +"62 f2 fd 08 54 d1 \tvpopcntw %xmm1,%xmm2",}, +{{0x62, 0xf2, 0xfd, 0x28, 0x54, 0xd1, }, 6, 0, "", "", +"62 f2 fd 28 54 d1 \tvpopcntw %ymm1,%ymm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x54, 0xd1, }, 6, 0, "", "", +"62 f2 fd 48 54 d1 \tvpopcntw %zmm1,%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x54, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 fd 48 54 94 c8 78 56 34 12 \tvpopcntw 0x12345678(%rax,%rcx,8),%zmm2",}, +{{0x67, 0x62, 0xf2, 0xfd, 0x48, 0x54, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 fd 48 54 94 c8 78 56 34 12 \tvpopcntw 0x12345678(%eax,%ecx,8),%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x08, 0x55, 0xd1, }, 6, 0, "", "", +"62 f2 7d 08 55 d1 \tvpopcntd %xmm1,%xmm2",}, +{{0x62, 0xf2, 0x7d, 0x28, 0x55, 0xd1, }, 6, 0, "", "", +"62 f2 7d 28 55 d1 \tvpopcntd %ymm1,%ymm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x55, 0xd1, }, 6, 0, "", "", +"62 f2 7d 48 55 d1 \tvpopcntd %zmm1,%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x55, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 55 94 c8 78 56 34 12 \tvpopcntd 0x12345678(%rax,%rcx,8),%zmm2",}, +{{0x67, 0x62, 0xf2, 0x7d, 0x48, 0x55, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7d 48 55 94 c8 78 56 34 12 \tvpopcntd 0x12345678(%eax,%ecx,8),%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x08, 0x55, 0xd1, }, 6, 0, "", "", +"62 f2 fd 08 55 d1 \tvpopcntq %xmm1,%xmm2",}, +{{0x62, 0xf2, 0xfd, 0x28, 0x55, 0xd1, }, 6, 0, "", "", +"62 f2 fd 28 55 d1 \tvpopcntq %ymm1,%ymm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x55, 0xd1, }, 6, 0, "", "", +"62 f2 fd 48 55 d1 \tvpopcntq %zmm1,%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x55, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 fd 48 55 94 c8 78 56 34 12 \tvpopcntq 0x12345678(%rax,%rcx,8),%zmm2",}, +{{0x67, 0x62, 0xf2, 0xfd, 0x48, 0x55, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 fd 48 55 94 c8 78 56 34 12 \tvpopcntq 0x12345678(%eax,%ecx,8),%zmm2",}, {{0xc4, 0xe2, 0x79, 0x59, 0xf4, }, 5, 0, "", "", "c4 e2 79 59 f4 \tvpbroadcastq %xmm4,%xmm6",}, {{0x62, 0x02, 0x7d, 0x48, 0x59, 0xd3, }, 6, 0, "", "", @@ -601,6 +707,46 @@ "62 62 7d 48 5b 21 \tvbroadcasti32x8 (%rcx),%zmm28",}, {{0x62, 0x62, 0xfd, 0x48, 0x5b, 0x11, }, 6, 0, "", "", "62 62 fd 48 5b 11 \tvbroadcasti64x4 (%rcx),%zmm26",}, +{{0x62, 0xf2, 0x7d, 0x08, 0x62, 0xd1, }, 6, 0, "", "", +"62 f2 7d 08 62 d1 \tvpexpandb %xmm1,%xmm2",}, +{{0x62, 0xf2, 0x7d, 0x28, 0x62, 0xd1, }, 6, 0, "", "", +"62 f2 7d 28 62 d1 \tvpexpandb %ymm1,%ymm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x62, 0xd1, }, 6, 0, "", "", +"62 f2 7d 48 62 d1 \tvpexpandb %zmm1,%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 62 94 c8 78 56 34 12 \tvpexpandb 0x12345678(%rax,%rcx,8),%zmm2",}, +{{0x67, 0x62, 0xf2, 0x7d, 0x48, 0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7d 48 62 94 c8 78 56 34 12 \tvpexpandb 0x12345678(%eax,%ecx,8),%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x08, 0x62, 0xd1, }, 6, 0, "", "", +"62 f2 fd 08 62 d1 \tvpexpandw %xmm1,%xmm2",}, +{{0x62, 0xf2, 0xfd, 0x28, 0x62, 0xd1, }, 6, 0, "", "", +"62 f2 fd 28 62 d1 \tvpexpandw %ymm1,%ymm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x62, 0xd1, }, 6, 0, "", "", +"62 f2 fd 48 62 d1 \tvpexpandw %zmm1,%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 fd 48 62 94 c8 78 56 34 12 \tvpexpandw 0x12345678(%rax,%rcx,8),%zmm2",}, +{{0x67, 0x62, 0xf2, 0xfd, 0x48, 0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 fd 48 62 94 c8 78 56 34 12 \tvpexpandw 0x12345678(%eax,%ecx,8),%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x08, 0x63, 0xca, }, 6, 0, "", "", +"62 f2 7d 08 63 ca \tvpcompressb %xmm1,%xmm2",}, +{{0x62, 0xf2, 0x7d, 0x28, 0x63, 0xca, }, 6, 0, "", "", +"62 f2 7d 28 63 ca \tvpcompressb %ymm1,%ymm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x63, 0xca, }, 6, 0, "", "", +"62 f2 7d 48 63 ca \tvpcompressb %zmm1,%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x63, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 63 94 c8 78 56 34 12 \tvpcompressb %zmm2,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0x62, 0xf2, 0x7d, 0x48, 0x63, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7d 48 63 94 c8 78 56 34 12 \tvpcompressb %zmm2,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf2, 0xfd, 0x08, 0x63, 0xca, }, 6, 0, "", "", +"62 f2 fd 08 63 ca \tvpcompressw %xmm1,%xmm2",}, +{{0x62, 0xf2, 0xfd, 0x28, 0x63, 0xca, }, 6, 0, "", "", +"62 f2 fd 28 63 ca \tvpcompressw %ymm1,%ymm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x63, 0xca, }, 6, 0, "", "", +"62 f2 fd 48 63 ca \tvpcompressw %zmm1,%zmm2",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x63, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 fd 48 63 94 c8 78 56 34 12 \tvpcompressw %zmm2,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0x62, 0xf2, 0xfd, 0x48, 0x63, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 fd 48 63 94 c8 78 56 34 12 \tvpcompressw %zmm2,0x12345678(%eax,%ecx,8)",}, {{0x62, 0x02, 0x25, 0x40, 0x64, 0xe2, }, 6, 0, "", "", "62 02 25 40 64 e2 \tvpblendmd %zmm26,%zmm27,%zmm28",}, {{0x62, 0x02, 0xa5, 0x40, 0x64, 0xe2, }, 6, 0, "", "", @@ -613,6 +759,106 @@ "62 02 25 40 66 e2 \tvpblendmb %zmm26,%zmm27,%zmm28",}, {{0x62, 0x02, 0xa5, 0x40, 0x66, 0xe2, }, 6, 0, "", "", "62 02 a5 40 66 e2 \tvpblendmw %zmm26,%zmm27,%zmm28",}, +{{0x62, 0xf2, 0x6f, 0x08, 0x68, 0xd9, }, 6, 0, "", "", +"62 f2 6f 08 68 d9 \tvp2intersectd %xmm1,%xmm2,%k3",}, +{{0x62, 0xf2, 0x6f, 0x28, 0x68, 0xd9, }, 6, 0, "", "", +"62 f2 6f 28 68 d9 \tvp2intersectd %ymm1,%ymm2,%k3",}, +{{0x62, 0xf2, 0x6f, 0x48, 0x68, 0xd9, }, 6, 0, "", "", +"62 f2 6f 48 68 d9 \tvp2intersectd %zmm1,%zmm2,%k3",}, +{{0x62, 0xf2, 0x6f, 0x48, 0x68, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6f 48 68 9c c8 78 56 34 12 \tvp2intersectd 0x12345678(%rax,%rcx,8),%zmm2,%k3",}, +{{0x67, 0x62, 0xf2, 0x6f, 0x48, 0x68, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6f 48 68 9c c8 78 56 34 12 \tvp2intersectd 0x12345678(%eax,%ecx,8),%zmm2,%k3",}, +{{0x62, 0xf2, 0xef, 0x08, 0x68, 0xd9, }, 6, 0, "", "", +"62 f2 ef 08 68 d9 \tvp2intersectq %xmm1,%xmm2,%k3",}, +{{0x62, 0xf2, 0xef, 0x28, 0x68, 0xd9, }, 6, 0, "", "", +"62 f2 ef 28 68 d9 \tvp2intersectq %ymm1,%ymm2,%k3",}, +{{0x62, 0xf2, 0xef, 0x48, 0x68, 0xd9, }, 6, 0, "", "", +"62 f2 ef 48 68 d9 \tvp2intersectq %zmm1,%zmm2,%k3",}, +{{0x62, 0xf2, 0xef, 0x48, 0x68, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ef 48 68 9c c8 78 56 34 12 \tvp2intersectq 0x12345678(%rax,%rcx,8),%zmm2,%k3",}, +{{0x67, 0x62, 0xf2, 0xef, 0x48, 0x68, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 ef 48 68 9c c8 78 56 34 12 \tvp2intersectq 0x12345678(%eax,%ecx,8),%zmm2,%k3",}, +{{0x62, 0xf2, 0xed, 0x08, 0x70, 0xd9, }, 6, 0, "", "", +"62 f2 ed 08 70 d9 \tvpshldvw %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0xed, 0x28, 0x70, 0xd9, }, 6, 0, "", "", +"62 f2 ed 28 70 d9 \tvpshldvw %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x70, 0xd9, }, 6, 0, "", "", +"62 f2 ed 48 70 d9 \tvpshldvw %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x70, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ed 48 70 9c c8 78 56 34 12 \tvpshldvw 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0xed, 0x48, 0x70, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 ed 48 70 9c c8 78 56 34 12 \tvpshldvw 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x71, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 71 d9 \tvpshldvd %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x71, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 71 d9 \tvpshldvd %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x71, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 71 d9 \tvpshldvd %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x71, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 71 9c c8 78 56 34 12 \tvpshldvd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0x71, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 71 9c c8 78 56 34 12 \tvpshldvd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x08, 0x71, 0xd9, }, 6, 0, "", "", +"62 f2 ed 08 71 d9 \tvpshldvq %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0xed, 0x28, 0x71, 0xd9, }, 6, 0, "", "", +"62 f2 ed 28 71 d9 \tvpshldvq %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x71, 0xd9, }, 6, 0, "", "", +"62 f2 ed 48 71 d9 \tvpshldvq %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x71, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ed 48 71 9c c8 78 56 34 12 \tvpshldvq 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0xed, 0x48, 0x71, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 ed 48 71 9c c8 78 56 34 12 \tvpshldvq 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6f, 0x08, 0x72, 0xd9, }, 6, 0, "", "", +"62 f2 6f 08 72 d9 \tvcvtne2ps2bf16 %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6f, 0x28, 0x72, 0xd9, }, 6, 0, "", "", +"62 f2 6f 28 72 d9 \tvcvtne2ps2bf16 %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6f, 0x48, 0x72, 0xd9, }, 6, 0, "", "", +"62 f2 6f 48 72 d9 \tvcvtne2ps2bf16 %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6f, 0x48, 0x72, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6f 48 72 9c c8 78 56 34 12 \tvcvtne2ps2bf16 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6f, 0x48, 0x72, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6f 48 72 9c c8 78 56 34 12 \tvcvtne2ps2bf16 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x7e, 0x08, 0x72, 0xd1, }, 6, 0, "", "", +"62 f2 7e 08 72 d1 \tvcvtneps2bf16 %xmm1,%xmm2",}, +{{0x62, 0xf2, 0x7e, 0x28, 0x72, 0xd1, }, 6, 0, "", "", +"62 f2 7e 28 72 d1 \tvcvtneps2bf16 %ymm1,%xmm2",}, +{{0x62, 0xf2, 0x7e, 0x48, 0x72, 0xd1, }, 6, 0, "", "", +"62 f2 7e 48 72 d1 \tvcvtneps2bf16 %zmm1,%ymm2",}, +{{0x62, 0xf2, 0x7e, 0x48, 0x72, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7e 48 72 94 c8 78 56 34 12 \tvcvtneps2bf16 0x12345678(%rax,%rcx,8),%ymm2",}, +{{0x67, 0x62, 0xf2, 0x7e, 0x48, 0x72, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7e 48 72 94 c8 78 56 34 12 \tvcvtneps2bf16 0x12345678(%eax,%ecx,8),%ymm2",}, +{{0x62, 0xf2, 0xed, 0x08, 0x72, 0xd9, }, 6, 0, "", "", +"62 f2 ed 08 72 d9 \tvpshrdvw %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0xed, 0x28, 0x72, 0xd9, }, 6, 0, "", "", +"62 f2 ed 28 72 d9 \tvpshrdvw %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x72, 0xd9, }, 6, 0, "", "", +"62 f2 ed 48 72 d9 \tvpshrdvw %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x72, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ed 48 72 9c c8 78 56 34 12 \tvpshrdvw 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0xed, 0x48, 0x72, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 ed 48 72 9c c8 78 56 34 12 \tvpshrdvw 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x73, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 73 d9 \tvpshrdvd %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x73, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 73 d9 \tvpshrdvd %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x73, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 73 d9 \tvpshrdvd %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x73, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 73 9c c8 78 56 34 12 \tvpshrdvd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0x73, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 73 9c c8 78 56 34 12 \tvpshrdvd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x08, 0x73, 0xd9, }, 6, 0, "", "", +"62 f2 ed 08 73 d9 \tvpshrdvq %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0xed, 0x28, 0x73, 0xd9, }, 6, 0, "", "", +"62 f2 ed 28 73 d9 \tvpshrdvq %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x73, 0xd9, }, 6, 0, "", "", +"62 f2 ed 48 73 d9 \tvpshrdvq %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x73, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ed 48 73 9c c8 78 56 34 12 \tvpshrdvq 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0xed, 0x48, 0x73, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 ed 48 73 9c c8 78 56 34 12 \tvpshrdvq 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, {{0x62, 0x02, 0x35, 0x40, 0x75, 0xd0, }, 6, 0, "", "", "62 02 35 40 75 d0 \tvpermi2b %zmm24,%zmm25,%zmm26",}, {{0x62, 0x02, 0xa5, 0x40, 0x75, 0xe2, }, 6, 0, "", "", @@ -667,6 +913,16 @@ "62 02 25 40 8d e2 \tvpermb %zmm26,%zmm27,%zmm28",}, {{0x62, 0x02, 0xa5, 0x40, 0x8d, 0xe2, }, 6, 0, "", "", "62 02 a5 40 8d e2 \tvpermw %zmm26,%zmm27,%zmm28",}, +{{0x62, 0xf2, 0x6d, 0x08, 0x8f, 0xd9, }, 6, 0, "", "", +"62 f2 6d 08 8f d9 \tvpshufbitqmb %xmm1,%xmm2,%k3",}, +{{0x62, 0xf2, 0x6d, 0x28, 0x8f, 0xd9, }, 6, 0, "", "", +"62 f2 6d 28 8f d9 \tvpshufbitqmb %ymm1,%ymm2,%k3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x8f, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 8f d9 \tvpshufbitqmb %zmm1,%zmm2,%k3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x8f, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 8f 9c c8 78 56 34 12 \tvpshufbitqmb 0x12345678(%rax,%rcx,8),%zmm2,%k3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0x8f, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 8f 9c c8 78 56 34 12 \tvpshufbitqmb 0x12345678(%eax,%ecx,8),%zmm2,%k3",}, {{0xc4, 0xe2, 0x69, 0x90, 0x4c, 0x7d, 0x02, }, 7, 0, "", "", "c4 e2 69 90 4c 7d 02 \tvpgatherdd %xmm2,0x2(%rbp,%xmm7,2),%xmm1",}, {{0xc4, 0xe2, 0xe9, 0x90, 0x4c, 0x7d, 0x04, }, 7, 0, "", "", @@ -683,6 +939,54 @@ "62 22 7d 41 91 94 dd 7b 00 00 00 \tvpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}",}, {{0x62, 0x22, 0xfd, 0x41, 0x91, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", "62 22 fd 41 91 94 dd 7b 00 00 00 \tvpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}",}, +{{0xc4, 0xe2, 0x69, 0x9a, 0xd9, }, 5, 0, "", "", +"c4 e2 69 9a d9 \tvfmsub132ps %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0x9a, 0xd9, }, 5, 0, "", "", +"c4 e2 6d 9a d9 \tvfmsub132ps %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x9a, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 9a d9 \tvfmsub132ps %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0x9a, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 9a 9c c8 78 56 34 12 \tvfmsub132ps 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0x9a, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 9a 9c c8 78 56 34 12 \tvfmsub132ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0xc4, 0xe2, 0xe9, 0x9a, 0xd9, }, 5, 0, "", "", +"c4 e2 e9 9a d9 \tvfmsub132pd %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0xed, 0x9a, 0xd9, }, 5, 0, "", "", +"c4 e2 ed 9a d9 \tvfmsub132pd %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x9a, 0xd9, }, 6, 0, "", "", +"62 f2 ed 48 9a d9 \tvfmsub132pd %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0x9a, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ed 48 9a 9c c8 78 56 34 12 \tvfmsub132pd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0xed, 0x48, 0x9a, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 ed 48 9a 9c c8 78 56 34 12 \tvfmsub132pd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x7f, 0x48, 0x9a, 0x20, }, 6, 0, "", "", +"62 f2 7f 48 9a 20 \tv4fmaddps (%rax),%zmm0,%zmm4",}, +{{0x67, 0x62, 0xf2, 0x7f, 0x48, 0x9a, 0x20, }, 7, 0, "", "", +"67 62 f2 7f 48 9a 20 \tv4fmaddps (%eax),%zmm0,%zmm4",}, +{{0x62, 0xf2, 0x7f, 0x48, 0x9a, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7f 48 9a a4 c8 78 56 34 12 \tv4fmaddps 0x12345678(%rax,%rcx,8),%zmm0,%zmm4",}, +{{0x67, 0x62, 0xf2, 0x7f, 0x48, 0x9a, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7f 48 9a a4 c8 78 56 34 12 \tv4fmaddps 0x12345678(%eax,%ecx,8),%zmm0,%zmm4",}, +{{0xc4, 0xe2, 0x69, 0x9b, 0xd9, }, 5, 0, "", "", +"c4 e2 69 9b d9 \tvfmsub132ss %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x69, 0x9b, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 69 9b 9c c8 78 56 34 12 \tvfmsub132ss 0x12345678(%rax,%rcx,8),%xmm2,%xmm3",}, +{{0x67, 0xc4, 0xe2, 0x69, 0x9b, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 69 9b 9c c8 78 56 34 12 \tvfmsub132ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0xe9, 0x9b, 0xd9, }, 5, 0, "", "", +"c4 e2 e9 9b d9 \tvfmsub132sd %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0xe9, 0x9b, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 e9 9b 9c c8 78 56 34 12 \tvfmsub132sd 0x12345678(%rax,%rcx,8),%xmm2,%xmm3",}, +{{0x67, 0xc4, 0xe2, 0xe9, 0x9b, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 e9 9b 9c c8 78 56 34 12 \tvfmsub132sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x7f, 0x08, 0x9b, 0x20, }, 6, 0, "", "", +"62 f2 7f 08 9b 20 \tv4fmaddss (%rax),%xmm0,%xmm4",}, +{{0x67, 0x62, 0xf2, 0x7f, 0x08, 0x9b, 0x20, }, 7, 0, "", "", +"67 62 f2 7f 08 9b 20 \tv4fmaddss (%eax),%xmm0,%xmm4",}, +{{0x62, 0xf2, 0x7f, 0x08, 0x9b, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7f 08 9b a4 c8 78 56 34 12 \tv4fmaddss 0x12345678(%rax,%rcx,8),%xmm0,%xmm4",}, +{{0x67, 0x62, 0xf2, 0x7f, 0x08, 0x9b, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7f 08 9b a4 c8 78 56 34 12 \tv4fmaddss 0x12345678(%eax,%ecx,8),%xmm0,%xmm4",}, {{0x62, 0x22, 0x7d, 0x41, 0xa0, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", "62 22 7d 41 a0 a4 ed 7b 00 00 00 \tvpscatterdd %zmm28,0x7b(%rbp,%zmm29,8){%k1}",}, {{0x62, 0x22, 0xfd, 0x41, 0xa0, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", @@ -699,6 +1003,54 @@ "62 b2 7d 41 a3 b4 ed 7b 00 00 00 \tvscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}",}, {{0x62, 0x22, 0xfd, 0x41, 0xa3, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", "62 22 fd 41 a3 a4 ed 7b 00 00 00 \tvscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}",}, +{{0xc4, 0xe2, 0x69, 0xaa, 0xd9, }, 5, 0, "", "", +"c4 e2 69 aa d9 \tvfmsub213ps %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0xaa, 0xd9, }, 5, 0, "", "", +"c4 e2 6d aa d9 \tvfmsub213ps %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xaa, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 aa d9 \tvfmsub213ps %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xaa, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 aa 9c c8 78 56 34 12 \tvfmsub213ps 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0xaa, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 aa 9c c8 78 56 34 12 \tvfmsub213ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0xc4, 0xe2, 0xe9, 0xaa, 0xd9, }, 5, 0, "", "", +"c4 e2 e9 aa d9 \tvfmsub213pd %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0xed, 0xaa, 0xd9, }, 5, 0, "", "", +"c4 e2 ed aa d9 \tvfmsub213pd %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0xaa, 0xd9, }, 6, 0, "", "", +"62 f2 ed 48 aa d9 \tvfmsub213pd %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0xed, 0x48, 0xaa, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 ed 48 aa 9c c8 78 56 34 12 \tvfmsub213pd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0xed, 0x48, 0xaa, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 ed 48 aa 9c c8 78 56 34 12 \tvfmsub213pd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x7f, 0x48, 0xaa, 0x20, }, 6, 0, "", "", +"62 f2 7f 48 aa 20 \tv4fnmaddps (%rax),%zmm0,%zmm4",}, +{{0x67, 0x62, 0xf2, 0x7f, 0x48, 0xaa, 0x20, }, 7, 0, "", "", +"67 62 f2 7f 48 aa 20 \tv4fnmaddps (%eax),%zmm0,%zmm4",}, +{{0x62, 0xf2, 0x7f, 0x48, 0xaa, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7f 48 aa a4 c8 78 56 34 12 \tv4fnmaddps 0x12345678(%rax,%rcx,8),%zmm0,%zmm4",}, +{{0x67, 0x62, 0xf2, 0x7f, 0x48, 0xaa, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7f 48 aa a4 c8 78 56 34 12 \tv4fnmaddps 0x12345678(%eax,%ecx,8),%zmm0,%zmm4",}, +{{0xc4, 0xe2, 0x69, 0xab, 0xd9, }, 5, 0, "", "", +"c4 e2 69 ab d9 \tvfmsub213ss %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x69, 0xab, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 69 ab 9c c8 78 56 34 12 \tvfmsub213ss 0x12345678(%rax,%rcx,8),%xmm2,%xmm3",}, +{{0x67, 0xc4, 0xe2, 0x69, 0xab, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 69 ab 9c c8 78 56 34 12 \tvfmsub213ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0xe9, 0xab, 0xd9, }, 5, 0, "", "", +"c4 e2 e9 ab d9 \tvfmsub213sd %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0xe9, 0xab, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 e9 ab 9c c8 78 56 34 12 \tvfmsub213sd 0x12345678(%rax,%rcx,8),%xmm2,%xmm3",}, +{{0x67, 0xc4, 0xe2, 0xe9, 0xab, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 e9 ab 9c c8 78 56 34 12 \tvfmsub213sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x7f, 0x08, 0xab, 0x20, }, 6, 0, "", "", +"62 f2 7f 08 ab 20 \tv4fnmaddss (%rax),%xmm0,%xmm4",}, +{{0x67, 0x62, 0xf2, 0x7f, 0x08, 0xab, 0x20, }, 7, 0, "", "", +"67 62 f2 7f 08 ab 20 \tv4fnmaddss (%eax),%xmm0,%xmm4",}, +{{0x62, 0xf2, 0x7f, 0x08, 0xab, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7f 08 ab a4 c8 78 56 34 12 \tv4fnmaddss 0x12345678(%rax,%rcx,8),%xmm0,%xmm4",}, +{{0x67, 0x62, 0xf2, 0x7f, 0x08, 0xab, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7f 08 ab a4 c8 78 56 34 12 \tv4fnmaddss 0x12345678(%eax,%ecx,8),%xmm0,%xmm4",}, {{0x62, 0x02, 0xa5, 0x40, 0xb4, 0xe2, }, 6, 0, "", "", "62 02 a5 40 b4 e2 \tvpmadd52luq %zmm26,%zmm27,%zmm28",}, {{0x62, 0x02, 0xa5, 0x40, 0xb5, 0xe2, }, 6, 0, "", "", @@ -727,6 +1079,62 @@ "62 02 15 07 cd f4 \tvrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}",}, {{0x62, 0x02, 0xad, 0x07, 0xcd, 0xd9, }, 6, 0, "", "", "62 02 ad 07 cd d9 \tvrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}",}, +{{0x66, 0x0f, 0x38, 0xcf, 0xd9, }, 5, 0, "", "", +"66 0f 38 cf d9 \tgf2p8mulb %xmm1,%xmm3",}, +{{0x66, 0x0f, 0x38, 0xcf, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 cf 9c c8 78 56 34 12 \tgf2p8mulb 0x12345678(%rax,%rcx,8),%xmm3",}, +{{0x67, 0x66, 0x0f, 0x38, 0xcf, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 66 0f 38 cf 9c c8 78 56 34 12 \tgf2p8mulb 0x12345678(%eax,%ecx,8),%xmm3",}, +{{0xc4, 0xe2, 0x69, 0xcf, 0xd9, }, 5, 0, "", "", +"c4 e2 69 cf d9 \tvgf2p8mulb %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0xcf, 0xd9, }, 5, 0, "", "", +"c4 e2 6d cf d9 \tvgf2p8mulb %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xcf, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 cf d9 \tvgf2p8mulb %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xcf, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 cf 9c c8 78 56 34 12 \tvgf2p8mulb 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0xcf, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 cf 9c c8 78 56 34 12 \tvgf2p8mulb 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0xc4, 0xe2, 0x69, 0xdc, 0xd9, }, 5, 0, "", "", +"c4 e2 69 dc d9 \tvaesenc %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0xdc, 0xd9, }, 5, 0, "", "", +"c4 e2 6d dc d9 \tvaesenc %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xdc, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 dc d9 \tvaesenc %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xdc, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 dc 9c c8 78 56 34 12 \tvaesenc 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0xdc, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 dc 9c c8 78 56 34 12 \tvaesenc 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0xc4, 0xe2, 0x69, 0xdd, 0xd9, }, 5, 0, "", "", +"c4 e2 69 dd d9 \tvaesenclast %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0xdd, 0xd9, }, 5, 0, "", "", +"c4 e2 6d dd d9 \tvaesenclast %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xdd, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 dd d9 \tvaesenclast %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xdd, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 dd 9c c8 78 56 34 12 \tvaesenclast 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0xdd, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 dd 9c c8 78 56 34 12 \tvaesenclast 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0xc4, 0xe2, 0x69, 0xde, 0xd9, }, 5, 0, "", "", +"c4 e2 69 de d9 \tvaesdec %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0xde, 0xd9, }, 5, 0, "", "", +"c4 e2 6d de d9 \tvaesdec %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xde, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 de d9 \tvaesdec %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xde, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 de 9c c8 78 56 34 12 \tvaesdec 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0xde, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 de 9c c8 78 56 34 12 \tvaesdec 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, +{{0xc4, 0xe2, 0x69, 0xdf, 0xd9, }, 5, 0, "", "", +"c4 e2 69 df d9 \tvaesdeclast %xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe2, 0x6d, 0xdf, 0xd9, }, 5, 0, "", "", +"c4 e2 6d df d9 \tvaesdeclast %ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xdf, 0xd9, }, 6, 0, "", "", +"62 f2 6d 48 df d9 \tvaesdeclast %zmm1,%zmm2,%zmm3",}, +{{0x62, 0xf2, 0x6d, 0x48, 0xdf, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 6d 48 df 9c c8 78 56 34 12 \tvaesdeclast 0x12345678(%rax,%rcx,8),%zmm2,%zmm3",}, +{{0x67, 0x62, 0xf2, 0x6d, 0x48, 0xdf, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 6d 48 df 9c c8 78 56 34 12 \tvaesdeclast 0x12345678(%eax,%ecx,8),%zmm2,%zmm3",}, {{0x62, 0x03, 0x15, 0x40, 0x03, 0xf4, 0x12, }, 7, 0, "", "", "62 03 15 40 03 f4 12 \tvalignd $0x12,%zmm28,%zmm29,%zmm30",}, {{0x62, 0x03, 0xad, 0x40, 0x03, 0xd9, 0x12, }, 7, 0, "", "", @@ -827,6 +1235,14 @@ "62 03 2d 40 43 d9 12 \tvshufi32x4 $0x12,%zmm25,%zmm26,%zmm27",}, {{0x62, 0x03, 0x95, 0x40, 0x43, 0xf4, 0x12, }, 7, 0, "", "", "62 03 95 40 43 f4 12 \tvshufi64x2 $0x12,%zmm28,%zmm29,%zmm30",}, +{{0xc4, 0xe3, 0x69, 0x44, 0xd9, 0x12, }, 6, 0, "", "", +"c4 e3 69 44 d9 12 \tvpclmulqdq $0x12,%xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe3, 0x6d, 0x44, 0xd9, 0x12, }, 6, 0, "", "", +"c4 e3 6d 44 d9 12 \tvpclmulqdq $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0x6d, 0x48, 0x44, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 48 44 d9 12 \tvpclmulqdq $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0x03, 0x2d, 0x40, 0x44, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 2d 40 44 d9 12 \tvpclmulqdq $0x12,%zmm25,%zmm26,%zmm27",}, {{0x62, 0x03, 0x2d, 0x40, 0x50, 0xd9, 0x12, }, 7, 0, "", "", "62 03 2d 40 50 d9 12 \tvrangeps $0x12,%zmm25,%zmm26,%zmm27",}, {{0x62, 0x03, 0x95, 0x40, 0x50, 0xf4, 0x12, }, 7, 0, "", "", @@ -859,6 +1275,74 @@ "62 93 7d 08 67 eb 12 \tvfpclassss $0x12,%xmm27,%k5",}, {{0x62, 0x93, 0xfd, 0x08, 0x67, 0xee, 0x12, }, 7, 0, "", "", "62 93 fd 08 67 ee 12 \tvfpclasssd $0x12,%xmm30,%k5",}, +{{0x62, 0xf3, 0xed, 0x08, 0x70, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 08 70 d9 12 \tvpshldw $0x12,%xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf3, 0xed, 0x28, 0x70, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 28 70 d9 12 \tvpshldw $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0xed, 0x48, 0x70, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 48 70 d9 12 \tvpshldw $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0x03, 0xad, 0x40, 0x70, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 ad 40 70 d9 12 \tvpshldw $0x12,%zmm25,%zmm26,%zmm27",}, +{{0x62, 0xf3, 0x6d, 0x08, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 08 71 d9 12 \tvpshldd $0x12,%xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf3, 0x6d, 0x28, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 28 71 d9 12 \tvpshldd $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0x6d, 0x48, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 48 71 d9 12 \tvpshldd $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0x03, 0x2d, 0x40, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 2d 40 71 d9 12 \tvpshldd $0x12,%zmm25,%zmm26,%zmm27",}, +{{0x62, 0xf3, 0xed, 0x08, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 08 71 d9 12 \tvpshldq $0x12,%xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf3, 0xed, 0x28, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 28 71 d9 12 \tvpshldq $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0xed, 0x48, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 48 71 d9 12 \tvpshldq $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0x03, 0xad, 0x40, 0x71, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 ad 40 71 d9 12 \tvpshldq $0x12,%zmm25,%zmm26,%zmm27",}, +{{0x62, 0xf3, 0xed, 0x08, 0x72, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 08 72 d9 12 \tvpshrdw $0x12,%xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf3, 0xed, 0x28, 0x72, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 28 72 d9 12 \tvpshrdw $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0xed, 0x48, 0x72, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 48 72 d9 12 \tvpshrdw $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0x03, 0xad, 0x40, 0x72, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 ad 40 72 d9 12 \tvpshrdw $0x12,%zmm25,%zmm26,%zmm27",}, +{{0x62, 0xf3, 0x6d, 0x08, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 08 73 d9 12 \tvpshrdd $0x12,%xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf3, 0x6d, 0x28, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 28 73 d9 12 \tvpshrdd $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0x6d, 0x48, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 6d 48 73 d9 12 \tvpshrdd $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0x03, 0x2d, 0x40, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 2d 40 73 d9 12 \tvpshrdd $0x12,%zmm25,%zmm26,%zmm27",}, +{{0x62, 0xf3, 0xed, 0x08, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 08 73 d9 12 \tvpshrdq $0x12,%xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf3, 0xed, 0x28, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 28 73 d9 12 \tvpshrdq $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0xed, 0x48, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 48 73 d9 12 \tvpshrdq $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0x03, 0xad, 0x40, 0x73, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 ad 40 73 d9 12 \tvpshrdq $0x12,%zmm25,%zmm26,%zmm27",}, +{{0x66, 0x0f, 0x3a, 0xce, 0xd9, 0x12, }, 6, 0, "", "", +"66 0f 3a ce d9 12 \tgf2p8affineqb $0x12,%xmm1,%xmm3",}, +{{0xc4, 0xe3, 0xe9, 0xce, 0xd9, 0x12, }, 6, 0, "", "", +"c4 e3 e9 ce d9 12 \tvgf2p8affineqb $0x12,%xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe3, 0xed, 0xce, 0xd9, 0x12, }, 6, 0, "", "", +"c4 e3 ed ce d9 12 \tvgf2p8affineqb $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0xed, 0x48, 0xce, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 48 ce d9 12 \tvgf2p8affineqb $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0x03, 0xad, 0x40, 0xce, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 ad 40 ce d9 12 \tvgf2p8affineqb $0x12,%zmm25,%zmm26,%zmm27",}, +{{0x66, 0x0f, 0x3a, 0xcf, 0xd9, 0x12, }, 6, 0, "", "", +"66 0f 3a cf d9 12 \tgf2p8affineinvqb $0x12,%xmm1,%xmm3",}, +{{0xc4, 0xe3, 0xe9, 0xcf, 0xd9, 0x12, }, 6, 0, "", "", +"c4 e3 e9 cf d9 12 \tvgf2p8affineinvqb $0x12,%xmm1,%xmm2,%xmm3",}, +{{0xc4, 0xe3, 0xed, 0xcf, 0xd9, 0x12, }, 6, 0, "", "", +"c4 e3 ed cf d9 12 \tvgf2p8affineinvqb $0x12,%ymm1,%ymm2,%ymm3",}, +{{0x62, 0xf3, 0xed, 0x48, 0xcf, 0xd9, 0x12, }, 7, 0, "", "", +"62 f3 ed 48 cf d9 12 \tvgf2p8affineinvqb $0x12,%zmm1,%zmm2,%zmm3",}, +{{0x62, 0x03, 0xad, 0x40, 0xcf, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 ad 40 cf d9 12 \tvgf2p8affineinvqb $0x12,%zmm25,%zmm26,%zmm27",}, {{0x62, 0x91, 0x2d, 0x40, 0x72, 0xc1, 0x12, }, 7, 0, "", "", "62 91 2d 40 72 c1 12 \tvprord $0x12,%zmm25,%zmm26",}, {{0x62, 0x91, 0xad, 0x40, 0x72, 0xc1, 0x12, }, 7, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index dd85a3afd9ce..ddbf07c50bb8 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -510,6 +510,82 @@ int main(void) asm volatile("vrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}"); asm volatile("vrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}"); + /* AVX-512: Op code 0f 38 50 */ + + asm volatile("vpdpbusd %xmm1, %xmm2, %xmm3"); + asm volatile("vpdpbusd %ymm1, %ymm2, %ymm3"); + asm volatile("vpdpbusd %zmm1, %zmm2, %zmm3"); + asm volatile("vpdpbusd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vpdpbusd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 51 */ + + asm volatile("vpdpbusds %xmm1, %xmm2, %xmm3"); + asm volatile("vpdpbusds %ymm1, %ymm2, %ymm3"); + asm volatile("vpdpbusds %zmm1, %zmm2, %zmm3"); + asm volatile("vpdpbusds 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vpdpbusds 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 52 */ + + asm volatile("vdpbf16ps %xmm1, %xmm2, %xmm3"); + asm volatile("vdpbf16ps %ymm1, %ymm2, %ymm3"); + asm volatile("vdpbf16ps %zmm1, %zmm2, %zmm3"); + asm volatile("vdpbf16ps 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vdpbf16ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vpdpwssd %xmm1, %xmm2, %xmm3"); + asm volatile("vpdpwssd %ymm1, %ymm2, %ymm3"); + asm volatile("vpdpwssd %zmm1, %zmm2, %zmm3"); + asm volatile("vpdpwssd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vpdpwssd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vp4dpwssd (%rax), %zmm0, %zmm4"); + asm volatile("vp4dpwssd (%eax), %zmm0, %zmm4"); + asm volatile("vp4dpwssd 0x12345678(%rax,%rcx,8),%zmm0,%zmm4"); + asm volatile("vp4dpwssd 0x12345678(%eax,%ecx,8),%zmm0,%zmm4"); + + /* AVX-512: Op code 0f 38 53 */ + + asm volatile("vpdpwssds %xmm1, %xmm2, %xmm3"); + asm volatile("vpdpwssds %ymm1, %ymm2, %ymm3"); + asm volatile("vpdpwssds %zmm1, %zmm2, %zmm3"); + asm volatile("vpdpwssds 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vpdpwssds 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vp4dpwssds (%rax), %zmm0, %zmm4"); + asm volatile("vp4dpwssds (%eax), %zmm0, %zmm4"); + asm volatile("vp4dpwssds 0x12345678(%rax,%rcx,8),%zmm0,%zmm4"); + asm volatile("vp4dpwssds 0x12345678(%eax,%ecx,8),%zmm0,%zmm4"); + + /* AVX-512: Op code 0f 38 54 */ + + asm volatile("vpopcntb %xmm1, %xmm2"); + asm volatile("vpopcntb %ymm1, %ymm2"); + asm volatile("vpopcntb %zmm1, %zmm2"); + asm volatile("vpopcntb 0x12345678(%rax,%rcx,8),%zmm2"); + asm volatile("vpopcntb 0x12345678(%eax,%ecx,8),%zmm2"); + + asm volatile("vpopcntw %xmm1, %xmm2"); + asm volatile("vpopcntw %ymm1, %ymm2"); + asm volatile("vpopcntw %zmm1, %zmm2"); + asm volatile("vpopcntw 0x12345678(%rax,%rcx,8),%zmm2"); + asm volatile("vpopcntw 0x12345678(%eax,%ecx,8),%zmm2"); + + /* AVX-512: Op code 0f 38 55 */ + + asm volatile("vpopcntd %xmm1, %xmm2"); + asm volatile("vpopcntd %ymm1, %ymm2"); + asm volatile("vpopcntd %zmm1, %zmm2"); + asm volatile("vpopcntd 0x12345678(%rax,%rcx,8),%zmm2"); + asm volatile("vpopcntd 0x12345678(%eax,%ecx,8),%zmm2"); + + asm volatile("vpopcntq %xmm1, %xmm2"); + asm volatile("vpopcntq %ymm1, %ymm2"); + asm volatile("vpopcntq %zmm1, %zmm2"); + asm volatile("vpopcntq 0x12345678(%rax,%rcx,8),%zmm2"); + asm volatile("vpopcntq 0x12345678(%eax,%ecx,8),%zmm2"); + /* AVX-512: Op code 0f 38 59 */ asm volatile("vpbroadcastq %xmm4,%xmm6"); @@ -526,6 +602,34 @@ int main(void) asm volatile("vbroadcasti32x8 (%rcx),%zmm28"); asm volatile("vbroadcasti64x4 (%rcx),%zmm26"); + /* AVX-512: Op code 0f 38 62 */ + + asm volatile("vpexpandb %xmm1, %xmm2"); + asm volatile("vpexpandb %ymm1, %ymm2"); + asm volatile("vpexpandb %zmm1, %zmm2"); + asm volatile("vpexpandb 0x12345678(%rax,%rcx,8),%zmm2"); + asm volatile("vpexpandb 0x12345678(%eax,%ecx,8),%zmm2"); + + asm volatile("vpexpandw %xmm1, %xmm2"); + asm volatile("vpexpandw %ymm1, %ymm2"); + asm volatile("vpexpandw %zmm1, %zmm2"); + asm volatile("vpexpandw 0x12345678(%rax,%rcx,8),%zmm2"); + asm volatile("vpexpandw 0x12345678(%eax,%ecx,8),%zmm2"); + + /* AVX-512: Op code 0f 38 63 */ + + asm volatile("vpcompressb %xmm1, %xmm2"); + asm volatile("vpcompressb %ymm1, %ymm2"); + asm volatile("vpcompressb %zmm1, %zmm2"); + asm volatile("vpcompressb %zmm2,0x12345678(%rax,%rcx,8)"); + asm volatile("vpcompressb %zmm2,0x12345678(%eax,%ecx,8)"); + + asm volatile("vpcompressw %xmm1, %xmm2"); + asm volatile("vpcompressw %ymm1, %ymm2"); + asm volatile("vpcompressw %zmm1, %zmm2"); + asm volatile("vpcompressw %zmm2,0x12345678(%rax,%rcx,8)"); + asm volatile("vpcompressw %zmm2,0x12345678(%eax,%ecx,8)"); + /* AVX-512: Op code 0f 38 64 */ asm volatile("vpblendmd %zmm26,%zmm27,%zmm28"); @@ -541,6 +645,76 @@ int main(void) asm volatile("vpblendmb %zmm26,%zmm27,%zmm28"); asm volatile("vpblendmw %zmm26,%zmm27,%zmm28"); + /* AVX-512: Op code 0f 38 68 */ + + asm volatile("vp2intersectd %xmm1, %xmm2, %k3"); + asm volatile("vp2intersectd %ymm1, %ymm2, %k3"); + asm volatile("vp2intersectd %zmm1, %zmm2, %k3"); + asm volatile("vp2intersectd 0x12345678(%rax,%rcx,8),%zmm2,%k3"); + asm volatile("vp2intersectd 0x12345678(%eax,%ecx,8),%zmm2,%k3"); + + asm volatile("vp2intersectq %xmm1, %xmm2, %k3"); + asm volatile("vp2intersectq %ymm1, %ymm2, %k3"); + asm volatile("vp2intersectq %zmm1, %zmm2, %k3"); + asm volatile("vp2intersectq 0x12345678(%rax,%rcx,8),%zmm2,%k3"); + asm volatile("vp2intersectq 0x12345678(%eax,%ecx,8),%zmm2,%k3"); + + /* AVX-512: Op code 0f 38 70 */ + + asm volatile("vpshldvw %xmm1, %xmm2, %xmm3"); + asm volatile("vpshldvw %ymm1, %ymm2, %ymm3"); + asm volatile("vpshldvw %zmm1, %zmm2, %zmm3"); + asm volatile("vpshldvw 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vpshldvw 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 71 */ + + asm volatile("vpshldvd %xmm1, %xmm2, %xmm3"); + asm volatile("vpshldvd %ymm1, %ymm2, %ymm3"); + asm volatile("vpshldvd %zmm1, %zmm2, %zmm3"); + asm volatile("vpshldvd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vpshldvd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vpshldvq %xmm1, %xmm2, %xmm3"); + asm volatile("vpshldvq %ymm1, %ymm2, %ymm3"); + asm volatile("vpshldvq %zmm1, %zmm2, %zmm3"); + asm volatile("vpshldvq 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vpshldvq 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 72 */ + + asm volatile("vcvtne2ps2bf16 %xmm1, %xmm2, %xmm3"); + asm volatile("vcvtne2ps2bf16 %ymm1, %ymm2, %ymm3"); + asm volatile("vcvtne2ps2bf16 %zmm1, %zmm2, %zmm3"); + asm volatile("vcvtne2ps2bf16 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vcvtne2ps2bf16 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vcvtneps2bf16 %xmm1, %xmm2"); + asm volatile("vcvtneps2bf16 %ymm1, %xmm2"); + asm volatile("vcvtneps2bf16 %zmm1, %ymm2"); + asm volatile("vcvtneps2bf16 0x12345678(%rax,%rcx,8),%ymm2"); + asm volatile("vcvtneps2bf16 0x12345678(%eax,%ecx,8),%ymm2"); + + asm volatile("vpshrdvw %xmm1, %xmm2, %xmm3"); + asm volatile("vpshrdvw %ymm1, %ymm2, %ymm3"); + asm volatile("vpshrdvw %zmm1, %zmm2, %zmm3"); + asm volatile("vpshrdvw 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vpshrdvw 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 73 */ + + asm volatile("vpshrdvd %xmm1, %xmm2, %xmm3"); + asm volatile("vpshrdvd %ymm1, %ymm2, %ymm3"); + asm volatile("vpshrdvd %zmm1, %zmm2, %zmm3"); + asm volatile("vpshrdvd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vpshrdvd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vpshrdvq %xmm1, %xmm2, %xmm3"); + asm volatile("vpshrdvq %ymm1, %ymm2, %ymm3"); + asm volatile("vpshrdvq %zmm1, %zmm2, %zmm3"); + asm volatile("vpshrdvq 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vpshrdvq 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + /* AVX-512: Op code 0f 38 75 */ asm volatile("vpermi2b %zmm24,%zmm25,%zmm26"); @@ -613,6 +787,14 @@ int main(void) asm volatile("vpermb %zmm26,%zmm27,%zmm28"); asm volatile("vpermw %zmm26,%zmm27,%zmm28"); + /* AVX-512: Op code 0f 38 8f */ + + asm volatile("vpshufbitqmb %xmm1, %xmm2, %k3"); + asm volatile("vpshufbitqmb %ymm1, %ymm2, %k3"); + asm volatile("vpshufbitqmb %zmm1, %zmm2, %k3"); + asm volatile("vpshufbitqmb 0x12345678(%rax,%rcx,8),%zmm2,%k3"); + asm volatile("vpshufbitqmb 0x12345678(%eax,%ecx,8),%zmm2,%k3"); + /* AVX-512: Op code 0f 38 90 */ asm volatile("vpgatherdd %xmm2,0x02(%rbp,%xmm7,2),%xmm1"); @@ -627,6 +809,40 @@ int main(void) asm volatile("vpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}"); asm volatile("vpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}"); + /* AVX-512: Op code 0f 38 9a */ + + asm volatile("vfmsub132ps %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub132ps %ymm1, %ymm2, %ymm3"); + asm volatile("vfmsub132ps %zmm1, %zmm2, %zmm3"); + asm volatile("vfmsub132ps 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vfmsub132ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vfmsub132pd %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub132pd %ymm1, %ymm2, %ymm3"); + asm volatile("vfmsub132pd %zmm1, %zmm2, %zmm3"); + asm volatile("vfmsub132pd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vfmsub132pd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("v4fmaddps (%rax), %zmm0, %zmm4"); + asm volatile("v4fmaddps (%eax), %zmm0, %zmm4"); + asm volatile("v4fmaddps 0x12345678(%rax,%rcx,8),%zmm0,%zmm4"); + asm volatile("v4fmaddps 0x12345678(%eax,%ecx,8),%zmm0,%zmm4"); + + /* AVX-512: Op code 0f 38 9b */ + + asm volatile("vfmsub132ss %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub132ss 0x12345678(%rax,%rcx,8),%xmm2,%xmm3"); + asm volatile("vfmsub132ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm3"); + + asm volatile("vfmsub132sd %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub132sd 0x12345678(%rax,%rcx,8),%xmm2,%xmm3"); + asm volatile("vfmsub132sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm3"); + + asm volatile("v4fmaddss (%rax), %xmm0, %xmm4"); + asm volatile("v4fmaddss (%eax), %xmm0, %xmm4"); + asm volatile("v4fmaddss 0x12345678(%rax,%rcx,8),%xmm0,%xmm4"); + asm volatile("v4fmaddss 0x12345678(%eax,%ecx,8),%xmm0,%xmm4"); + /* AVX-512: Op code 0f 38 a0 */ asm volatile("vpscatterdd %zmm28,0x7b(%rbp,%zmm29,8){%k1}"); @@ -647,6 +863,40 @@ int main(void) asm volatile("vscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}"); asm volatile("vscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}"); + /* AVX-512: Op code 0f 38 aa */ + + asm volatile("vfmsub213ps %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub213ps %ymm1, %ymm2, %ymm3"); + asm volatile("vfmsub213ps %zmm1, %zmm2, %zmm3"); + asm volatile("vfmsub213ps 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vfmsub213ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vfmsub213pd %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub213pd %ymm1, %ymm2, %ymm3"); + asm volatile("vfmsub213pd %zmm1, %zmm2, %zmm3"); + asm volatile("vfmsub213pd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vfmsub213pd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("v4fnmaddps (%rax), %zmm0, %zmm4"); + asm volatile("v4fnmaddps (%eax), %zmm0, %zmm4"); + asm volatile("v4fnmaddps 0x12345678(%rax,%rcx,8),%zmm0,%zmm4"); + asm volatile("v4fnmaddps 0x12345678(%eax,%ecx,8),%zmm0,%zmm4"); + + /* AVX-512: Op code 0f 38 ab */ + + asm volatile("vfmsub213ss %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub213ss 0x12345678(%rax,%rcx,8),%xmm2,%xmm3"); + asm volatile("vfmsub213ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm3"); + + asm volatile("vfmsub213sd %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub213sd 0x12345678(%rax,%rcx,8),%xmm2,%xmm3"); + asm volatile("vfmsub213sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm3"); + + asm volatile("v4fnmaddss (%rax), %xmm0, %xmm4"); + asm volatile("v4fnmaddss (%eax), %xmm0, %xmm4"); + asm volatile("v4fnmaddss 0x12345678(%rax,%rcx,8),%xmm0,%xmm4"); + asm volatile("v4fnmaddss 0x12345678(%eax,%ecx,8),%xmm0,%xmm4"); + /* AVX-512: Op code 0f 38 b4 */ asm volatile("vpmadd52luq %zmm26,%zmm27,%zmm28"); @@ -685,6 +935,50 @@ int main(void) asm volatile("vrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}"); asm volatile("vrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}"); + /* AVX-512: Op code 0f 38 cf */ + + asm volatile("gf2p8mulb %xmm1, %xmm3"); + asm volatile("gf2p8mulb 0x12345678(%rax,%rcx,8),%xmm3"); + asm volatile("gf2p8mulb 0x12345678(%eax,%ecx,8),%xmm3"); + + asm volatile("vgf2p8mulb %xmm1, %xmm2, %xmm3"); + asm volatile("vgf2p8mulb %ymm1, %ymm2, %ymm3"); + asm volatile("vgf2p8mulb %zmm1, %zmm2, %zmm3"); + asm volatile("vgf2p8mulb 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vgf2p8mulb 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 dc */ + + asm volatile("vaesenc %xmm1, %xmm2, %xmm3"); + asm volatile("vaesenc %ymm1, %ymm2, %ymm3"); + asm volatile("vaesenc %zmm1, %zmm2, %zmm3"); + asm volatile("vaesenc 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vaesenc 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 dd */ + + asm volatile("vaesenclast %xmm1, %xmm2, %xmm3"); + asm volatile("vaesenclast %ymm1, %ymm2, %ymm3"); + asm volatile("vaesenclast %zmm1, %zmm2, %zmm3"); + asm volatile("vaesenclast 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vaesenclast 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 de */ + + asm volatile("vaesdec %xmm1, %xmm2, %xmm3"); + asm volatile("vaesdec %ymm1, %ymm2, %ymm3"); + asm volatile("vaesdec %zmm1, %zmm2, %zmm3"); + asm volatile("vaesdec 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vaesdec 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 df */ + + asm volatile("vaesdeclast %xmm1, %xmm2, %xmm3"); + asm volatile("vaesdeclast %ymm1, %ymm2, %ymm3"); + asm volatile("vaesdeclast %zmm1, %zmm2, %zmm3"); + asm volatile("vaesdeclast 0x12345678(%rax,%rcx,8),%zmm2,%zmm3"); + asm volatile("vaesdeclast 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + /* AVX-512: Op code 0f 3a 03 */ asm volatile("valignd $0x12,%zmm28,%zmm29,%zmm30"); @@ -804,6 +1098,13 @@ int main(void) asm volatile("vshufi32x4 $0x12,%zmm25,%zmm26,%zmm27"); asm volatile("vshufi64x2 $0x12,%zmm28,%zmm29,%zmm30"); + /* AVX-512: Op code 0f 3a 44 */ + + asm volatile("vpclmulqdq $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpclmulqdq $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpclmulqdq $0x12,%zmm1,%zmm2,%zmm3"); + asm volatile("vpclmulqdq $0x12,%zmm25,%zmm26,%zmm27"); + /* AVX-512: Op code 0f 3a 50 */ asm volatile("vrangeps $0x12,%zmm25,%zmm26,%zmm27"); @@ -844,6 +1145,62 @@ int main(void) asm volatile("vfpclassss $0x12,%xmm27,%k5"); asm volatile("vfpclasssd $0x12,%xmm30,%k5"); + /* AVX-512: Op code 0f 3a 70 */ + + asm volatile("vpshldw $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpshldw $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpshldw $0x12,%zmm1,%zmm2,%zmm3"); + asm volatile("vpshldw $0x12,%zmm25,%zmm26,%zmm27"); + + /* AVX-512: Op code 0f 3a 71 */ + + asm volatile("vpshldd $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpshldd $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpshldd $0x12,%zmm1,%zmm2,%zmm3"); + asm volatile("vpshldd $0x12,%zmm25,%zmm26,%zmm27"); + + asm volatile("vpshldq $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpshldq $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpshldq $0x12,%zmm1,%zmm2,%zmm3"); + asm volatile("vpshldq $0x12,%zmm25,%zmm26,%zmm27"); + + /* AVX-512: Op code 0f 3a 72 */ + + asm volatile("vpshrdw $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpshrdw $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpshrdw $0x12,%zmm1,%zmm2,%zmm3"); + asm volatile("vpshrdw $0x12,%zmm25,%zmm26,%zmm27"); + + /* AVX-512: Op code 0f 3a 73 */ + + asm volatile("vpshrdd $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpshrdd $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpshrdd $0x12,%zmm1,%zmm2,%zmm3"); + asm volatile("vpshrdd $0x12,%zmm25,%zmm26,%zmm27"); + + asm volatile("vpshrdq $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpshrdq $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpshrdq $0x12,%zmm1,%zmm2,%zmm3"); + asm volatile("vpshrdq $0x12,%zmm25,%zmm26,%zmm27"); + + /* AVX-512: Op code 0f 3a ce */ + + asm volatile("gf2p8affineqb $0x12,%xmm1,%xmm3"); + + asm volatile("vgf2p8affineqb $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vgf2p8affineqb $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vgf2p8affineqb $0x12,%zmm1,%zmm2,%zmm3"); + asm volatile("vgf2p8affineqb $0x12,%zmm25,%zmm26,%zmm27"); + + /* AVX-512: Op code 0f 3a cf */ + + asm volatile("gf2p8affineinvqb $0x12,%xmm1,%xmm3"); + + asm volatile("vgf2p8affineinvqb $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vgf2p8affineinvqb $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vgf2p8affineinvqb $0x12,%zmm1,%zmm2,%zmm3"); + asm volatile("vgf2p8affineinvqb $0x12,%zmm25,%zmm26,%zmm27"); + /* AVX-512: Op code 0f 72 (Grp13) */ asm volatile("vprord $0x12,%zmm25,%zmm26"); @@ -1946,6 +2303,69 @@ int main(void) asm volatile("vrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}"); asm volatile("vrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}"); + /* AVX-512: Op code 0f 38 50 */ + + asm volatile("vpdpbusd %xmm1, %xmm2, %xmm3"); + asm volatile("vpdpbusd %ymm1, %ymm2, %ymm3"); + asm volatile("vpdpbusd %zmm1, %zmm2, %zmm3"); + asm volatile("vpdpbusd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 51 */ + + asm volatile("vpdpbusds %xmm1, %xmm2, %xmm3"); + asm volatile("vpdpbusds %ymm1, %ymm2, %ymm3"); + asm volatile("vpdpbusds %zmm1, %zmm2, %zmm3"); + asm volatile("vpdpbusds 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 52 */ + + asm volatile("vdpbf16ps %xmm1, %xmm2, %xmm3"); + asm volatile("vdpbf16ps %ymm1, %ymm2, %ymm3"); + asm volatile("vdpbf16ps %zmm1, %zmm2, %zmm3"); + asm volatile("vdpbf16ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vpdpwssd %xmm1, %xmm2, %xmm3"); + asm volatile("vpdpwssd %ymm1, %ymm2, %ymm3"); + asm volatile("vpdpwssd %zmm1, %zmm2, %zmm3"); + asm volatile("vpdpwssd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vp4dpwssd (%eax), %zmm0, %zmm4"); + asm volatile("vp4dpwssd 0x12345678(%eax,%ecx,8),%zmm0,%zmm4"); + + /* AVX-512: Op code 0f 38 53 */ + + asm volatile("vpdpwssds %xmm1, %xmm2, %xmm3"); + asm volatile("vpdpwssds %ymm1, %ymm2, %ymm3"); + asm volatile("vpdpwssds %zmm1, %zmm2, %zmm3"); + asm volatile("vpdpwssds 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vp4dpwssds (%eax), %zmm0, %zmm4"); + asm volatile("vp4dpwssds 0x12345678(%eax,%ecx,8),%zmm0,%zmm4"); + + /* AVX-512: Op code 0f 38 54 */ + + asm volatile("vpopcntb %xmm1, %xmm2"); + asm volatile("vpopcntb %ymm1, %ymm2"); + asm volatile("vpopcntb %zmm1, %zmm2"); + asm volatile("vpopcntb 0x12345678(%eax,%ecx,8),%zmm2"); + + asm volatile("vpopcntw %xmm1, %xmm2"); + asm volatile("vpopcntw %ymm1, %ymm2"); + asm volatile("vpopcntw %zmm1, %zmm2"); + asm volatile("vpopcntw 0x12345678(%eax,%ecx,8),%zmm2"); + + /* AVX-512: Op code 0f 38 55 */ + + asm volatile("vpopcntd %xmm1, %xmm2"); + asm volatile("vpopcntd %ymm1, %ymm2"); + asm volatile("vpopcntd %zmm1, %zmm2"); + asm volatile("vpopcntd 0x12345678(%eax,%ecx,8),%zmm2"); + + asm volatile("vpopcntq %xmm1, %xmm2"); + asm volatile("vpopcntq %ymm1, %ymm2"); + asm volatile("vpopcntq %zmm1, %zmm2"); + asm volatile("vpopcntq 0x12345678(%eax,%ecx,8),%zmm2"); + /* AVX-512: Op code 0f 38 59 */ asm volatile("vpbroadcastq %xmm4,%xmm6"); @@ -1962,6 +2382,30 @@ int main(void) asm volatile("vbroadcasti32x8 (%ecx),%zmm6"); asm volatile("vbroadcasti64x4 (%ecx),%zmm6"); + /* AVX-512: Op code 0f 38 62 */ + + asm volatile("vpexpandb %xmm1, %xmm2"); + asm volatile("vpexpandb %ymm1, %ymm2"); + asm volatile("vpexpandb %zmm1, %zmm2"); + asm volatile("vpexpandb 0x12345678(%eax,%ecx,8),%zmm2"); + + asm volatile("vpexpandw %xmm1, %xmm2"); + asm volatile("vpexpandw %ymm1, %ymm2"); + asm volatile("vpexpandw %zmm1, %zmm2"); + asm volatile("vpexpandw 0x12345678(%eax,%ecx,8),%zmm2"); + + /* AVX-512: Op code 0f 38 63 */ + + asm volatile("vpcompressb %xmm1, %xmm2"); + asm volatile("vpcompressb %ymm1, %ymm2"); + asm volatile("vpcompressb %zmm1, %zmm2"); + asm volatile("vpcompressb %zmm2,0x12345678(%eax,%ecx,8)"); + + asm volatile("vpcompressw %xmm1, %xmm2"); + asm volatile("vpcompressw %ymm1, %ymm2"); + asm volatile("vpcompressw %zmm1, %zmm2"); + asm volatile("vpcompressw %zmm2,0x12345678(%eax,%ecx,8)"); + /* AVX-512: Op code 0f 38 64 */ asm volatile("vpblendmd %zmm4,%zmm5,%zmm6"); @@ -1977,6 +2421,66 @@ int main(void) asm volatile("vpblendmb %zmm4,%zmm5,%zmm6"); asm volatile("vpblendmw %zmm4,%zmm5,%zmm6"); + /* AVX-512: Op code 0f 38 68 */ + + asm volatile("vp2intersectd %xmm1, %xmm2, %k3"); + asm volatile("vp2intersectd %ymm1, %ymm2, %k3"); + asm volatile("vp2intersectd %zmm1, %zmm2, %k3"); + asm volatile("vp2intersectd 0x12345678(%eax,%ecx,8),%zmm2,%k3"); + + asm volatile("vp2intersectq %xmm1, %xmm2, %k3"); + asm volatile("vp2intersectq %ymm1, %ymm2, %k3"); + asm volatile("vp2intersectq %zmm1, %zmm2, %k3"); + asm volatile("vp2intersectq 0x12345678(%eax,%ecx,8),%zmm2,%k3"); + + /* AVX-512: Op code 0f 38 70 */ + + asm volatile("vpshldvw %xmm1, %xmm2, %xmm3"); + asm volatile("vpshldvw %ymm1, %ymm2, %ymm3"); + asm volatile("vpshldvw %zmm1, %zmm2, %zmm3"); + asm volatile("vpshldvw 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 71 */ + + asm volatile("vpshldvd %xmm1, %xmm2, %xmm3"); + asm volatile("vpshldvd %ymm1, %ymm2, %ymm3"); + asm volatile("vpshldvd %zmm1, %zmm2, %zmm3"); + asm volatile("vpshldvd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vpshldvq %xmm1, %xmm2, %xmm3"); + asm volatile("vpshldvq %ymm1, %ymm2, %ymm3"); + asm volatile("vpshldvq %zmm1, %zmm2, %zmm3"); + asm volatile("vpshldvq 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 72 */ + + asm volatile("vcvtne2ps2bf16 %xmm1, %xmm2, %xmm3"); + asm volatile("vcvtne2ps2bf16 %ymm1, %ymm2, %ymm3"); + asm volatile("vcvtne2ps2bf16 %zmm1, %zmm2, %zmm3"); + asm volatile("vcvtne2ps2bf16 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vcvtneps2bf16 %xmm1, %xmm2"); + asm volatile("vcvtneps2bf16 %ymm1, %xmm2"); + asm volatile("vcvtneps2bf16 %zmm1, %ymm2"); + asm volatile("vcvtneps2bf16 0x12345678(%eax,%ecx,8),%ymm2"); + + asm volatile("vpshrdvw %xmm1, %xmm2, %xmm3"); + asm volatile("vpshrdvw %ymm1, %ymm2, %ymm3"); + asm volatile("vpshrdvw %zmm1, %zmm2, %zmm3"); + asm volatile("vpshrdvw 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 73 */ + + asm volatile("vpshrdvd %xmm1, %xmm2, %xmm3"); + asm volatile("vpshrdvd %ymm1, %ymm2, %ymm3"); + asm volatile("vpshrdvd %zmm1, %zmm2, %zmm3"); + asm volatile("vpshrdvd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vpshrdvq %xmm1, %xmm2, %xmm3"); + asm volatile("vpshrdvq %ymm1, %ymm2, %ymm3"); + asm volatile("vpshrdvq %zmm1, %zmm2, %zmm3"); + asm volatile("vpshrdvq 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + /* AVX-512: Op code 0f 38 75 */ asm volatile("vpermi2b %zmm4,%zmm5,%zmm6"); @@ -2048,6 +2552,13 @@ int main(void) asm volatile("vpermb %zmm4,%zmm5,%zmm6"); asm volatile("vpermw %zmm4,%zmm5,%zmm6"); + /* AVX-512: Op code 0f 38 8f */ + + asm volatile("vpshufbitqmb %xmm1, %xmm2, %k3"); + asm volatile("vpshufbitqmb %ymm1, %ymm2, %k3"); + asm volatile("vpshufbitqmb %zmm1, %zmm2, %k3"); + asm volatile("vpshufbitqmb 0x12345678(%eax,%ecx,8),%zmm2,%k3"); + /* AVX-512: Op code 0f 38 90 */ asm volatile("vpgatherdd %xmm2,0x02(%ebp,%xmm7,2),%xmm1"); @@ -2062,6 +2573,32 @@ int main(void) asm volatile("vpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}"); asm volatile("vpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}"); + /* AVX-512: Op code 0f 38 9a */ + + asm volatile("vfmsub132ps %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub132ps %ymm1, %ymm2, %ymm3"); + asm volatile("vfmsub132ps %zmm1, %zmm2, %zmm3"); + asm volatile("vfmsub132ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vfmsub132pd %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub132pd %ymm1, %ymm2, %ymm3"); + asm volatile("vfmsub132pd %zmm1, %zmm2, %zmm3"); + asm volatile("vfmsub132pd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("v4fmaddps (%eax), %zmm0, %zmm4"); + asm volatile("v4fmaddps 0x12345678(%eax,%ecx,8),%zmm0,%zmm4"); + + /* AVX-512: Op code 0f 38 9b */ + + asm volatile("vfmsub132ss %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub132ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm3"); + + asm volatile("vfmsub132sd %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub132sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm3"); + + asm volatile("v4fmaddss (%eax), %xmm0, %xmm4"); + asm volatile("v4fmaddss 0x12345678(%eax,%ecx,8),%xmm0,%xmm4"); + /* AVX-512: Op code 0f 38 a0 */ asm volatile("vpscatterdd %zmm6,0x7b(%ebp,%zmm7,8){%k1}"); @@ -2082,6 +2619,32 @@ int main(void) asm volatile("vscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}"); asm volatile("vscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}"); + /* AVX-512: Op code 0f 38 aa */ + + asm volatile("vfmsub213ps %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub213ps %ymm1, %ymm2, %ymm3"); + asm volatile("vfmsub213ps %zmm1, %zmm2, %zmm3"); + asm volatile("vfmsub213ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("vfmsub213pd %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub213pd %ymm1, %ymm2, %ymm3"); + asm volatile("vfmsub213pd %zmm1, %zmm2, %zmm3"); + asm volatile("vfmsub213pd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + asm volatile("v4fnmaddps (%eax), %zmm0, %zmm4"); + asm volatile("v4fnmaddps 0x12345678(%eax,%ecx,8),%zmm0,%zmm4"); + + /* AVX-512: Op code 0f 38 ab */ + + asm volatile("vfmsub213ss %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub213ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm3"); + + asm volatile("vfmsub213sd %xmm1, %xmm2, %xmm3"); + asm volatile("vfmsub213sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm3"); + + asm volatile("v4fnmaddss (%eax), %xmm0, %xmm4"); + asm volatile("v4fnmaddss 0x12345678(%eax,%ecx,8),%xmm0,%xmm4"); + /* AVX-512: Op code 0f 38 b4 */ asm volatile("vpmadd52luq %zmm4,%zmm5,%zmm6"); @@ -2120,6 +2683,44 @@ int main(void) asm volatile("vrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}"); asm volatile("vrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}"); + /* AVX-512: Op code 0f 38 cf */ + + asm volatile("gf2p8mulb %xmm1, %xmm3"); + asm volatile("gf2p8mulb 0x12345678(%eax,%ecx,8),%xmm3"); + + asm volatile("vgf2p8mulb %xmm1, %xmm2, %xmm3"); + asm volatile("vgf2p8mulb %ymm1, %ymm2, %ymm3"); + asm volatile("vgf2p8mulb %zmm1, %zmm2, %zmm3"); + asm volatile("vgf2p8mulb 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 dc */ + + asm volatile("vaesenc %xmm1, %xmm2, %xmm3"); + asm volatile("vaesenc %ymm1, %ymm2, %ymm3"); + asm volatile("vaesenc %zmm1, %zmm2, %zmm3"); + asm volatile("vaesenc 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 dd */ + + asm volatile("vaesenclast %xmm1, %xmm2, %xmm3"); + asm volatile("vaesenclast %ymm1, %ymm2, %ymm3"); + asm volatile("vaesenclast %zmm1, %zmm2, %zmm3"); + asm volatile("vaesenclast 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 de */ + + asm volatile("vaesdec %xmm1, %xmm2, %xmm3"); + asm volatile("vaesdec %ymm1, %ymm2, %ymm3"); + asm volatile("vaesdec %zmm1, %zmm2, %zmm3"); + asm volatile("vaesdec 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 38 df */ + + asm volatile("vaesdeclast %xmm1, %xmm2, %xmm3"); + asm volatile("vaesdeclast %ymm1, %ymm2, %ymm3"); + asm volatile("vaesdeclast %zmm1, %zmm2, %zmm3"); + asm volatile("vaesdeclast 0x12345678(%eax,%ecx,8),%zmm2,%zmm3"); + /* AVX-512: Op code 0f 3a 03 */ asm volatile("valignd $0x12,%zmm5,%zmm6,%zmm7"); @@ -2239,6 +2840,12 @@ int main(void) asm volatile("vshufi32x4 $0x12,%zmm5,%zmm6,%zmm7"); asm volatile("vshufi64x2 $0x12,%zmm5,%zmm6,%zmm7"); + /* AVX-512: Op code 0f 3a 44 */ + + asm volatile("vpclmulqdq $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpclmulqdq $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpclmulqdq $0x12,%zmm1,%zmm2,%zmm3"); + /* AVX-512: Op code 0f 3a 50 */ asm volatile("vrangeps $0x12,%zmm5,%zmm6,%zmm7"); @@ -2279,6 +2886,54 @@ int main(void) asm volatile("vfpclassss $0x12,%xmm7,%k5"); asm volatile("vfpclasssd $0x12,%xmm7,%k5"); + /* AVX-512: Op code 0f 3a 70 */ + + asm volatile("vpshldw $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpshldw $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpshldw $0x12,%zmm1,%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 3a 71 */ + + asm volatile("vpshldd $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpshldd $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpshldd $0x12,%zmm1,%zmm2,%zmm3"); + + asm volatile("vpshldq $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpshldq $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpshldq $0x12,%zmm1,%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 3a 72 */ + + asm volatile("vpshrdw $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpshrdw $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpshrdw $0x12,%zmm1,%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 3a 73 */ + + asm volatile("vpshrdd $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpshrdd $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpshrdd $0x12,%zmm1,%zmm2,%zmm3"); + + asm volatile("vpshrdq $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vpshrdq $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vpshrdq $0x12,%zmm1,%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 3a ce */ + + asm volatile("gf2p8affineqb $0x12,%xmm1,%xmm3"); + + asm volatile("vgf2p8affineqb $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vgf2p8affineqb $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vgf2p8affineqb $0x12,%zmm1,%zmm2,%zmm3"); + + /* AVX-512: Op code 0f 3a cf */ + + asm volatile("gf2p8affineinvqb $0x12,%xmm1,%xmm3"); + + asm volatile("vgf2p8affineinvqb $0x12,%xmm1,%xmm2,%xmm3"); + asm volatile("vgf2p8affineinvqb $0x12,%ymm1,%ymm2,%ymm3"); + asm volatile("vgf2p8affineinvqb $0x12,%zmm1,%zmm2,%zmm3"); + /* AVX-512: Op code 0f 72 (Grp13) */ asm volatile("vprord $0x12,%zmm5,%zmm6"); diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index d1044df7c0d7..ac45015cc6ba 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -18,8 +18,7 @@ int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, { int rc = 0; struct map *pos; - struct map_groups *kmaps = &machine->kmaps; - struct maps *maps = &kmaps->maps; + struct maps *kmaps = &machine->kmaps; union perf_event *event = zalloc(sizeof(event->mmap) + machine->id_hdr_size); @@ -29,7 +28,7 @@ int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, return -1; } - maps__for_each_entry(maps, pos) { + maps__for_each_entry(kmaps, pos) { struct kmap *kmap; size_t size; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 376dbf10ad64..f8b6ae557d8b 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -547,8 +547,8 @@ static int64_t block_cycles_diff_cmp(struct hist_entry *left, if (!pairs_left && !pairs_right) return 0; - l = labs(left->diff.cycles); - r = labs(right->diff.cycles); + l = llabs(left->diff.cycles); + r = llabs(right->diff.cycles); return r - l; } @@ -646,7 +646,7 @@ static void compute_cycles_diff(struct hist_entry *he, if (i >= he->block_info->num || i >= NUM_SPARKS) break; - val = labs(pair->block_info->cycles_spark[i] - + val = llabs(pair->block_info->cycles_spark[i] - he->block_info->cycles_spark[i]); update_spark_value(pair->diff.svals, NUM_SPARKS, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ab0f6e516b03..830d563de889 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -780,11 +780,6 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) return printed; } -static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp) -{ - return maps__fprintf_task(&mg->maps, indent, fp); -} - static void task__print_level(struct task *task, FILE *fp, int level) { struct thread *thread = task->thread; @@ -795,7 +790,7 @@ static void task__print_level(struct task *task, FILE *fp, int level) fprintf(fp, "%s\n", thread__comm_str(thread)); - map_groups__fprintf_task(thread->mg, comm_indent, fp); + maps__fprintf_task(thread->maps, comm_indent, fp); if (!list_empty(&task->children)) { list_for_each_entry(child, &task->children, list) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f86c5cce5b2c..e2406b291c1c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -448,7 +448,7 @@ static int perf_evsel__check_attr(struct evsel *evsel, "selected. Hence, no address to lookup the source line number.\n"); return -EINVAL; } - if (PRINT_FIELD(BRSTACKINSN) && + if (PRINT_FIELD(BRSTACKINSN) && !allow_user_set && !(perf_evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) { pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" @@ -932,6 +932,48 @@ static int grab_bb(u8 *buffer, u64 start, u64 end, return len; } +static int map__fprintf_srccode(struct map *map, u64 addr, FILE *fp, struct srccode_state *state) +{ + char *srcfile; + int ret = 0; + unsigned line; + int len; + char *srccode; + + if (!map || !map->dso) + return 0; + srcfile = get_srcline_split(map->dso, + map__rip_2objdump(map, addr), + &line); + if (!srcfile) + return 0; + + /* Avoid redundant printing */ + if (state && + state->srcfile && + !strcmp(state->srcfile, srcfile) && + state->line == line) { + free(srcfile); + return 0; + } + + srccode = find_sourceline(srcfile, line, &len); + if (!srccode) + goto out_free_line; + + ret = fprintf(fp, "|%-8d %.*s", line, len, srccode); + + if (state) { + state->srcfile = srcfile; + state->line = line; + } + return ret; + +out_free_line: + free(srcfile); + return ret; +} + static int print_srccode(struct thread *thread, u8 cpumode, uint64_t addr) { struct addr_location al; @@ -1084,7 +1126,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, insn++; } } - if (off != (unsigned)len) + if (off != end - start) printed += fprintf(fp, "\tmismatch of LBR data and executable\n"); } diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index e72accefd669..a3c595fba943 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -27,7 +27,7 @@ perf-y += wp.o perf-y += task-exit.o perf-y += sw-clock.o perf-y += mmap-thread-lookup.o -perf-y += thread-mg-share.o +perf-y += thread-maps-share.o perf-y += switch-tracking.o perf-y += keep-tracking.o perf-y += code-reading.o @@ -52,7 +52,7 @@ perf-y += perf-hooks.o perf-y += clang.o perf-y += unit_number__scnprintf.o perf-y += mem2node.o -perf-y += map_groups.o +perf-y += maps.o perf-y += time-utils-test.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 8b286e9b7549..7115aa32a51e 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -166,8 +166,8 @@ static struct test generic_tests[] = { .func = test__mmap_thread_lookup, }, { - .desc = "Share thread mg", - .func = test__thread_mg_share, + .desc = "Share thread maps", + .func = test__thread_maps_share, }, { .desc = "Sort output of hist entries", @@ -297,8 +297,8 @@ static struct test generic_tests[] = { .func = test__time_utils, }, { - .desc = "map_groups__merge_in", - .func = test__map_groups__merge_in, + .desc = "maps__merge_in", + .func = test__maps__merge_in, }, { .func = NULL, diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 1f017e1b2a55..6fe221d31f07 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -276,7 +276,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, len = al.map->end - addr; /* Read the object code using perf */ - ret_len = dso__data_read_offset(al.map->dso, thread->mg->machine, + ret_len = dso__data_read_offset(al.map->dso, thread->maps->machine, al.addr, buf1, len); if (ret_len != len) { pr_debug("dso__data_read_offset failed\n"); diff --git a/tools/perf/tests/map_groups.c b/tools/perf/tests/maps.c index 6b9f1cdcbe5b..edcbc70ff9d6 100644 --- a/tools/perf/tests/map_groups.c +++ b/tools/perf/tests/maps.c @@ -3,7 +3,7 @@ #include <linux/kernel.h> #include "tests.h" #include "map.h" -#include "map_groups.h" +#include "maps.h" #include "dso.h" #include "debug.h" @@ -13,12 +13,12 @@ struct map_def { u64 end; }; -static int check_maps(struct map_def *merged, unsigned int size, struct map_groups *mg) +static int check_maps(struct map_def *merged, unsigned int size, struct maps *maps) { struct map *map; unsigned int i = 0; - map_groups__for_each_entry(mg, map) { + maps__for_each_entry(maps, map) { if (i > 0) TEST_ASSERT_VAL("less maps expected", (map && i < size) || (!map && i == size)); @@ -33,9 +33,9 @@ static int check_maps(struct map_def *merged, unsigned int size, struct map_grou return TEST_OK; } -int test__map_groups__merge_in(struct test *t __maybe_unused, int subtest __maybe_unused) +int test__maps__merge_in(struct test *t __maybe_unused, int subtest __maybe_unused) { - struct map_groups mg; + struct maps maps; unsigned int i; struct map_def bpf_progs[] = { { "bpf_prog_1", 200, 300 }, @@ -64,7 +64,7 @@ int test__map_groups__merge_in(struct test *t __maybe_unused, int subtest __mayb struct map *map_kcore1, *map_kcore2, *map_kcore3; int ret; - map_groups__init(&mg, NULL); + maps__init(&maps, NULL); for (i = 0; i < ARRAY_SIZE(bpf_progs); i++) { struct map *map; @@ -74,7 +74,7 @@ int test__map_groups__merge_in(struct test *t __maybe_unused, int subtest __mayb map->start = bpf_progs[i].start; map->end = bpf_progs[i].end; - map_groups__insert(&mg, map); + maps__insert(&maps, map); map__put(map); } @@ -99,22 +99,22 @@ int test__map_groups__merge_in(struct test *t __maybe_unused, int subtest __mayb map_kcore3->start = 880; map_kcore3->end = 1100; - ret = map_groups__merge_in(&mg, map_kcore1); + ret = maps__merge_in(&maps, map_kcore1); TEST_ASSERT_VAL("failed to merge map", !ret); - ret = check_maps(merged12, ARRAY_SIZE(merged12), &mg); + ret = check_maps(merged12, ARRAY_SIZE(merged12), &maps); TEST_ASSERT_VAL("merge check failed", !ret); - ret = map_groups__merge_in(&mg, map_kcore2); + ret = maps__merge_in(&maps, map_kcore2); TEST_ASSERT_VAL("failed to merge map", !ret); - ret = check_maps(merged12, ARRAY_SIZE(merged12), &mg); + ret = check_maps(merged12, ARRAY_SIZE(merged12), &maps); TEST_ASSERT_VAL("merge check failed", !ret); - ret = map_groups__merge_in(&mg, map_kcore3); + ret = maps__merge_in(&maps, map_kcore3); TEST_ASSERT_VAL("failed to merge map", !ret); - ret = check_maps(merged3, ARRAY_SIZE(merged3), &mg); + ret = check_maps(merged3, ARRAY_SIZE(merged3), &maps); TEST_ASSERT_VAL("merge check failed", !ret); return TEST_OK; } diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 9837b6e93023..25aea387e2bf 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -73,7 +73,7 @@ int test__dwarf_unwind(struct test *test, int subtest); int test__expr(struct test *test, int subtest); int test__hists_filter(struct test *test, int subtest); int test__mmap_thread_lookup(struct test *test, int subtest); -int test__thread_mg_share(struct test *test, int subtest); +int test__thread_maps_share(struct test *test, int subtest); int test__hists_output(struct test *test, int subtest); int test__hists_cumulate(struct test *test, int subtest); int test__switch_tracking(struct test *test, int subtest); @@ -107,7 +107,7 @@ const char *test__clang_subtest_get_desc(int subtest); int test__clang_subtest_get_nr(void); int test__unit_number__scnprint(struct test *test, int subtest); int test__mem2node(struct test *t, int subtest); -int test__map_groups__merge_in(struct test *t, int subtest); +int test__maps__merge_in(struct test *t, int subtest); int test__time_utils(struct test *t, int subtest); bool test__bp_signal_is_supported(void); diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-maps-share.c index cbac71716dec..9371484973f2 100644 --- a/tools/perf/tests/thread-mg-share.c +++ b/tools/perf/tests/thread-maps-share.c @@ -4,7 +4,7 @@ #include "thread.h" #include "debug.h" -int test__thread_mg_share(struct test *test __maybe_unused, int subtest __maybe_unused) +int test__thread_maps_share(struct test *test __maybe_unused, int subtest __maybe_unused) { struct machines machines; struct machine *machine; @@ -12,16 +12,16 @@ int test__thread_mg_share(struct test *test __maybe_unused, int subtest __maybe_ /* thread group */ struct thread *leader; struct thread *t1, *t2, *t3; - struct map_groups *mg; + struct maps *maps; /* other process */ struct thread *other, *other_leader; - struct map_groups *other_mg; + struct maps *other_maps; /* * This test create 2 processes abstractions (struct thread) * with several threads and checks they properly share and - * maintain map groups info (struct map_groups). + * maintain maps info (struct maps). * * thread group (pid: 0, tids: 0, 1, 2, 3) * other group (pid: 4, tids: 4, 5) @@ -42,17 +42,17 @@ int test__thread_mg_share(struct test *test __maybe_unused, int subtest __maybe_ TEST_ASSERT_VAL("failed to create threads", leader && t1 && t2 && t3 && other); - mg = leader->mg; - TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 4); + maps = leader->maps; + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&maps->refcnt), 4); - /* test the map groups pointer is shared */ - TEST_ASSERT_VAL("map groups don't match", mg == t1->mg); - TEST_ASSERT_VAL("map groups don't match", mg == t2->mg); - TEST_ASSERT_VAL("map groups don't match", mg == t3->mg); + /* test the maps pointer is shared */ + TEST_ASSERT_VAL("maps don't match", maps == t1->maps); + TEST_ASSERT_VAL("maps don't match", maps == t2->maps); + TEST_ASSERT_VAL("maps don't match", maps == t3->maps); /* * Verify the other leader was created by previous call. - * It should have shared map groups with no change in + * It should have shared maps with no change in * refcnt. */ other_leader = machine__find_thread(machine, 4, 4); @@ -70,26 +70,26 @@ int test__thread_mg_share(struct test *test __maybe_unused, int subtest __maybe_ machine__remove_thread(machine, other); machine__remove_thread(machine, other_leader); - other_mg = other->mg; - TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_mg->refcnt), 2); + other_maps = other->maps; + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_maps->refcnt), 2); - TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg); + TEST_ASSERT_VAL("maps don't match", other_maps == other_leader->maps); /* release thread group */ thread__put(leader); - TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 3); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&maps->refcnt), 3); thread__put(t1); - TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 2); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&maps->refcnt), 2); thread__put(t2); - TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 1); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&maps->refcnt), 1); thread__put(t3); /* release other group */ thread__put(other_leader); - TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_mg->refcnt), 1); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_maps->refcnt), 1); thread__put(other); diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index ff649078da9a..193b7c91b4e2 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -190,10 +190,9 @@ next_pair: * so use the short name, less descriptive but the same ("[kernel]" in * both cases. */ - pair = map_groups__find_by_name(&kallsyms.kmaps, - (map->dso->kernel ? - map->dso->short_name : - map->dso->name)); + pair = maps__find_by_name(&kallsyms.kmaps, (map->dso->kernel ? + map->dso->short_name : + map->dso->name)); if (pair) { pair->priv = 1; } else { @@ -213,7 +212,7 @@ next_pair: mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start); mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end); - pair = map_groups__find(&kallsyms.kmaps, mem_start); + pair = maps__find(&kallsyms.kmaps, mem_start); if (pair == NULL || pair->priv) continue; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 992705c78bd0..badbddbb30f8 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -430,7 +430,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser, return true; } - target_ms.mg = ms->mg; + target_ms.maps = ms->maps; target_ms.map = ms->map; target_ms.sym = dl->ops.target.sym; pthread_mutex_unlock(¬es->lock); diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 132056c7d5b7..2ab2af4d4849 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -8,7 +8,7 @@ #include "../../util/event.h" #include "../../util/hist.h" #include "../../util/map.h" -#include "../../util/map_groups.h" +#include "../../util/maps.h" #include "../../util/symbol.h" #include "../../util/sort.h" #include "../../util/evsel.h" @@ -885,7 +885,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, } if (h->ms.map == NULL && verbose > 1) { - map_groups__fprintf(h->thread->mg, fp); + maps__fprintf(h->thread->maps, fp); fprintf(fp, "%.10s end\n", graph_dotted_line); } } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index b8e05a147b2b..07da6c790b63 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -49,6 +49,7 @@ perf-y += header.o perf-y += callchain.o perf-y += values.o perf-y += debug.o +perf-y += fncache.o perf-y += machine.o perf-y += map.o perf-y += pstack.o @@ -76,6 +77,7 @@ perf-y += sort.o perf-y += hist.o perf-y += util.o perf-y += cpumap.o +perf-y += affinity.o perf-y += cputopo.o perf-y += cgroup.o perf-y += target.o diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c new file mode 100644 index 000000000000..a5e31f826828 --- /dev/null +++ b/tools/perf/util/affinity.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Manage affinity to optimize IPIs inside the kernel perf API. */ +#define _GNU_SOURCE 1 +#include <sched.h> +#include <stdlib.h> +#include <linux/bitmap.h> +#include <linux/zalloc.h> +#include "perf.h" +#include "cpumap.h" +#include "affinity.h" + +static int get_cpu_set_size(void) +{ + int sz = cpu__max_cpu() + 8 - 1; + /* + * sched_getaffinity doesn't like masks smaller than the kernel. + * Hopefully that's big enough. + */ + if (sz < 4096) + sz = 4096; + return sz / 8; +} + +int affinity__setup(struct affinity *a) +{ + int cpu_set_size = get_cpu_set_size(); + + a->orig_cpus = bitmap_alloc(cpu_set_size * 8); + if (!a->orig_cpus) + return -1; + sched_getaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus); + a->sched_cpus = bitmap_alloc(cpu_set_size * 8); + if (!a->sched_cpus) { + zfree(&a->orig_cpus); + return -1; + } + bitmap_zero((unsigned long *)a->sched_cpus, cpu_set_size); + a->changed = false; + return 0; +} + +/* + * perf_event_open does an IPI internally to the target CPU. + * It is more efficient to change perf's affinity to the target + * CPU and then set up all events on that CPU, so we amortize + * CPU communication. + */ +void affinity__set(struct affinity *a, int cpu) +{ + int cpu_set_size = get_cpu_set_size(); + + if (cpu == -1) + return; + a->changed = true; + set_bit(cpu, a->sched_cpus); + /* + * We ignore errors because affinity is just an optimization. + * This could happen for example with isolated CPUs or cpusets. + * In this case the IPIs inside the kernel's perf API still work. + */ + sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus); + clear_bit(cpu, a->sched_cpus); +} + +void affinity__cleanup(struct affinity *a) +{ + int cpu_set_size = get_cpu_set_size(); + + if (a->changed) + sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus); + zfree(&a->sched_cpus); + zfree(&a->orig_cpus); +} diff --git a/tools/perf/util/affinity.h b/tools/perf/util/affinity.h new file mode 100644 index 000000000000..0ad6a18ef20c --- /dev/null +++ b/tools/perf/util/affinity.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef PERF_AFFINITY_H +#define PERF_AFFINITY_H 1 + +#include <stdbool.h> + +struct affinity { + unsigned long *orig_cpus; + unsigned long *sched_cpus; + bool changed; +}; + +void affinity__cleanup(struct affinity *a); +void affinity__set(struct affinity *a, int cpu); +int affinity__setup(struct affinity *a); + +#endif // PERF_AFFINITY_H diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5ea9a4534848..f5e77ed237e8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -23,7 +23,7 @@ #include "dso.h" #include "env.h" #include "map.h" -#include "map_groups.h" +#include "maps.h" #include "symbol.h" #include "srcline.h" #include "units.h" @@ -271,7 +271,7 @@ static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_s find_target: target.addr = map__objdump_2mem(map, ops->target.addr); - if (map_groups__find_ams(ms->mg, &target) == 0 && + if (maps__find_ams(ms->maps, &target) == 0 && map__rip_2objdump(target.ms.map, map->map_ip(target.ms.map, target.addr)) == ops->target.addr) ops->target.sym = target.ms.sym; @@ -391,7 +391,7 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s * Actual navigation will come next, with further understanding of how * the symbol searching and disassembly should be done. */ - if (map_groups__find_ams(ms->mg, &target) == 0 && + if (maps__find_ams(ms->maps, &target) == 0 && map__rip_2objdump(target.ms.map, map->map_ip(target.ms.map, target.addr)) == ops->target.addr) ops->target.sym = target.ms.sym; @@ -1545,7 +1545,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, .ms = { .map = map, }, }; - if (!map_groups__find_ams(args->ms.mg, &target) && + if (!maps__find_ams(args->ms.maps, &target) && target.ms.sym->start == target.al_addr) dl->ops.target.sym = target.ms.sym; } diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index f7ed5d122e22..a3207d900339 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -52,9 +52,7 @@ static int machine__process_bpf_event_load(struct machine *machine, for (i = 0; i < info_linear->info.nr_jited_ksyms; i++) { u64 *addrs = (u64 *)(uintptr_t)(info_linear->info.jited_ksyms); u64 addr = addrs[i]; - struct map *map; - - map = map_groups__find(&machine->kmaps, addr); + struct map *map = maps__find(&machine->kmaps, addr); if (map) { map->dso->binary_type = DSO_BINARY_TYPE__BPF_PROG_INFO; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 5cefce33b66b..818aa4efd386 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1106,7 +1106,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, bool hide_unresolved) { - al->mg = node->ms.mg; + al->maps = node->ms.maps; al->map = node->ms.map; al->sym = node->ms.sym; al->srcline = node->srcline; @@ -1119,8 +1119,8 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * goto out; } - if (al->mg == &al->mg->machine->kmaps) { - if (machine__is_host(al->mg->machine)) { + if (al->maps == &al->maps->machine->kmaps) { + if (machine__is_host(al->maps->machine)) { al->cpumode = PERF_RECORD_MISC_KERNEL; al->level = 'k'; } else { @@ -1128,7 +1128,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * al->level = 'g'; } } else { - if (machine__is_host(al->mg->machine)) { + if (machine__is_host(al->maps->machine)) { al->cpumode = PERF_RECORD_MISC_USER; al->level = '.'; } else if (perf_guest) { diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f5f855fff412..5471045ebf5c 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2569,7 +2569,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (err) goto err_delete_thread; - if (thread__init_map_groups(etm->unknown_thread, etm->machine)) { + if (thread__init_maps(etm->unknown_thread, etm->machine)) { err = -ENOMEM; goto err_delete_thread; } diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index d029faf9fc9f..db7447154622 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -181,7 +181,7 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, if (al->map) { struct dso *dso = al->map->dso; - err = db_export__dso(dbe, dso, al->mg->machine); + err = db_export__dso(dbe, dso, al->maps->machine); if (err) return err; *dso_db_id = dso->db_id; @@ -251,7 +251,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, */ al.sym = node->ms.sym; al.map = node->ms.map; - al.mg = thread->mg; + al.maps = thread->maps; al.addr = node->ip; if (al.map && !al.sym) @@ -360,13 +360,13 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, if (err) return err; - err = db_export__machine(dbe, al->mg->machine); + err = db_export__machine(dbe, al->maps->machine); if (err) return err; - main_thread = thread__main_thread(al->mg->machine, thread); + main_thread = thread__main_thread(al->maps->machine, thread); - err = db_export__threads(dbe, thread, main_thread, al->mg->machine, &comm); + err = db_export__threads(dbe, thread, main_thread, al->maps->machine, &comm); if (err) goto out_put; @@ -380,7 +380,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, goto out_put; if (dbe->cpr) { - struct call_path *cp = call_path_from_sample(dbe, al->mg->machine, + struct call_path *cp = call_path_from_sample(dbe, al->maps->machine, thread, sample, evsel); if (cp) { diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 0141b26bae47..c5447ff516a2 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -457,11 +457,11 @@ int perf_event__process(struct perf_tool *tool __maybe_unused, struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al) { - struct map_groups *mg = thread->mg; - struct machine *machine = mg->machine; + struct maps *maps = thread->maps; + struct machine *machine = maps->machine; bool load_map = false; - al->mg = mg; + al->maps = maps; al->thread = thread; al->addr = addr; al->cpumode = cpumode; @@ -474,13 +474,13 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { al->level = 'k'; - al->mg = mg = &machine->kmaps; + al->maps = maps = &machine->kmaps; load_map = true; } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { al->level = '.'; } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { al->level = 'g'; - al->mg = mg = &machine->kmaps; + al->maps = maps = &machine->kmaps; load_map = true; } else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) { al->level = 'u'; @@ -500,7 +500,7 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, return NULL; } - al->map = map_groups__find(mg, al->addr); + al->map = maps__find(maps, al->addr); if (al->map != NULL) { /* * Kernel maps might be changed when loading symbols so loading @@ -523,7 +523,7 @@ struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al) { struct map *map = thread__find_map(thread, cpumode, addr, al); - struct machine *machine = thread->mg->machine; + struct machine *machine = thread->maps->machine; u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr); if (map || addr_cpumode == cpumode) diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c new file mode 100644 index 000000000000..6225cbc52310 --- /dev/null +++ b/tools/perf/util/fncache.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Manage a cache of file names' existence */ +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <linux/list.h> +#include "fncache.h" + +struct fncache { + struct hlist_node nd; + bool res; + char name[]; +}; + +#define FNHSIZE 61 + +static struct hlist_head fncache_hash[FNHSIZE]; + +unsigned shash(const unsigned char *s) +{ + unsigned h = 0; + while (*s) + h = 65599 * h + *s++; + return h ^ (h >> 16); +} + +static bool lookup_fncache(const char *name, bool *res) +{ + int h = shash((const unsigned char *)name) % FNHSIZE; + struct fncache *n; + + hlist_for_each_entry(n, &fncache_hash[h], nd) { + if (!strcmp(n->name, name)) { + *res = n->res; + return true; + } + } + return false; +} + +static void update_fncache(const char *name, bool res) +{ + struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1); + int h = shash((const unsigned char *)name) % FNHSIZE; + + if (!n) + return; + strcpy(n->name, name); + n->res = res; + hlist_add_head(&n->nd, &fncache_hash[h]); +} + +/* No LRU, only use when bounded in some other way. */ +bool file_available(const char *name) +{ + bool res; + + if (lookup_fncache(name, &res)) + return res; + res = access(name, R_OK) == 0; + update_fncache(name, res); + return res; +} diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h new file mode 100644 index 000000000000..fe020beaefb1 --- /dev/null +++ b/tools/perf/util/fncache.h @@ -0,0 +1,7 @@ +#ifndef _FCACHE_H +#define _FCACHE_H 1 + +unsigned shash(const unsigned char *s); +bool file_available(const char *name); + +#endif diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 0a8d72ae93ca..ca5a8f4d007e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -692,7 +692,7 @@ __hists__add_entry(struct hists *hists, .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0, }, .ms = { - .mg = al->mg, + .maps = al->maps, .map = al->map, .sym = al->sym, }, @@ -760,7 +760,7 @@ struct hist_entry *hists__add_entry_block(struct hists *hists, .block_info = block_info, .hists = hists, .ms = { - .mg = al->mg, + .maps = al->maps, .map = al->map, .sym = al->sym, }, @@ -895,7 +895,7 @@ iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) if (iter->curr >= iter->total) return 0; - al->mg = bi[i].to.ms.mg; + al->maps = bi[i].to.ms.maps; al->map = bi[i].to.ms.map; al->sym = bi[i].to.ms.sym; al->addr = bi[i].to.addr; @@ -1072,7 +1072,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, .comm = thread__comm(al->thread), .ip = al->addr, .ms = { - .mg = al->mg, + .maps = al->maps, .map = al->map, .sym = al->sym, }, diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 409afc611be9..33cf8928cf05 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3296,7 +3296,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, err = thread__set_comm(pt->unknown_thread, "unknown", 0); if (err) goto err_delete_thread; - if (thread__init_map_groups(pt->unknown_thread, pt->machine)) { + if (thread__init_maps(pt->unknown_thread, pt->machine)) { err = -ENOMEM; goto err_delete_thread; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index e2a312c649f0..416d174d223c 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -86,7 +86,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) int err = -ENOMEM; memset(machine, 0, sizeof(*machine)); - map_groups__init(&machine->kmaps, machine); + maps__init(&machine->kmaps, machine); RB_CLEAR_NODE(&machine->rb_node); dsos__init(&machine->dsos); @@ -217,7 +217,7 @@ void machine__exit(struct machine *machine) return; machine__destroy_kernel_maps(machine); - map_groups__exit(&machine->kmaps); + maps__exit(&machine->kmaps); dsos__exit(&machine->dsos); machine__exit_vdso(machine); zfree(&machine->root_dir); @@ -412,28 +412,28 @@ static void machine__update_thread_pid(struct machine *machine, if (!leader) goto out_err; - if (!leader->mg) - leader->mg = map_groups__new(machine); + if (!leader->maps) + leader->maps = maps__new(machine); - if (!leader->mg) + if (!leader->maps) goto out_err; - if (th->mg == leader->mg) + if (th->maps == leader->maps) return; - if (th->mg) { + if (th->maps) { /* * Maps are created from MMAP events which provide the pid and * tid. Consequently there never should be any maps on a thread * with an unknown pid. Just print an error if there are. */ - if (!map_groups__empty(th->mg)) + if (!maps__empty(th->maps)) pr_err("Discarding thread maps for %d:%d\n", th->pid_, th->tid); - map_groups__put(th->mg); + maps__put(th->maps); } - th->mg = map_groups__get(leader->mg); + th->maps = maps__get(leader->maps); out_put: thread__put(leader); return; @@ -536,14 +536,13 @@ static struct thread *____machine__findnew_thread(struct machine *machine, rb_insert_color_cached(&th->rb_node, &threads->entries, leftmost); /* - * We have to initialize map_groups separately - * after rb tree is updated. + * We have to initialize maps separately after rb tree is updated. * * The reason is that we call machine__findnew_thread - * within thread__init_map_groups to find the thread + * within thread__init_maps to find the thread * leader and that would screwed the rb tree. */ - if (thread__init_map_groups(th, machine)) { + if (thread__init_maps(th, machine)) { rb_erase_cached(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); thread__put(th); @@ -724,9 +723,8 @@ static int machine__process_ksymbol_register(struct machine *machine, struct perf_sample *sample __maybe_unused) { struct symbol *sym; - struct map *map; + struct map *map = maps__find(&machine->kmaps, event->ksymbol.addr); - map = map_groups__find(&machine->kmaps, event->ksymbol.addr); if (!map) { map = dso__new_map(event->ksymbol.name); if (!map) @@ -734,7 +732,7 @@ static int machine__process_ksymbol_register(struct machine *machine, map->start = event->ksymbol.addr; map->end = map->start + event->ksymbol.len; - map_groups__insert(&machine->kmaps, map); + maps__insert(&machine->kmaps, map); } sym = symbol__new(map->map_ip(map, map->start), @@ -752,9 +750,9 @@ static int machine__process_ksymbol_unregister(struct machine *machine, { struct map *map; - map = map_groups__find(&machine->kmaps, event->ksymbol.addr); + map = maps__find(&machine->kmaps, event->ksymbol.addr); if (map) - map_groups__remove(&machine->kmaps, map); + maps__remove(&machine->kmaps, map); return 0; } @@ -790,9 +788,9 @@ static struct map *machine__addnew_module_map(struct machine *machine, u64 start if (map == NULL) goto out; - map_groups__insert(&machine->kmaps, map); + maps__insert(&machine->kmaps, map); - /* Put the map here because map_groups__insert alread got it */ + /* Put the map here because maps__insert alread got it */ map__put(map); out: /* put the dso here, corresponding to machine__findnew_module_dso */ @@ -977,7 +975,7 @@ int machine__create_extra_kernel_map(struct machine *machine, kmap->kmaps = &machine->kmaps; strlcpy(kmap->name, xm->name, KMAP_NAME_LEN); - map_groups__insert(&machine->kmaps, map); + maps__insert(&machine->kmaps, map); pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n", kmap->name, map->start, map->end); @@ -1022,8 +1020,7 @@ static u64 find_entry_trampoline(struct dso *dso) int machine__map_x86_64_entry_trampolines(struct machine *machine, struct dso *kernel) { - struct map_groups *kmaps = &machine->kmaps; - struct maps *maps = &kmaps->maps; + struct maps *kmaps = &machine->kmaps; int nr_cpus_avail, cpu; bool found = false; struct map *map; @@ -1033,14 +1030,14 @@ int machine__map_x86_64_entry_trampolines(struct machine *machine, * In the vmlinux case, pgoff is a virtual address which must now be * mapped to a vmlinux offset. */ - maps__for_each_entry(maps, map) { + maps__for_each_entry(kmaps, map) { struct kmap *kmap = __map__kmap(map); struct map *dest_map; if (!kmap || !is_entry_trampoline(kmap->name)) continue; - dest_map = map_groups__find(kmaps, map->pgoff); + dest_map = maps__find(kmaps, map->pgoff); if (dest_map != map) map->pgoff = dest_map->map_ip(dest_map, map->pgoff); found = true; @@ -1102,7 +1099,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) return -1; kmap->kmaps = &machine->kmaps; - map_groups__insert(&machine->kmaps, map); + maps__insert(&machine->kmaps, map); return 0; } @@ -1116,7 +1113,7 @@ void machine__destroy_kernel_maps(struct machine *machine) return; kmap = map__kmap(map); - map_groups__remove(&machine->kmaps, map); + maps__remove(&machine->kmaps, map); if (kmap && kmap->ref_reloc_sym) { zfree((char **)&kmap->ref_reloc_sym->name); zfree(&kmap->ref_reloc_sym); @@ -1211,7 +1208,7 @@ int machine__load_kallsyms(struct machine *machine, const char *filename) * kernel, with modules between them, fixup the end of all * sections. */ - map_groups__fixup_end(&machine->kmaps); + maps__fixup_end(&machine->kmaps); } return ret; @@ -1262,11 +1259,10 @@ static bool is_kmod_dso(struct dso *dso) dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE; } -static int map_groups__set_module_path(struct map_groups *mg, const char *path, - struct kmod_path *m) +static int maps__set_module_path(struct maps *maps, const char *path, struct kmod_path *m) { char *long_name; - struct map *map = map_groups__find_by_name(mg, m->name); + struct map *map = maps__find_by_name(maps, m->name); if (map == NULL) return 0; @@ -1290,8 +1286,7 @@ static int map_groups__set_module_path(struct map_groups *mg, const char *path, return 0; } -static int map_groups__set_modules_path_dir(struct map_groups *mg, - const char *dir_name, int depth) +static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, int depth) { struct dirent *dent; DIR *dir = opendir(dir_name); @@ -1323,8 +1318,7 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg, continue; } - ret = map_groups__set_modules_path_dir(mg, path, - depth + 1); + ret = maps__set_modules_path_dir(maps, path, depth + 1); if (ret < 0) goto out; } else { @@ -1335,7 +1329,7 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg, goto out; if (m.kmod) - ret = map_groups__set_module_path(mg, path, &m); + ret = maps__set_module_path(maps, path, &m); zfree(&m.name); @@ -1362,7 +1356,7 @@ static int machine__set_modules_path(struct machine *machine) machine->root_dir, version); free(version); - return map_groups__set_modules_path_dir(&machine->kmaps, modules_path, 0); + return maps__set_modules_path_dir(&machine->kmaps, modules_path, 0); } int __weak arch__fix_module_text_start(u64 *start __maybe_unused, u64 *size __maybe_unused, @@ -1435,11 +1429,11 @@ static void machine__update_kernel_mmap(struct machine *machine, struct map *map = machine__kernel_map(machine); map__get(map); - map_groups__remove(&machine->kmaps, map); + maps__remove(&machine->kmaps, map); machine__set_kernel_mmap(machine, start, end); - map_groups__insert(&machine->kmaps, map); + maps__insert(&machine->kmaps, map); map__put(map); } @@ -1940,7 +1934,7 @@ static void ip__resolve_ams(struct thread *thread, ams->addr = ip; ams->al_addr = al.addr; - ams->ms.mg = al.mg; + ams->ms.maps = al.maps; ams->ms.sym = al.sym; ams->ms.map = al.map; ams->phys_addr = 0; @@ -1958,7 +1952,7 @@ static void ip__resolve_data(struct thread *thread, ams->addr = addr; ams->al_addr = al.addr; - ams->ms.mg = al.mg; + ams->ms.maps = al.maps; ams->ms.sym = al.sym; ams->ms.map = al.map; ams->phys_addr = phys_addr; @@ -2075,7 +2069,7 @@ static int add_callchain_ip(struct thread *thread, iter_cycles = iter->cycles; } - ms.mg = al.mg; + ms.maps = al.maps; ms.map = al.map; ms.sym = al.sym; srcline = callchain_srcline(&ms, al.addr); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 499be204830d..be0a930eca89 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -4,7 +4,7 @@ #include <sys/types.h> #include <linux/rbtree.h> -#include "map_groups.h" +#include "maps.h" #include "dsos.h" #include "rwsem.h" @@ -51,7 +51,7 @@ struct machine { struct vdso_info *vdso_info; struct perf_env *env; struct dsos dsos; - struct map_groups kmaps; + struct maps kmaps; struct map *vmlinux_map; u64 kernel_start; pid_t *current_tid; @@ -83,7 +83,7 @@ struct map *machine__kernel_map(struct machine *machine) static inline struct maps *machine__kernel_maps(struct machine *machine) { - return &machine->kmaps.maps; + return &machine->kmaps; } int machine__get_kernel_start(struct machine *machine); @@ -212,7 +212,7 @@ static inline struct symbol *machine__find_kernel_symbol(struct machine *machine, u64 addr, struct map **mapp) { - return map_groups__find_symbol(&machine->kmaps, addr, mapp); + return maps__find_symbol(&machine->kmaps, addr, mapp); } static inline @@ -220,7 +220,7 @@ struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine, const char *name, struct map **mapp) { - return map_groups__find_symbol_by_name(&machine->kmaps, name, mapp); + return maps__find_symbol_by_name(&machine->kmaps, name, mapp); } int arch__fix_module_text_start(u64 *start, u64 *size, const char *name); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 744bfbaf35cf..fdd5bddb3075 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -433,51 +433,6 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, return ret; } -int map__fprintf_srccode(struct map *map, u64 addr, - FILE *fp, - struct srccode_state *state) -{ - char *srcfile; - int ret = 0; - unsigned line; - int len; - char *srccode; - - if (!map || !map->dso) - return 0; - srcfile = get_srcline_split(map->dso, - map__rip_2objdump(map, addr), - &line); - if (!srcfile) - return 0; - - /* Avoid redundant printing */ - if (state && - state->srcfile && - !strcmp(state->srcfile, srcfile) && - state->line == line) { - free(srcfile); - return 0; - } - - srccode = find_sourceline(srcfile, line, &len); - if (!srccode) - goto out_free_line; - - ret = fprintf(fp, "|%-8d %.*s", line, len, srccode); - - if (state) { - state->srcfile = srcfile; - state->line = line; - } - return ret; - -out_free_line: - free(srcfile); - return ret; -} - - void srccode_state_free(struct srccode_state *state) { zfree(&state->srcfile); @@ -557,73 +512,71 @@ u64 map__objdump_2mem(struct map *map, u64 ip) return ip + map->reloc; } -static void maps__init(struct maps *maps) +void maps__init(struct maps *maps, struct machine *machine) { maps->entries = RB_ROOT; init_rwsem(&maps->lock); + maps->machine = machine; + maps->last_search_by_name = NULL; + maps->nr_maps = 0; + maps->maps_by_name = NULL; + refcount_set(&maps->refcnt, 1); } -void map_groups__init(struct map_groups *mg, struct machine *machine) -{ - maps__init(&mg->maps); - mg->machine = machine; - mg->last_search_by_name = NULL; - mg->nr_maps = 0; - mg->maps_by_name = NULL; - refcount_set(&mg->refcnt, 1); -} - -static void __map_groups__free_maps_by_name(struct map_groups *mg) +static void __maps__free_maps_by_name(struct maps *maps) { /* * Free everything to try to do it from the rbtree in the next search */ - zfree(&mg->maps_by_name); - mg->nr_maps_allocated = 0; + zfree(&maps->maps_by_name); + maps->nr_maps_allocated = 0; } -void map_groups__insert(struct map_groups *mg, struct map *map) +void maps__insert(struct maps *maps, struct map *map) { - struct maps *maps = &mg->maps; - down_write(&maps->lock); __maps__insert(maps, map); - ++mg->nr_maps; + ++maps->nr_maps; /* * If we already performed some search by name, then we need to add the just * inserted map and resort. */ - if (mg->maps_by_name) { - if (mg->nr_maps > mg->nr_maps_allocated) { - int nr_allocate = mg->nr_maps * 2; - struct map **maps_by_name = realloc(mg->maps_by_name, nr_allocate * sizeof(map)); + if (maps->maps_by_name) { + if (maps->nr_maps > maps->nr_maps_allocated) { + int nr_allocate = maps->nr_maps * 2; + struct map **maps_by_name = realloc(maps->maps_by_name, nr_allocate * sizeof(map)); if (maps_by_name == NULL) { - __map_groups__free_maps_by_name(mg); + __maps__free_maps_by_name(maps); return; } - mg->maps_by_name = maps_by_name; - mg->nr_maps_allocated = nr_allocate; + maps->maps_by_name = maps_by_name; + maps->nr_maps_allocated = nr_allocate; } - mg->maps_by_name[mg->nr_maps - 1] = map; - __map_groups__sort_by_name(mg); + maps->maps_by_name[maps->nr_maps - 1] = map; + __maps__sort_by_name(maps); } up_write(&maps->lock); } -void map_groups__remove(struct map_groups *mg, struct map *map) +static void __maps__remove(struct maps *maps, struct map *map) +{ + rb_erase_init(&map->rb_node, &maps->entries); + map__put(map); +} + +void maps__remove(struct maps *maps, struct map *map) { - struct maps *maps = &mg->maps; down_write(&maps->lock); - if (mg->last_search_by_name == map) - mg->last_search_by_name = NULL; + if (maps->last_search_by_name == map) + maps->last_search_by_name = NULL; __maps__remove(maps, map); - --mg->nr_maps; - if (mg->maps_by_name) - __map_groups__free_maps_by_name(mg); + --maps->nr_maps; + if (maps->maps_by_name) + __maps__free_maps_by_name(maps); up_write(&maps->lock); } @@ -637,50 +590,44 @@ static void __maps__purge(struct maps *maps) } } -static void maps__exit(struct maps *maps) +void maps__exit(struct maps *maps) { down_write(&maps->lock); __maps__purge(maps); up_write(&maps->lock); } -void map_groups__exit(struct map_groups *mg) -{ - maps__exit(&mg->maps); -} - -bool map_groups__empty(struct map_groups *mg) +bool maps__empty(struct maps *maps) { - return !maps__first(&mg->maps); + return !maps__first(maps); } -struct map_groups *map_groups__new(struct machine *machine) +struct maps *maps__new(struct machine *machine) { - struct map_groups *mg = zalloc(sizeof(*mg)); + struct maps *maps = zalloc(sizeof(*maps)); - if (mg != NULL) - map_groups__init(mg, machine); + if (maps != NULL) + maps__init(maps, machine); - return mg; + return maps; } -void map_groups__delete(struct map_groups *mg) +void maps__delete(struct maps *maps) { - map_groups__exit(mg); - unwind__finish_access(mg); - free(mg); + maps__exit(maps); + unwind__finish_access(maps); + free(maps); } -void map_groups__put(struct map_groups *mg) +void maps__put(struct maps *maps) { - if (mg && refcount_dec_and_test(&mg->refcnt)) - map_groups__delete(mg); + if (maps && refcount_dec_and_test(&maps->refcnt)) + maps__delete(maps); } -struct symbol *map_groups__find_symbol(struct map_groups *mg, - u64 addr, struct map **mapp) +struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) { - struct map *map = map_groups__find(mg, addr); + struct map *map = maps__find(maps, addr); /* Ensure map is loaded before using map->map_ip */ if (map != NULL && map__load(map) >= 0) { @@ -699,8 +646,7 @@ static bool map__contains_symbol(struct map *map, struct symbol *sym) return ip >= map->start && ip < map->end; } -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, - struct map **mapp) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { struct symbol *sym; struct map *pos; @@ -727,19 +673,12 @@ out: return sym; } -struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, - const char *name, - struct map **mapp) -{ - return maps__find_symbol_by_name(&mg->maps, name, mapp); -} - -int map_groups__find_ams(struct map_groups *mg, struct addr_map_symbol *ams) +int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) { if (ams->addr < ams->ms.map->start || ams->addr >= ams->ms.map->end) { - if (mg == NULL) + if (maps == NULL) return -1; - ams->ms.map = map_groups__find(mg, ams->addr); + ams->ms.map = maps__find(maps, ams->addr); if (ams->ms.map == NULL) return -1; } @@ -750,7 +689,7 @@ int map_groups__find_ams(struct map_groups *mg, struct addr_map_symbol *ams) return ams->ms.sym ? 0 : -1; } -static size_t maps__fprintf(struct maps *maps, FILE *fp) +size_t maps__fprintf(struct maps *maps, FILE *fp) { size_t printed = 0; struct map *pos; @@ -771,19 +710,8 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp) return printed; } -size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) -{ - return maps__fprintf(&mg->maps, fp); -} - -static void __map_groups__insert(struct map_groups *mg, struct map *map) -{ - __maps__insert(&mg->maps, map); -} - -int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE *fp) +int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) { - struct maps *maps = &mg->maps; struct rb_root *root; struct rb_node *next, *first; int err = 0; @@ -848,7 +776,7 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE } before->end = map->start; - __map_groups__insert(mg, before); + __maps__insert(maps, before); if (verbose >= 2 && !use_browser) map__fprintf(before, fp); map__put(before); @@ -865,7 +793,7 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE after->start = map->end; after->pgoff += map->end - pos->start; assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end)); - __map_groups__insert(mg, after); + __maps__insert(maps, after); if (verbose >= 2 && !use_browser) map__fprintf(after, fp); map__put(after); @@ -886,31 +814,30 @@ out: /* * XXX This should not really _copy_ te maps, but refcount them. */ -int map_groups__clone(struct thread *thread, struct map_groups *parent) +int maps__clone(struct thread *thread, struct maps *parent) { - struct map_groups *mg = thread->mg; + struct maps *maps = thread->maps; int err = -ENOMEM; struct map *map; - struct maps *maps = &parent->maps; - down_read(&maps->lock); + down_read(&parent->lock); - maps__for_each_entry(maps, map) { + maps__for_each_entry(parent, map) { struct map *new = map__clone(map); if (new == NULL) goto out_unlock; - err = unwind__prepare_access(mg, new, NULL); + err = unwind__prepare_access(maps, new, NULL); if (err) goto out_unlock; - map_groups__insert(mg, new); + maps__insert(maps, new); map__put(new); } err = 0; out_unlock: - up_read(&maps->lock); + up_read(&parent->lock); return err; } @@ -935,26 +862,6 @@ static void __maps__insert(struct maps *maps, struct map *map) map__get(map); } -void maps__insert(struct maps *maps, struct map *map) -{ - down_write(&maps->lock); - __maps__insert(maps, map); - up_write(&maps->lock); -} - -void __maps__remove(struct maps *maps, struct map *map) -{ - rb_erase_init(&map->rb_node, &maps->entries); - map__put(map); -} - -void maps__remove(struct maps *maps, struct map *map) -{ - down_write(&maps->lock); - __maps__remove(maps, map); - up_write(&maps->lock); -} - struct map *maps__find(struct maps *maps, u64 ip) { struct rb_node *p; @@ -1018,7 +925,7 @@ struct kmap *map__kmap(struct map *map) return kmap; } -struct map_groups *map__kmaps(struct map *map) +struct maps *map__kmaps(struct map *map) { struct kmap *kmap = map__kmap(map); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 5e8899883231..067036e8970c 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -12,11 +12,8 @@ #include <linux/types.h> struct dso; -struct ip_callchain; -struct ref_reloc_sym; -struct map_groups; +struct maps; struct machine; -struct evsel; struct map { union { @@ -45,7 +42,7 @@ struct kmap; struct kmap *__map__kmap(struct map *map); struct kmap *map__kmap(struct map *map); -struct map_groups *map__kmaps(struct map *map); +struct maps *map__kmaps(struct map *map); static inline u64 map__map_ip(struct map *map, u64 ip) { @@ -138,19 +135,12 @@ char *map__srcline(struct map *map, u64 addr, struct symbol *sym); int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, FILE *fp); -struct srccode_state; - -int map__fprintf_srccode(struct map *map, u64 addr, - FILE *fp, struct srccode_state *state); - int map__load(struct map *map); struct symbol *map__find_symbol(struct map *map, u64 addr); struct symbol *map__find_symbol_by_name(struct map *map, const char *name); void map__fixup_start(struct map *map); void map__fixup_end(struct map *map); -void map__reloc_vmlinux(struct map *map); - int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr); diff --git a/tools/perf/util/map_groups.h b/tools/perf/util/map_groups.h deleted file mode 100644 index 63ed211fe241..000000000000 --- a/tools/perf/util/map_groups.h +++ /dev/null @@ -1,106 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PERF_MAP_GROUPS_H -#define __PERF_MAP_GROUPS_H - -#include <linux/refcount.h> -#include <linux/rbtree.h> -#include <stdio.h> -#include <stdbool.h> -#include <linux/types.h> -#include "rwsem.h" - -struct ref_reloc_sym; -struct machine; -struct map; -struct thread; - -struct maps { - struct rb_root entries; - struct rw_semaphore lock; -}; - -void maps__insert(struct maps *maps, struct map *map); -void maps__remove(struct maps *maps, struct map *map); -void __maps__remove(struct maps *maps, struct map *map); -struct map *maps__find(struct maps *maps, u64 addr); -struct map *maps__first(struct maps *maps); -struct map *map__next(struct map *map); - -#define maps__for_each_entry(maps, map) \ - for (map = maps__first(maps); map; map = map__next(map)) - -#define maps__for_each_entry_safe(maps, map, next) \ - for (map = maps__first(maps), next = map__next(map); map; map = next, next = map__next(map)) - -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp); - -struct map_groups { - struct maps maps; - struct machine *machine; - struct map *last_search_by_name; - struct map **maps_by_name; - refcount_t refcnt; - unsigned int nr_maps; - unsigned int nr_maps_allocated; -#ifdef HAVE_LIBUNWIND_SUPPORT - void *addr_space; - struct unwind_libunwind_ops *unwind_libunwind_ops; -#endif -}; - -#define KMAP_NAME_LEN 256 - -struct kmap { - struct ref_reloc_sym *ref_reloc_sym; - struct map_groups *kmaps; - char name[KMAP_NAME_LEN]; -}; - -struct map_groups *map_groups__new(struct machine *machine); -void map_groups__delete(struct map_groups *mg); -bool map_groups__empty(struct map_groups *mg); - -static inline struct map_groups *map_groups__get(struct map_groups *mg) -{ - if (mg) - refcount_inc(&mg->refcnt); - return mg; -} - -void map_groups__put(struct map_groups *mg); -void map_groups__init(struct map_groups *mg, struct machine *machine); -void map_groups__exit(struct map_groups *mg); -int map_groups__clone(struct thread *thread, struct map_groups *parent); -size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); - -void map_groups__insert(struct map_groups *mg, struct map *map); - -void map_groups__remove(struct map_groups *mg, struct map *map); - -static inline struct map *map_groups__find(struct map_groups *mg, u64 addr) -{ - return maps__find(&mg->maps, addr); -} - -#define map_groups__for_each_entry(mg, map) \ - for (map = maps__first(&mg->maps); map; map = map__next(map)) - -#define map_groups__for_each_entry_safe(mg, map, next) \ - for (map = maps__first(&mg->maps), next = map__next(map); map; map = next, next = map__next(map)) - -struct symbol *map_groups__find_symbol(struct map_groups *mg, u64 addr, struct map **mapp); -struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, const char *name, struct map **mapp); - -struct addr_map_symbol; - -int map_groups__find_ams(struct map_groups *mg, struct addr_map_symbol *ams); - -int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE *fp); - -struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); - -int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map); - -void __map_groups__sort_by_name(struct map_groups *mg); - -#endif // __PERF_MAP_GROUPS_H diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h index 2964d971aeab..5b8ca93798e9 100644 --- a/tools/perf/util/map_symbol.h +++ b/tools/perf/util/map_symbol.h @@ -4,12 +4,12 @@ #include <linux/types.h> -struct map_groups; +struct maps; struct map; struct symbol; struct map_symbol { - struct map_groups *mg; + struct maps *maps; struct map *map; struct symbol *sym; }; diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h new file mode 100644 index 000000000000..3dd000ddf925 --- /dev/null +++ b/tools/perf/util/maps.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_MAPS_H +#define __PERF_MAPS_H + +#include <linux/refcount.h> +#include <linux/rbtree.h> +#include <stdio.h> +#include <stdbool.h> +#include <linux/types.h> +#include "rwsem.h" + +struct ref_reloc_sym; +struct machine; +struct map; +struct maps; +struct thread; + +struct map *maps__find(struct maps *maps, u64 addr); +struct map *maps__first(struct maps *maps); +struct map *map__next(struct map *map); + +#define maps__for_each_entry(maps, map) \ + for (map = maps__first(maps); map; map = map__next(map)) + +#define maps__for_each_entry_safe(maps, map, next) \ + for (map = maps__first(maps), next = map__next(map); map; map = next, next = map__next(map)) + +struct maps { + struct rb_root entries; + struct rw_semaphore lock; + struct machine *machine; + struct map *last_search_by_name; + struct map **maps_by_name; + refcount_t refcnt; + unsigned int nr_maps; + unsigned int nr_maps_allocated; +#ifdef HAVE_LIBUNWIND_SUPPORT + void *addr_space; + struct unwind_libunwind_ops *unwind_libunwind_ops; +#endif +}; + +#define KMAP_NAME_LEN 256 + +struct kmap { + struct ref_reloc_sym *ref_reloc_sym; + struct maps *kmaps; + char name[KMAP_NAME_LEN]; +}; + +struct maps *maps__new(struct machine *machine); +void maps__delete(struct maps *maps); +bool maps__empty(struct maps *maps); + +static inline struct maps *maps__get(struct maps *maps) +{ + if (maps) + refcount_inc(&maps->refcnt); + return maps; +} + +void maps__put(struct maps *maps); +void maps__init(struct maps *maps, struct machine *machine); +void maps__exit(struct maps *maps); +int maps__clone(struct thread *thread, struct maps *parent); +size_t maps__fprintf(struct maps *maps, FILE *fp); + +void maps__insert(struct maps *maps, struct map *map); + +void maps__remove(struct maps *maps, struct map *map); + +struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp); + +struct addr_map_symbol; + +int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams); + +int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp); + +struct map *maps__find_by_name(struct maps *maps, const char *name); + +int maps__merge_in(struct maps *kmaps, struct map *new_map); + +void __maps__sort_by_name(struct maps *maps); + +#endif // __PERF_MAPS_H diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index e014c2c038f4..a45499126184 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -41,7 +41,7 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); static inline const char *perf_reg_name(int id __maybe_unused) { - return NULL; + return "unknown"; } static inline int perf_reg_value(u64 *valp __maybe_unused, diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e8d348988026..8b99fd312aae 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -24,6 +24,7 @@ #include "pmu-events/pmu-events.h" #include "string2.h" #include "strbuf.h" +#include "fncache.h" struct perf_pmu_format { char *name; @@ -82,7 +83,6 @@ int perf_pmu__format_parse(char *dir, struct list_head *head) */ static int pmu_format(const char *name, struct list_head *format) { - struct stat st; char path[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); @@ -92,8 +92,8 @@ static int pmu_format(const char *name, struct list_head *format) snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name); - if (stat(path, &st) < 0) - return 0; /* no error if format does not exist */ + if (!file_available(path)) + return 0; if (perf_pmu__format_parse(path, format)) return -1; @@ -475,7 +475,6 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) */ static int pmu_aliases(const char *name, struct list_head *head) { - struct stat st; char path[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); @@ -485,8 +484,8 @@ static int pmu_aliases(const char *name, struct list_head *head) snprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events", sysfs, name); - if (stat(path, &st) < 0) - return 0; /* no error if 'events' does not exist */ + if (!file_available(path)) + return 0; if (pmu_aliases_parse(path, head)) return -1; @@ -525,7 +524,6 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, */ static int pmu_type(const char *name, __u32 *type) { - struct stat st; char path[PATH_MAX]; FILE *file; int ret = 0; @@ -537,7 +535,7 @@ static int pmu_type(const char *name, __u32 *type) snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name); - if (stat(path, &st) < 0) + if (access(path, R_OK) < 0) return -1; file = fopen(path, "r"); @@ -628,14 +626,11 @@ static struct perf_cpu_map *pmu_cpumask(const char *name) static bool pmu_is_uncore(const char *name) { char path[PATH_MAX]; - struct perf_cpu_map *cpus; - const char *sysfs = sysfs__mountpoint(); + const char *sysfs; + sysfs = sysfs__mountpoint(); snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name); - cpus = __pmu_cpumask(path); - perf_cpu_map__put(cpus); - - return !!cpus; + return file_available(path); } /* @@ -645,7 +640,6 @@ static bool pmu_is_uncore(const char *name) */ static int is_arm_pmu_core(const char *name) { - struct stat st; char path[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); @@ -655,10 +649,7 @@ static int is_arm_pmu_core(const char *name) /* Look for cpu sysfs (specific to arm) */ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", sysfs, name); - if (stat(path, &st) == 0) - return 1; - - return 0; + return file_available(path); } static char *perf_pmu__getcpuid(struct perf_pmu *pmu) @@ -1544,7 +1535,6 @@ bool pmu_have_event(const char *pname, const char *name) static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) { - struct stat st; char path[PATH_MAX]; const char *sysfs; @@ -1554,10 +1544,8 @@ static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name); - - if (stat(path, &st) < 0) + if (!file_available(path)) return NULL; - return fopen(path, "r"); } diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 52b2d165453a..eea132f512b0 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -28,7 +28,7 @@ #include "dso.h" #include "color.h" #include "map.h" -#include "map_groups.h" +#include "maps.h" #include "symbol.h" #include <api/fs/fs.h> #include "trace-event.h" /* For __maybe_unused */ @@ -321,7 +321,7 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) char module_name[128]; snprintf(module_name, sizeof(module_name), "[%s]", module); - map = map_groups__find_by_name(&host_machine->kmaps, module_name); + map = maps__find_by_name(&host_machine->kmaps, module_name); if (map) { dso = map->dso; goto found; diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 9af183860fbd..e7279ea6043a 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -33,3 +33,4 @@ util/trace-event.c util/string.c util/symbol_fprintf.c util/units.c +util/affinity.c diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 9581a904af29..80ca5d0ab7fe 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1127,7 +1127,7 @@ static void python_export_sample_table(struct db_export *dbe, tuple_set_u64(t, 0, es->db_id); tuple_set_u64(t, 1, es->evsel->db_id); - tuple_set_u64(t, 2, es->al->mg->machine->db_id); + tuple_set_u64(t, 2, es->al->maps->machine->db_id); tuple_set_u64(t, 3, es->al->thread->db_id); tuple_set_u64(t, 4, es->comm_db_id); tuple_set_u64(t, 5, es->dso_db_id); diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c index d84ed8b6caaa..c29edaaca863 100644 --- a/tools/perf/util/srccode.c +++ b/tools/perf/util/srccode.c @@ -16,6 +16,7 @@ #include "srccode.h" #include "debug.h" #include <internal/lib.h> // page_size +#include "fncache.h" #define MAXSRCCACHE (32*1024*1024) #define MAXSRCFILES 64 @@ -36,14 +37,6 @@ static LIST_HEAD(srcfile_list); static long map_total_sz; static int num_srcfiles; -static unsigned shash(unsigned char *s) -{ - unsigned h = 0; - while (*s) - h = 65599 * h + *s++; - return h ^ (h >> 16); -} - static int countlines(char *map, int maplen) { int numl; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 16776d5fbaea..6658fbf196e6 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -9,7 +9,7 @@ #include "dso.h" #include "map.h" -#include "map_groups.h" +#include "maps.h" #include "symbol.h" #include "symsrc.h" #include "demangle-java.h" @@ -844,7 +844,7 @@ void __weak arch__sym_update(struct symbol *s __maybe_unused, static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, - struct map_groups *kmaps, struct kmap *kmap, + struct maps *kmaps, struct kmap *kmap, struct dso **curr_dsop, struct map **curr_mapp, const char *section_name, bool adjust_kernel_syms, bool kmodule, bool *remap_kernel) @@ -876,8 +876,8 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, /* Ensure maps are correctly ordered */ if (kmaps) { map__get(map); - map_groups__remove(kmaps, map); - map_groups__insert(kmaps, map); + maps__remove(kmaps, map); + maps__insert(kmaps, map); map__put(map); } } @@ -902,7 +902,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name); - curr_map = map_groups__find_by_name(kmaps, dso_name); + curr_map = maps__find_by_name(kmaps, dso_name); if (curr_map == NULL) { u64 start = sym->st_value; @@ -928,7 +928,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, curr_map->map_ip = curr_map->unmap_ip = identity__map_ip; } curr_dso->symtab_type = dso->symtab_type; - map_groups__insert(kmaps, curr_map); + maps__insert(kmaps, curr_map); /* * Add it before we drop the referece to curr_map, i.e. while * we still are sure to have a reference to this DSO via @@ -950,7 +950,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, int kmodule) { struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; - struct map_groups *kmaps = kmap ? map__kmaps(map) : NULL; + struct maps *kmaps = kmap ? map__kmaps(map) : NULL; struct map *curr_map = map; struct dso *curr_dso = dso; Elf_Data *symstrs, *secstrs; @@ -1162,7 +1162,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, * We need to fixup this here too because we create new * maps here, for things like vsyscall sections. */ - map_groups__fixup_end(kmaps); + maps__fixup_end(kmaps); } } err = nr; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index db9667aacb88..3b379b1296f1 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -239,9 +239,8 @@ void symbols__fixup_end(struct rb_root_cached *symbols) curr->end = roundup(curr->start, 4096) + 4096; } -void map_groups__fixup_end(struct map_groups *mg) +void maps__fixup_end(struct maps *maps) { - struct maps *maps = &mg->maps; struct map *prev = NULL, *curr; down_write(&maps->lock); @@ -698,7 +697,7 @@ static int dso__load_all_kallsyms(struct dso *dso, const char *filename) return kallsyms__parse(filename, dso, map__process_kallsym_symbol); } -static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct dso *dso) +static int maps__split_kallsyms_for_kcore(struct maps *kmaps, struct dso *dso) { struct map *curr_map; struct symbol *pos; @@ -724,7 +723,7 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct if (module) *module = '\0'; - curr_map = map_groups__find(kmaps, pos->start); + curr_map = maps__find(kmaps, pos->start); if (!curr_map) { symbol__delete(pos); @@ -751,8 +750,8 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct * kernel range is broken in several maps, named [kernel].N, as we don't have * the original ELF section names vmlinux have. */ -static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, u64 delta, - struct map *initial_map) +static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, + struct map *initial_map) { struct machine *machine; struct map *curr_map = initial_map; @@ -797,7 +796,7 @@ static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, dso__set_loaded(curr_map->dso); } - curr_map = map_groups__find_by_name(kmaps, module); + curr_map = maps__find_by_name(kmaps, module); if (curr_map == NULL) { pr_debug("%s/proc/{kallsyms,modules} " "inconsistency while looking " @@ -864,7 +863,7 @@ static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, } curr_map->map_ip = curr_map->unmap_ip = identity__map_ip; - map_groups__insert(kmaps, curr_map); + maps__insert(kmaps, curr_map); ++kernel_range; } else if (delta) { /* Kernel was relocated at boot time */ @@ -1049,8 +1048,7 @@ out_delete_from: return ret; } -static int do_validate_kcore_modules(const char *filename, - struct map_groups *kmaps) +static int do_validate_kcore_modules(const char *filename, struct maps *kmaps) { struct rb_root modules = RB_ROOT; struct map *old_map; @@ -1060,7 +1058,7 @@ static int do_validate_kcore_modules(const char *filename, if (err) return err; - map_groups__for_each_entry(kmaps, old_map) { + maps__for_each_entry(kmaps, old_map) { struct module_info *mi; if (!__map__is_kmodule(old_map)) { @@ -1107,7 +1105,7 @@ static bool filename_from_kallsyms_filename(char *filename, static int validate_kcore_modules(const char *kallsyms_filename, struct map *map) { - struct map_groups *kmaps = map__kmaps(map); + struct maps *kmaps = map__kmaps(map); char modules_filename[PATH_MAX]; if (!kmaps) @@ -1167,15 +1165,15 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) } /* - * Merges map into map_groups by splitting the new map - * within the existing map regions. + * Merges map into maps by splitting the new map within the existing map + * regions. */ -int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map) +int maps__merge_in(struct maps *kmaps, struct map *new_map) { struct map *old_map; LIST_HEAD(merged); - map_groups__for_each_entry(kmaps, old_map) { + maps__for_each_entry(kmaps, old_map) { /* no overload with this one */ if (new_map->end < old_map->start || new_map->start >= old_map->end) @@ -1232,12 +1230,12 @@ int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map) while (!list_empty(&merged)) { old_map = list_entry(merged.next, struct map, node); list_del_init(&old_map->node); - map_groups__insert(kmaps, old_map); + maps__insert(kmaps, old_map); map__put(old_map); } if (new_map) { - map_groups__insert(kmaps, new_map); + maps__insert(kmaps, new_map); map__put(new_map); } return 0; @@ -1246,7 +1244,7 @@ int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map) static int dso__load_kcore(struct dso *dso, struct map *map, const char *kallsyms_filename) { - struct map_groups *kmaps = map__kmaps(map); + struct maps *kmaps = map__kmaps(map); struct kcore_mapfn_data md; struct map *old_map, *new_map, *replacement_map = NULL, *next; struct machine *machine; @@ -1295,14 +1293,14 @@ static int dso__load_kcore(struct dso *dso, struct map *map, } /* Remove old maps */ - map_groups__for_each_entry_safe(kmaps, old_map, next) { + maps__for_each_entry_safe(kmaps, old_map, next) { /* * We need to preserve eBPF maps even if they are * covered by kcore, because we need to access * eBPF dso for source data. */ if (old_map != map && !__map__is_bpf_prog(old_map)) - map_groups__remove(kmaps, old_map); + maps__remove(kmaps, old_map); } machine->trampolines_mapped = false; @@ -1331,8 +1329,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map, map->unmap_ip = new_map->unmap_ip; /* Ensure maps are correctly ordered */ map__get(map); - map_groups__remove(kmaps, map); - map_groups__insert(kmaps, map); + maps__remove(kmaps, map); + maps__insert(kmaps, map); map__put(map); map__put(new_map); } else { @@ -1341,7 +1339,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, * and ensure that current maps (eBPF) * stay intact. */ - if (map_groups__merge_in(kmaps, new_map)) + if (maps__merge_in(kmaps, new_map)) goto out_err; } } @@ -1433,9 +1431,9 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS; if (!no_kcore && !dso__load_kcore(dso, map, filename)) - return map_groups__split_kallsyms_for_kcore(kmap->kmaps, dso); + return maps__split_kallsyms_for_kcore(kmap->kmaps, dso); else - return map_groups__split_kallsyms(kmap->kmaps, dso, delta, map); + return maps__split_kallsyms(kmap->kmaps, dso, delta, map); } int dso__load_kallsyms(struct dso *dso, const char *filename, @@ -1772,68 +1770,67 @@ static int map__strcmp_name(const void *name, const void *b) return strcmp(name, map->dso->short_name); } -void __map_groups__sort_by_name(struct map_groups *mg) +void __maps__sort_by_name(struct maps *maps) { - qsort(mg->maps_by_name, mg->nr_maps, sizeof(struct map *), map__strcmp); + qsort(maps->maps_by_name, maps->nr_maps, sizeof(struct map *), map__strcmp); } -static int map__groups__sort_by_name_from_rbtree(struct map_groups *mg) +static int map__groups__sort_by_name_from_rbtree(struct maps *maps) { struct map *map; - struct map **maps_by_name = realloc(mg->maps_by_name, mg->nr_maps * sizeof(map)); + struct map **maps_by_name = realloc(maps->maps_by_name, maps->nr_maps * sizeof(map)); int i = 0; if (maps_by_name == NULL) return -1; - mg->maps_by_name = maps_by_name; - mg->nr_maps_allocated = mg->nr_maps; + maps->maps_by_name = maps_by_name; + maps->nr_maps_allocated = maps->nr_maps; - maps__for_each_entry(&mg->maps, map) + maps__for_each_entry(maps, map) maps_by_name[i++] = map; - __map_groups__sort_by_name(mg); + __maps__sort_by_name(maps); return 0; } -static struct map *__map_groups__find_by_name(struct map_groups *mg, const char *name) +static struct map *__maps__find_by_name(struct maps *maps, const char *name) { struct map **mapp; - if (mg->maps_by_name == NULL && - map__groups__sort_by_name_from_rbtree(mg)) + if (maps->maps_by_name == NULL && + map__groups__sort_by_name_from_rbtree(maps)) return NULL; - mapp = bsearch(name, mg->maps_by_name, mg->nr_maps, sizeof(*mapp), map__strcmp_name); + mapp = bsearch(name, maps->maps_by_name, maps->nr_maps, sizeof(*mapp), map__strcmp_name); if (mapp) return *mapp; return NULL; } -struct map *map_groups__find_by_name(struct map_groups *mg, const char *name) +struct map *maps__find_by_name(struct maps *maps, const char *name) { - struct maps *maps = &mg->maps; struct map *map; down_read(&maps->lock); - if (mg->last_search_by_name && strcmp(mg->last_search_by_name->dso->short_name, name) == 0) { - map = mg->last_search_by_name; + if (maps->last_search_by_name && strcmp(maps->last_search_by_name->dso->short_name, name) == 0) { + map = maps->last_search_by_name; goto out_unlock; } /* - * If we have mg->maps_by_name, then the name isn't in the rbtree, - * as mg->maps_by_name mirrors the rbtree when lookups by name are + * If we have maps->maps_by_name, then the name isn't in the rbtree, + * as maps->maps_by_name mirrors the rbtree when lookups by name are * made. */ - map = __map_groups__find_by_name(mg, name); - if (map || mg->maps_by_name != NULL) + map = __maps__find_by_name(maps, name); + if (map || maps->maps_by_name != NULL) goto out_unlock; /* Fallback to traversing the rbtree... */ maps__for_each_entry(maps, map) if (strcmp(map->dso->short_name, name) == 0) { - mg->last_search_by_name = map; + maps->last_search_by_name = map; goto out_unlock; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0b718cc9fb28..93fc43db1be3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -21,7 +21,7 @@ struct dso; struct map; -struct map_groups; +struct maps; struct option; /* @@ -108,7 +108,7 @@ struct ref_reloc_sym { struct addr_location { struct thread *thread; - struct map_groups *mg; + struct maps *maps; struct map *map; struct symbol *sym; const char *srcline; @@ -186,7 +186,7 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym, void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root_cached *symbols); void symbols__fixup_end(struct rb_root_cached *symbols); -void map_groups__fixup_end(struct map_groups *mg); +void maps__fixup_end(struct maps *maps); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 48c3f8b9c852..c423298fe62d 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -493,7 +493,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, /* * send mmap only for thread group leader - * see thread__init_map_groups + * see thread__init_maps() */ if (pid == tgid && perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index cd8a948d03ec..0885967d5bc3 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -134,8 +134,8 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread, if (err) return err; - if (thread->mg && thread->mg->machine) { - struct machine *machine = thread->mg->machine; + if (thread->maps && thread->maps->machine) { + struct machine *machine = thread->maps->machine; const char *arch = perf_env__arch(machine->env); ts->kernel_start = machine__kernel_start(machine); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 0a277a920970..28b719388028 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -19,21 +19,21 @@ #include <api/fs/fs.h> -int thread__init_map_groups(struct thread *thread, struct machine *machine) +int thread__init_maps(struct thread *thread, struct machine *machine) { pid_t pid = thread->pid_; if (pid == thread->tid || pid == -1) { - thread->mg = map_groups__new(machine); + thread->maps = maps__new(machine); } else { struct thread *leader = __machine__findnew_thread(machine, pid, pid); if (leader) { - thread->mg = map_groups__get(leader->mg); + thread->maps = maps__get(leader->maps); thread__put(leader); } } - return thread->mg ? 0 : -1; + return thread->maps ? 0 : -1; } struct thread *thread__new(pid_t pid, pid_t tid) @@ -86,9 +86,9 @@ void thread__delete(struct thread *thread) thread_stack__free(thread); - if (thread->mg) { - map_groups__put(thread->mg); - thread->mg = NULL; + if (thread->maps) { + maps__put(thread->maps); + thread->maps = NULL; } down_write(&thread->namespaces_lock); list_for_each_entry_safe(namespaces, tmp_namespaces, @@ -251,7 +251,7 @@ static int ____thread__set_comm(struct thread *thread, const char *str, list_add(&new->list, &thread->comm_list); if (exec) - unwind__flush_access(thread->mg); + unwind__flush_access(thread->maps); } thread->comm_set = true; @@ -324,19 +324,19 @@ int thread__comm_len(struct thread *thread) size_t thread__fprintf(struct thread *thread, FILE *fp) { return fprintf(fp, "Thread %d %s\n", thread->tid, thread__comm_str(thread)) + - map_groups__fprintf(thread->mg, fp); + maps__fprintf(thread->maps, fp); } int thread__insert_map(struct thread *thread, struct map *map) { int ret; - ret = unwind__prepare_access(thread->mg, map, NULL); + ret = unwind__prepare_access(thread->maps, map, NULL); if (ret) return ret; - map_groups__fixup_overlappings(thread->mg, map, stderr); - map_groups__insert(thread->mg, map); + maps__fixup_overlappings(thread->maps, map, stderr); + maps__insert(thread->maps, map); return 0; } @@ -345,13 +345,13 @@ static int __thread__prepare_access(struct thread *thread) { bool initialized = false; int err = 0; - struct maps *maps = &thread->mg->maps; + struct maps *maps = thread->maps; struct map *map; down_read(&maps->lock); maps__for_each_entry(maps, map) { - err = unwind__prepare_access(thread->mg, map, &initialized); + err = unwind__prepare_access(thread->maps, map, &initialized); if (err || initialized) break; } @@ -371,21 +371,19 @@ static int thread__prepare_access(struct thread *thread) return err; } -static int thread__clone_map_groups(struct thread *thread, - struct thread *parent, - bool do_maps_clone) +static int thread__clone_maps(struct thread *thread, struct thread *parent, bool do_maps_clone) { /* This is new thread, we share map groups for process. */ if (thread->pid_ == parent->pid_) return thread__prepare_access(thread); - if (thread->mg == parent->mg) { + if (thread->maps == parent->maps) { pr_debug("broken map groups on thread %d/%d parent %d/%d\n", thread->pid_, thread->tid, parent->pid_, parent->tid); return 0; } /* But this one is new process, copy maps. */ - return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0; + return do_maps_clone ? maps__clone(thread, parent->maps) : 0; } int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) @@ -401,7 +399,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo } thread->ppid = parent->tid; - return thread__clone_map_groups(thread, parent, do_maps_clone); + return thread__clone_maps(thread, parent, do_maps_clone); } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 51bdb9a7af7f..20b96b5d1f15 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -25,7 +25,7 @@ struct thread { struct rb_node rb_node; struct list_head node; }; - struct map_groups *mg; + struct maps *maps; pid_t pid_; /* Not all tools update this */ pid_t tid; pid_t ppid; @@ -53,7 +53,7 @@ struct namespaces; struct comm; struct thread *thread__new(pid_t pid, pid_t tid); -int thread__init_map_groups(struct thread *thread, struct machine *machine); +int thread__init_maps(struct thread *thread, struct machine *machine); void thread__delete(struct thread *thread); struct thread *thread__get(struct thread *thread); diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index d2a8df01c4a7..7a3dbc259cec 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -81,7 +81,7 @@ static int entry(u64 ip, struct unwind_info *ui) return -1; e->ip = ip; - e->ms.mg = al.mg; + e->ms.maps = al.maps; e->ms.map = al.map; e->ms.sym = al.sym; @@ -200,7 +200,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct unwind_info *ui, ui_buf = { .sample = data, .thread = thread, - .machine = thread->mg->machine, + .machine = thread->maps->machine, .cb = cb, .arg = arg, .max_stack = max_stack, diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 6d53347d6744..b4649f5a0c2f 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -578,7 +578,7 @@ static int entry(u64 ip, struct thread *thread, e.ms.sym = thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al); e.ip = ip; e.ms.map = al.map; - e.ms.mg = al.mg; + e.ms.maps = al.maps; pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", al.sym ? al.sym->name : "''", @@ -616,26 +616,26 @@ static unw_accessors_t accessors = { .get_proc_name = get_proc_name, }; -static int _unwind__prepare_access(struct map_groups *mg) +static int _unwind__prepare_access(struct maps *maps) { - mg->addr_space = unw_create_addr_space(&accessors, 0); - if (!mg->addr_space) { + maps->addr_space = unw_create_addr_space(&accessors, 0); + if (!maps->addr_space) { pr_err("unwind: Can't create unwind address space.\n"); return -ENOMEM; } - unw_set_caching_policy(mg->addr_space, UNW_CACHE_GLOBAL); + unw_set_caching_policy(maps->addr_space, UNW_CACHE_GLOBAL); return 0; } -static void _unwind__flush_access(struct map_groups *mg) +static void _unwind__flush_access(struct maps *maps) { - unw_flush_cache(mg->addr_space, 0, 0); + unw_flush_cache(maps->addr_space, 0, 0); } -static void _unwind__finish_access(struct map_groups *mg) +static void _unwind__finish_access(struct maps *maps) { - unw_destroy_addr_space(mg->addr_space); + unw_destroy_addr_space(maps->addr_space); } static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, @@ -660,7 +660,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, */ if (max_stack - 1 > 0) { WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL"); - addr_space = ui->thread->mg->addr_space; + addr_space = ui->thread->maps->addr_space; if (addr_space == NULL) return -1; @@ -709,7 +709,7 @@ static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct unwind_info ui = { .sample = data, .thread = thread, - .machine = thread->mg->machine, + .machine = thread->maps->machine, }; if (!data->user_regs.regs) diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index a24fb57c9b2c..e89a5479b361 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -12,14 +12,12 @@ struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops; -static void unwind__register_ops(struct map_groups *mg, - struct unwind_libunwind_ops *ops) +static void unwind__register_ops(struct maps *maps, struct unwind_libunwind_ops *ops) { - mg->unwind_libunwind_ops = ops; + maps->unwind_libunwind_ops = ops; } -int unwind__prepare_access(struct map_groups *mg, struct map *map, - bool *initialized) +int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized) { const char *arch; enum dso_type dso_type; @@ -29,7 +27,7 @@ int unwind__prepare_access(struct map_groups *mg, struct map *map, if (!dwarf_callchain_users) return 0; - if (mg->addr_space) { + if (maps->addr_space) { pr_debug("unwind: thread map already set, dso=%s\n", map->dso->name); if (initialized) @@ -38,14 +36,14 @@ int unwind__prepare_access(struct map_groups *mg, struct map *map, } /* env->arch is NULL for live-mode (i.e. perf top) */ - if (!mg->machine->env || !mg->machine->env->arch) + if (!maps->machine->env || !maps->machine->env->arch) goto out_register; - dso_type = dso__type(map->dso, mg->machine); + dso_type = dso__type(map->dso, maps->machine); if (dso_type == DSO__TYPE_UNKNOWN) return 0; - arch = perf_env__arch(mg->machine->env); + arch = perf_env__arch(maps->machine->env); if (!strcmp(arch, "x86")) { if (dso_type != DSO__TYPE_64BIT) @@ -60,31 +58,31 @@ int unwind__prepare_access(struct map_groups *mg, struct map *map, return 0; } out_register: - unwind__register_ops(mg, ops); + unwind__register_ops(maps, ops); - err = mg->unwind_libunwind_ops->prepare_access(mg); + err = maps->unwind_libunwind_ops->prepare_access(maps); if (initialized) *initialized = err ? false : true; return err; } -void unwind__flush_access(struct map_groups *mg) +void unwind__flush_access(struct maps *maps) { - if (mg->unwind_libunwind_ops) - mg->unwind_libunwind_ops->flush_access(mg); + if (maps->unwind_libunwind_ops) + maps->unwind_libunwind_ops->flush_access(maps); } -void unwind__finish_access(struct map_groups *mg) +void unwind__finish_access(struct maps *maps) { - if (mg->unwind_libunwind_ops) - mg->unwind_libunwind_ops->finish_access(mg); + if (maps->unwind_libunwind_ops) + maps->unwind_libunwind_ops->finish_access(maps); } int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, struct perf_sample *data, int max_stack) { - if (thread->mg->unwind_libunwind_ops) - return thread->mg->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack); + if (thread->maps->unwind_libunwind_ops) + return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack); return 0; } diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index 50337c966979..ab8ad469c8de 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -6,7 +6,7 @@ #include <linux/types.h> #include "util/map_symbol.h" -struct map_groups; +struct maps; struct perf_sample; struct thread; @@ -18,9 +18,9 @@ struct unwind_entry { typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg); struct unwind_libunwind_ops { - int (*prepare_access)(struct map_groups *mg); - void (*flush_access)(struct map_groups *mg); - void (*finish_access)(struct map_groups *mg); + int (*prepare_access)(struct maps *maps); + void (*flush_access)(struct maps *maps); + void (*finish_access)(struct maps *maps); int (*get_entries)(unwind_entry_cb_t cb, void *arg, struct thread *thread, struct perf_sample *data, int max_stack); @@ -45,20 +45,19 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, #endif int LIBUNWIND__ARCH_REG_ID(int regnum); -int unwind__prepare_access(struct map_groups *mg, struct map *map, - bool *initialized); -void unwind__flush_access(struct map_groups *mg); -void unwind__finish_access(struct map_groups *mg); +int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized); +void unwind__flush_access(struct maps *maps); +void unwind__finish_access(struct maps *maps); #else -static inline int unwind__prepare_access(struct map_groups *mg __maybe_unused, +static inline int unwind__prepare_access(struct maps *maps __maybe_unused, struct map *map __maybe_unused, bool *initialized __maybe_unused) { return 0; } -static inline void unwind__flush_access(struct map_groups *mg __maybe_unused) {} -static inline void unwind__finish_access(struct map_groups *mg __maybe_unused) {} +static inline void unwind__flush_access(struct maps *maps __maybe_unused) {} +static inline void unwind__finish_access(struct maps *maps __maybe_unused) {} #endif #else static inline int @@ -71,14 +70,14 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, return 0; } -static inline int unwind__prepare_access(struct map_groups *mg __maybe_unused, +static inline int unwind__prepare_access(struct maps *maps __maybe_unused, struct map *map __maybe_unused, bool *initialized __maybe_unused) { return 0; } -static inline void unwind__flush_access(struct map_groups *mg __maybe_unused) {} -static inline void unwind__finish_access(struct map_groups *mg __maybe_unused) {} +static inline void unwind__flush_access(struct maps *maps __maybe_unused) {} +static inline void unwind__finish_access(struct maps *maps __maybe_unused) {} #endif /* HAVE_DWARF_UNWIND_SUPPORT */ #endif /* __UNWIND_H */ diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 6e00793c10ee..3cc91ad048ea 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -144,7 +144,7 @@ static enum dso_type machine__thread_dso_type(struct machine *machine, enum dso_type dso_type = DSO__TYPE_UNKNOWN; struct map *map; - map_groups__for_each_entry(thread->mg, map) { + maps__for_each_entry(thread->maps, map) { struct dso *dso = map->dso; if (!dso || dso->long_name[0] != '/') continue; |