aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds2021-02-21 12:49:32 -0800
committerLinus Torvalds2021-02-21 12:49:32 -0800
commitd310ec03a34e92a77302edb804f7d68ee4f01ba0 (patch)
treed0c09463b518e386ad76141a286d0f314f1e0c03 /arch/x86/kernel
parent657bd90c93146a929c69cd43addf2804eb70c926 (diff)
parent8bcfdd7cad3dffdd340f9a79098cbf331eb2cd53 (diff)
Merge tag 'perf-core-2021-02-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull performance event updates from Ingo Molnar: - Add CPU-PMU support for Intel Sapphire Rapids CPUs - Extend the perf ABI with PERF_SAMPLE_WEIGHT_STRUCT, to offer two-parameter sampling event feedback. Not used yet, but is intended for Golden Cove CPU-PMU, which can provide both the instruction latency and the cache latency information for memory profiling events. - Remove experimental, default-disabled perfmon-v4 counter_freezing support that could only be enabled via a boot option. The hardware is hopelessly broken, we'd like to make sure nobody starts relying on this, as it would only end in tears. - Fix energy/power events on Intel SPR platforms - Simplify the uprobes resume_execution() logic - Misc smaller fixes. * tag 'perf-core-2021-02-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/rapl: Fix psys-energy event on Intel SPR platform perf/x86/rapl: Only check lower 32bits for RAPL energy counters perf/x86/rapl: Add msr mask support perf/x86/kvm: Add Cascade Lake Xeon steppings to isolation_ucodes[] perf/x86/intel: Support CPUID 10.ECX to disable fixed counters perf/x86/intel: Add perf core PMU support for Sapphire Rapids perf/x86/intel: Filter unsupported Topdown metrics event perf/x86/intel: Factor out intel_update_topdown_event() perf/core: Add PERF_SAMPLE_WEIGHT_STRUCT perf/intel: Remove Perfmon-v4 counter_freezing support x86/perf: Use static_call for x86_pmu.guest_get_msrs perf/x86/intel/uncore: With > 8 nodes, get pci bus die id from NUMA info perf/x86/intel/uncore: Store the logical die id instead of the physical die id. x86/kprobes: Do not decode opcode in resume_execution()
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/kprobes/core.c168
1 files changed, 74 insertions, 94 deletions
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index a65e9e97857f..df776cdca327 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -133,26 +133,6 @@ void synthesize_relcall(void *dest, void *from, void *to)
NOKPROBE_SYMBOL(synthesize_relcall);
/*
- * Skip the prefixes of the instruction.
- */
-static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
-{
- insn_attr_t attr;
-
- attr = inat_get_opcode_attribute((insn_byte_t)*insn);
- while (inat_is_legacy_prefix(attr)) {
- insn++;
- attr = inat_get_opcode_attribute((insn_byte_t)*insn);
- }
-#ifdef CONFIG_X86_64
- if (inat_is_rex_prefix(attr))
- insn++;
-#endif
- return insn;
-}
-NOKPROBE_SYMBOL(skip_prefixes);
-
-/*
* Returns non-zero if INSN is boostable.
* RIP relative instructions are adjusted at copying time in 64 bits mode
*/
@@ -312,25 +292,6 @@ static int can_probe(unsigned long paddr)
}
/*
- * Returns non-zero if opcode modifies the interrupt flag.
- */
-static int is_IF_modifier(kprobe_opcode_t *insn)
-{
- /* Skip prefixes */
- insn = skip_prefixes(insn);
-
- switch (*insn) {
- case 0xfa: /* cli */
- case 0xfb: /* sti */
- case 0xcf: /* iret/iretd */
- case 0x9d: /* popf/popfd */
- return 1;
- }
-
- return 0;
-}
-
-/*
* Copy an instruction with recovering modified instruction by kprobes
* and adjust the displacement if the instruction uses the %rip-relative
* addressing mode. Note that since @real will be the final place of copied
@@ -411,9 +372,9 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
synthesize_reljump(buf + len, p->ainsn.insn + len,
p->addr + insn->length);
len += JMP32_INSN_SIZE;
- p->ainsn.boostable = true;
+ p->ainsn.boostable = 1;
} else {
- p->ainsn.boostable = false;
+ p->ainsn.boostable = 0;
}
return len;
@@ -450,6 +411,67 @@ void free_insn_page(void *page)
module_memfree(page);
}
+static void set_resume_flags(struct kprobe *p, struct insn *insn)
+{
+ insn_byte_t opcode = insn->opcode.bytes[0];
+
+ switch (opcode) {
+ case 0xfa: /* cli */
+ case 0xfb: /* sti */
+ case 0x9d: /* popf/popfd */
+ /* Check whether the instruction modifies Interrupt Flag or not */
+ p->ainsn.if_modifier = 1;
+ break;
+ case 0x9c: /* pushfl */
+ p->ainsn.is_pushf = 1;
+ break;
+ case 0xcf: /* iret */
+ p->ainsn.if_modifier = 1;
+ fallthrough;
+ case 0xc2: /* ret/lret */
+ case 0xc3:
+ case 0xca:
+ case 0xcb:
+ case 0xea: /* jmp absolute -- ip is correct */
+ /* ip is already adjusted, no more changes required */
+ p->ainsn.is_abs_ip = 1;
+ /* Without resume jump, this is boostable */
+ p->ainsn.boostable = 1;
+ break;
+ case 0xe8: /* call relative - Fix return addr */
+ p->ainsn.is_call = 1;
+ break;
+#ifdef CONFIG_X86_32
+ case 0x9a: /* call absolute -- same as call absolute, indirect */
+ p->ainsn.is_call = 1;
+ p->ainsn.is_abs_ip = 1;
+ break;
+#endif
+ case 0xff:
+ opcode = insn->opcode.bytes[1];
+ if ((opcode & 0x30) == 0x10) {
+ /*
+ * call absolute, indirect
+ * Fix return addr; ip is correct.
+ * But this is not boostable
+ */
+ p->ainsn.is_call = 1;
+ p->ainsn.is_abs_ip = 1;
+ break;
+ } else if (((opcode & 0x31) == 0x20) ||
+ ((opcode & 0x31) == 0x21)) {
+ /*
+ * jmp near and far, absolute indirect
+ * ip is correct.
+ */
+ p->ainsn.is_abs_ip = 1;
+ /* Without resume jump, this is boostable */
+ p->ainsn.boostable = 1;
+ }
+ break;
+ }
+}
+
static int arch_copy_kprobe(struct kprobe *p)
{
struct insn insn;
@@ -467,8 +489,8 @@ static int arch_copy_kprobe(struct kprobe *p)
*/
len = prepare_boost(buf, p, &insn);
- /* Check whether the instruction modifies Interrupt Flag or not */
- p->ainsn.if_modifier = is_IF_modifier(buf);
+ /* Analyze the opcode and set resume flags */
+ set_resume_flags(p, &insn);
/* Also, displacement change doesn't affect the first byte */
p->opcode = buf[0];
@@ -491,6 +513,9 @@ int arch_prepare_kprobe(struct kprobe *p)
if (!can_probe((unsigned long)p->addr))
return -EILSEQ;
+
+ memset(&p->ainsn, 0, sizeof(p->ainsn));
+
/* insn: must be on special executable page on x86. */
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
@@ -806,11 +831,6 @@ NOKPROBE_SYMBOL(trampoline_handler);
* 2) If the single-stepped instruction was a call, the return address
* that is atop the stack is the address following the copied instruction.
* We need to make it the address following the original instruction.
- *
- * If this is the first time we've single-stepped the instruction at
- * this probepoint, and the instruction is boostable, boost it: add a
- * jump instruction after the copied instruction, that jumps to the next
- * instruction after the probepoint.
*/
static void resume_execution(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
@@ -818,60 +838,20 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
unsigned long *tos = stack_addr(regs);
unsigned long copy_ip = (unsigned long)p->ainsn.insn;
unsigned long orig_ip = (unsigned long)p->addr;
- kprobe_opcode_t *insn = p->ainsn.insn;
-
- /* Skip prefixes */
- insn = skip_prefixes(insn);
regs->flags &= ~X86_EFLAGS_TF;
- switch (*insn) {
- case 0x9c: /* pushfl */
+
+ /* Fixup the contents of top of stack */
+ if (p->ainsn.is_pushf) {
*tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
*tos |= kcb->kprobe_old_flags;
- break;
- case 0xc2: /* iret/ret/lret */
- case 0xc3:
- case 0xca:
- case 0xcb:
- case 0xcf:
- case 0xea: /* jmp absolute -- ip is correct */
- /* ip is already adjusted, no more changes required */
- p->ainsn.boostable = true;
- goto no_change;
- case 0xe8: /* call relative - Fix return addr */
+ } else if (p->ainsn.is_call) {
*tos = orig_ip + (*tos - copy_ip);
- break;
-#ifdef CONFIG_X86_32
- case 0x9a: /* call absolute -- same as call absolute, indirect */
- *tos = orig_ip + (*tos - copy_ip);
- goto no_change;
-#endif
- case 0xff:
- if ((insn[1] & 0x30) == 0x10) {
- /*
- * call absolute, indirect
- * Fix return addr; ip is correct.
- * But this is not boostable
- */
- *tos = orig_ip + (*tos - copy_ip);
- goto no_change;
- } else if (((insn[1] & 0x31) == 0x20) ||
- ((insn[1] & 0x31) == 0x21)) {
- /*
- * jmp near and far, absolute indirect
- * ip is correct. And this is boostable
- */
- p->ainsn.boostable = true;
- goto no_change;
- }
- break;
- default:
- break;
}
- regs->ip += orig_ip - copy_ip;
+ if (!p->ainsn.is_abs_ip)
+ regs->ip += orig_ip - copy_ip;
-no_change:
restore_btf();
}
NOKPROBE_SYMBOL(resume_execution);