aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds2009-04-05 11:04:19 -0700
committerLinus Torvalds2009-04-05 11:04:19 -0700
commit714f83d5d9f7c785f622259dad1f4fad12d64664 (patch)
tree20563541ae438e11d686b4d629074eb002a481b7 /arch/x86
parent8901e7ffc2fa78ede7ce9826dbad68a3a25dc2dc (diff)
parent645dae969c3b8651c5bc7c54a1835ec03820f85f (diff)
Merge branch 'tracing-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (413 commits) tracing, net: fix net tree and tracing tree merge interaction tracing, powerpc: fix powerpc tree and tracing tree interaction ring-buffer: do not remove reader page from list on ring buffer free function-graph: allow unregistering twice trace: make argument 'mem' of trace_seq_putmem() const tracing: add missing 'extern' keywords to trace_output.h tracing: provide trace_seq_reserve() blktrace: print out BLK_TN_MESSAGE properly blktrace: extract duplidate code blktrace: fix memory leak when freeing struct blk_io_trace blktrace: fix blk_probes_ref chaos blktrace: make classic output more classic blktrace: fix off-by-one bug blktrace: fix the original blktrace blktrace: fix a race when creating blk_tree_root in debugfs blktrace: fix timestamp in binary output tracing, Text Edit Lock: cleanup tracing: filter fix for TRACE_EVENT_FORMAT events ftrace: Using FTRACE_WARN_ON() to check "freed record" in ftrace_release() x86: kretprobe-booster interrupt emulation code fix ... Fix up trivial conflicts in arch/parisc/include/asm/ftrace.h include/linux/memory.h kernel/extable.c kernel/module.c
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/cacheflush.h5
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/ftrace.h7
-rw-r--r--arch/x86/include/asm/ptrace-abi.h3
-rw-r--r--arch/x86/include/asm/thread_info.h9
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/alternative.c29
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/dumpstack.c6
-rw-r--r--arch/x86/kernel/ftrace.c192
-rw-r--r--arch/x86/kernel/kprobes.c17
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/ptrace.c7
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--arch/x86/mm/init_32.c35
-rw-r--r--arch/x86/mm/init_64.c37
17 files changed, 268 insertions, 98 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5696cec7b4b0..5b2196ab8168 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,8 @@ config X86
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
+ select HAVE_FTRACE_SYSCALLS
select HAVE_KVM
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index b3894bf52fcd..e55dfc1ad453 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -126,6 +126,11 @@ void clflush_cache_range(void *addr, unsigned int size);
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
extern const int rodata_test_data;
+void set_kernel_text_rw(void);
+void set_kernel_text_ro(void);
+#else
+static inline void set_kernel_text_rw(void) { }
+static inline void set_kernel_text_ro(void) { }
#endif
#ifdef CONFIG_DEBUG_RODATA_TEST
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 63a79c77d220..81937a5dc77c 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -111,6 +111,8 @@ enum fixed_addresses {
#ifdef CONFIG_PARAVIRT
FIX_PARAVIRT_BOOTMAP,
#endif
+ FIX_TEXT_POKE0, /* reserve 2 pages for text_poke() */
+ FIX_TEXT_POKE1,
__end_of_permanent_fixed_addresses,
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
FIX_OHCI1394_BASE,
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index db24c2278be0..bd2c6511c887 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -28,6 +28,13 @@
#endif
+/* FIXME: I don't want to stay hardcoded */
+#ifdef CONFIG_X86_64
+# define FTRACE_SYSCALL_MAX 296
+#else
+# define FTRACE_SYSCALL_MAX 333
+#endif
+
#ifdef CONFIG_FUNCTION_TRACER
#define MCOUNT_ADDR ((long)(mcount))
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h
index 8e0f8d199e05..86723035a515 100644
--- a/arch/x86/include/asm/ptrace-abi.h
+++ b/arch/x86/include/asm/ptrace-abi.h
@@ -80,8 +80,6 @@
#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */
-#ifdef CONFIG_X86_PTRACE_BTS
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
@@ -140,6 +138,5 @@ struct ptrace_bts_config {
BTS records are read from oldest to newest.
Returns number of BTS records drained.
*/
-#endif /* CONFIG_X86_PTRACE_BTS */
#endif /* _ASM_X86_PTRACE_ABI_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index df9d5f78385e..8820a73ae090 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -94,6 +94,7 @@ struct thread_info {
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
+#define TIF_SYSCALL_FTRACE 27 /* for ftrace syscall instrumentation */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -115,15 +116,17 @@ struct thread_info {
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
+#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE)
/* work to do in syscall_trace_enter() */
#define _TIF_WORK_SYSCALL_ENTRY \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \
+ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_FTRACE | \
_TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP)
/* work to do in syscall_trace_leave() */
#define _TIF_WORK_SYSCALL_EXIT \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP)
+ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
+ _TIF_SYSCALL_FTRACE)
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
@@ -132,7 +135,7 @@ struct thread_info {
_TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
/* work to do on any return to user space */
-#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
+#define _TIF_ALLWORK_MASK ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_FTRACE)
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index c611ad64137f..145cce75cda7 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -66,7 +66,8 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-y += apic/
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
-obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4c80f1557433..f57658702571 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -5,6 +5,7 @@
#include <linux/kprobes.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
+#include <linux/memory.h>
#include <asm/alternative.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
@@ -12,7 +13,9 @@
#include <asm/nmi.h>
#include <asm/vsyscall.h>
#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
#include <asm/io.h>
+#include <asm/fixmap.h>
#define MAX_PATCH_LEN (255-1)
@@ -226,6 +229,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
{
u8 **ptr;
+ mutex_lock(&text_mutex);
for (ptr = start; ptr < end; ptr++) {
if (*ptr < text)
continue;
@@ -234,6 +238,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
/* turn DS segment override prefix into lock prefix */
text_poke(*ptr, ((unsigned char []){0xf0}), 1);
};
+ mutex_unlock(&text_mutex);
}
static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
@@ -243,6 +248,7 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end
if (noreplace_smp)
return;
+ mutex_lock(&text_mutex);
for (ptr = start; ptr < end; ptr++) {
if (*ptr < text)
continue;
@@ -251,6 +257,7 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end
/* turn lock prefix into DS segment override prefix */
text_poke(*ptr, ((unsigned char []){0x3E}), 1);
};
+ mutex_unlock(&text_mutex);
}
struct smp_alt_module {
@@ -500,15 +507,16 @@ void *text_poke_early(void *addr, const void *opcode, size_t len)
* It means the size must be writable atomically and the address must be aligned
* in a way that permits an atomic write. It also makes sure we fit on a single
* page.
+ *
+ * Note: Must be called under text_mutex.
*/
void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
{
+ unsigned long flags;
char *vaddr;
- int nr_pages = 2;
struct page *pages[2];
int i;
- might_sleep();
if (!core_kernel_text((unsigned long)addr)) {
pages[0] = vmalloc_to_page(addr);
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -518,18 +526,21 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
pages[1] = virt_to_page(addr + PAGE_SIZE);
}
BUG_ON(!pages[0]);
- if (!pages[1])
- nr_pages = 1;
- vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
- BUG_ON(!vaddr);
- local_irq_disable();
+ local_irq_save(flags);
+ set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
+ if (pages[1])
+ set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
+ vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
- local_irq_enable();
- vunmap(vaddr);
+ clear_fixmap(FIX_TEXT_POKE0);
+ if (pages[1])
+ clear_fixmap(FIX_TEXT_POKE1);
+ local_flush_tlb();
sync_core();
/* Could also do a CLFLUSH here to speed up CPU recovery; but
that causes hangs on some VIA CPUs. */
for (i = 0; i < len; i++)
BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
+ local_irq_restore(flags);
return addr;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 23da96e57b17..05209b5cc6ca 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,7 +33,7 @@
#include <linux/cpufreq.h>
#include <linux/compiler.h>
#include <linux/dmi.h>
-#include <linux/ftrace.h>
+#include <trace/power.h>
#include <linux/acpi.h>
#include <linux/io.h>
@@ -72,6 +72,8 @@ struct acpi_cpufreq_data {
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+DEFINE_TRACE(power_mark);
+
/* acpi_perf_data is a pointer to percpu data. */
static struct acpi_processor_performance *acpi_perf_data;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index dd2130b0fb3e..95ea5fa7d444 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -15,6 +15,7 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
+#include <linux/ftrace.h>
#include <asm/stacktrace.h>
@@ -196,6 +197,11 @@ unsigned __kprobes long oops_begin(void)
int cpu;
unsigned long flags;
+ /* notify the hw-branch tracer so it may disable tracing and
+ add the last trace to the trace buffer -
+ the earlier this happens, the more useful the trace. */
+ trace_hw_branch_oops();
+
oops_enter();
/* racy, but better than risking deadlock. */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 76f7141e0f91..61df77532120 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/list.h>
+#include <asm/cacheflush.h>
#include <asm/ftrace.h>
#include <linux/ftrace.h>
#include <asm/nops.h>
@@ -26,6 +27,18 @@
#ifdef CONFIG_DYNAMIC_FTRACE
+int ftrace_arch_code_modify_prepare(void)
+{
+ set_kernel_text_rw();
+ return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+ set_kernel_text_ro();
+ return 0;
+}
+
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
struct {
@@ -66,11 +79,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
*
* 1) Put the instruction pointer into the IP buffer
* and the new code into the "code" buffer.
- * 2) Set a flag that says we are modifying code
- * 3) Wait for any running NMIs to finish.
- * 4) Write the code
- * 5) clear the flag.
- * 6) Wait for any running NMIs to finish.
+ * 2) Wait for any running NMIs to finish and set a flag that says
+ * we are modifying code, it is done in an atomic operation.
+ * 3) Write the code
+ * 4) clear the flag.
+ * 5) Wait for any running NMIs to finish.
*
* If an NMI is executed, the first thing it does is to call
* "ftrace_nmi_enter". This will check if the flag is set to write
@@ -82,9 +95,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
* are the same as what exists.
*/
-static atomic_t in_nmi = ATOMIC_INIT(0);
+#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
+static atomic_t nmi_running = ATOMIC_INIT(0);
static int mod_code_status; /* holds return value of text write */
-static int mod_code_write; /* set when NMI should do the write */
static void *mod_code_ip; /* holds the IP to write to */
static void *mod_code_newcode; /* holds the text to write to the IP */
@@ -101,6 +114,20 @@ int ftrace_arch_read_dyn_info(char *buf, int size)
return r;
}
+static void clear_mod_flag(void)
+{
+ int old = atomic_read(&nmi_running);
+
+ for (;;) {
+ int new = old & ~MOD_CODE_WRITE_FLAG;
+
+ if (old == new)
+ break;
+
+ old = atomic_cmpxchg(&nmi_running, old, new);
+ }
+}
+
static void ftrace_mod_code(void)
{
/*
@@ -111,37 +138,52 @@ static void ftrace_mod_code(void)
*/
mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
MCOUNT_INSN_SIZE);
+
+ /* if we fail, then kill any new writers */
+ if (mod_code_status)
+ clear_mod_flag();
}
void ftrace_nmi_enter(void)
{
- atomic_inc(&in_nmi);
- /* Must have in_nmi seen before reading write flag */
- smp_mb();
- if (mod_code_write) {
+ if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
+ smp_rmb();
ftrace_mod_code();
atomic_inc(&nmi_update_count);
}
+ /* Must have previous changes seen before executions */
+ smp_mb();
}
void ftrace_nmi_exit(void)
{
- /* Finish all executions before clearing in_nmi */
- smp_wmb();
- atomic_dec(&in_nmi);
+ /* Finish all executions before clearing nmi_running */
+ smp_mb();
+ atomic_dec(&nmi_running);
+}
+
+static void wait_for_nmi_and_set_mod_flag(void)
+{
+ if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
+ return;
+
+ do {
+ cpu_relax();
+ } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
+
+ nmi_wait_count++;
}
static void wait_for_nmi(void)
{
- int waited = 0;
+ if (!atomic_read(&nmi_running))
+ return;
- while (atomic_read(&in_nmi)) {
- waited = 1;
+ do {
cpu_relax();
- }
+ } while (atomic_read(&nmi_running));
- if (waited)
- nmi_wait_count++;
+ nmi_wait_count++;
}
static int
@@ -151,14 +193,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
mod_code_newcode = new_code;
/* The buffers need to be visible before we let NMIs write them */
- smp_wmb();
-
- mod_code_write = 1;
-
- /* Make sure write bit is visible before we wait on NMIs */
smp_mb();
- wait_for_nmi();
+ wait_for_nmi_and_set_mod_flag();
/* Make sure all running NMIs have finished before we write the code */
smp_mb();
@@ -166,13 +203,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
ftrace_mod_code();
/* Make sure the write happens before clearing the bit */
- smp_wmb();
-
- mod_code_write = 0;
-
- /* make sure NMIs see the cleared bit */
smp_mb();
+ clear_mod_flag();
wait_for_nmi();
return mod_code_status;
@@ -368,25 +401,6 @@ int ftrace_disable_ftrace_graph_caller(void)
return ftrace_mod_jmp(ip, old_offset, new_offset);
}
-#else /* CONFIG_DYNAMIC_FTRACE */
-
-/*
- * These functions are picked from those used on
- * this page for dynamic ftrace. They have been
- * simplified to ignore all traces in NMI context.
- */
-static atomic_t in_nmi;
-
-void ftrace_nmi_enter(void)
-{
- atomic_inc(&in_nmi);
-}
-
-void ftrace_nmi_exit(void)
-{
- atomic_dec(&in_nmi);
-}
-
#endif /* !CONFIG_DYNAMIC_FTRACE */
/*
@@ -396,14 +410,13 @@ void ftrace_nmi_exit(void)
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
{
unsigned long old;
- unsigned long long calltime;
int faulted;
struct ftrace_graph_ent trace;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
/* Nmi's are currently unsupported */
- if (unlikely(atomic_read(&in_nmi)))
+ if (unlikely(in_nmi()))
return;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
@@ -439,17 +452,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}
- if (unlikely(!__kernel_text_address(old))) {
- ftrace_graph_stop();
- *parent = old;
- WARN_ON(1);
- return;
- }
-
- calltime = cpu_clock(raw_smp_processor_id());
-
- if (ftrace_push_return_trace(old, calltime,
- self_addr, &trace.depth) == -EBUSY) {
+ if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
*parent = old;
return;
}
@@ -463,3 +466,66 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
}
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_FTRACE_SYSCALLS
+
+extern unsigned long __start_syscalls_metadata[];
+extern unsigned long __stop_syscalls_metadata[];
+extern unsigned long *sys_call_table;
+
+static struct syscall_metadata **syscalls_metadata;
+
+static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
+{
+ struct syscall_metadata *start;
+ struct syscall_metadata *stop;
+ char str[KSYM_SYMBOL_LEN];
+
+
+ start = (struct syscall_metadata *)__start_syscalls_metadata;
+ stop = (struct syscall_metadata *)__stop_syscalls_metadata;
+ kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
+
+ for ( ; start < stop; start++) {
+ if (start->name && !strcmp(start->name, str))
+ return start;
+ }
+ return NULL;
+}
+
+struct syscall_metadata *syscall_nr_to_meta(int nr)
+{
+ if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0)
+ return NULL;
+
+ return syscalls_metadata[nr];
+}
+
+void arch_init_ftrace_syscalls(void)
+{
+ int i;
+ struct syscall_metadata *meta;
+ unsigned long **psys_syscall_table = &sys_call_table;
+ static atomic_t refs;
+
+ if (atomic_inc_return(&refs) != 1)
+ goto end;
+
+ syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
+ FTRACE_SYSCALL_MAX, GFP_KERNEL);
+ if (!syscalls_metadata) {
+ WARN_ON(1);
+ return;
+ }
+
+ for (i = 0; i < FTRACE_SYSCALL_MAX; i++) {
+ meta = find_syscall_meta(psys_syscall_table[i]);
+ syscalls_metadata[i] = meta;
+ }
+ return;
+
+ /* Paranoid: avoid overflow */
+end:
+ atomic_dec(&refs);
+}
+#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 55b94614e348..7b5169d2b000 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -638,13 +638,13 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
#else
" pushf\n"
/*
- * Skip cs, ip, orig_ax.
+ * Skip cs, ip, orig_ax and gs.
* trampoline_handler() will plug in these values
*/
- " subl $12, %esp\n"
+ " subl $16, %esp\n"
" pushl %fs\n"
- " pushl %ds\n"
" pushl %es\n"
+ " pushl %ds\n"
" pushl %eax\n"
" pushl %ebp\n"
" pushl %edi\n"
@@ -655,10 +655,10 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
" movl %esp, %eax\n"
" call trampoline_handler\n"
/* Move flags to cs */
- " movl 52(%esp), %edx\n"
- " movl %edx, 48(%esp)\n"
+ " movl 56(%esp), %edx\n"
+ " movl %edx, 52(%esp)\n"
/* Replace saved flags with true return address. */
- " movl %eax, 52(%esp)\n"
+ " movl %eax, 56(%esp)\n"
" popl %ebx\n"
" popl %ecx\n"
" popl %edx\n"
@@ -666,8 +666,8 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
" popl %edi\n"
" popl %ebp\n"
" popl %eax\n"
- /* Skip ip, orig_ax, es, ds, fs */
- " addl $20, %esp\n"
+ /* Skip ds, es, fs, gs, orig_ax and ip */
+ " addl $24, %esp\n"
" popf\n"
#endif
" ret\n");
@@ -691,6 +691,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
regs->cs = __KERNEL_CS;
#else
regs->cs = __KERNEL_CS | get_kernel_rpl();
+ regs->gs = 0;
#endif
regs->ip = trampoline_address;
regs->orig_ax = ~0UL;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 25e28087a3ee..ca989158e847 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,7 +8,7 @@
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/clockchips.h>
-#include <linux/ftrace.h>
+#include <trace/power.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/idle.h>
@@ -22,6 +22,9 @@ EXPORT_SYMBOL(idle_nomwait);
struct kmem_cache *task_xstate_cachep;
+DEFINE_TRACE(power_start);
+DEFINE_TRACE(power_end);
+
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
*dst = *src;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b7cc21bc6ae0..fe9345c967de 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
#include <linux/audit.h>
#include <linux/seccomp.h>
#include <linux/signal.h>
+#include <linux/ftrace.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -1415,6 +1416,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
tracehook_report_syscall_entry(regs))
ret = -1L;
+ if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
+ ftrace_syscall_enter(regs);
+
if (unlikely(current->audit_context)) {
if (IS_IA32)
audit_syscall_entry(AUDIT_ARCH_I386,
@@ -1438,6 +1442,9 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
if (unlikely(current->audit_context))
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+ if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
+ ftrace_syscall_exit(regs);
+
if (test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, 0);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 0a303c3ed11f..a58504ea78cc 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -59,7 +59,8 @@ config KVM_AMD
config KVM_TRACE
bool "KVM trace support"
- depends on KVM && MARKERS && SYSFS
+ depends on KVM && SYSFS
+ select MARKERS
select RELAY
select DEBUG_FS
default n
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index db81e9a8556b..749559ed80f5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1054,17 +1054,47 @@ static noinline int do_test_wp_bit(void)
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
+static int kernel_set_to_readonly;
+
+void set_kernel_text_rw(void)
+{
+ unsigned long start = PFN_ALIGN(_text);
+ unsigned long size = PFN_ALIGN(_etext) - start;
+
+ if (!kernel_set_to_readonly)
+ return;
+
+ pr_debug("Set kernel text: %lx - %lx for read write\n",
+ start, start+size);
+
+ set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
+}
+
+void set_kernel_text_ro(void)
+{
+ unsigned long start = PFN_ALIGN(_text);
+ unsigned long size = PFN_ALIGN(_etext) - start;
+
+ if (!kernel_set_to_readonly)
+ return;
+
+ pr_debug("Set kernel text: %lx - %lx for read only\n",
+ start, start+size);
+
+ set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
+}
+
void mark_rodata_ro(void)
{
unsigned long start = PFN_ALIGN(_text);
unsigned long size = PFN_ALIGN(_etext) - start;
-#ifndef CONFIG_DYNAMIC_FTRACE
- /* Dynamic tracing modifies the kernel text section */
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel text: %luk\n",
size >> 10);
+ kernel_set_to_readonly = 1;
+
#ifdef CONFIG_CPA_DEBUG
printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
start, start+size);
@@ -1073,7 +1103,6 @@ void mark_rodata_ro(void)
printk(KERN_INFO "Testing CPA: write protecting again\n");
set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
#endif
-#endif /* CONFIG_DYNAMIC_FTRACE */
start += size;
size = (unsigned long)__end_rodata - start;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 54efa57d1c03..1753e8020df6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -734,21 +734,48 @@ void __init mem_init(void)
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
+static int kernel_set_to_readonly;
+
+void set_kernel_text_rw(void)
+{
+ unsigned long start = PFN_ALIGN(_stext);
+ unsigned long end = PFN_ALIGN(__start_rodata);
+
+ if (!kernel_set_to_readonly)
+ return;
+
+ pr_debug("Set kernel text: %lx - %lx for read write\n",
+ start, end);
+
+ set_memory_rw(start, (end - start) >> PAGE_SHIFT);
+}
+
+void set_kernel_text_ro(void)
+{
+ unsigned long start = PFN_ALIGN(_stext);
+ unsigned long end = PFN_ALIGN(__start_rodata);
+
+ if (!kernel_set_to_readonly)
+ return;
+
+ pr_debug("Set kernel text: %lx - %lx for read only\n",
+ start, end);
+
+ set_memory_ro(start, (end - start) >> PAGE_SHIFT);
+}
+
void mark_rodata_ro(void)
{
unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
unsigned long rodata_start =
((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
-#ifdef CONFIG_DYNAMIC_FTRACE
- /* Dynamic tracing modifies the kernel text section */
- start = rodata_start;
-#endif
-
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
set_memory_ro(start, (end - start) >> PAGE_SHIFT);
+ kernel_set_to_readonly = 1;
+
/*
* The rodata section (but not the kernel text!) should also be
* not-executable.