diff options
author | Srinivasa D S | 2008-07-25 01:46:04 -0700 |
---|---|---|
committer | Linus Torvalds | 2008-07-25 10:53:30 -0700 |
commit | ef53d9c5e4da147ecaa43c44c5e5945eb83970a2 (patch) | |
tree | 3b596445e5d0613fda4b33a4ae96e0e3fffdcf1e /arch | |
parent | 53a9600c634e3bfd6230e0597aca159bf4d4d010 (diff) |
kprobes: improve kretprobe scalability with hashed locking
Currently list of kretprobe instances are stored in kretprobe object (as
used_instances,free_instances) and in kretprobe hash table. We have one
global kretprobe lock to serialise the access to these lists. This causes
only one kretprobe handler to execute at a time. Hence affects system
performance, particularly on SMP systems and when return probe is set on
lot of functions (like on all systemcalls).
Solution proposed here gives fine-grain locks that performs better on SMP
system compared to present kretprobe implementation.
Solution:
1) Instead of having one global lock to protect kretprobe instances
present in kretprobe object and kretprobe hash table. We will have
two locks, one lock for protecting kretprobe hash table and another
lock for kretporbe object.
2) We hold lock present in kretprobe object while we modify kretprobe
instance in kretprobe object and we hold per-hash-list lock while
modifying kretprobe instances present in that hash list. To prevent
deadlock, we never grab a per-hash-list lock while holding a kretprobe
lock.
3) We can remove used_instances from struct kretprobe, as we can
track used instances of kretprobe instances using kretprobe hash
table.
Time duration for kernel compilation ("make -j 8") on a 8-way ppc64 system
with return probes set on all systemcalls looks like this.
cacheline non-cacheline Un-patched kernel
aligned patch aligned patch
===============================================================================
real 9m46.784s 9m54.412s 10m2.450s
user 40m5.715s 40m7.142s 40m4.273s
sys 2m57.754s 2m58.583s 3m17.430s
===========================================================
Time duration for kernel compilation ("make -j 8) on the same system, when
kernel is not probed.
=========================
real 9m26.389s
user 40m8.775s
sys 2m7.283s
=========================
Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com>
Signed-off-by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/kernel/kprobes.c | 6 | ||||
-rw-r--r-- | arch/ia64/kernel/kprobes.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kernel/kprobes.c | 6 | ||||
-rw-r--r-- | arch/s390/kernel/kprobes.c | 6 | ||||
-rw-r--r-- | arch/sparc64/kernel/kprobes.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes.c | 6 |
6 files changed, 15 insertions, 26 deletions
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 5ee39e10c8d1..d28513f14d05 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -296,8 +296,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -337,7 +336,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) } kretprobe_assert(ri, orig_ret_address, trampoline_address); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); @@ -347,7 +346,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) return (void *)orig_ret_address; } -/* Called with kretprobe_lock held. */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 233434f4f88f..f07688da947c 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -429,8 +429,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) ((struct fnptr *)kretprobe_trampoline)->ip; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -485,7 +484,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { @@ -500,7 +499,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) return 1; } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 4ba2af125450..de79915452c8 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -144,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_saved_msr = regs->msr; } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -312,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -352,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, regs->nip = orig_ret_address; reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 288ad490a6dd..4f82e5b5f879 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -270,7 +270,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, __ctl_store(kcb->kprobe_saved_ctl, 9, 11); } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -377,8 +376,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -417,7 +415,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c index f43b5d755354..201a6e547e4a 100644 --- a/arch/sparc64/kernel/kprobes.c +++ b/arch/sparc64/kernel/kprobes.c @@ -478,9 +478,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } -/* Called with kretprobe_lock held. The value stored in the return - * address register is actually 2 instructions before where the - * callee will return to. Sequences usually look something like this +/* The value stored in the return address register is actually 2 + * instructions before where the callee will return to. + * Sequences usually look something like this * * call some_function <--- return register points here * nop <--- call delay slot @@ -512,8 +512,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -553,7 +552,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) regs->tnpc = orig_ret_address + 4; reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 43c019f85f0d..6c27679ec6aa 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->ip = (unsigned long)p->ainsn.insn; } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ #ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; @@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); |