From da753beaeb1446aa87bcca7e8a0026633a8914f0 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 28 Apr 2006 15:21:23 -0700 Subject: [NET]: use hlist_unhashed() Use hlist_unhashed() rather than accessing inside data structure. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/list.h | 2 +- include/net/inet_timewait_sock.h | 2 +- include/net/sock.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 67258b47e9ca..76f05718342c 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -619,7 +619,7 @@ static inline void hlist_del_rcu(struct hlist_node *n) static inline void hlist_del_init(struct hlist_node *n) { - if (n->pprev) { + if (!hlist_unhashed(n)) { __hlist_del(n); INIT_HLIST_NODE(n); } diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 1da294c47522..e837f98fdb50 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -150,7 +150,7 @@ static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) { - return tw->tw_death_node.pprev != NULL; + return !hlist_unhashed(&tw->tw_death_node); } static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) diff --git a/include/net/sock.h b/include/net/sock.h index ff8b0dad7b0f..c9fad6fb629b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -279,7 +279,7 @@ static inline int sk_unhashed(const struct sock *sk) static inline int sk_hashed(const struct sock *sk) { - return sk->sk_node.pprev != NULL; + return !sk_unhashed(sk); } static __inline__ void sk_node_init(struct hlist_node *node) -- cgit v1.2.3 From cd95842ca0ffb0e3df3b459832a60f9f4544ed9e Mon Sep 17 00:00:00 2001 From: Markus Gutschke Date: Sun, 30 Apr 2006 15:34:29 +0100 Subject: [ARM] 3486/1: Mark memory as clobbered by the ARM _syscallX() macros Patch from Markus Gutschke In order to prevent gcc from making incorrect optimizations, all asm() statements that define system calls should report memory as clobbered. Recent versions of the headers for i386 have been changed accordingly, but the ARM headers are still defective. This patch fixes the bug tracked at http://bugzilla.kernel.org/show_bug.cgi?id=6205 Signed-off-by: Markus Gutschke Signed-off-by: Russell King --- drivers/input/touchscreen/corgi_ts.c | 2 +- include/asm-arm/unistd.h | 21 ++++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/input/touchscreen/corgi_ts.c b/drivers/input/touchscreen/corgi_ts.c index 1042987856f7..5013703db0e6 100644 --- a/drivers/input/touchscreen/corgi_ts.c +++ b/drivers/input/touchscreen/corgi_ts.c @@ -17,7 +17,7 @@ #include #include #include -#include +//#include #include #include diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h index ee8dfea549bc..26f2f4828e03 100644 --- a/include/asm-arm/unistd.h +++ b/include/asm-arm/unistd.h @@ -410,7 +410,8 @@ type name(void) { \ __asm__ __volatile__ ( \ __syscall(name) \ : "=r" (__res_r0) \ - : __SYS_REG_LIST() ); \ + : __SYS_REG_LIST() \ + : "memory" ); \ __res = __res_r0; \ __syscall_return(type,__res); \ } @@ -424,7 +425,8 @@ type name(type1 arg1) { \ __asm__ __volatile__ ( \ __syscall(name) \ : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0) ) ); \ + : __SYS_REG_LIST( "0" (__r0) ) \ + : "memory" ); \ __res = __res_r0; \ __syscall_return(type,__res); \ } @@ -439,7 +441,8 @@ type name(type1 arg1,type2 arg2) { \ __asm__ __volatile__ ( \ __syscall(name) \ : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1) ) ); \ + : __SYS_REG_LIST( "0" (__r0), "r" (__r1) ) \ + : "memory" ); \ __res = __res_r0; \ __syscall_return(type,__res); \ } @@ -456,7 +459,8 @@ type name(type1 arg1,type2 arg2,type3 arg3) { \ __asm__ __volatile__ ( \ __syscall(name) \ : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2) ) ); \ + : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2) ) \ + : "memory" ); \ __res = __res_r0; \ __syscall_return(type,__res); \ } @@ -474,7 +478,8 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ __asm__ __volatile__ ( \ __syscall(name) \ : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2), "r" (__r3) ) ); \ + : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2), "r" (__r3) ) \ + : "memory" ); \ __res = __res_r0; \ __syscall_return(type,__res); \ } @@ -494,7 +499,8 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ __syscall(name) \ : "=r" (__res_r0) \ : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2), \ - "r" (__r3), "r" (__r4) ) ); \ + "r" (__r3), "r" (__r4) ) \ + : "memory" ); \ __res = __res_r0; \ __syscall_return(type,__res); \ } @@ -514,7 +520,8 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6 __syscall(name) \ : "=r" (__res_r0) \ : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2), \ - "r" (__r3), "r" (__r4), "r" (__r5) ) ); \ + "r" (__r3), "r" (__r4), "r" (__r5) ) \ + : "memory" ); \ __res = __res_r0; \ __syscall_return(type,__res); \ } -- cgit v1.2.3 From 76bbb00288e569e7bd9ec18f45e4f814352260dd Mon Sep 17 00:00:00 2001 From: Deepak Saxena Date: Sun, 30 Apr 2006 15:34:29 +0100 Subject: [ARM] 3487/1: IXP4xx: Support non-PCI systems Patch from Deepak Saxena This patch allows for the addition of IXP4xx systems that do not make use of the PCI interface by moving the CONFIG_PCI symbol selection to be platform-specific instead of for all of IXP4xx. If at least one machine with PCI support is built, the PCI code will be compiled in, but when building !PCI, this will drastically shrink the kernel size. Signed-off-by: Deepak Saxena Signed-off-by: Russell King --- arch/arm/Kconfig | 2 -- arch/arm/mach-ixp4xx/Kconfig | 15 ++++++++++++++- arch/arm/mach-ixp4xx/Makefile | 3 ++- include/asm-arm/arch-ixp4xx/io.h | 7 +++++++ include/asm-arm/arch-ixp4xx/memory.h | 2 +- 5 files changed, 24 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1dbf6ddb300d..08b7cc900cae 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -150,8 +150,6 @@ config ARCH_IOP3XX config ARCH_IXP4XX bool "IXP4xx-based" - select DMABOUNCE - select PCI help Support for Intel's IXP4XX (XScale) family of processors. diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig index 5bf50a2a737d..2a39f9e481ad 100644 --- a/arch/arm/mach-ixp4xx/Kconfig +++ b/arch/arm/mach-ixp4xx/Kconfig @@ -11,6 +11,7 @@ comment "IXP4xx Platforms" config MACH_NSLU2 bool prompt "Linksys NSLU2" + select PCI help Say 'Y' here if you want your kernel to support Linksys's NSLU2 NAS device. For more information on this platform, @@ -18,6 +19,7 @@ config MACH_NSLU2 config ARCH_AVILA bool "Avila" + select PCI help Say 'Y' here if you want your kernel to support the Gateworks Avila Network Platform. For more information on this platform, @@ -25,6 +27,7 @@ config ARCH_AVILA config ARCH_ADI_COYOTE bool "Coyote" + select PCI help Say 'Y' here if you want your kernel to support the ADI Engineering Coyote Gateway Reference Platform. For more @@ -32,6 +35,7 @@ config ARCH_ADI_COYOTE config ARCH_IXDP425 bool "IXDP425" + select PCI help Say 'Y' here if you want your kernel to support Intel's IXDP425 Development Platform (Also known as Richfield). @@ -39,6 +43,7 @@ config ARCH_IXDP425 config MACH_IXDPG425 bool "IXDPG425" + select PCI help Say 'Y' here if you want your kernel to support Intel's IXDPG425 Development Platform (Also known as Montajade). @@ -46,6 +51,7 @@ config MACH_IXDPG425 config MACH_IXDP465 bool "IXDP465" + select PCI help Say 'Y' here if you want your kernel to support Intel's IXDP465 Development Platform (Also known as BMP). @@ -72,6 +78,7 @@ config ARCH_PRPMC1100 config MACH_NAS100D bool prompt "NAS100D" + select PCI help Say 'Y' here if you want your kernel to support Iomega's NAS 100d device. For more information on this platform, @@ -96,6 +103,7 @@ config CPU_IXP46X config MACH_GTWX5715 bool "Gemtek WX5715 (Linksys WRV54G)" depends on ARCH_IXP4XX + select PCI help This board is currently inside the Linksys WRV54G Gateways. @@ -110,11 +118,16 @@ config MACH_GTWX5715 "High Speed" UART is n/c (as far as I can tell) 20 Pin ARM/Xscale JTAG interface on J2 - comment "IXP4xx Options" +config DMABOUNCE + bool + default y + depends on PCI + config IXP4XX_INDIRECT_PCI bool "Use indirect PCI memory access" + depends on PCI help IXP4xx provides two methods of accessing PCI memory space: diff --git a/arch/arm/mach-ixp4xx/Makefile b/arch/arm/mach-ixp4xx/Makefile index 0471044fa179..5a4aaa0e0a09 100644 --- a/arch/arm/mach-ixp4xx/Makefile +++ b/arch/arm/mach-ixp4xx/Makefile @@ -2,8 +2,9 @@ # Makefile for the linux kernel. # -obj-y += common.o common-pci.o +obj-y += common.o +obj-$(CONFIG_PCI) += common-pci.o obj-$(CONFIG_ARCH_IXDP4XX) += ixdp425-pci.o ixdp425-setup.o obj-$(CONFIG_MACH_IXDPG425) += ixdpg425-pci.o coyote-setup.o obj-$(CONFIG_ARCH_ADI_COYOTE) += coyote-pci.o coyote-setup.o diff --git a/include/asm-arm/arch-ixp4xx/io.h b/include/asm-arm/arch-ixp4xx/io.h index 942b622455bc..b59520e56fc7 100644 --- a/include/asm-arm/arch-ixp4xx/io.h +++ b/include/asm-arm/arch-ixp4xx/io.h @@ -260,6 +260,12 @@ out: #endif +#ifndef CONFIG_PCI + +#define __io(v) v + +#else + /* * IXP4xx does not have a transparent cpu -> PCI I/O translation * window. Instead, it has a set of registers that must be tweaked @@ -578,6 +584,7 @@ __ixp4xx_iowrite32_rep(void __iomem *addr, const void *vaddr, u32 count) #define ioport_map(port, nr) ((void __iomem*)(port + PIO_OFFSET)) #define ioport_unmap(addr) +#endif // !CONFIG_PCI #endif // __ASM_ARM_ARCH_IO_H diff --git a/include/asm-arm/arch-ixp4xx/memory.h b/include/asm-arm/arch-ixp4xx/memory.h index ee211d28a3ef..af9667b57ab3 100644 --- a/include/asm-arm/arch-ixp4xx/memory.h +++ b/include/asm-arm/arch-ixp4xx/memory.h @@ -14,7 +14,7 @@ */ #define PHYS_OFFSET UL(0x00000000) -#ifndef __ASSEMBLY__ +#if !defined(__ASSEMBLY__) && defined(CONFIG_PCI) void ixp4xx_adjust_zones(int node, unsigned long *size, unsigned long *holes); -- cgit v1.2.3 From 1241140f5183db38393556832198a3b109bf9085 Mon Sep 17 00:00:00 2001 From: David S. Miller Date: Sun, 30 Apr 2006 21:40:13 -0700 Subject: [SPARC64]: Kill __flush_tlb_page() prototype. This function no longer exists. Signed-off-by: David S. Miller --- include/asm-sparc64/tlbflush.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h index 9ad5d9c51d42..e3a7c453b500 100644 --- a/include/asm-sparc64/tlbflush.h +++ b/include/asm-sparc64/tlbflush.h @@ -22,8 +22,6 @@ extern void flush_tlb_pending(void); /* Local cpu only. */ extern void __flush_tlb_all(void); -extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r); - extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifndef CONFIG_SMP -- cgit v1.2.3 From 5411be59db80333039386f3b1ccfe5eb9023a916 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 29 Mar 2006 20:23:36 -0500 Subject: [PATCH] drop task argument of audit_syscall_{entry,exit} ... it's always current, and that's a good thing - allows simpler locking. Signed-off-by: Al Viro --- arch/i386/kernel/ptrace.c | 7 +++---- arch/i386/kernel/vm86.c | 2 +- arch/ia64/kernel/ptrace.c | 4 ++-- arch/mips/kernel/ptrace.c | 4 ++-- arch/powerpc/kernel/ptrace.c | 5 ++--- arch/s390/kernel/ptrace.c | 5 ++--- arch/sparc64/kernel/ptrace.c | 5 ++--- arch/um/kernel/ptrace.c | 6 ++---- arch/x86_64/kernel/ptrace.c | 6 +++--- include/linux/audit.h | 8 ++++---- kernel/auditsc.c | 8 ++++---- 11 files changed, 27 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 506462ef36a0..fd7eaf7866e0 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -671,7 +671,7 @@ int do_syscall_trace(struct pt_regs *regs, int entryexit) if (unlikely(current->audit_context)) { if (entryexit) - audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), + audit_syscall_exit(AUDITSC_RESULT(regs->eax), regs->eax); /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is @@ -720,14 +720,13 @@ int do_syscall_trace(struct pt_regs *regs, int entryexit) ret = is_sysemu; out: if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax, + audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_eax, regs->ebx, regs->ecx, regs->edx, regs->esi); if (ret == 0) return 0; regs->orig_eax = -1; /* force skip of syscall restarting */ if (unlikely(current->audit_context)) - audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), - regs->eax); + audit_syscall_exit(AUDITSC_RESULT(regs->eax), regs->eax); return 1; } diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index aee14fafd13d..00e0118e717c 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -312,7 +312,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk /*call audit_syscall_exit since we do not exit via the normal paths */ if (unlikely(current->audit_context)) - audit_syscall_exit(current, AUDITSC_RESULT(eax), eax); + audit_syscall_exit(AUDITSC_RESULT(eax), eax); __asm__ __volatile__( "movl %0,%%esp\n\t" diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 9887c8787e7a..e61e15e28d8b 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1644,7 +1644,7 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, arch = AUDIT_ARCH_IA64; } - audit_syscall_entry(current, arch, syscall, arg0, arg1, arg2, arg3); + audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3); } } @@ -1662,7 +1662,7 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, if (success != AUDITSC_SUCCESS) result = -result; - audit_syscall_exit(current, success, result); + audit_syscall_exit(success, result); } if (test_thread_flag(TIF_SYSCALL_TRACE) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index f3106d0771b0..9b4733c12395 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -483,7 +483,7 @@ static inline int audit_arch(void) asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) { if (unlikely(current->audit_context) && entryexit) - audit_syscall_exit(current, AUDITSC_RESULT(regs->regs[2]), + audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]), regs->regs[2]); if (!(current->ptrace & PT_PTRACED)) @@ -507,7 +507,7 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) } out: if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(current, audit_arch(), regs->regs[2], + audit_syscall_entry(audit_arch(), regs->regs[2], regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); } diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index bcb83574335b..4a677d1bd4ef 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -538,7 +538,7 @@ void do_syscall_trace_enter(struct pt_regs *regs) do_syscall_trace(); if (unlikely(current->audit_context)) - audit_syscall_entry(current, + audit_syscall_entry( #ifdef CONFIG_PPC32 AUDIT_ARCH_PPC, #else @@ -556,8 +556,7 @@ void do_syscall_trace_leave(struct pt_regs *regs) #endif if (unlikely(current->audit_context)) - audit_syscall_exit(current, - (regs->ccr&0x1000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, + audit_syscall_exit((regs->ccr&0x1000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, regs->result); if ((test_thread_flag(TIF_SYSCALL_TRACE) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 37dfe33dab73..8f36504075ed 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -734,7 +734,7 @@ asmlinkage void syscall_trace(struct pt_regs *regs, int entryexit) { if (unlikely(current->audit_context) && entryexit) - audit_syscall_exit(current, AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]); + audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]); if (!test_thread_flag(TIF_SYSCALL_TRACE)) goto out; @@ -761,8 +761,7 @@ syscall_trace(struct pt_regs *regs, int entryexit) } out: if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(current, - test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X, + audit_syscall_entry(test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X, regs->gprs[2], regs->orig_gpr2, regs->gprs[3], regs->gprs[4], regs->gprs[5]); } diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c index 49e6dedd027d..d31975e6d6f6 100644 --- a/arch/sparc64/kernel/ptrace.c +++ b/arch/sparc64/kernel/ptrace.c @@ -653,7 +653,7 @@ asmlinkage void syscall_trace(struct pt_regs *regs, int syscall_exit_p) if (unlikely(tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) result = AUDITSC_FAILURE; - audit_syscall_exit(current, result, regs->u_regs[UREG_I0]); + audit_syscall_exit(result, regs->u_regs[UREG_I0]); } if (!(current->ptrace & PT_PTRACED)) @@ -677,8 +677,7 @@ asmlinkage void syscall_trace(struct pt_regs *regs, int syscall_exit_p) out: if (unlikely(current->audit_context) && !syscall_exit_p) - audit_syscall_entry(current, - (test_thread_flag(TIF_32BIT) ? + audit_syscall_entry((test_thread_flag(TIF_32BIT) ? AUDIT_ARCH_SPARC : AUDIT_ARCH_SPARC64), regs->u_regs[UREG_G1], diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 60d2eda995c1..9a77fb3c269d 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -275,15 +275,13 @@ void syscall_trace(union uml_pt_regs *regs, int entryexit) if (unlikely(current->audit_context)) { if (!entryexit) - audit_syscall_entry(current, - HOST_AUDIT_ARCH, + audit_syscall_entry(HOST_AUDIT_ARCH, UPT_SYSCALL_NR(regs), UPT_SYSCALL_ARG1(regs), UPT_SYSCALL_ARG2(regs), UPT_SYSCALL_ARG3(regs), UPT_SYSCALL_ARG4(regs)); - else audit_syscall_exit(current, - AUDITSC_RESULT(UPT_SYSCALL_RET(regs)), + else audit_syscall_exit(AUDITSC_RESULT(UPT_SYSCALL_RET(regs)), UPT_SYSCALL_RET(regs)); } diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index da8e7903d817..2d50024c9f30 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -600,12 +600,12 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs) if (unlikely(current->audit_context)) { if (test_thread_flag(TIF_IA32)) { - audit_syscall_entry(current, AUDIT_ARCH_I386, + audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_rax, regs->rbx, regs->rcx, regs->rdx, regs->rsi); } else { - audit_syscall_entry(current, AUDIT_ARCH_X86_64, + audit_syscall_entry(AUDIT_ARCH_X86_64, regs->orig_rax, regs->rdi, regs->rsi, regs->rdx, regs->r10); @@ -616,7 +616,7 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs) asmlinkage void syscall_trace_leave(struct pt_regs *regs) { if (unlikely(current->audit_context)) - audit_syscall_exit(current, AUDITSC_RESULT(regs->rax), regs->rax); + audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax); if ((test_thread_flag(TIF_SYSCALL_TRACE) || test_thread_flag(TIF_SINGLESTEP)) diff --git a/include/linux/audit.h b/include/linux/audit.h index 1c47c59058c1..39fef6ebb854 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -287,10 +287,10 @@ struct netlink_skb_parms; /* Public API */ extern int audit_alloc(struct task_struct *task); extern void audit_free(struct task_struct *task); -extern void audit_syscall_entry(struct task_struct *task, int arch, +extern void audit_syscall_entry(int arch, int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3); -extern void audit_syscall_exit(struct task_struct *task, int failed, long return_code); +extern void audit_syscall_exit(int failed, long return_code); extern void audit_getname(const char *name); extern void audit_putname(const char *name); extern void __audit_inode(const char *name, const struct inode *inode, unsigned flags); @@ -323,8 +323,8 @@ extern int audit_set_macxattr(const char *name); #else #define audit_alloc(t) ({ 0; }) #define audit_free(t) do { ; } while (0) -#define audit_syscall_entry(t,ta,a,b,c,d,e) do { ; } while (0) -#define audit_syscall_exit(t,f,r) do { ; } while (0) +#define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0) +#define audit_syscall_exit(f,r) do { ; } while (0) #define audit_getname(n) do { ; } while (0) #define audit_putname(n) do { ; } while (0) #define __audit_inode(n,i,f) do { ; } while (0) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ba0ec1ba6698..7ed82b088e4b 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -736,10 +736,11 @@ void audit_free(struct task_struct *tsk) * will only be written if another part of the kernel requests that it * be written). */ -void audit_syscall_entry(struct task_struct *tsk, int arch, int major, +void audit_syscall_entry(int arch, int major, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4) { + struct task_struct *tsk = current; struct audit_context *context = tsk->audit_context; enum audit_state state; @@ -817,12 +818,11 @@ void audit_syscall_entry(struct task_struct *tsk, int arch, int major, * message), then write out the syscall information. In call cases, * free the names stored from getname(). */ -void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code) +void audit_syscall_exit(int valid, long return_code) { + struct task_struct *tsk = current; struct audit_context *context; - /* tsk == current */ - get_task_struct(tsk); task_lock(tsk); context = audit_get_context(tsk, valid, return_code); -- cgit v1.2.3 From 376bd9cb357ec945ac893feaeb63af7370a6e70b Mon Sep 17 00:00:00 2001 From: Darrel Goeddel Date: Fri, 24 Feb 2006 15:44:05 -0600 Subject: [PATCH] support for context based audit filtering The following patch provides selinux interfaces that will allow the audit system to perform filtering based on the process context (user, role, type, sensitivity, and clearance). These interfaces will allow the selinux module to perform efficient matches based on lower level selinux constructs, rather than relying on context retrievals and string comparisons within the audit module. It also allows for dominance checks on the mls portion of the contexts that are impossible with only string comparisons. Signed-off-by: Darrel Goeddel Signed-off-by: Al Viro --- include/linux/audit.h | 5 + include/linux/selinux.h | 112 ++++++++++++++++++++ security/selinux/Makefile | 2 +- security/selinux/avc.c | 13 +-- security/selinux/exports.c | 28 +++++ security/selinux/ss/mls.c | 30 +++++- security/selinux/ss/mls.h | 4 +- security/selinux/ss/services.c | 235 ++++++++++++++++++++++++++++++++++++++++- 8 files changed, 419 insertions(+), 10 deletions(-) create mode 100644 include/linux/selinux.h create mode 100644 security/selinux/exports.c (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 39fef6ebb854..740f950397b7 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -145,6 +145,11 @@ #define AUDIT_PERS 10 #define AUDIT_ARCH 11 #define AUDIT_MSGTYPE 12 +#define AUDIT_SE_USER 13 /* security label user */ +#define AUDIT_SE_ROLE 14 /* security label role */ +#define AUDIT_SE_TYPE 15 /* security label type */ +#define AUDIT_SE_SEN 16 /* security label sensitivity label */ +#define AUDIT_SE_CLR 17 /* security label clearance label */ /* These are ONLY useful when checking * at syscall exit time (AUDIT_AT_EXIT). */ diff --git a/include/linux/selinux.h b/include/linux/selinux.h new file mode 100644 index 000000000000..9d684b1728b0 --- /dev/null +++ b/include/linux/selinux.h @@ -0,0 +1,112 @@ +/* + * SELinux services exported to the rest of the kernel. + * + * Author: James Morris + * + * Copyright (C) 2005 Red Hat, Inc., James Morris + * Copyright (C) 2006 Trusted Computer Solutions, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + */ +#ifndef _LINUX_SELINUX_H +#define _LINUX_SELINUX_H + +struct selinux_audit_rule; +struct audit_context; + +#ifdef CONFIG_SECURITY_SELINUX + +/** + * selinux_audit_rule_init - alloc/init an selinux audit rule structure. + * @field: the field this rule refers to + * @op: the operater the rule uses + * @rulestr: the text "target" of the rule + * @rule: pointer to the new rule structure returned via this + * + * Returns 0 if successful, -errno if not. On success, the rule structure + * will be allocated internally. The caller must free this structure with + * selinux_audit_rule_free() after use. + */ +int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, + struct selinux_audit_rule **rule); + +/** + * selinux_audit_rule_free - free an selinux audit rule structure. + * @rule: pointer to the audit rule to be freed + * + * This will free all memory associated with the given rule. + * If @rule is NULL, no operation is performed. + */ +void selinux_audit_rule_free(struct selinux_audit_rule *rule); + +/** + * selinux_audit_rule_match - determine if a context ID matches a rule. + * @ctxid: the context ID to check + * @field: the field this rule refers to + * @op: the operater the rule uses + * @rule: pointer to the audit rule to check against + * @actx: the audit context (can be NULL) associated with the check + * + * Returns 1 if the context id matches the rule, 0 if it does not, and + * -errno on failure. + */ +int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, + struct selinux_audit_rule *rule, + struct audit_context *actx); + +/** + * selinux_audit_set_callback - set the callback for policy reloads. + * @callback: the function to call when the policy is reloaded + * + * This sets the function callback function that will update the rules + * upon policy reloads. This callback should rebuild all existing rules + * using selinux_audit_rule_init(). + */ +void selinux_audit_set_callback(int (*callback)(void)); + +/** + * selinux_task_ctxid - determine a context ID for a process. + * @tsk: the task object + * @ctxid: ID value returned via this + * + * On return, ctxid will contain an ID for the context. This value + * should only be used opaquely. + */ +void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid); + +#else + +static inline int selinux_audit_rule_init(u32 field, u32 op, + char *rulestr, + struct selinux_audit_rule **rule) +{ + return -ENOTSUPP; +} + +static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule) +{ + return; +} + +static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, + struct selinux_audit_rule *rule, + struct audit_context *actx) +{ + return 0; +} + +static inline void selinux_audit_set_callback(int (*callback)(void)) +{ + return; +} + +static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid) +{ + *ctxid = 0; +} + +#endif /* CONFIG_SECURITY_SELINUX */ + +#endif /* _LINUX_SELINUX_H */ diff --git a/security/selinux/Makefile b/security/selinux/Makefile index 688c0a267b62..faf2e02e4410 100644 --- a/security/selinux/Makefile +++ b/security/selinux/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_SECURITY_SELINUX) := selinux.o ss/ -selinux-y := avc.o hooks.o selinuxfs.o netlink.o nlmsgtab.o netif.o +selinux-y := avc.o hooks.o selinuxfs.o netlink.o nlmsgtab.o netif.o exports.o selinux-$(CONFIG_SECURITY_NETWORK_XFRM) += xfrm.o diff --git a/security/selinux/avc.c b/security/selinux/avc.c index ac5d69bb3377..a300702da527 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -800,7 +800,7 @@ out: int avc_ss_reset(u32 seqno) { struct avc_callback_node *c; - int i, rc = 0; + int i, rc = 0, tmprc; unsigned long flag; struct avc_node *node; @@ -813,15 +813,16 @@ int avc_ss_reset(u32 seqno) for (c = avc_callbacks; c; c = c->next) { if (c->events & AVC_CALLBACK_RESET) { - rc = c->callback(AVC_CALLBACK_RESET, - 0, 0, 0, 0, NULL); - if (rc) - goto out; + tmprc = c->callback(AVC_CALLBACK_RESET, + 0, 0, 0, 0, NULL); + /* save the first error encountered for the return + value and continue processing the callbacks */ + if (!rc) + rc = tmprc; } } avc_latest_notif_update(seqno, 0); -out: return rc; } diff --git a/security/selinux/exports.c b/security/selinux/exports.c new file mode 100644 index 000000000000..333c4c7824d8 --- /dev/null +++ b/security/selinux/exports.c @@ -0,0 +1,28 @@ +/* + * SELinux services exported to the rest of the kernel. + * + * Author: James Morris + * + * Copyright (C) 2005 Red Hat, Inc., James Morris + * Copyright (C) 2006 Trusted Computer Solutions, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + */ +#include +#include +#include +#include + +#include "security.h" +#include "objsec.h" + +void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid) +{ + struct task_security_struct *tsec = tsk->security; + if (selinux_enabled) + *ctxid = tsec->sid; + else + *ctxid = 0; +} diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 84047f69f9c1..7bc5b6440f70 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -8,7 +8,7 @@ * * Support for enhanced MLS infrastructure. * - * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. + * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. */ #include @@ -384,6 +384,34 @@ out: return rc; } +/* + * Set the MLS fields in the security context structure + * `context' based on the string representation in + * the string `str'. This function will allocate temporary memory with the + * given constraints of gfp_mask. + */ +int mls_from_string(char *str, struct context *context, gfp_t gfp_mask) +{ + char *tmpstr, *freestr; + int rc; + + if (!selinux_mls_enabled) + return -EINVAL; + + /* we need freestr because mls_context_to_sid will change + the value of tmpstr */ + tmpstr = freestr = kstrdup(str, gfp_mask); + if (!tmpstr) { + rc = -ENOMEM; + } else { + rc = mls_context_to_sid(':', &tmpstr, context, + NULL, SECSID_NULL); + kfree(freestr); + } + + return rc; +} + /* * Copies the effective MLS range from `src' into `dst'. */ diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h index 03de697c8058..fbb42f07dd7c 100644 --- a/security/selinux/ss/mls.h +++ b/security/selinux/ss/mls.h @@ -8,7 +8,7 @@ * * Support for enhanced MLS infrastructure. * - * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. + * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. */ #ifndef _SS_MLS_H_ @@ -27,6 +27,8 @@ int mls_context_to_sid(char oldc, struct sidtab *s, u32 def_sid); +int mls_from_string(char *str, struct context *context, gfp_t gfp_mask); + int mls_convert_context(struct policydb *oldp, struct policydb *newp, struct context *context); diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 61492485de84..7177e98df7f3 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -7,12 +7,13 @@ * Updated: Trusted Computer Solutions, Inc. * * Support for enhanced MLS infrastructure. + * Support for context based audit filters. * * Updated: Frank Mayer and Karl MacMillan * * Added conditional policy language extensions * - * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. + * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. * Copyright (C) 2003 - 2004 Tresys Technology, LLC * Copyright (C) 2003 Red Hat, Inc., James Morris * This program is free software; you can redistribute it and/or modify @@ -1811,3 +1812,235 @@ out: POLICY_RDUNLOCK; return rc; } + +struct selinux_audit_rule { + u32 au_seqno; + struct context au_ctxt; +}; + +void selinux_audit_rule_free(struct selinux_audit_rule *rule) +{ + if (rule) { + context_destroy(&rule->au_ctxt); + kfree(rule); + } +} + +int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, + struct selinux_audit_rule **rule) +{ + struct selinux_audit_rule *tmprule; + struct role_datum *roledatum; + struct type_datum *typedatum; + struct user_datum *userdatum; + int rc = 0; + + *rule = NULL; + + if (!ss_initialized) + return -ENOTSUPP; + + switch (field) { + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + /* only 'equals' and 'not equals' fit user, role, and type */ + if (op != AUDIT_EQUAL && op != AUDIT_NOT_EQUAL) + return -EINVAL; + break; + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + /* we do not allow a range, indicated by the presense of '-' */ + if (strchr(rulestr, '-')) + return -EINVAL; + break; + default: + /* only the above fields are valid */ + return -EINVAL; + } + + tmprule = kzalloc(sizeof(struct selinux_audit_rule), GFP_KERNEL); + if (!tmprule) + return -ENOMEM; + + context_init(&tmprule->au_ctxt); + + POLICY_RDLOCK; + + tmprule->au_seqno = latest_granting; + + switch (field) { + case AUDIT_SE_USER: + userdatum = hashtab_search(policydb.p_users.table, rulestr); + if (!userdatum) + rc = -EINVAL; + else + tmprule->au_ctxt.user = userdatum->value; + break; + case AUDIT_SE_ROLE: + roledatum = hashtab_search(policydb.p_roles.table, rulestr); + if (!roledatum) + rc = -EINVAL; + else + tmprule->au_ctxt.role = roledatum->value; + break; + case AUDIT_SE_TYPE: + typedatum = hashtab_search(policydb.p_types.table, rulestr); + if (!typedatum) + rc = -EINVAL; + else + tmprule->au_ctxt.type = typedatum->value; + break; + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + rc = mls_from_string(rulestr, &tmprule->au_ctxt, GFP_ATOMIC); + break; + } + + POLICY_RDUNLOCK; + + if (rc) { + selinux_audit_rule_free(tmprule); + tmprule = NULL; + } + + *rule = tmprule; + + return rc; +} + +int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, + struct selinux_audit_rule *rule, + struct audit_context *actx) +{ + struct context *ctxt; + struct mls_level *level; + int match = 0; + + if (!rule) { + audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR, + "selinux_audit_rule_match: missing rule\n"); + return -ENOENT; + } + + POLICY_RDLOCK; + + if (rule->au_seqno < latest_granting) { + audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR, + "selinux_audit_rule_match: stale rule\n"); + match = -ESTALE; + goto out; + } + + ctxt = sidtab_search(&sidtab, ctxid); + if (!ctxt) { + audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR, + "selinux_audit_rule_match: unrecognized SID %d\n", + ctxid); + match = -ENOENT; + goto out; + } + + /* a field/op pair that is not caught here will simply fall through + without a match */ + switch (field) { + case AUDIT_SE_USER: + switch (op) { + case AUDIT_EQUAL: + match = (ctxt->user == rule->au_ctxt.user); + break; + case AUDIT_NOT_EQUAL: + match = (ctxt->user != rule->au_ctxt.user); + break; + } + break; + case AUDIT_SE_ROLE: + switch (op) { + case AUDIT_EQUAL: + match = (ctxt->role == rule->au_ctxt.role); + break; + case AUDIT_NOT_EQUAL: + match = (ctxt->role != rule->au_ctxt.role); + break; + } + break; + case AUDIT_SE_TYPE: + switch (op) { + case AUDIT_EQUAL: + match = (ctxt->type == rule->au_ctxt.type); + break; + case AUDIT_NOT_EQUAL: + match = (ctxt->type != rule->au_ctxt.type); + break; + } + break; + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + level = (op == AUDIT_SE_SEN ? + &ctxt->range.level[0] : &ctxt->range.level[1]); + switch (op) { + case AUDIT_EQUAL: + match = mls_level_eq(&rule->au_ctxt.range.level[0], + level); + break; + case AUDIT_NOT_EQUAL: + match = !mls_level_eq(&rule->au_ctxt.range.level[0], + level); + break; + case AUDIT_LESS_THAN: + match = (mls_level_dom(&rule->au_ctxt.range.level[0], + level) && + !mls_level_eq(&rule->au_ctxt.range.level[0], + level)); + break; + case AUDIT_LESS_THAN_OR_EQUAL: + match = mls_level_dom(&rule->au_ctxt.range.level[0], + level); + break; + case AUDIT_GREATER_THAN: + match = (mls_level_dom(level, + &rule->au_ctxt.range.level[0]) && + !mls_level_eq(level, + &rule->au_ctxt.range.level[0])); + break; + case AUDIT_GREATER_THAN_OR_EQUAL: + match = mls_level_dom(level, + &rule->au_ctxt.range.level[0]); + break; + } + } + +out: + POLICY_RDUNLOCK; + return match; +} + +static int (*aurule_callback)(void) = NULL; + +static int aurule_avc_callback(u32 event, u32 ssid, u32 tsid, + u16 class, u32 perms, u32 *retained) +{ + int err = 0; + + if (event == AVC_CALLBACK_RESET && aurule_callback) + err = aurule_callback(); + return err; +} + +static int __init aurule_init(void) +{ + int err; + + err = avc_add_callback(aurule_avc_callback, AVC_CALLBACK_RESET, + SECSID_NULL, SECSID_NULL, SECCLASS_NULL, 0); + if (err) + panic("avc_add_callback() failed, error %d\n", err); + + return err; +} +__initcall(aurule_init); + +void selinux_audit_set_callback(int (*callback)(void)) +{ + aurule_callback = callback; +} -- cgit v1.2.3 From 1b50eed9cac0e8e5e4d3a522d8aa267f7f8f8acb Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Mon, 3 Apr 2006 14:06:13 -0400 Subject: [PATCH] audit inode patch Previously, we were gathering the context instead of the sid. Now in this patch, we gather just the sid and convert to context only if an audit event is being output. This patch brings the performance hit from 146% down to 23% Signed-off-by: Al Viro --- include/linux/selinux.h | 34 +++++++++++++++++++++++++++++ kernel/auditsc.c | 53 ++++++++++++++-------------------------------- security/selinux/exports.c | 24 +++++++++++++++++++++ 3 files changed, 74 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/selinux.h b/include/linux/selinux.h index 9d684b1728b0..84a6c7404687 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -15,6 +15,7 @@ struct selinux_audit_rule; struct audit_context; +struct inode; #ifdef CONFIG_SECURITY_SELINUX @@ -76,6 +77,27 @@ void selinux_audit_set_callback(int (*callback)(void)); */ void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid); +/** + * selinux_ctxid_to_string - map a security context ID to a string + * @ctxid: security context ID to be converted. + * @ctx: address of context string to be returned + * @ctxlen: length of returned context string. + * + * Returns 0 if successful, -errno if not. On success, the context + * string will be allocated internally, and the caller must call + * kfree() on it after use. + */ +int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen); + +/** + * selinux_get_inode_sid - get the inode's security context ID + * @inode: inode structure to get the sid from. + * @sid: pointer to security context ID to be filled in. + * + * Returns nothing + */ +void selinux_get_inode_sid(const struct inode *inode, u32 *sid); + #else static inline int selinux_audit_rule_init(u32 field, u32 op, @@ -107,6 +129,18 @@ static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid) *ctxid = 0; } +static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen) +{ + *ctx = NULL; + *ctxlen = 0; + return 0; +} + +static inline void selinux_get_inode_sid(const struct inode *inode, u32 *sid) +{ + *sid = 0; +} + #endif /* CONFIG_SECURITY_SELINUX */ #endif /* _LINUX_SELINUX_H */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index d3d97d28b69a..2e123a8a0d60 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -90,7 +90,7 @@ struct audit_names { uid_t uid; gid_t gid; dev_t rdev; - char *ctx; + u32 osid; }; struct audit_aux_data { @@ -410,9 +410,6 @@ static inline void audit_free_names(struct audit_context *context) #endif for (i = 0; i < context->name_count; i++) { - char *p = context->names[i].ctx; - context->names[i].ctx = NULL; - kfree(p); if (context->names[i].name) __putname(context->names[i].name); } @@ -674,6 +671,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts } } for (i = 0; i < context->name_count; i++) { + int call_panic = 0; unsigned long ino = context->names[i].ino; unsigned long pino = context->names[i].pino; @@ -703,12 +701,22 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->names[i].gid, MAJOR(context->names[i].rdev), MINOR(context->names[i].rdev)); - if (context->names[i].ctx) { - audit_log_format(ab, " obj=%s", - context->names[i].ctx); + if (context->names[i].osid != 0) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string( + context->names[i].osid, &ctx, &len)) { + audit_log_format(ab, " obj=%u", + context->names[i].osid); + call_panic = 1; + } else + audit_log_format(ab, " obj=%s", ctx); + kfree(ctx); } audit_log_end(ab); + if (call_panic) + audit_panic("error converting sid to string"); } } @@ -946,37 +954,8 @@ void audit_putname(const char *name) void audit_inode_context(int idx, const struct inode *inode) { struct audit_context *context = current->audit_context; - const char *suffix = security_inode_xattr_getsuffix(); - char *ctx = NULL; - int len = 0; - - if (!suffix) - goto ret; - - len = security_inode_getsecurity(inode, suffix, NULL, 0, 0); - if (len == -EOPNOTSUPP) - goto ret; - if (len < 0) - goto error_path; - - ctx = kmalloc(len, GFP_KERNEL); - if (!ctx) - goto error_path; - - len = security_inode_getsecurity(inode, suffix, ctx, len, 0); - if (len < 0) - goto error_path; - - kfree(context->names[idx].ctx); - context->names[idx].ctx = ctx; - goto ret; -error_path: - if (ctx) - kfree(ctx); - audit_panic("error in audit_inode_context"); -ret: - return; + selinux_get_inode_sid(inode, &context->names[idx].osid); } diff --git a/security/selinux/exports.c b/security/selinux/exports.c index 333c4c7824d8..07ddce7bf374 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "security.h" #include "objsec.h" @@ -26,3 +27,26 @@ void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid) else *ctxid = 0; } + +int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen) +{ + if (selinux_enabled) + return security_sid_to_context(ctxid, ctx, ctxlen); + else { + *ctx = NULL; + *ctxlen = 0; + } + + return 0; +} + +void selinux_get_inode_sid(const struct inode *inode, u32 *sid) +{ + if (selinux_enabled) { + struct inode_security_struct *isec = inode->i_security; + *sid = isec->sid; + return; + } + *sid = 0; +} + -- cgit v1.2.3 From 9c7aa6aa74fa8a5cda36e54cbbe4fffe0214497d Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Fri, 31 Mar 2006 15:22:49 -0500 Subject: [PATCH] change lspp ipc auditing Hi, The patch below converts IPC auditing to collect sid's and convert to context string only if it needs to output an audit record. This patch depends on the inode audit change patch already being applied. Signed-off-by: Steve Grubb Signed-off-by: Al Viro --- include/linux/security.h | 16 ----------- include/linux/selinux.h | 15 ++++++++++ kernel/auditsc.c | 68 ++++++++++++++-------------------------------- security/dummy.c | 6 ---- security/selinux/exports.c | 11 ++++++++ security/selinux/hooks.c | 8 ------ 6 files changed, 47 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index aaa0a5cdbf75..1bab48f6aeac 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -869,11 +869,6 @@ struct swap_info_struct; * @ipcp contains the kernel IPC permission structure * @flag contains the desired (requested) permission set * Return 0 if permission is granted. - * @ipc_getsecurity: - * Copy the security label associated with the ipc object into - * @buffer. @buffer may be NULL to request the size of the buffer - * required. @size indicates the size of @buffer in bytes. Return - * number of bytes used/required on success. * * Security hooks for individual messages held in System V IPC message queues * @msg_msg_alloc_security: @@ -1223,7 +1218,6 @@ struct security_operations { void (*task_to_inode)(struct task_struct *p, struct inode *inode); int (*ipc_permission) (struct kern_ipc_perm * ipcp, short flag); - int (*ipc_getsecurity)(struct kern_ipc_perm *ipcp, void *buffer, size_t size); int (*msg_msg_alloc_security) (struct msg_msg * msg); void (*msg_msg_free_security) (struct msg_msg * msg); @@ -1887,11 +1881,6 @@ static inline int security_ipc_permission (struct kern_ipc_perm *ipcp, return security_ops->ipc_permission (ipcp, flag); } -static inline int security_ipc_getsecurity(struct kern_ipc_perm *ipcp, void *buffer, size_t size) -{ - return security_ops->ipc_getsecurity(ipcp, buffer, size); -} - static inline int security_msg_msg_alloc (struct msg_msg * msg) { return security_ops->msg_msg_alloc_security (msg); @@ -2532,11 +2521,6 @@ static inline int security_ipc_permission (struct kern_ipc_perm *ipcp, return 0; } -static inline int security_ipc_getsecurity(struct kern_ipc_perm *ipcp, void *buffer, size_t size) -{ - return -EOPNOTSUPP; -} - static inline int security_msg_msg_alloc (struct msg_msg * msg) { return 0; diff --git a/include/linux/selinux.h b/include/linux/selinux.h index 84a6c7404687..413d66773b91 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -16,6 +16,7 @@ struct selinux_audit_rule; struct audit_context; struct inode; +struct kern_ipc_perm; #ifdef CONFIG_SECURITY_SELINUX @@ -98,6 +99,15 @@ int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen); */ void selinux_get_inode_sid(const struct inode *inode, u32 *sid); +/** + * selinux_get_ipc_sid - get the ipc security context ID + * @ipcp: ipc structure to get the sid from. + * @sid: pointer to security context ID to be filled in. + * + * Returns nothing + */ +void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *sid); + #else static inline int selinux_audit_rule_init(u32 field, u32 op, @@ -141,6 +151,11 @@ static inline void selinux_get_inode_sid(const struct inode *inode, u32 *sid) *sid = 0; } +static inline void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *sid) +{ + *sid = 0; +} + #endif /* CONFIG_SECURITY_SELINUX */ #endif /* _LINUX_SELINUX_H */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 2e123a8a0d60..b4f7223811fe 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -107,7 +107,7 @@ struct audit_aux_data_ipcctl { uid_t uid; gid_t gid; mode_t mode; - char *ctx; + u32 osid; }; struct audit_aux_data_socketcall { @@ -432,11 +432,6 @@ static inline void audit_free_aux(struct audit_context *context) dput(axi->dentry); mntput(axi->mnt); } - if ( aux->type == AUDIT_IPC ) { - struct audit_aux_data_ipcctl *axi = (void *)aux; - if (axi->ctx) - kfree(axi->ctx); - } context->aux = aux->next; kfree(aux); @@ -584,7 +579,7 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { - int i; + int i, call_panic = 0; struct audit_buffer *ab; struct audit_aux_data *aux; const char *tty; @@ -635,8 +630,20 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts case AUDIT_IPC: { struct audit_aux_data_ipcctl *axi = (void *)aux; audit_log_format(ab, - " qbytes=%lx iuid=%u igid=%u mode=%x obj=%s", - axi->qbytes, axi->uid, axi->gid, axi->mode, axi->ctx); + " qbytes=%lx iuid=%u igid=%u mode=%x", + axi->qbytes, axi->uid, axi->gid, axi->mode); + if (axi->osid != 0) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string( + axi->osid, &ctx, &len)) { + audit_log_format(ab, " obj=%u", + axi->osid); + call_panic = 1; + } else + audit_log_format(ab, " obj=%s", ctx); + kfree(ctx); + } break; } case AUDIT_SOCKETCALL: { @@ -671,7 +678,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts } } for (i = 0; i < context->name_count; i++) { - int call_panic = 0; unsigned long ino = context->names[i].ino; unsigned long pino = context->names[i].pino; @@ -708,16 +714,16 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->names[i].osid, &ctx, &len)) { audit_log_format(ab, " obj=%u", context->names[i].osid); - call_panic = 1; + call_panic = 2; } else audit_log_format(ab, " obj=%s", ctx); kfree(ctx); } audit_log_end(ab); - if (call_panic) - audit_panic("error converting sid to string"); } + if (call_panic) + audit_panic("error converting sid to string"); } /** @@ -951,7 +957,7 @@ void audit_putname(const char *name) #endif } -void audit_inode_context(int idx, const struct inode *inode) +static void audit_inode_context(int idx, const struct inode *inode) { struct audit_context *context = current->audit_context; @@ -1141,38 +1147,6 @@ uid_t audit_get_loginuid(struct audit_context *ctx) return ctx ? ctx->loginuid : -1; } -static char *audit_ipc_context(struct kern_ipc_perm *ipcp) -{ - struct audit_context *context = current->audit_context; - char *ctx = NULL; - int len = 0; - - if (likely(!context)) - return NULL; - - len = security_ipc_getsecurity(ipcp, NULL, 0); - if (len == -EOPNOTSUPP) - goto ret; - if (len < 0) - goto error_path; - - ctx = kmalloc(len, GFP_ATOMIC); - if (!ctx) - goto error_path; - - len = security_ipc_getsecurity(ipcp, ctx, len); - if (len < 0) - goto error_path; - - return ctx; - -error_path: - kfree(ctx); - audit_panic("error in audit_ipc_context"); -ret: - return NULL; -} - /** * audit_ipc_perms - record audit data for ipc * @qbytes: msgq bytes @@ -1198,7 +1172,7 @@ int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, str ax->uid = uid; ax->gid = gid; ax->mode = mode; - ax->ctx = audit_ipc_context(ipcp); + selinux_get_ipc_sid(ipcp, &ax->osid); ax->d.type = AUDIT_IPC; ax->d.next = context->aux; diff --git a/security/dummy.c b/security/dummy.c index fd99429278e9..8ccccccc12ac 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -563,11 +563,6 @@ static int dummy_ipc_permission (struct kern_ipc_perm *ipcp, short flag) return 0; } -static int dummy_ipc_getsecurity(struct kern_ipc_perm *ipcp, void *buffer, size_t size) -{ - return -EOPNOTSUPP; -} - static int dummy_msg_msg_alloc_security (struct msg_msg *msg) { return 0; @@ -976,7 +971,6 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, task_reparent_to_init); set_to_dummy_if_null(ops, task_to_inode); set_to_dummy_if_null(ops, ipc_permission); - set_to_dummy_if_null(ops, ipc_getsecurity); set_to_dummy_if_null(ops, msg_msg_alloc_security); set_to_dummy_if_null(ops, msg_msg_free_security); set_to_dummy_if_null(ops, msg_queue_alloc_security); diff --git a/security/selinux/exports.c b/security/selinux/exports.c index 07ddce7bf374..7357cf247f60 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "security.h" #include "objsec.h" @@ -50,3 +51,13 @@ void selinux_get_inode_sid(const struct inode *inode, u32 *sid) *sid = 0; } +void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *sid) +{ + if (selinux_enabled) { + struct ipc_security_struct *isec = ipcp->security; + *sid = isec->sid; + return; + } + *sid = 0; +} + diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b61b9554bc27..3cf368a16448 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4052,13 +4052,6 @@ static int selinux_ipc_permission(struct kern_ipc_perm *ipcp, short flag) return ipc_has_perm(ipcp, av); } -static int selinux_ipc_getsecurity(struct kern_ipc_perm *ipcp, void *buffer, size_t size) -{ - struct ipc_security_struct *isec = ipcp->security; - - return selinux_getsecurity(isec->sid, buffer, size); -} - /* module stacking operations */ static int selinux_register_security (const char *name, struct security_operations *ops) { @@ -4321,7 +4314,6 @@ static struct security_operations selinux_ops = { .task_to_inode = selinux_task_to_inode, .ipc_permission = selinux_ipc_permission, - .ipc_getsecurity = selinux_ipc_getsecurity, .msg_msg_alloc_security = selinux_msg_msg_alloc_security, .msg_msg_free_security = selinux_msg_msg_free_security, -- cgit v1.2.3 From e7c3497013a7e5496ce3d5fd3c73b5cf5af7a56e Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Mon, 3 Apr 2006 09:08:13 -0400 Subject: [PATCH] Reworked patch for labels on user space messages The below patch should be applied after the inode and ipc sid patches. This patch is a reworking of Tim's patch that has been updated to match the inode and ipc patches since its similar. [updated: > Stephen Smalley also wanted to change a variable from isec to tsec in the > user sid patch. ] Signed-off-by: Steve Grubb Signed-off-by: Al Viro --- include/linux/netlink.h | 1 + include/linux/selinux.h | 16 ++++++++++++++++ kernel/audit.c | 22 +++++++++++++++++++--- net/netlink/af_netlink.c | 2 ++ security/selinux/exports.c | 11 +++++++++++ 5 files changed, 49 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index f8f3d1c927f8..87b8a5703ebc 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -143,6 +143,7 @@ struct netlink_skb_parms __u32 dst_group; kernel_cap_t eff_cap; __u32 loginuid; /* Login (audit) uid */ + __u32 sid; /* SELinux security id */ }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) diff --git a/include/linux/selinux.h b/include/linux/selinux.h index 413d66773b91..4047bcde4484 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -5,6 +5,7 @@ * * Copyright (C) 2005 Red Hat, Inc., James Morris * Copyright (C) 2006 Trusted Computer Solutions, Inc. + * Copyright (C) 2006 IBM Corporation, Timothy R. Chavez * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -108,6 +109,16 @@ void selinux_get_inode_sid(const struct inode *inode, u32 *sid); */ void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *sid); +/** + * selinux_get_task_sid - return the SID of task + * @tsk: the task whose SID will be returned + * @sid: pointer to security context ID to be filled in. + * + * Returns nothing + */ +void selinux_get_task_sid(struct task_struct *tsk, u32 *sid); + + #else static inline int selinux_audit_rule_init(u32 field, u32 op, @@ -156,6 +167,11 @@ static inline void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *si *sid = 0; } +static inline void selinux_get_task_sid(struct task_struct *tsk, u32 *sid) +{ + *sid = 0; +} + #endif /* CONFIG_SECURITY_SELINUX */ #endif /* _LINUX_SELINUX_H */ diff --git a/kernel/audit.c b/kernel/audit.c index 9060be750c48..7ec9ccae1299 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -390,7 +390,7 @@ static int audit_netlink_ok(kernel_cap_t eff_cap, u16 msg_type) static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - u32 uid, pid, seq; + u32 uid, pid, seq, sid; void *data; struct audit_status *status_get, status_set; int err; @@ -416,6 +416,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) pid = NETLINK_CREDS(skb)->pid; uid = NETLINK_CREDS(skb)->uid; loginuid = NETLINK_CB(skb).loginuid; + sid = NETLINK_CB(skb).sid; seq = nlh->nlmsg_seq; data = NLMSG_DATA(nlh); @@ -468,8 +469,23 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) ab = audit_log_start(NULL, GFP_KERNEL, msg_type); if (ab) { audit_log_format(ab, - "user pid=%d uid=%u auid=%u msg='%.1024s'", - pid, uid, loginuid, (char *)data); + "user pid=%d uid=%u auid=%u", + pid, uid, loginuid); + if (sid) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string( + sid, &ctx, &len)) { + audit_log_format(ab, + " subj=%u", sid); + /* Maybe call audit_panic? */ + } else + audit_log_format(ab, + " subj=%s", ctx); + kfree(ctx); + } + audit_log_format(ab, " msg='%.1024s'", + (char *)data); audit_set_pid(ab, pid); audit_log_end(ab); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2a233ffcf618..09fbc4bc7088 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -1157,6 +1158,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).dst_pid = dst_pid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); + selinux_get_task_sid(current, &(NETLINK_CB(skb).sid)); memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); /* What can I do? Netlink is asynchronous, so that diff --git a/security/selinux/exports.c b/security/selinux/exports.c index 7357cf247f60..ae4c73eb3085 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -5,6 +5,7 @@ * * Copyright (C) 2005 Red Hat, Inc., James Morris * Copyright (C) 2006 Trusted Computer Solutions, Inc. + * Copyright (C) 2006 IBM Corporation, Timothy R. Chavez * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -61,3 +62,13 @@ void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *sid) *sid = 0; } +void selinux_get_task_sid(struct task_struct *tsk, u32 *sid) +{ + if (selinux_enabled) { + struct task_security_struct *tsec = tsk->security; + *sid = tsec->sid; + return; + } + *sid = 0; +} + -- cgit v1.2.3 From ce29b682e228c70cdc91a1b2935c5adb2087bab8 Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Sat, 1 Apr 2006 18:29:34 -0500 Subject: [PATCH] More user space subject labels Hi, The patch below builds upon the patch sent earlier and adds subject label to all audit events generated via the netlink interface. It also cleans up a few other minor things. Signed-off-by: Steve Grubb Signed-off-by: Al Viro --- include/linux/audit.h | 2 +- kernel/audit.c | 132 ++++++++++++++++++++++++++++++++++++++------------ kernel/auditfilter.c | 44 ++++++++++++++--- kernel/auditsc.c | 4 +- 4 files changed, 142 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 740f950397b7..d5c40823e166 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -371,7 +371,7 @@ extern void audit_log_d_path(struct audit_buffer *ab, extern int audit_filter_user(struct netlink_skb_parms *cb, int type); extern int audit_filter_type(int type); extern int audit_receive_filter(int type, int pid, int uid, int seq, - void *data, size_t datasz, uid_t loginuid); + void *data, size_t datasz, uid_t loginuid, u32 sid); #else #define audit_log(c,g,t,f,...) do { ; } while (0) #define audit_log_start(c,g,t) ({ NULL; }) diff --git a/kernel/audit.c b/kernel/audit.c index 7ec9ccae1299..df57b493e1cb 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -230,49 +230,103 @@ void audit_log_lost(const char *message) } } -static int audit_set_rate_limit(int limit, uid_t loginuid) +static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) { - int old = audit_rate_limit; - audit_rate_limit = limit; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + int old = audit_rate_limit; + + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_rate_limit=%d old=%d by auid=%u subj=%s", + limit, old, loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_rate_limit=%d old=%d by auid=%u", - audit_rate_limit, old, loginuid); + limit, old, loginuid); + audit_rate_limit = limit; return old; } -static int audit_set_backlog_limit(int limit, uid_t loginuid) +static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) { - int old = audit_backlog_limit; - audit_backlog_limit = limit; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + int old = audit_backlog_limit; + + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_backlog_limit=%d old=%d by auid=%u subj=%s", + limit, old, loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_backlog_limit=%d old=%d by auid=%u", - audit_backlog_limit, old, loginuid); + limit, old, loginuid); + audit_backlog_limit = limit; return old; } -static int audit_set_enabled(int state, uid_t loginuid) +static int audit_set_enabled(int state, uid_t loginuid, u32 sid) { - int old = audit_enabled; + int old = audit_enabled; + if (state != 0 && state != 1) return -EINVAL; - audit_enabled = state; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_enabled=%d old=%d by auid=%u subj=%s", + state, old, loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_enabled=%d old=%d by auid=%u", - audit_enabled, old, loginuid); + state, old, loginuid); + audit_enabled = state; return old; } -static int audit_set_failure(int state, uid_t loginuid) +static int audit_set_failure(int state, uid_t loginuid, u32 sid) { - int old = audit_failure; + int old = audit_failure; + if (state != AUDIT_FAIL_SILENT && state != AUDIT_FAIL_PRINTK && state != AUDIT_FAIL_PANIC) return -EINVAL; - audit_failure = state; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_failure=%d old=%d by auid=%u subj=%s", + state, old, loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_failure=%d old=%d by auid=%u", - audit_failure, old, loginuid); + state, old, loginuid); + audit_failure = state; return old; } @@ -437,25 +491,43 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; status_get = (struct audit_status *)data; if (status_get->mask & AUDIT_STATUS_ENABLED) { - err = audit_set_enabled(status_get->enabled, loginuid); + err = audit_set_enabled(status_get->enabled, + loginuid, sid); if (err < 0) return err; } if (status_get->mask & AUDIT_STATUS_FAILURE) { - err = audit_set_failure(status_get->failure, loginuid); + err = audit_set_failure(status_get->failure, + loginuid, sid); if (err < 0) return err; } if (status_get->mask & AUDIT_STATUS_PID) { int old = audit_pid; + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string( + sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, + AUDIT_CONFIG_CHANGE, + "audit_pid=%d old=%d by auid=%u subj=%s", + status_get->pid, old, + loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_pid=%d old=%d by auid=%u", + status_get->pid, old, loginuid); audit_pid = status_get->pid; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, - "audit_pid=%d old=%d by auid=%u", - audit_pid, old, loginuid); } if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) - audit_set_rate_limit(status_get->rate_limit, loginuid); + audit_set_rate_limit(status_get->rate_limit, + loginuid, sid); if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) audit_set_backlog_limit(status_get->backlog_limit, - loginuid); + loginuid, sid); break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG...AUDIT_LAST_USER_MSG: @@ -477,7 +549,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (selinux_ctxid_to_string( sid, &ctx, &len)) { audit_log_format(ab, - " subj=%u", sid); + " ssid=%u", sid); /* Maybe call audit_panic? */ } else audit_log_format(ab, @@ -499,7 +571,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_LIST: err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, uid, seq, data, nlmsg_len(nlh), - loginuid); + loginuid, sid); break; case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: @@ -509,7 +581,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_LIST_RULES: err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, uid, seq, data, nlmsg_len(nlh), - loginuid); + loginuid, sid); break; case AUDIT_SIGNAL_INFO: sig_data.uid = audit_sig_uid; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 85a7862143a1..7c134906d689 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -586,9 +586,10 @@ static int audit_list_rules(void *_dest) * @data: payload data * @datasz: size of payload data * @loginuid: loginuid of sender + * @sid: SE Linux Security ID of sender */ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, - size_t datasz, uid_t loginuid) + size_t datasz, uid_t loginuid, u32 sid) { struct task_struct *tsk; int *dest; @@ -631,9 +632,23 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, err = audit_add_rule(entry, &audit_filter_list[entry->rule.listnr]); - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, - "auid=%u add rule to list=%d res=%d\n", - loginuid, entry->rule.listnr, !err); + if (sid) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string(sid, &ctx, &len)) { + /* Maybe call audit_panic? */ + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u ssid=%u add rule to list=%d res=%d", + loginuid, sid, entry->rule.listnr, !err); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u subj=%s add rule to list=%d res=%d", + loginuid, ctx, entry->rule.listnr, !err); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u add rule to list=%d res=%d", + loginuid, entry->rule.listnr, !err); if (err) audit_free_rule(entry); @@ -649,9 +664,24 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, err = audit_del_rule(entry, &audit_filter_list[entry->rule.listnr]); - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, - "auid=%u remove rule from list=%d res=%d\n", - loginuid, entry->rule.listnr, !err); + + if (sid) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string(sid, &ctx, &len)) { + /* Maybe call audit_panic? */ + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u ssid=%u remove rule from list=%d res=%d", + loginuid, sid, entry->rule.listnr, !err); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u subj=%s remove rule from list=%d res=%d", + loginuid, ctx, entry->rule.listnr, !err); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u remove rule from list=%d res=%d", + loginuid, entry->rule.listnr, !err); audit_free_rule(entry); break; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b4f7223811fe..d94e0404113c 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -637,7 +637,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts u32 len; if (selinux_ctxid_to_string( axi->osid, &ctx, &len)) { - audit_log_format(ab, " obj=%u", + audit_log_format(ab, " osid=%u", axi->osid); call_panic = 1; } else @@ -712,7 +712,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts u32 len; if (selinux_ctxid_to_string( context->names[i].osid, &ctx, &len)) { - audit_log_format(ab, " obj=%u", + audit_log_format(ab, " osid=%u", context->names[i].osid); call_panic = 2; } else -- cgit v1.2.3 From 073115d6b29c7910feaa08241c6484637f5ca958 Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Sun, 2 Apr 2006 17:07:33 -0400 Subject: [PATCH] Rework of IPC auditing 1) The audit_ipc_perms() function has been split into two different functions: - audit_ipc_obj() - audit_ipc_set_perm() There's a key shift here... The audit_ipc_obj() collects the uid, gid, mode, and SElinux context label of the current ipc object. This audit_ipc_obj() hook is now found in several places. Most notably, it is hooked in ipcperms(), which is called in various places around the ipc code permforming a MAC check. Additionally there are several places where *checkid() is used to validate that an operation is being performed on a valid object while not necessarily having a nearby ipcperms() call. In these locations, audit_ipc_obj() is called to ensure that the information is captured by the audit system. The audit_set_new_perm() function is called any time the permissions on the ipc object changes. In this case, the NEW permissions are recorded (and note that an audit_ipc_obj() call exists just a few lines before each instance). 2) Support for an AUDIT_IPC_SET_PERM audit message type. This allows for separate auxiliary audit records for normal operations on an IPC object and permissions changes. Note that the same struct audit_aux_data_ipcctl is used and populated, however there are separate audit_log_format statements based on the type of the message. Finally, the AUDIT_IPC block of code in audit_free_aux() was extended to handle aux messages of this new type. No more mem leaks I hope ;-) Signed-off-by: Al Viro --- include/linux/audit.h | 7 +++++-- ipc/msg.c | 11 ++++++++++- ipc/sem.c | 11 ++++++++++- ipc/shm.c | 19 +++++++++++++++--- ipc/util.c | 7 ++++++- kernel/auditsc.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++--- 6 files changed, 98 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index d5c40823e166..b74c148f14e3 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -83,6 +83,7 @@ #define AUDIT_CONFIG_CHANGE 1305 /* Audit system configuration change */ #define AUDIT_SOCKADDR 1306 /* sockaddr copied as syscall arg */ #define AUDIT_CWD 1307 /* Current working directory */ +#define AUDIT_IPC_SET_PERM 1311 /* IPC new permissions record type */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ @@ -319,7 +320,8 @@ extern void auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial); extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); extern uid_t audit_get_loginuid(struct audit_context *ctx); -extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp); +extern int audit_ipc_obj(struct kern_ipc_perm *ipcp); +extern int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp); extern int audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt); @@ -338,7 +340,8 @@ extern int audit_set_macxattr(const char *name); #define audit_inode_child(d,i,p) do { ; } while (0) #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) #define audit_get_loginuid(c) ({ -1; }) -#define audit_ipc_perms(q,u,g,m,i) ({ 0; }) +#define audit_ipc_obj(i) ({ 0; }) +#define audit_ipc_set_perm(q,u,g,m,i) ({ 0; }) #define audit_socketcall(n,a) ({ 0; }) #define audit_sockaddr(len, addr) ({ 0; }) #define audit_avc_path(dentry, mnt) ({ 0; }) diff --git a/ipc/msg.c b/ipc/msg.c index 48a7f17a7236..7d1340ccb16b 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -13,6 +13,9 @@ * mostly rewritten, threaded and wake-one semantics added * MSGMAX limit removed, sysctl's added * (c) 1999 Manfred Spraul + * + * support for audit of ipc object properties and permission changes + * Dustin Kirkland */ #include @@ -447,6 +450,11 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf) if (msg_checkid(msq,msqid)) goto out_unlock_up; ipcp = &msq->q_perm; + + err = audit_ipc_obj(ipcp); + if (err) + goto out_unlock_up; + err = -EPERM; if (current->euid != ipcp->cuid && current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) @@ -460,7 +468,8 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf) switch (cmd) { case IPC_SET: { - if ((err = audit_ipc_perms(setbuf.qbytes, setbuf.uid, setbuf.gid, setbuf.mode, ipcp))) + err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid, setbuf.mode, ipcp); + if (err) goto out_unlock_up; err = -EPERM; diff --git a/ipc/sem.c b/ipc/sem.c index 642659cd596b..7919f8ece6ba 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -61,6 +61,9 @@ * (c) 2001 Red Hat Inc * Lockless wakeup * (c) 2003 Manfred Spraul + * + * support for audit of ipc object properties and permission changes + * Dustin Kirkland */ #include @@ -820,6 +823,11 @@ static int semctl_down(int semid, int semnum, int cmd, int version, union semun goto out_unlock; } ipcp = &sma->sem_perm; + + err = audit_ipc_obj(ipcp); + if (err) + goto out_unlock; + if (current->euid != ipcp->cuid && current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) { err=-EPERM; @@ -836,7 +844,8 @@ static int semctl_down(int semid, int semnum, int cmd, int version, union semun err = 0; break; case IPC_SET: - if ((err = audit_ipc_perms(0, setbuf.uid, setbuf.gid, setbuf.mode, ipcp))) + err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode, ipcp); + if (err) goto out_unlock; ipcp->uid = setbuf.uid; ipcp->gid = setbuf.gid; diff --git a/ipc/shm.c b/ipc/shm.c index 1c2faf62bc73..809896851902 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -13,6 +13,8 @@ * Shared /dev/zero support, Kanoj Sarcar * Move the mm functionality over to mm/shmem.c, Christoph Rohland * + * support for audit of ipc object properties and permission changes + * Dustin Kirkland */ #include @@ -542,6 +544,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) if(err) goto out_unlock; + err = audit_ipc_obj(&(shp->shm_perm)); + if (err) + goto out_unlock; + if (!capable(CAP_IPC_LOCK)) { err = -EPERM; if (current->euid != shp->shm_perm.uid && @@ -594,6 +600,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) if(err) goto out_unlock_up; + err = audit_ipc_obj(&(shp->shm_perm)); + if (err) + goto out_unlock_up; + if (current->euid != shp->shm_perm.uid && current->euid != shp->shm_perm.cuid && !capable(CAP_SYS_ADMIN)) { @@ -627,12 +637,15 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) err=-EINVAL; if(shp==NULL) goto out_up; - if ((err = audit_ipc_perms(0, setbuf.uid, setbuf.gid, - setbuf.mode, &(shp->shm_perm)))) - goto out_unlock_up; err = shm_checkid(shp,shmid); if(err) goto out_unlock_up; + err = audit_ipc_obj(&(shp->shm_perm)); + if (err) + goto out_unlock_up; + err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode, &(shp->shm_perm)); + if (err) + goto out_unlock_up; err=-EPERM; if (current->euid != shp->shm_perm.uid && current->euid != shp->shm_perm.cuid && diff --git a/ipc/util.c b/ipc/util.c index b3dcfad3b4f7..8193299f45f6 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -10,6 +10,8 @@ * Manfred Spraul * Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary(). * Mingming Cao + * Mar 2006 - support for audit of ipc object properties + * Dustin Kirkland */ #include @@ -27,6 +29,7 @@ #include #include #include +#include #include @@ -464,8 +467,10 @@ void ipc_rcu_putref(void *ptr) int ipcperms (struct kern_ipc_perm *ipcp, short flag) { /* flag will most probably be 0 or S_...UGO from */ - int requested_mode, granted_mode; + int requested_mode, granted_mode, err; + if (unlikely((err = audit_ipc_obj(ipcp)))) + return err; requested_mode = (flag >> 6) | (flag >> 3) | flag; granted_mode = ipcp->mode; if (current->euid == ipcp->cuid || current->euid == ipcp->uid) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index d94e0404113c..a300736ee037 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -646,6 +646,25 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts } break; } + case AUDIT_IPC_SET_PERM: { + struct audit_aux_data_ipcctl *axi = (void *)aux; + audit_log_format(ab, + " new qbytes=%lx new iuid=%u new igid=%u new mode=%x", + axi->qbytes, axi->uid, axi->gid, axi->mode); + if (axi->osid != 0) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string( + axi->osid, &ctx, &len)) { + audit_log_format(ab, " osid=%u", + axi->osid); + call_panic = 1; + } else + audit_log_format(ab, " obj=%s", ctx); + kfree(ctx); + } + break; } + case AUDIT_SOCKETCALL: { int i; struct audit_aux_data_socketcall *axs = (void *)aux; @@ -1148,7 +1167,36 @@ uid_t audit_get_loginuid(struct audit_context *ctx) } /** - * audit_ipc_perms - record audit data for ipc + * audit_ipc_obj - record audit data for ipc object + * @ipcp: ipc permissions + * + * Returns 0 for success or NULL context or < 0 on error. + */ +int audit_ipc_obj(struct kern_ipc_perm *ipcp) +{ + struct audit_aux_data_ipcctl *ax; + struct audit_context *context = current->audit_context; + + if (likely(!context)) + return 0; + + ax = kmalloc(sizeof(*ax), GFP_ATOMIC); + if (!ax) + return -ENOMEM; + + ax->uid = ipcp->uid; + ax->gid = ipcp->gid; + ax->mode = ipcp->mode; + selinux_get_ipc_sid(ipcp, &ax->osid); + + ax->d.type = AUDIT_IPC; + ax->d.next = context->aux; + context->aux = (void *)ax; + return 0; +} + +/** + * audit_ipc_set_perm - record audit data for new ipc permissions * @qbytes: msgq bytes * @uid: msgq user id * @gid: msgq group id @@ -1156,7 +1204,7 @@ uid_t audit_get_loginuid(struct audit_context *ctx) * * Returns 0 for success or NULL context or < 0 on error. */ -int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp) +int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp) { struct audit_aux_data_ipcctl *ax; struct audit_context *context = current->audit_context; @@ -1174,7 +1222,7 @@ int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, str ax->mode = mode; selinux_get_ipc_sid(ipcp, &ax->osid); - ax->d.type = AUDIT_IPC; + ax->d.type = AUDIT_IPC_SET_PERM; ax->d.next = context->aux; context->aux = (void *)ax; return 0; -- cgit v1.2.3 From 0568b409c74f7a125d92a09a3f386785700ef688 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 1 May 2006 19:50:48 +0200 Subject: [PATCH] splice: fix bugs in pipe_to_file() Found by Oleg Nesterov , fixed by me. - Only allow full pages to go to the page cache. - Check page != buf->page instead of using PIPE_BUF_FLAG_STOLEN. - Remember to clear 'stolen' if add_to_page_cache() fails. And as a cleanup on that: - Make the bottom fall-through logic a little less convoluted. Also make the steal path hold an extra reference to the page, so we don't have to differentiate between stolen and non-stolen at the end. Signed-off-by: Jens Axboe --- fs/pipe.c | 3 --- fs/splice.c | 37 +++++++++++++++++++------------------ include/linux/pipe_fs_i.h | 3 +-- 3 files changed, 20 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/fs/pipe.c b/fs/pipe.c index 5a369273c51b..888f265011bf 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -99,8 +99,6 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, { struct page *page = buf->page; - buf->flags &= ~PIPE_BUF_FLAG_STOLEN; - /* * If nobody else uses this page, and we don't already have a * temporary page, let's keep track of it as a one-deep @@ -130,7 +128,6 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, struct page *page = buf->page; if (page_count(page) == 1) { - buf->flags |= PIPE_BUF_FLAG_STOLEN; lock_page(page); return 0; } diff --git a/fs/splice.c b/fs/splice.c index 9df28d30efa0..1633778f3652 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -78,7 +78,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, return 1; } - buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; + buf->flags |= PIPE_BUF_FLAG_LRU; return 0; } @@ -87,7 +87,7 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info, { page_cache_release(buf->page); buf->page = NULL; - buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); + buf->flags &= ~PIPE_BUF_FLAG_LRU; } static void *page_cache_pipe_buf_map(struct file *file, @@ -587,9 +587,10 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, this_len = PAGE_CACHE_SIZE - offset; /* - * Reuse buf page, if SPLICE_F_MOVE is set. + * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full + * page. */ - if (sd->flags & SPLICE_F_MOVE) { + if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) { /* * If steal succeeds, buf->page is now pruned from the vm * side (LRU and page cache) and we can reuse it. The page @@ -604,6 +605,8 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, goto find_page; } + page_cache_get(page); + if (!(buf->flags & PIPE_BUF_FLAG_LRU)) lru_cache_add(page); } else { @@ -662,7 +665,7 @@ find_page: } else if (ret) goto out; - if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { + if (buf->page != page) { char *dst = kmap_atomic(page, KM_USER0); memcpy(dst + offset, src + buf->offset, this_len); @@ -671,22 +674,20 @@ find_page: } ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); - if (ret == AOP_TRUNCATED_PAGE) { + if (!ret) { + /* + * Return the number of bytes written and mark page as + * accessed, we are now done! + */ + ret = this_len; + mark_page_accessed(page); + balance_dirty_pages_ratelimited(mapping); + } else if (ret == AOP_TRUNCATED_PAGE) { page_cache_release(page); goto find_page; - } else if (ret) - goto out; - - /* - * Return the number of bytes written. - */ - ret = this_len; - mark_page_accessed(page); - balance_dirty_pages_ratelimited(mapping); + } out: - if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) - page_cache_release(page); - + page_cache_release(page); unlock_page(page); out_nomem: buf->ops->unmap(info, buf); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 0008d4bd4059..3130977fc6ab 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -5,8 +5,7 @@ #define PIPE_BUFFERS (16) -#define PIPE_BUF_FLAG_STOLEN 0x01 -#define PIPE_BUF_FLAG_LRU 0x02 +#define PIPE_BUF_FLAG_LRU 0x01 struct pipe_buffer { struct page *page; -- cgit v1.2.3 From f84d751994441292593523c7069ed147176f6cab Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 1 May 2006 19:59:03 +0200 Subject: [PATCH] pipe: introduce ->pin() buffer operation The ->map() function is really expensive on highmem machines right now, since it has to use the slower kmap() instead of kmap_atomic(). Splice rarely needs to access the virtual address of a page, so it's a waste of time doing it. Introduce ->pin() to take over the responsibility of making sure the page data is valid. ->map() is then reduced to just kmap(). That way we can also share a most of the pipe buffer ops between pipe.c and splice.c Signed-off-by: Jens Axboe --- fs/pipe.c | 39 +++++++++++--------- fs/splice.c | 91 ++++++++++++++++------------------------------- include/linux/pipe_fs_i.h | 21 ++++++++++- 3 files changed, 73 insertions(+), 78 deletions(-) (limited to 'include') diff --git a/fs/pipe.c b/fs/pipe.c index 888f265011bf..d9644fd9cc0d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -110,14 +110,14 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, page_cache_release(page); } -static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +void *generic_pipe_buf_map(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { return kmap(buf->page); } -static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { kunmap(buf->page); } @@ -135,19 +135,24 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, return 1; } -static void anon_pipe_buf_get(struct pipe_inode_info *info, - struct pipe_buffer *buf) +void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf) { page_cache_get(buf->page); } +int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf) +{ + return 0; +} + static struct pipe_buf_operations anon_pipe_buf_ops = { .can_merge = 1, - .map = anon_pipe_buf_map, - .unmap = anon_pipe_buf_unmap, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .pin = generic_pipe_buf_pin, .release = anon_pipe_buf_release, .steal = anon_pipe_buf_steal, - .get = anon_pipe_buf_get, + .get = generic_pipe_buf_get, }; static ssize_t @@ -183,12 +188,14 @@ pipe_readv(struct file *filp, const struct iovec *_iov, if (chars > total_len) chars = total_len; - addr = ops->map(filp, pipe, buf); - if (IS_ERR(addr)) { + error = ops->pin(pipe, buf); + if (error) { if (!ret) - ret = PTR_ERR(addr); + error = ret; break; } + + addr = ops->map(pipe, buf); error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); ops->unmap(pipe, buf); if (unlikely(error)) { @@ -300,11 +307,11 @@ pipe_writev(struct file *filp, const struct iovec *_iov, void *addr; int error; - addr = ops->map(filp, pipe, buf); - if (IS_ERR(addr)) { - error = PTR_ERR(addr); + error = ops->pin(pipe, buf); + if (error) goto out; - } + + addr = ops->map(pipe, buf); error = pipe_iov_copy_from_user(offset + addr, iov, chars); ops->unmap(pipe, buf); diff --git a/fs/splice.c b/fs/splice.c index 1633778f3652..d7538d83c367 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -90,9 +90,8 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info, buf->flags &= ~PIPE_BUF_FLAG_LRU; } -static void *page_cache_pipe_buf_map(struct file *file, - struct pipe_inode_info *info, - struct pipe_buffer *buf) +static int page_cache_pipe_buf_pin(struct pipe_inode_info *info, + struct pipe_buffer *buf) { struct page *page = buf->page; int err; @@ -118,49 +117,25 @@ static void *page_cache_pipe_buf_map(struct file *file, } /* - * Page is ok afterall, fall through to mapping. + * Page is ok afterall, we are done. */ unlock_page(page); } - return kmap(page); + return 0; error: unlock_page(page); - return ERR_PTR(err); -} - -static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, - struct pipe_buffer *buf) -{ - kunmap(buf->page); -} - -static void *user_page_pipe_buf_map(struct file *file, - struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - return kmap(buf->page); -} - -static void user_page_pipe_buf_unmap(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - kunmap(buf->page); -} - -static void page_cache_pipe_buf_get(struct pipe_inode_info *info, - struct pipe_buffer *buf) -{ - page_cache_get(buf->page); + return err; } static struct pipe_buf_operations page_cache_pipe_buf_ops = { .can_merge = 0, - .map = page_cache_pipe_buf_map, - .unmap = page_cache_pipe_buf_unmap, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .pin = page_cache_pipe_buf_pin, .release = page_cache_pipe_buf_release, .steal = page_cache_pipe_buf_steal, - .get = page_cache_pipe_buf_get, + .get = generic_pipe_buf_get, }; static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, @@ -171,11 +146,12 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, static struct pipe_buf_operations user_page_pipe_buf_ops = { .can_merge = 0, - .map = user_page_pipe_buf_map, - .unmap = user_page_pipe_buf_unmap, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .pin = generic_pipe_buf_pin, .release = page_cache_pipe_buf_release, .steal = user_page_pipe_buf_steal, - .get = page_cache_pipe_buf_get, + .get = generic_pipe_buf_get, }; /* @@ -517,26 +493,16 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, { struct file *file = sd->file; loff_t pos = sd->pos; - ssize_t ret; - void *ptr; - int more; + int ret, more; - /* - * Sub-optimal, but we are limited by the pipe ->map. We don't - * need a kmap'ed buffer here, we just want to make sure we - * have the page pinned if the pipe page originates from the - * page cache. - */ - ptr = buf->ops->map(file, info, buf); - if (IS_ERR(ptr)) - return PTR_ERR(ptr); - - more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; + ret = buf->ops->pin(info, buf); + if (!ret) { + more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; - ret = file->f_op->sendpage(file, buf->page, buf->offset, sd->len, - &pos, more); + ret = file->f_op->sendpage(file, buf->page, buf->offset, + sd->len, &pos, more); + } - buf->ops->unmap(info, buf); return ret; } @@ -569,15 +535,14 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, unsigned int offset, this_len; struct page *page; pgoff_t index; - char *src; int ret; /* * make sure the data in this buffer is uptodate */ - src = buf->ops->map(file, info, buf); - if (IS_ERR(src)) - return PTR_ERR(src); + ret = buf->ops->pin(info, buf); + if (unlikely(ret)) + return ret; index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; @@ -666,11 +631,16 @@ find_page: goto out; if (buf->page != page) { - char *dst = kmap_atomic(page, KM_USER0); + /* + * Careful, ->map() uses KM_USER0! + */ + char *src = buf->ops->map(info, buf); + char *dst = kmap_atomic(page, KM_USER1); memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); - kunmap_atomic(dst, KM_USER0); + kunmap_atomic(dst, KM_USER1); + buf->ops->unmap(info, buf); } ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); @@ -690,7 +660,6 @@ out: page_cache_release(page); unlock_page(page); out_nomem: - buf->ops->unmap(info, buf); return ret; } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 3130977fc6ab..b8aae1fc5185 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -14,10 +14,23 @@ struct pipe_buffer { unsigned int flags; }; +/* + * Note on the nesting of these functions: + * + * ->pin() + * ->steal() + * ... + * ->map() + * ... + * ->unmap() + * + * That is, ->map() must be called on a pinned buffer, same goes for ->steal(). + */ struct pipe_buf_operations { int can_merge; - void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *); + void * (*map)(struct pipe_inode_info *, struct pipe_buffer *); void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); + int (*pin)(struct pipe_inode_info *, struct pipe_buffer *); void (*release)(struct pipe_inode_info *, struct pipe_buffer *); int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); void (*get)(struct pipe_inode_info *, struct pipe_buffer *); @@ -50,6 +63,12 @@ struct pipe_inode_info * alloc_pipe_info(struct inode * inode); void free_pipe_info(struct inode * inode); void __free_pipe_info(struct pipe_inode_info *); +/* Generic pipe buffer ops functions */ +void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *); +void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *); +void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); +int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *); + /* * splice is tied to pipes as a transport (at least for now), so we'll just * add the splice flags here. -- cgit v1.2.3 From f6762b7ad8edd6abc802542ce845d3bc8adcb92f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 1 May 2006 20:02:05 +0200 Subject: [PATCH] pipe: enable atomic copying of pipe data to/from user space The pipe ->map() method uses kmap() to virtually map the pages, which is both slow and has known scalability issues on SMP. This patch enables atomic copying of pipe pages, by pre-faulting data and using kmap_atomic() instead. lmbench bw_pipe and lat_pipe measurements agree this is a Good Thing. Here are results from that on a UP machine with highmem (1.5GiB of RAM), running first a UP kernel, SMP kernel, and SMP kernel patched. Vanilla-UP: Pipe bandwidth: 1622.28 MB/sec Pipe bandwidth: 1610.59 MB/sec Pipe bandwidth: 1608.30 MB/sec Pipe latency: 7.3275 microseconds Pipe latency: 7.2995 microseconds Pipe latency: 7.3097 microseconds Vanilla-SMP: Pipe bandwidth: 1382.19 MB/sec Pipe bandwidth: 1317.27 MB/sec Pipe bandwidth: 1355.61 MB/sec Pipe latency: 9.6402 microseconds Pipe latency: 9.6696 microseconds Pipe latency: 9.6153 microseconds Patched-SMP: Pipe bandwidth: 1578.70 MB/sec Pipe bandwidth: 1579.95 MB/sec Pipe bandwidth: 1578.63 MB/sec Pipe latency: 9.1654 microseconds Pipe latency: 9.2266 microseconds Pipe latency: 9.1527 microseconds Signed-off-by: Jens Axboe --- fs/pipe.c | 141 ++++++++++++++++++++++++++++++++++++++-------- fs/splice.c | 4 +- include/linux/pipe_fs_i.h | 11 ++-- 3 files changed, 126 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/fs/pipe.c b/fs/pipe.c index d9644fd9cc0d..3941a7f78b5d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -55,7 +55,8 @@ void pipe_wait(struct pipe_inode_info *pipe) } static int -pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) +pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, + int atomic) { unsigned long copy; @@ -64,8 +65,13 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) iov++; copy = min_t(unsigned long, len, iov->iov_len); - if (copy_from_user(to, iov->iov_base, copy)) - return -EFAULT; + if (atomic) { + if (__copy_from_user_inatomic(to, iov->iov_base, copy)) + return -EFAULT; + } else { + if (copy_from_user(to, iov->iov_base, copy)) + return -EFAULT; + } to += copy; len -= copy; iov->iov_base += copy; @@ -75,7 +81,8 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) } static int -pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) +pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, + int atomic) { unsigned long copy; @@ -84,8 +91,13 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) iov++; copy = min_t(unsigned long, len, iov->iov_len); - if (copy_to_user(iov->iov_base, from, copy)) - return -EFAULT; + if (atomic) { + if (__copy_to_user_inatomic(iov->iov_base, from, copy)) + return -EFAULT; + } else { + if (copy_to_user(iov->iov_base, from, copy)) + return -EFAULT; + } from += copy; len -= copy; iov->iov_base += copy; @@ -94,6 +106,47 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) return 0; } +/* + * Attempt to pre-fault in the user memory, so we can use atomic copies. + * Returns the number of bytes not faulted in. + */ +static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) +{ + while (!iov->iov_len) + iov++; + + while (len > 0) { + unsigned long this_len; + + this_len = min_t(unsigned long, len, iov->iov_len); + if (fault_in_pages_writeable(iov->iov_base, this_len)) + break; + + len -= this_len; + iov++; + } + + return len; +} + +/* + * Pre-fault in the user memory, so we can use atomic copies. + */ +static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) +{ + while (!iov->iov_len) + iov++; + + while (len > 0) { + unsigned long this_len; + + this_len = min_t(unsigned long, len, iov->iov_len); + fault_in_pages_readable(iov->iov_base, this_len); + len -= this_len; + iov++; + } +} + static void anon_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { @@ -111,15 +164,24 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, } void *generic_pipe_buf_map(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) + struct pipe_buffer *buf, int atomic) { + if (atomic) { + buf->flags |= PIPE_BUF_FLAG_ATOMIC; + return kmap_atomic(buf->page, KM_USER0); + } + return kmap(buf->page); } void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) + struct pipe_buffer *buf, void *map_data) { - kunmap(buf->page); + if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { + buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; + kunmap_atomic(map_data, KM_USER0); + } else + kunmap(buf->page); } static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, @@ -183,7 +245,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov, struct pipe_buf_operations *ops = buf->ops; void *addr; size_t chars = buf->len; - int error; + int error, atomic; if (chars > total_len) chars = total_len; @@ -195,12 +257,21 @@ pipe_readv(struct file *filp, const struct iovec *_iov, break; } - addr = ops->map(pipe, buf); - error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); - ops->unmap(pipe, buf); + atomic = !iov_fault_in_pages_write(iov, chars); +redo: + addr = ops->map(pipe, buf, atomic); + error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); + ops->unmap(pipe, buf, addr); if (unlikely(error)) { + /* + * Just retry with the slow path if we failed. + */ + if (atomic) { + atomic = 0; + goto redo; + } if (!ret) - ret = -EFAULT; + ret = error; break; } ret += chars; @@ -304,21 +375,28 @@ pipe_writev(struct file *filp, const struct iovec *_iov, int offset = buf->offset + buf->len; if (ops->can_merge && offset + chars <= PAGE_SIZE) { + int error, atomic = 1; void *addr; - int error; error = ops->pin(pipe, buf); if (error) goto out; - addr = ops->map(pipe, buf); + iov_fault_in_pages_read(iov, chars); +redo1: + addr = ops->map(pipe, buf, atomic); error = pipe_iov_copy_from_user(offset + addr, iov, - chars); - ops->unmap(pipe, buf); + chars, atomic); + ops->unmap(pipe, buf, addr); ret = error; do_wakeup = 1; - if (error) + if (error) { + if (atomic) { + atomic = 0; + goto redo1; + } goto out; + } buf->len += chars; total_len -= chars; ret = chars; @@ -341,7 +419,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov, int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); struct pipe_buffer *buf = pipe->bufs + newbuf; struct page *page = pipe->tmp_page; - int error; + char *src; + int error, atomic = 1; if (!page) { page = alloc_page(GFP_HIGHUSER); @@ -361,11 +440,27 @@ pipe_writev(struct file *filp, const struct iovec *_iov, if (chars > total_len) chars = total_len; - error = pipe_iov_copy_from_user(kmap(page), iov, chars); - kunmap(page); + iov_fault_in_pages_read(iov, chars); +redo2: + if (atomic) + src = kmap_atomic(page, KM_USER0); + else + src = kmap(page); + + error = pipe_iov_copy_from_user(src, iov, chars, + atomic); + if (atomic) + kunmap_atomic(src, KM_USER0); + else + kunmap(page); + if (unlikely(error)) { + if (atomic) { + atomic = 0; + goto redo2; + } if (!ret) - ret = -EFAULT; + ret = error; break; } ret += chars; diff --git a/fs/splice.c b/fs/splice.c index 0a6916423e7d..d4664a297bab 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -640,13 +640,13 @@ find_page: /* * Careful, ->map() uses KM_USER0! */ - char *src = buf->ops->map(info, buf); + char *src = buf->ops->map(info, buf, 1); char *dst = kmap_atomic(page, KM_USER1); memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); kunmap_atomic(dst, KM_USER1); - buf->ops->unmap(info, buf); + buf->ops->unmap(info, buf, src); } ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index b8aae1fc5185..4c054491e38e 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -5,7 +5,8 @@ #define PIPE_BUFFERS (16) -#define PIPE_BUF_FLAG_LRU 0x01 +#define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ +#define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ struct pipe_buffer { struct page *page; @@ -28,8 +29,8 @@ struct pipe_buffer { */ struct pipe_buf_operations { int can_merge; - void * (*map)(struct pipe_inode_info *, struct pipe_buffer *); - void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); + void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int); + void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *); int (*pin)(struct pipe_inode_info *, struct pipe_buffer *); void (*release)(struct pipe_inode_info *, struct pipe_buffer *); int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); @@ -64,8 +65,8 @@ void free_pipe_info(struct inode * inode); void __free_pipe_info(struct pipe_inode_info *); /* Generic pipe buffer ops functions */ -void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *); -void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *); +void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int); +void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *); void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *); -- cgit v1.2.3 From 7afa6fd037e51e95d322990cb127bb2b1217251a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 1 May 2006 20:02:33 +0200 Subject: [PATCH] vmsplice: allow user to pass in gift pages If SPLICE_F_GIFT is set, the user is basically giving this pages away to the kernel. That means we can steal them for eg page cache uses instead of copying it. The data must be properly page aligned and also a multiple of the page size in length. Signed-off-by: Jens Axboe --- fs/splice.c | 28 +++++++++++++++++++++++++--- include/linux/pipe_fs_i.h | 2 ++ 2 files changed, 27 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/splice.c b/fs/splice.c index d4664a297bab..b150493b6fc3 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -141,7 +141,10 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = { static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - return 1; + if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) + return 1; + + return 0; } static struct pipe_buf_operations user_page_pipe_buf_ops = { @@ -186,6 +189,9 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, buf->offset = spd->partial[page_nr].offset; buf->len = spd->partial[page_nr].len; buf->ops = spd->ops; + if (spd->flags & SPLICE_F_GIFT) + buf->flags |= PIPE_BUF_FLAG_GIFT; + pipe->nrbufs++; page_nr++; ret += buf->len; @@ -1073,7 +1079,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, */ static int get_iovec_page_array(const struct iovec __user *iov, unsigned int nr_vecs, struct page **pages, - struct partial_page *partial) + struct partial_page *partial, int aligned) { int buffers = 0, error = 0; @@ -1113,6 +1119,15 @@ static int get_iovec_page_array(const struct iovec __user *iov, * in the user pages. */ off = (unsigned long) base & ~PAGE_MASK; + + /* + * If asked for alignment, the offset must be zero and the + * length a multiple of the PAGE_SIZE. + */ + error = -EINVAL; + if (aligned && (off || len & ~PAGE_MASK)) + break; + npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; if (npages > PIPE_BUFFERS - buffers) npages = PIPE_BUFFERS - buffers; @@ -1206,7 +1221,8 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, else if (unlikely(!nr_segs)) return 0; - spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial); + spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, + flags & SPLICE_F_GIFT); if (spd.nr_pages <= 0) return spd.nr_pages; @@ -1314,6 +1330,12 @@ static int link_pipe(struct pipe_inode_info *ipipe, obuf = opipe->bufs + nbuf; *obuf = *ibuf; + /* + * Don't inherit the gift flag, we need to + * prevent multiple steals of this page. + */ + obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + if (obuf->len > len) obuf->len = len; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 4c054491e38e..df4d3fa7d3dc 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -7,6 +7,7 @@ #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ #define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ +#define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ struct pipe_buffer { struct page *page; @@ -79,6 +80,7 @@ int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *); /* we may still block on the fd we splice */ /* from/to, of course */ #define SPLICE_F_MORE (0x04) /* expect more data */ +#define SPLICE_F_GIFT (0x08) /* pages passed in are a gift */ /* * Passed to the actors -- cgit v1.2.3 From 953039c8df7beb2694814e20e2707a77d335a2e3 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 1 May 2006 12:16:12 -0700 Subject: [PATCH] powerpc: Allow devices to register with numa topology Change of_node_to_nid() to traverse the device tree, looking for a numa id. Cell uses this to assign ids to SPUs, which are children of the CPU node. Existing users of of_node_to_nid() are altered to use of_node_to_nid_single(), which doesn't do the traversal. Export an attach_sysdev_to_node() function, allowing system devices (eg. SPUs) to link themselves into the numa topology in sysfs. Signed-off-by: Arnd Bergmann Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/sysfs.c | 18 ++++++++++++++++++ arch/powerpc/mm/numa.c | 32 +++++++++++++++++++++++++++----- include/asm-powerpc/topology.h | 24 ++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index ed737cacf92d..5bc2585c8036 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -322,13 +322,31 @@ static void register_nodes(void) } } } + +int sysfs_add_device_to_node(struct sys_device *dev, int nid) +{ + struct node *node = &node_devices[nid]; + return sysfs_create_link(&node->sysdev.kobj, &dev->kobj, + kobject_name(&dev->kobj)); +} + +void sysfs_remove_device_from_node(struct sys_device *dev, int nid) +{ + struct node *node = &node_devices[nid]; + sysfs_remove_link(&node->sysdev.kobj, kobject_name(&dev->kobj)); +} + #else static void register_nodes(void) { return; } + #endif +EXPORT_SYMBOL_GPL(sysfs_add_device_to_node); +EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node); + /* Only valid if CPU is present. */ static ssize_t show_physical_id(struct sys_device *dev, char *buf) { diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 0a335f34974c..092355f37399 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -194,7 +194,7 @@ static int *of_get_associativity(struct device_node *dev) /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa * info is found. */ -static int of_node_to_nid(struct device_node *device) +static int of_node_to_nid_single(struct device_node *device) { int nid = -1; unsigned int *tmp; @@ -216,6 +216,28 @@ out: return nid; } +/* Walk the device tree upwards, looking for an associativity id */ +int of_node_to_nid(struct device_node *device) +{ + struct device_node *tmp; + int nid = -1; + + of_node_get(device); + while (device) { + nid = of_node_to_nid_single(device); + if (nid != -1) + break; + + tmp = device; + device = of_get_parent(tmp); + of_node_put(tmp); + } + of_node_put(device); + + return nid; +} +EXPORT_SYMBOL_GPL(of_node_to_nid); + /* * In theory, the "ibm,associativity" property may contain multiple * associativity lists because a resource may be multiply connected @@ -300,7 +322,7 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu) goto out; } - nid = of_node_to_nid(cpu); + nid = of_node_to_nid_single(cpu); if (nid < 0 || !node_online(nid)) nid = any_online_node(NODE_MASK_ALL); @@ -393,7 +415,7 @@ static int __init parse_numa_properties(void) cpu = find_cpu_node(i); BUG_ON(!cpu); - nid = of_node_to_nid(cpu); + nid = of_node_to_nid_single(cpu); of_node_put(cpu); /* @@ -437,7 +459,7 @@ new_range: * have associativity properties. If none, then * everything goes to default_nid. */ - nid = of_node_to_nid(memory); + nid = of_node_to_nid_single(memory); if (nid < 0) nid = default_nid; node_set_online(nid); @@ -776,7 +798,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) ha_new_range: start = read_n_cells(n_mem_addr_cells, &memcell_buf); size = read_n_cells(n_mem_size_cells, &memcell_buf); - nid = of_node_to_nid(memory); + nid = of_node_to_nid_single(memory); /* Domains not present at boot default to 0 */ if (nid < 0 || !node_online(nid)) diff --git a/include/asm-powerpc/topology.h b/include/asm-powerpc/topology.h index 1e19cd00af25..87362a05542b 100644 --- a/include/asm-powerpc/topology.h +++ b/include/asm-powerpc/topology.h @@ -4,6 +4,9 @@ #include +struct sys_device; +struct device_node; + #ifdef CONFIG_NUMA #include @@ -27,6 +30,8 @@ static inline int node_to_first_cpu(int node) return first_cpu(tmp); } +int of_node_to_nid(struct device_node *device); + #define pcibus_to_node(node) (-1) #define pcibus_to_cpumask(bus) (cpu_online_map) @@ -57,10 +62,29 @@ static inline int node_to_first_cpu(int node) extern void __init dump_numa_cpu_topology(void); +extern int sysfs_add_device_to_node(struct sys_device *dev, int nid); +extern void sysfs_remove_device_from_node(struct sys_device *dev, int nid); + #else +static inline int of_node_to_nid(struct device_node *device) +{ + return 0; +} + static inline void dump_numa_cpu_topology(void) {} +static inline int sysfs_add_device_to_node(struct sys_device *dev, int nid) +{ + return 0; +} + +static inline void sysfs_remove_device_from_node(struct sys_device *dev, + int nid) +{ +} + + #include #endif /* CONFIG_NUMA */ -- cgit v1.2.3 From 8261aa600943046a5305564a1cd7432009971799 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 1 May 2006 12:16:13 -0700 Subject: [PATCH] powerpc: cell: Add numa id to struct spu Add an nid member to the spu structure, and store the numa id of the spu there on creation. Signed-off-by: Arnd Bergmann Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/cell/spu_base.c | 43 ++++++++++++++++------------------ include/asm-powerpc/spu.h | 1 + 2 files changed, 21 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index b788e165c557..ad141fe8d52d 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -529,8 +529,8 @@ static int __init find_spu_node_id(struct device_node *spe) return id ? *id : 0; } -static int __init cell_spuprop_present(struct device_node *spe, - const char *prop) +static int __init cell_spuprop_present(struct spu *spu, struct device_node *spe, + const char *prop) { static DEFINE_MUTEX(add_spumem_mutex); @@ -541,7 +541,6 @@ static int __init cell_spuprop_present(struct device_node *spe, int proplen; unsigned long start_pfn, nr_pages; - int node_id; struct pglist_data *pgdata; struct zone *zone; int ret; @@ -552,14 +551,7 @@ static int __init cell_spuprop_present(struct device_node *spe, start_pfn = p->address >> PAGE_SHIFT; nr_pages = ((unsigned long)p->len + PAGE_SIZE - 1) >> PAGE_SHIFT; - /* - * XXX need to get the correct NUMA node in here. This may - * be different from the spe::node_id property, e.g. when - * the host firmware is not NUMA aware. - */ - node_id = 0; - - pgdata = NODE_DATA(node_id); + pgdata = NODE_DATA(spu->nid); zone = pgdata->node_zones; /* XXX rethink locking here */ @@ -570,8 +562,8 @@ static int __init cell_spuprop_present(struct device_node *spe, return ret; } -static void __iomem * __init map_spe_prop(struct device_node *n, - const char *name) +static void __iomem * __init map_spe_prop(struct spu *spu, + struct device_node *n, const char *name) { struct address_prop { unsigned long address; @@ -589,7 +581,7 @@ static void __iomem * __init map_spe_prop(struct device_node *n, prop = p; - err = cell_spuprop_present(n, name); + err = cell_spuprop_present(spu, n, name); if (err && (err != -EEXIST)) goto out; @@ -607,44 +599,45 @@ static void spu_unmap(struct spu *spu) iounmap((u8 __iomem *)spu->local_store); } -static int __init spu_map_device(struct spu *spu, struct device_node *spe) +static int __init spu_map_device(struct spu *spu, struct device_node *node) { char *prop; int ret; ret = -ENODEV; - prop = get_property(spe, "isrc", NULL); + prop = get_property(node, "isrc", NULL); if (!prop) goto out; spu->isrc = *(unsigned int *)prop; - spu->name = get_property(spe, "name", NULL); + spu->name = get_property(node, "name", NULL); if (!spu->name) goto out; - prop = get_property(spe, "local-store", NULL); + prop = get_property(node, "local-store", NULL); if (!prop) goto out; spu->local_store_phys = *(unsigned long *)prop; /* we use local store as ram, not io memory */ - spu->local_store = (void __force *)map_spe_prop(spe, "local-store"); + spu->local_store = (void __force *) + map_spe_prop(spu, node, "local-store"); if (!spu->local_store) goto out; - prop = get_property(spe, "problem", NULL); + prop = get_property(node, "problem", NULL); if (!prop) goto out_unmap; spu->problem_phys = *(unsigned long *)prop; - spu->problem= map_spe_prop(spe, "problem"); + spu->problem= map_spe_prop(spu, node, "problem"); if (!spu->problem) goto out_unmap; - spu->priv1= map_spe_prop(spe, "priv1"); + spu->priv1= map_spe_prop(spu, node, "priv1"); /* priv1 is not available on a hypervisor */ - spu->priv2= map_spe_prop(spe, "priv2"); + spu->priv2= map_spe_prop(spu, node, "priv2"); if (!spu->priv2) goto out_unmap; ret = 0; @@ -672,6 +665,10 @@ static int __init create_spu(struct device_node *spe) goto out_free; spu->node = find_spu_node_id(spe); + spu->nid = of_node_to_nid(spe); + if (spu->nid == -1) + spu->nid = 0; + spu->stop_code = 0; spu->slb_replace = 0; spu->mm = NULL; diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index f431d8b0b651..7cfcff3ef027 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h @@ -117,6 +117,7 @@ struct spu { struct list_head list; struct list_head sched_list; int number; + int nid; u32 isrc; u32 node; u64 flags; -- cgit v1.2.3 From 46c5ea3c9ae7fbc6e52a13c92e59d4fc7f4ca80a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 2 May 2006 05:12:22 +0200 Subject: [NETFILTER] x_tables: fix compat related crash on non-x86 When iptables userspace adds an ipt_standard_target, it calculates the size of the entire entry as: sizeof(struct ipt_entry) + XT_ALIGN(sizeof(struct ipt_standard_target)) ipt_standard_target looks like this: struct xt_standard_target { struct xt_entry_target target; int verdict; }; xt_entry_target contains a pointer, so when compiled for 64 bit the structure gets an extra 4 byte of padding at the end. On 32 bit architectures where iptables aligns to 8 byte it will also have 4 byte padding at the end because it is only 36 bytes large. The compat_ipt_standard_fn in the kernel adjusts the offsets by sizeof(struct ipt_standard_target) - sizeof(struct compat_ipt_standard_target), which will always result in 4, even if the structure from userspace was already padded to a multiple of 8. On x86 this works out by accident because userspace only aligns to 4, on all other architectures this is broken and causes incorrect adjustments to the size and following offsets. Thanks to Linus for lots of debugging help and testing. Signed-off-by: Patrick McHardy Signed-off-by: Linus Torvalds --- include/linux/netfilter/x_tables.h | 8 ++++++++ net/ipv4/netfilter/ip_tables.c | 33 ++++++++++++++------------------- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 38701454e197..48cc32d83f77 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -337,6 +337,10 @@ struct compat_xt_entry_match char name[XT_FUNCTION_MAXNAMELEN - 1]; u_int8_t revision; } user; + struct { + u_int16_t match_size; + compat_uptr_t match; + } kernel; u_int16_t match_size; } u; unsigned char data[0]; @@ -350,6 +354,10 @@ struct compat_xt_entry_target char name[XT_FUNCTION_MAXNAMELEN - 1]; u_int8_t revision; } user; + struct { + u_int16_t target_size; + compat_uptr_t target; + } kernel; u_int16_t target_size; } u; unsigned char data[0]; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index d25ac8ba6eba..6d1c11563943 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -956,15 +956,16 @@ struct compat_ipt_standard_target compat_int_t verdict; }; -#define IPT_ST_OFFSET (sizeof(struct ipt_standard_target) - \ - sizeof(struct compat_ipt_standard_target)) - struct compat_ipt_standard { struct compat_ipt_entry entry; struct compat_ipt_standard_target target; }; +#define IPT_ST_LEN XT_ALIGN(sizeof(struct ipt_standard_target)) +#define IPT_ST_COMPAT_LEN COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target)) +#define IPT_ST_OFFSET (IPT_ST_LEN - IPT_ST_COMPAT_LEN) + static int compat_ipt_standard_fn(void *target, void **dstptr, int *size, int convert) { @@ -975,35 +976,29 @@ static int compat_ipt_standard_fn(void *target, ret = 0; switch (convert) { case COMPAT_TO_USER: - pst = (struct ipt_standard_target *)target; + pst = target; memcpy(&compat_st.target, &pst->target, - sizeof(struct ipt_entry_target)); + sizeof(compat_st.target)); compat_st.verdict = pst->verdict; if (compat_st.verdict > 0) compat_st.verdict -= compat_calc_jump(compat_st.verdict); - compat_st.target.u.user.target_size = - sizeof(struct compat_ipt_standard_target); - if (__copy_to_user(*dstptr, &compat_st, - sizeof(struct compat_ipt_standard_target))) + compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN; + if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN)) ret = -EFAULT; *size -= IPT_ST_OFFSET; - *dstptr += sizeof(struct compat_ipt_standard_target); + *dstptr += IPT_ST_COMPAT_LEN; break; case COMPAT_FROM_USER: - pcompat_st = - (struct compat_ipt_standard_target *)target; - memcpy(&st.target, &pcompat_st->target, - sizeof(struct ipt_entry_target)); + pcompat_st = target; + memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN); st.verdict = pcompat_st->verdict; if (st.verdict > 0) st.verdict += compat_calc_jump(st.verdict); - st.target.u.user.target_size = - sizeof(struct ipt_standard_target); - memcpy(*dstptr, &st, - sizeof(struct ipt_standard_target)); + st.target.u.user.target_size = IPT_ST_LEN; + memcpy(*dstptr, &st, IPT_ST_LEN); *size += IPT_ST_OFFSET; - *dstptr += sizeof(struct ipt_standard_target); + *dstptr += IPT_ST_LEN; break; case COMPAT_CALC_SIZE: *size += IPT_ST_OFFSET; -- cgit v1.2.3 From a893b99be71f1d669b74f840e3a683dd077d007b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 2 May 2006 15:03:27 +0200 Subject: [PATCH] splice: fix page LRU accounting Currently we rely on the PIPE_BUF_FLAG_LRU flag being set correctly to know whether we need to fiddle with page LRU state after stealing it, however for some origins we just don't know if the page is on the LRU list or not. So remove PIPE_BUF_FLAG_LRU and do this check/add manually in pipe_to_file() instead. Signed-off-by: Jens Axboe --- fs/splice.c | 31 +++++++++++++++++++++---------- include/linux/pipe_fs_i.h | 5 ++--- 2 files changed, 23 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/splice.c b/fs/splice.c index b0c157d76948..3318b965c10b 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -78,7 +78,6 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, return 1; } - buf->flags |= PIPE_BUF_FLAG_LRU; return 0; } @@ -86,8 +85,6 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf) { page_cache_release(buf->page); - buf->page = NULL; - buf->flags &= ~PIPE_BUF_FLAG_LRU; } static int page_cache_pipe_buf_pin(struct pipe_inode_info *info, @@ -570,22 +567,36 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) { /* * If steal succeeds, buf->page is now pruned from the vm - * side (LRU and page cache) and we can reuse it. The page - * will also be looked on successful return. + * side (page cache) and we can reuse it. The page will also + * be locked on successful return. */ if (buf->ops->steal(info, buf)) goto find_page; page = buf->page; + page_cache_get(page); + + /* + * page must be on the LRU for adding to the pagecache. + * Check this without grabbing the zone lock, if it isn't + * the do grab the zone lock, recheck, and add if necessary. + */ + if (!PageLRU(page)) { + struct zone *zone = page_zone(page); + + spin_lock_irq(&zone->lru_lock); + if (!PageLRU(page)) { + SetPageLRU(page); + add_page_to_inactive_list(zone, page); + } + spin_unlock_irq(&zone->lru_lock); + } + if (add_to_page_cache(page, mapping, index, gfp_mask)) { + page_cache_release(page); unlock_page(page); goto find_page; } - - page_cache_get(page); - - if (!(buf->flags & PIPE_BUF_FLAG_LRU)) - lru_cache_add(page); } else { find_page: page = find_lock_page(mapping, index); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index df4d3fa7d3dc..070954f05947 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -5,9 +5,8 @@ #define PIPE_BUFFERS (16) -#define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ -#define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ -#define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ +#define PIPE_BUF_FLAG_ATOMIC 0x01 /* was atomically mapped */ +#define PIPE_BUF_FLAG_GIFT 0x02 /* page is a gift */ struct pipe_buffer { struct page *page; -- cgit v1.2.3 From 330ab71619bacc4d4494227a6cfc9b7f5500403d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 2 May 2006 15:29:57 +0200 Subject: [PATCH] vmsplice: restrict stealing a little more Apply the same rules as the anon pipe pages, only allow stealing if no one else is using the page. Signed-off-by: Jens Axboe --- fs/pipe.c | 6 +++--- fs/splice.c | 2 +- include/linux/pipe_fs_i.h | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/pipe.c b/fs/pipe.c index 3941a7f78b5d..5acd8954aaa0 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -184,8 +184,8 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, kunmap(buf->page); } -static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +int generic_pipe_buf_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { struct page *page = buf->page; @@ -213,7 +213,7 @@ static struct pipe_buf_operations anon_pipe_buf_ops = { .unmap = generic_pipe_buf_unmap, .pin = generic_pipe_buf_pin, .release = anon_pipe_buf_release, - .steal = anon_pipe_buf_steal, + .steal = generic_pipe_buf_steal, .get = generic_pipe_buf_get, }; diff --git a/fs/splice.c b/fs/splice.c index 3318b965c10b..7fb04970c72d 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -141,7 +141,7 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) return 1; - return 0; + return generic_pipe_buf_steal(pipe, buf); } static struct pipe_buf_operations user_page_pipe_buf_ops = { diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 070954f05947..ba73108cbf8b 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -69,6 +69,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int); void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *); void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *); +int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); /* * splice is tied to pipes as a transport (at least for now), so we'll just -- cgit v1.2.3 From 37be4e7809e0581db85387e126ae4da68c3d6286 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 2 May 2006 17:24:59 +0100 Subject: [MMC] extend data timeout for writes The CSD contains a "read2write factor" which determines the multiplier to be applied to the read timeout to obtain the write timeout. We were ignoring this parameter, resulting in the possibility for writes being timed out too early. Signed-off-by: Russell King --- drivers/mmc/mmc.c | 2 ++ drivers/mmc/mmc_block.c | 6 ++++++ include/linux/mmc/card.h | 1 + 3 files changed, 9 insertions(+) (limited to 'include') diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index da6ddd910fc5..05aa4d6b4f24 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -549,6 +549,7 @@ static void mmc_decode_csd(struct mmc_card *card) csd->read_partial = UNSTUFF_BITS(resp, 79, 1); csd->write_misalign = UNSTUFF_BITS(resp, 78, 1); csd->read_misalign = UNSTUFF_BITS(resp, 77, 1); + csd->r2w_factor = UNSTUFF_BITS(resp, 26, 3); csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4); csd->write_partial = UNSTUFF_BITS(resp, 21, 1); } else { @@ -583,6 +584,7 @@ static void mmc_decode_csd(struct mmc_card *card) csd->read_partial = UNSTUFF_BITS(resp, 79, 1); csd->write_misalign = UNSTUFF_BITS(resp, 78, 1); csd->read_misalign = UNSTUFF_BITS(resp, 77, 1); + csd->r2w_factor = UNSTUFF_BITS(resp, 26, 3); csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4); csd->write_partial = UNSTUFF_BITS(resp, 21, 1); } diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c index 8eb2a2ede64b..06bd1f4cb9b1 100644 --- a/drivers/mmc/mmc_block.c +++ b/drivers/mmc/mmc_block.c @@ -187,6 +187,12 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) brq.cmd.opcode = MMC_WRITE_BLOCK; brq.data.flags |= MMC_DATA_WRITE; brq.data.blocks = 1; + + /* + * Scale up the timeout by the r2w factor + */ + brq.data.timeout_ns <<= card->csd.r2w_factor; + brq.data.timeout_clks <<= card->csd.r2w_factor; } if (brq.data.blocks > 1) { diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 30dd978c1ec8..991a37382a22 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -28,6 +28,7 @@ struct mmc_csd { unsigned short cmdclass; unsigned short tacc_clks; unsigned int tacc_ns; + unsigned int r2w_factor; unsigned int max_dtr; unsigned int read_blkbits; unsigned int write_blkbits; -- cgit v1.2.3 From 61f5657c50341198ff05e375e6f1fc0476556562 Mon Sep 17 00:00:00 2001 From: Vitaly Bordug Date: Sat, 29 Apr 2006 22:32:44 +0400 Subject: [PATCH] ppc32 CPM_UART: Fixed break send on SCC SCC uart sends a break sequence each time it is stopped with the CPM_CR_STOP_TX command. That means that each time an application closes the serial device, a break is transmitted. To fix this, graceful tx stop is issued for SCC. Signed-off-by: David Jander Signed-off-by: Vitaly Bordug Signed-off-by: Paul Mackerras --- drivers/serial/cpm_uart/cpm_uart_core.c | 6 +++++- include/asm-ppc/commproc.h | 1 + include/asm-ppc/cpm2.h | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c index ced193bf9e1e..b9d1185df20c 100644 --- a/drivers/serial/cpm_uart/cpm_uart_core.c +++ b/drivers/serial/cpm_uart/cpm_uart_core.c @@ -457,7 +457,11 @@ static void cpm_uart_shutdown(struct uart_port *port) } /* Shut them really down and reinit buffer descriptors */ - cpm_line_cr_cmd(line, CPM_CR_STOP_TX); + if (IS_SMC(pinfo)) + cpm_line_cr_cmd(line, CPM_CR_STOP_TX); + else + cpm_line_cr_cmd(line, CPM_CR_GRA_STOP_TX); + cpm_uart_initbd(pinfo); } } diff --git a/include/asm-ppc/commproc.h b/include/asm-ppc/commproc.h index 973e60908234..31f362966a58 100644 --- a/include/asm-ppc/commproc.h +++ b/include/asm-ppc/commproc.h @@ -35,6 +35,7 @@ #define CPM_CR_INIT_TX ((ushort)0x0002) #define CPM_CR_HUNT_MODE ((ushort)0x0003) #define CPM_CR_STOP_TX ((ushort)0x0004) +#define CPM_CR_GRA_STOP_TX ((ushort)0x0005) #define CPM_CR_RESTART_TX ((ushort)0x0006) #define CPM_CR_CLOSE_RX_BD ((ushort)0x0007) #define CPM_CR_SET_GADDR ((ushort)0x0008) diff --git a/include/asm-ppc/cpm2.h b/include/asm-ppc/cpm2.h index b638b87cebe3..c70344b91049 100644 --- a/include/asm-ppc/cpm2.h +++ b/include/asm-ppc/cpm2.h @@ -69,7 +69,7 @@ #define CPM_CR_INIT_TX ((ushort)0x0002) #define CPM_CR_HUNT_MODE ((ushort)0x0003) #define CPM_CR_STOP_TX ((ushort)0x0004) -#define CPM_CR_GRA_STOP_TX ((ushort)0x0005) +#define CPM_CR_GRA_STOP_TX ((ushort)0x0005) #define CPM_CR_RESTART_TX ((ushort)0x0006) #define CPM_CR_SET_GADDR ((ushort)0x0008) #define CPM_CR_START_IDMA ((ushort)0x0009) -- cgit v1.2.3 From 6e1976961c9bd9a3dc368139fab1883961efc879 Mon Sep 17 00:00:00 2001 From: Vitaly Bordug Date: Sat, 29 Apr 2006 23:06:00 +0400 Subject: [PATCH] ppc32 CPM_UART: fixes and improvements A number of small issues are fixed, and added the header file, missed from the original series. With this, driver should be pretty stable as tested among both platform-device-driven and "old way" boards. Also added missing GPL statement , and updated year field on existing ones to reflect code update. Signed-off-by: Vitaly Bordug Signed-off-by: Paul Mackerras --- arch/ppc/platforms/mpc866ads_setup.c | 2 +- drivers/serial/cpm_uart/cpm_uart.h | 19 ++++++++--- drivers/serial/cpm_uart/cpm_uart_core.c | 31 +++++++++++++---- drivers/serial/cpm_uart/cpm_uart_cpm1.c | 2 ++ drivers/serial/cpm_uart/cpm_uart_cpm2.c | 2 ++ include/linux/fs_uart_pd.h | 60 +++++++++++++++++++++++++++++++++ 6 files changed, 104 insertions(+), 12 deletions(-) create mode 100644 include/linux/fs_uart_pd.h (limited to 'include') diff --git a/arch/ppc/platforms/mpc866ads_setup.c b/arch/ppc/platforms/mpc866ads_setup.c index 6ce3b842defe..d919dab61347 100644 --- a/arch/ppc/platforms/mpc866ads_setup.c +++ b/arch/ppc/platforms/mpc866ads_setup.c @@ -378,7 +378,7 @@ int __init mpc866ads_init(void) ppc_sys_device_setfunc(MPC8xx_CPM_SMC1, PPC_SYS_FUNC_UART); #endif -#ifdef CONFIG_SERIAL_CPM_SMCer +#ifdef CONFIG_SERIAL_CPM_SMC ppc_sys_device_enable(MPC8xx_CPM_SMC2); ppc_sys_device_setfunc(MPC8xx_CPM_SMC2, PPC_SYS_FUNC_UART); #endif diff --git a/drivers/serial/cpm_uart/cpm_uart.h b/drivers/serial/cpm_uart/cpm_uart.h index aa5eb7ddeda9..3b35cb779539 100644 --- a/drivers/serial/cpm_uart/cpm_uart.h +++ b/drivers/serial/cpm_uart/cpm_uart.h @@ -5,6 +5,13 @@ * * Copyright (C) 2004 Freescale Semiconductor, Inc. * + * 2006 (c) MontaVista Software, Inc. + * Vitaly Bordug + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + * */ #ifndef CPM_UART_H #define CPM_UART_H @@ -101,12 +108,13 @@ static inline unsigned long cpu2cpm_addr(void* addr, struct uart_cpm_port *pinfo int offset; u32 val = (u32)addr; /* sane check */ - if ((val >= (u32)pinfo->mem_addr) && + if (likely((val >= (u32)pinfo->mem_addr)) && (val<((u32)pinfo->mem_addr + pinfo->mem_size))) { offset = val - (u32)pinfo->mem_addr; return pinfo->dma_addr+offset; } - printk("%s(): address %x to translate out of range!\n", __FUNCTION__, val); + /* something nasty happened */ + BUG(); return 0; } @@ -115,12 +123,13 @@ static inline void *cpm2cpu_addr(unsigned long addr, struct uart_cpm_port *pinfo int offset; u32 val = addr; /* sane check */ - if ((val >= pinfo->dma_addr) && - (val<(pinfo->dma_addr + pinfo->mem_size))) { + if (likely((val >= pinfo->dma_addr) && + (val<(pinfo->dma_addr + pinfo->mem_size)))) { offset = val - (u32)pinfo->dma_addr; return (void*)(pinfo->mem_addr+offset); } - printk("%s(): address %x to translate out of range!\n", __FUNCTION__, val); + /* something nasty happened */ + BUG(); return 0; } diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c index b9d1185df20c..969f94900431 100644 --- a/drivers/serial/cpm_uart/cpm_uart_core.c +++ b/drivers/serial/cpm_uart/cpm_uart_core.c @@ -12,7 +12,8 @@ * * Copyright (C) 2004 Freescale Semiconductor, Inc. * (C) 2004 Intracom, S.A. - * (C) 2005 MontaVista Software, Inc. by Vitaly Bordug + * (C) 2005-2006 MontaVista Software, Inc. + * Vitaly Bordug * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -81,7 +82,7 @@ early_uart_get_pdev(int index) } -void cpm_uart_count(void) +static void cpm_uart_count(void) { cpm_uart_nr = 0; #ifdef CONFIG_SERIAL_CPM_SMC1 @@ -104,6 +105,21 @@ void cpm_uart_count(void) #endif } +/* Get UART number by its id */ +static int cpm_uart_id2nr(int id) +{ + int i; + if (id < UART_NR) { + for (i=0; ifs_no; line++); + line = cpm_uart_id2nr(idx); + if(line < 0) { + printk(KERN_ERR"%s(): port %d is not registered", __FUNCTION__, idx); + return -1; + } pinfo = (struct uart_cpm_port *) &cpm_uart_ports[idx]; @@ -1245,8 +1265,7 @@ static int cpm_uart_drv_probe(struct device *dev) } pdata = pdev->dev.platform_data; - pr_debug("cpm_uart_drv_probe: Adding CPM UART %d\n", - cpm_uart_port_map[pdata->fs_no]); + pr_debug("cpm_uart_drv_probe: Adding CPM UART %d\n", cpm_uart_id2nr(pdata->fs_no)); if ((ret = cpm_uart_drv_get_platform_data(pdev, 0))) return ret; @@ -1265,7 +1284,7 @@ static int cpm_uart_drv_remove(struct device *dev) struct fs_uart_platform_info *pdata = pdev->dev.platform_data; pr_debug("cpm_uart_drv_remove: Removing CPM UART %d\n", - cpm_uart_port_map[pdata->fs_no]); + cpm_uart_id2nr(pdata->fs_no)); uart_remove_one_port(&cpm_reg, &cpm_uart_ports[pdata->fs_no].port); return 0; diff --git a/drivers/serial/cpm_uart/cpm_uart_cpm1.c b/drivers/serial/cpm_uart/cpm_uart_cpm1.c index a5a30622637a..17406a05ce1f 100644 --- a/drivers/serial/cpm_uart/cpm_uart_cpm1.c +++ b/drivers/serial/cpm_uart/cpm_uart_cpm1.c @@ -8,6 +8,8 @@ * * Copyright (C) 2004 Freescale Semiconductor, Inc. * (C) 2004 Intracom, S.A. + * (C) 2006 MontaVista Software, Inc. + * Vitaly Bordug * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/serial/cpm_uart/cpm_uart_cpm2.c b/drivers/serial/cpm_uart/cpm_uart_cpm2.c index 7c6b07aeea92..4b2de08f46d0 100644 --- a/drivers/serial/cpm_uart/cpm_uart_cpm2.c +++ b/drivers/serial/cpm_uart/cpm_uart_cpm2.c @@ -8,6 +8,8 @@ * * Copyright (C) 2004 Freescale Semiconductor, Inc. * (C) 2004 Intracom, S.A. + * (C) 2006 MontaVista Software, Inc. + * Vitaly Bordug * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/include/linux/fs_uart_pd.h b/include/linux/fs_uart_pd.h new file mode 100644 index 000000000000..f5975126b712 --- /dev/null +++ b/include/linux/fs_uart_pd.h @@ -0,0 +1,60 @@ +/* + * Platform information definitions for the CPM Uart driver. + * + * 2006 (c) MontaVista Software, Inc. + * Vitaly Bordug + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef FS_UART_PD_H +#define FS_UART_PD_H + +#include +#include + +enum fs_uart_id { + fsid_smc1_uart, + fsid_smc2_uart, + fsid_scc1_uart, + fsid_scc2_uart, + fsid_scc3_uart, + fsid_scc4_uart, + fs_uart_nr, +}; + +static inline int fs_uart_id_scc2fsid(int id) +{ + return fsid_scc1_uart + id - 1; +} + +static inline int fs_uart_id_fsid2scc(int id) +{ + return id - fsid_scc1_uart + 1; +} + +static inline int fs_uart_id_smc2fsid(int id) +{ + return fsid_smc1_uart + id - 1; +} + +static inline int fs_uart_id_fsid2smc(int id) +{ + return id - fsid_smc1_uart + 1; +} + +struct fs_uart_platform_info { + void(*init_ioports)(void); + /* device specific information */ + int fs_no; /* controller index */ + u32 uart_clk; + u8 tx_num_fifo; + u8 tx_buf_size; + u8 rx_num_fifo; + u8 rx_buf_size; + u8 brg; +}; + +#endif -- cgit v1.2.3 From 6bfd93c32a5065d0e858780b3beb0b667081601c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 3 May 2006 23:02:04 +1000 Subject: powerpc: Fix incorrect might_sleep in __get_user/__put_user on kernel addresses We have a case where __get_user and __put_user can validly be used on kernel addresses in interrupt context - namely, the alignment exception handler, as our get/put_unaligned just do a single access and rely on the alignment exception handler to fix things up in the rare cases where the cpu can't handle it in hardware. Thus we can get alignment exceptions in the network stack at interrupt level. The alignment exception handler does a __get_user to read the instruction and blows up in might_sleep(). Since a __get_user on a kernel address won't actually ever sleep, this makes the might_sleep conditional on the address being less than PAGE_OFFSET. Signed-off-by: Paul Mackerras --- include/asm-powerpc/uaccess.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-powerpc/uaccess.h b/include/asm-powerpc/uaccess.h index 3872e924cdd6..d83fc29c2bbf 100644 --- a/include/asm-powerpc/uaccess.h +++ b/include/asm-powerpc/uaccess.h @@ -7,6 +7,7 @@ #include #include #include +#include #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -179,9 +180,11 @@ do { \ #define __put_user_nocheck(x, ptr, size) \ ({ \ long __pu_err; \ - might_sleep(); \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + if (!is_kernel_addr((unsigned long)__pu_addr)) \ + might_sleep(); \ __chk_user_ptr(ptr); \ - __put_user_size((x), (ptr), (size), __pu_err); \ + __put_user_size((x), __pu_addr, (size), __pu_err); \ __pu_err; \ }) @@ -258,9 +261,11 @@ do { \ ({ \ long __gu_err; \ unsigned long __gu_val; \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ - might_sleep(); \ - __get_user_size(__gu_val, (ptr), (size), __gu_err); \ + if (!is_kernel_addr((unsigned long)__gu_addr)) \ + might_sleep(); \ + __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) @@ -270,9 +275,11 @@ do { \ ({ \ long __gu_err; \ long long __gu_val; \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ - might_sleep(); \ - __get_user_size(__gu_val, (ptr), (size), __gu_err); \ + if (!is_kernel_addr((unsigned long)__gu_addr)) \ + might_sleep(); \ + __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) -- cgit v1.2.3 From 8c45112b823972e9ff7bbca77dc592ca2224951b Mon Sep 17 00:00:00 2001 From: David S. Miller Date: Wed, 3 May 2006 13:55:46 -0700 Subject: [SPARC]: Hook up vmsplice into syscall tables. Signed-off-by: David S. Miller --- arch/sparc/kernel/systbls.S | 2 +- arch/sparc64/kernel/sys32.S | 1 + arch/sparc64/kernel/systbls.S | 4 ++-- include/asm-sparc/unistd.h | 2 +- include/asm-sparc64/unistd.h | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S index db8faa75f94d..6e1135cc03b0 100644 --- a/arch/sparc/kernel/systbls.S +++ b/arch/sparc/kernel/systbls.S @@ -23,7 +23,7 @@ sys_call_table: /*10*/ .long sys_unlink, sunos_execv, sys_chdir, sys_chown16, sys_mknod /*15*/ .long sys_chmod, sys_lchown16, sparc_brk, sys_nis_syscall, sys_lseek /*20*/ .long sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16 -/*25*/ .long sys_time, sys_ptrace, sys_alarm, sys_sigaltstack, sys_pause +/*25*/ .long sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_pause /*30*/ .long sys_utime, sys_lchown, sys_fchown, sys_access, sys_nice /*35*/ .long sys_chown, sys_sync, sys_kill, sys_newstat, sys_sendfile /*40*/ .long sys_newlstat, sys_dup, sys_pipe, sys_times, sys_getuid diff --git a/arch/sparc64/kernel/sys32.S b/arch/sparc64/kernel/sys32.S index f9b75760163c..bdf1f4d02e3f 100644 --- a/arch/sparc64/kernel/sys32.S +++ b/arch/sparc64/kernel/sys32.S @@ -139,6 +139,7 @@ SIGN3(sys32_ioprio_set, sys_ioprio_set, %o0, %o1, %o2) SIGN2(sys32_splice, sys_splice, %o0, %o1) SIGN2(sys32_sync_file_range, compat_sync_file_range, %o0, %o5) SIGN2(sys32_tee, sys_tee, %o0, %o1) +SIGN1(sys32_vmsplice, compat_sys_vmsplice, %o0) .globl sys32_mmap2 sys32_mmap2: diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S index 62672cd92eca..d4b39cd30310 100644 --- a/arch/sparc64/kernel/systbls.S +++ b/arch/sparc64/kernel/systbls.S @@ -25,7 +25,7 @@ sys_call_table32: /*10*/ .word sys_unlink, sunos_execv, sys_chdir, sys32_chown16, sys32_mknod /*15*/ .word sys_chmod, sys32_lchown16, sparc_brk, sys32_perfctr, sys32_lseek /*20*/ .word sys_getpid, sys_capget, sys_capset, sys32_setuid16, sys32_getuid16 -/*25*/ .word compat_sys_time, sys_ptrace, sys_alarm, sys32_sigaltstack, sys32_pause +/*25*/ .word sys32_vmsplice, sys_ptrace, sys_alarm, sys32_sigaltstack, sys32_pause /*30*/ .word compat_sys_utime, sys_lchown, sys_fchown, sys32_access, sys32_nice .word sys_chown, sys_sync, sys32_kill, compat_sys_newstat, sys32_sendfile /*40*/ .word compat_sys_newlstat, sys_dup, sys_pipe, compat_sys_times, sys_getuid @@ -94,7 +94,7 @@ sys_call_table: /*10*/ .word sys_unlink, sys_nis_syscall, sys_chdir, sys_chown, sys_mknod /*15*/ .word sys_chmod, sys_lchown, sparc_brk, sys_perfctr, sys_lseek /*20*/ .word sys_getpid, sys_capget, sys_capset, sys_setuid, sys_getuid -/*25*/ .word sys_nis_syscall, sys_ptrace, sys_alarm, sys_sigaltstack, sys_nis_syscall +/*25*/ .word sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_nis_syscall /*30*/ .word sys_utime, sys_nis_syscall, sys_nis_syscall, sys_access, sys_nice .word sys_nis_syscall, sys_sync, sys_kill, sys_newstat, sys_sendfile64 /*40*/ .word sys_newlstat, sys_dup, sys_pipe, sys_times, sys_nis_syscall diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h index 32a48f623e2b..f5611a721fbd 100644 --- a/include/asm-sparc/unistd.h +++ b/include/asm-sparc/unistd.h @@ -41,7 +41,7 @@ #define __NR_capset 22 /* Linux Specific */ #define __NR_setuid 23 /* Implemented via setreuid in SunOS */ #define __NR_getuid 24 /* Common */ -/* #define __NR_time alias 25 ENOSYS under SunOS */ +#define __NR_vmsplice 25 /* ENOSYS under SunOS */ #define __NR_ptrace 26 /* Common */ #define __NR_alarm 27 /* Implemented via setitimer in SunOS */ #define __NR_sigaltstack 28 /* Common */ diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h index ca80e8aca128..68705748bec0 100644 --- a/include/asm-sparc64/unistd.h +++ b/include/asm-sparc64/unistd.h @@ -41,7 +41,7 @@ #define __NR_capset 22 /* Linux Specific */ #define __NR_setuid 23 /* Implemented via setreuid in SunOS */ #define __NR_getuid 24 /* Common */ -/* #define __NR_time alias 25 ENOSYS under SunOS */ +#define __NR_vmsplice 25 /* ENOSYS under SunOS */ #define __NR_ptrace 26 /* Common */ #define __NR_alarm 27 /* Implemented via setitimer in SunOS */ #define __NR_sigaltstack 28 /* Common */ -- cgit v1.2.3 From 1432873af7ae29d4bb3c56114c05b539d078ca62 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 3 May 2006 10:35:26 +0200 Subject: [PATCH] splice: LRU fixups Nick says that the current construct isn't safe. This goes back to the original, but sets PIPE_BUF_FLAG_LRU on user pages as well as they all seem to be on the LRU in the first place. Signed-off-by: Jens Axboe --- fs/splice.c | 33 +++++++++++---------------------- include/linux/pipe_fs_i.h | 5 +++-- 2 files changed, 14 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/splice.c b/fs/splice.c index 27f5e3738a7b..0b202425b0b5 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -78,6 +78,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, return 1; } + buf->flags |= PIPE_BUF_FLAG_LRU; return 0; } @@ -85,6 +86,7 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf) { page_cache_release(buf->page); + buf->flags &= ~PIPE_BUF_FLAG_LRU; } static int page_cache_pipe_buf_pin(struct pipe_inode_info *info, @@ -141,6 +143,7 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) return 1; + buf->flags |= PIPE_BUF_FLAG_LRU; return generic_pipe_buf_steal(pipe, buf); } @@ -566,37 +569,23 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, */ if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) { /* - * If steal succeeds, buf->page is now pruned from the vm - * side (page cache) and we can reuse it. The page will also - * be locked on successful return. + * If steal succeeds, buf->page is now pruned from the + * pagecache and we can reuse it. The page will also be + * locked on successful return. */ if (buf->ops->steal(info, buf)) goto find_page; page = buf->page; - page_cache_get(page); - - /* - * page must be on the LRU for adding to the pagecache. - * Check this without grabbing the zone lock, if it isn't - * the do grab the zone lock, recheck, and add if necessary. - */ - if (!PageLRU(page)) { - struct zone *zone = page_zone(page); - - spin_lock_irq(&zone->lru_lock); - if (!PageLRU(page)) { - SetPageLRU(page); - add_page_to_inactive_list(zone, page); - } - spin_unlock_irq(&zone->lru_lock); - } - if (add_to_page_cache(page, mapping, index, gfp_mask)) { - page_cache_release(page); unlock_page(page); goto find_page; } + + page_cache_get(page); + + if (!(buf->flags & PIPE_BUF_FLAG_LRU)) + lru_cache_add(page); } else { find_page: page = find_lock_page(mapping, index); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index ba73108cbf8b..ea4f7cd7bfd8 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -5,8 +5,9 @@ #define PIPE_BUFFERS (16) -#define PIPE_BUF_FLAG_ATOMIC 0x01 /* was atomically mapped */ -#define PIPE_BUF_FLAG_GIFT 0x02 /* page is a gift */ +#define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ +#define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ +#define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ struct pipe_buffer { struct page *page; -- cgit v1.2.3 From 7582e9d17edbabab6cbe59467c5d1b5e8c04fca8 Mon Sep 17 00:00:00 2001 From: Jing Min Zhao Date: Wed, 3 May 2006 23:19:59 -0700 Subject: [NETFILTER]: H.323 helper: Change author's email address Signed-off-by: Jing Min Zhao Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_helper_h323_asn1.h | 2 +- net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_asn1.h b/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_asn1.h index 0bd828081c0c..c6e9a0b6d30b 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_asn1.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_asn1.h @@ -2,7 +2,7 @@ * ip_conntrack_helper_h323_asn1.h - BER and PER decoding library for H.323 * conntrack/NAT module. * - * Copyright (c) 2006 by Jing Min Zhao + * Copyright (c) 2006 by Jing Min Zhao * * This source code is licensed under General Public License version 2. * diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c b/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c index f52d2c45a6ae..355a53a5b6cd 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c @@ -2,7 +2,7 @@ * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323 * conntrack/NAT module. * - * Copyright (c) 2006 by Jing Min Zhao + * Copyright (c) 2006 by Jing Min Zhao * * This source code is licensed under General Public License version 2. * -- cgit v1.2.3 From e1fdb5b39656ea2be8cadde565e543649a988af9 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 3 May 2006 23:27:16 -0700 Subject: [AX.25]: Eleminate HZ from AX.25 kernel interfaces Convert all AX.25 sysctl time values from jiffies to ms as units. Signed-off-by: Ralf Baechle Signed-off-by: David S. Miller --- include/net/ax25.h | 10 +++---- net/ax25/af_ax25.c | 73 ++++++++++++++++++++++++++-------------------- net/ax25/ax25_ds_timer.c | 3 +- net/ax25/sysctl_net_ax25.c | 10 +++---- 4 files changed, 53 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/net/ax25.h b/include/net/ax25.h index d052b221dbcd..5bd997487054 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -145,14 +145,14 @@ enum { #define AX25_DEF_CONMODE 2 /* Connected mode allowed */ #define AX25_DEF_WINDOW 2 /* Window=2 */ #define AX25_DEF_EWINDOW 32 /* Module-128 Window=32 */ -#define AX25_DEF_T1 (10 * HZ) /* T1=10s */ -#define AX25_DEF_T2 (3 * HZ) /* T2=3s */ -#define AX25_DEF_T3 (300 * HZ) /* T3=300s */ +#define AX25_DEF_T1 10000 /* T1=10s */ +#define AX25_DEF_T2 3000 /* T2=3s */ +#define AX25_DEF_T3 300000 /* T3=300s */ #define AX25_DEF_N2 10 /* N2=10 */ -#define AX25_DEF_IDLE (0 * 60 * HZ) /* Idle=None */ +#define AX25_DEF_IDLE 0 /* Idle=None */ #define AX25_DEF_PACLEN 256 /* Paclen=256 */ #define AX25_DEF_PROTOCOL AX25_PROTO_STD_SIMPLEX /* Standard AX.25 */ -#define AX25_DEF_DS_TIMEOUT (3 * 60 * HZ) /* DAMA timeout 3 minutes */ +#define AX25_DEF_DS_TIMEOUT 180000 /* DAMA timeout 3 minutes */ typedef struct ax25_uid_assoc { struct hlist_node uid_node; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a9f13dfde07e..a2e0dd047e9f 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -426,6 +426,26 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) return 0; } +static void ax25_fillin_cb_from_dev(ax25_cb *ax25, ax25_dev *ax25_dev) +{ + ax25->rtt = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]) / 2; + ax25->t1 = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]); + ax25->t2 = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T2]); + ax25->t3 = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T3]); + ax25->n2 = ax25_dev->values[AX25_VALUES_N2]; + ax25->paclen = ax25_dev->values[AX25_VALUES_PACLEN]; + ax25->idle = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_IDLE]); + ax25->backoff = ax25_dev->values[AX25_VALUES_BACKOFF]; + + if (ax25_dev->values[AX25_VALUES_AXDEFMODE]) { + ax25->modulus = AX25_EMODULUS; + ax25->window = ax25_dev->values[AX25_VALUES_EWINDOW]; + } else { + ax25->modulus = AX25_MODULUS; + ax25->window = ax25_dev->values[AX25_VALUES_WINDOW]; + } +} + /* * Fill in a created AX.25 created control block with the default * values for a particular device. @@ -435,39 +455,28 @@ void ax25_fillin_cb(ax25_cb *ax25, ax25_dev *ax25_dev) ax25->ax25_dev = ax25_dev; if (ax25->ax25_dev != NULL) { - ax25->rtt = ax25_dev->values[AX25_VALUES_T1] / 2; - ax25->t1 = ax25_dev->values[AX25_VALUES_T1]; - ax25->t2 = ax25_dev->values[AX25_VALUES_T2]; - ax25->t3 = ax25_dev->values[AX25_VALUES_T3]; - ax25->n2 = ax25_dev->values[AX25_VALUES_N2]; - ax25->paclen = ax25_dev->values[AX25_VALUES_PACLEN]; - ax25->idle = ax25_dev->values[AX25_VALUES_IDLE]; - ax25->backoff = ax25_dev->values[AX25_VALUES_BACKOFF]; - - if (ax25_dev->values[AX25_VALUES_AXDEFMODE]) { - ax25->modulus = AX25_EMODULUS; - ax25->window = ax25_dev->values[AX25_VALUES_EWINDOW]; - } else { - ax25->modulus = AX25_MODULUS; - ax25->window = ax25_dev->values[AX25_VALUES_WINDOW]; - } + ax25_fillin_cb_from_dev(ax25, ax25_dev); + return; + } + + /* + * No device, use kernel / AX.25 spec default values + */ + ax25->rtt = msecs_to_jiffies(AX25_DEF_T1) / 2; + ax25->t1 = msecs_to_jiffies(AX25_DEF_T1); + ax25->t2 = msecs_to_jiffies(AX25_DEF_T2); + ax25->t3 = msecs_to_jiffies(AX25_DEF_T3); + ax25->n2 = AX25_DEF_N2; + ax25->paclen = AX25_DEF_PACLEN; + ax25->idle = msecs_to_jiffies(AX25_DEF_IDLE); + ax25->backoff = AX25_DEF_BACKOFF; + + if (AX25_DEF_AXDEFMODE) { + ax25->modulus = AX25_EMODULUS; + ax25->window = AX25_DEF_EWINDOW; } else { - ax25->rtt = AX25_DEF_T1 / 2; - ax25->t1 = AX25_DEF_T1; - ax25->t2 = AX25_DEF_T2; - ax25->t3 = AX25_DEF_T3; - ax25->n2 = AX25_DEF_N2; - ax25->paclen = AX25_DEF_PACLEN; - ax25->idle = AX25_DEF_IDLE; - ax25->backoff = AX25_DEF_BACKOFF; - - if (AX25_DEF_AXDEFMODE) { - ax25->modulus = AX25_EMODULUS; - ax25->window = AX25_DEF_EWINDOW; - } else { - ax25->modulus = AX25_MODULUS; - ax25->window = AX25_DEF_WINDOW; - } + ax25->modulus = AX25_MODULUS; + ax25->window = AX25_DEF_WINDOW; } } diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 061083efc1dc..5961459935eb 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -61,7 +61,8 @@ void ax25_ds_set_timer(ax25_dev *ax25_dev) return; del_timer(&ax25_dev->dama.slave_timer); - ax25_dev->dama.slave_timeout = ax25_dev->values[AX25_VALUES_DS_TIMEOUT] / 10; + ax25_dev->dama.slave_timeout = + msecs_to_jiffies(ax25_dev->values[AX25_VALUES_DS_TIMEOUT]) / 10; ax25_ds_add_timer(ax25_dev); } diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index 894a22558d9d..bdb64c36df12 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -18,14 +18,14 @@ static int min_backoff[1], max_backoff[] = {2}; static int min_conmode[1], max_conmode[] = {2}; static int min_window[] = {1}, max_window[] = {7}; static int min_ewindow[] = {1}, max_ewindow[] = {63}; -static int min_t1[] = {1}, max_t1[] = {30 * HZ}; -static int min_t2[] = {1}, max_t2[] = {20 * HZ}; -static int min_t3[1], max_t3[] = {3600 * HZ}; -static int min_idle[1], max_idle[] = {65535 * HZ}; +static int min_t1[] = {1}, max_t1[] = {30000}; +static int min_t2[] = {1}, max_t2[] = {20000}; +static int min_t3[1], max_t3[] = {3600000}; +static int min_idle[1], max_idle[] = {65535000}; static int min_n2[] = {1}, max_n2[] = {31}; static int min_paclen[] = {1}, max_paclen[] = {512}; static int min_proto[1], max_proto[] = { AX25_PROTO_MAX }; -static int min_ds_timeout[1], max_ds_timeout[] = {65535 * HZ}; +static int min_ds_timeout[1], max_ds_timeout[] = {65535000}; static struct ctl_table_header *ax25_table_header; -- cgit v1.2.3 From 4d8937d0b113e8ec39f7d18cf13804f3b5fb8fd4 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 3 May 2006 23:27:47 -0700 Subject: [NETROM]: Eleminate HZ from NET/ROM kernel interfaces Convert all NET/ROM sysctl time values from jiffies to ms as units. Signed-off-by: Ralf Baechle Signed-off-by: David S. Miller --- include/net/netrom.h | 8 ++++---- net/netrom/af_netrom.c | 15 ++++++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/netrom.h b/include/net/netrom.h index a5ee53bce62f..e0ca112024a3 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -42,11 +42,11 @@ enum { #define NR_COND_PEER_RX_BUSY 0x04 #define NR_COND_OWN_RX_BUSY 0x08 -#define NR_DEFAULT_T1 (120 * HZ) /* Outstanding frames - 120 seconds */ -#define NR_DEFAULT_T2 (5 * HZ) /* Response delay - 5 seconds */ +#define NR_DEFAULT_T1 120000 /* Outstanding frames - 120 seconds */ +#define NR_DEFAULT_T2 5000 /* Response delay - 5 seconds */ #define NR_DEFAULT_N2 3 /* Number of Retries - 3 */ -#define NR_DEFAULT_T4 (180 * HZ) /* Busy Delay - 180 seconds */ -#define NR_DEFAULT_IDLE (0 * 60 * HZ) /* No Activity Timeout - none */ +#define NR_DEFAULT_T4 180000 /* Busy Delay - 180 seconds */ +#define NR_DEFAULT_IDLE 0 /* No Activity Timeout - none */ #define NR_DEFAULT_WINDOW 4 /* Default Window Size - 4 */ #define NR_DEFAULT_OBS 6 /* Default Obsolescence Count - 6 */ #define NR_DEFAULT_QUAL 10 /* Default Neighbour Quality - 10 */ diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index d44981f5a619..ecd288beca7c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -425,11 +425,16 @@ static int nr_create(struct socket *sock, int protocol) nr_init_timers(sk); - nr->t1 = sysctl_netrom_transport_timeout; - nr->t2 = sysctl_netrom_transport_acknowledge_delay; - nr->n2 = sysctl_netrom_transport_maximum_tries; - nr->t4 = sysctl_netrom_transport_busy_delay; - nr->idle = sysctl_netrom_transport_no_activity_timeout; + nr->t1 = + msecs_to_jiffies(sysctl_netrom_transport_timeout); + nr->t2 = + msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); + nr->n2 = + msecs_to_jiffies(sysctl_netrom_transport_maximum_tries); + nr->t4 = + msecs_to_jiffies(sysctl_netrom_transport_busy_delay); + nr->idle = + msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); nr->window = sysctl_netrom_transport_requested_window_size; nr->bpqext = 1; -- cgit v1.2.3 From 82e84249f0ee098e004c8bd6d90a1640bd56cfbb Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 3 May 2006 23:28:20 -0700 Subject: [ROSE]: Eleminate HZ from ROSE kernel interfaces Convert all ROSE sysctl time values from jiffies to ms as units. Signed-off-by: Ralf Baechle Signed-off-by: David S. Miller --- include/net/rose.h | 14 +++++++------- net/rose/af_rose.c | 10 +++++----- net/rose/rose_link.c | 6 ++++-- 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/rose.h b/include/net/rose.h index 3249b979605a..012b09ed2401 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -49,14 +49,14 @@ enum { ROSE_STATE_5 /* Deferred Call Acceptance */ }; -#define ROSE_DEFAULT_T0 (180 * HZ) /* Default T10 T20 value */ -#define ROSE_DEFAULT_T1 (200 * HZ) /* Default T11 T21 value */ -#define ROSE_DEFAULT_T2 (180 * HZ) /* Default T12 T22 value */ -#define ROSE_DEFAULT_T3 (180 * HZ) /* Default T13 T23 value */ -#define ROSE_DEFAULT_HB (5 * HZ) /* Default Holdback value */ -#define ROSE_DEFAULT_IDLE (0 * 60 * HZ) /* No Activity Timeout - none */ +#define ROSE_DEFAULT_T0 180000 /* Default T10 T20 value */ +#define ROSE_DEFAULT_T1 200000 /* Default T11 T21 value */ +#define ROSE_DEFAULT_T2 180000 /* Default T12 T22 value */ +#define ROSE_DEFAULT_T3 180000 /* Default T13 T23 value */ +#define ROSE_DEFAULT_HB 5000 /* Default Holdback value */ +#define ROSE_DEFAULT_IDLE 0 /* No Activity Timeout - none */ #define ROSE_DEFAULT_ROUTING 1 /* Default routing flag */ -#define ROSE_DEFAULT_FAIL_TIMEOUT (120 * HZ) /* Time until link considered usable */ +#define ROSE_DEFAULT_FAIL_TIMEOUT 120000 /* Time until link considered usable */ #define ROSE_DEFAULT_MAXVC 50 /* Maximum number of VCs per neighbour */ #define ROSE_DEFAULT_WINDOW_SIZE 7 /* Default window size */ diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index ea65396d1619..ef4538ac84f0 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -518,11 +518,11 @@ static int rose_create(struct socket *sock, int protocol) init_timer(&rose->timer); init_timer(&rose->idletimer); - rose->t1 = sysctl_rose_call_request_timeout; - rose->t2 = sysctl_rose_reset_request_timeout; - rose->t3 = sysctl_rose_clear_request_timeout; - rose->hb = sysctl_rose_ack_hold_back_timeout; - rose->idle = sysctl_rose_no_activity_timeout; + rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout); + rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout); + rose->t3 = msecs_to_jiffies(sysctl_rose_clear_request_timeout); + rose->hb = msecs_to_jiffies(sysctl_rose_ack_hold_back_timeout); + rose->idle = msecs_to_jiffies(sysctl_rose_no_activity_timeout); rose->state = ROSE_STATE_0; diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index 09e9e9d04d92..bd86a63960ce 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -40,7 +40,8 @@ void rose_start_ftimer(struct rose_neigh *neigh) neigh->ftimer.data = (unsigned long)neigh; neigh->ftimer.function = &rose_ftimer_expiry; - neigh->ftimer.expires = jiffies + sysctl_rose_link_fail_timeout; + neigh->ftimer.expires = + jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout); add_timer(&neigh->ftimer); } @@ -51,7 +52,8 @@ static void rose_start_t0timer(struct rose_neigh *neigh) neigh->t0timer.data = (unsigned long)neigh; neigh->t0timer.function = &rose_t0timer_expiry; - neigh->t0timer.expires = jiffies + sysctl_rose_restart_request_timeout; + neigh->t0timer.expires = + jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout); add_timer(&neigh->t0timer); } -- cgit v1.2.3 From 5b802344357338a4d645beac8ca97470bcbe3542 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 May 2006 14:07:42 +0100 Subject: [ARM] 3490/1: i.MX: move uart resources to board files Patch from Sascha Hauer This patch moves the i.MX uart resources and the gpio pin setup to the board files. This allows the boards to decide how many internal uarts are connected to the outside world and whether they use rts/cts or not. Signed-off-by: Sascha Hauer Signed-off-by: Russell King --- arch/arm/mach-imx/generic.c | 52 -------------------------- arch/arm/mach-imx/mx1ads.c | 74 +++++++++++++++++++++++++++++++++++++ drivers/serial/imx.c | 40 ++++++++------------ include/asm-arm/arch-imx/imx-uart.h | 10 +++++ 4 files changed, 100 insertions(+), 76 deletions(-) create mode 100644 include/asm-arm/arch-imx/imx-uart.h (limited to 'include') diff --git a/arch/arm/mach-imx/generic.c b/arch/arm/mach-imx/generic.c index 9d8331be2b58..12ea58a3b84f 100644 --- a/arch/arm/mach-imx/generic.c +++ b/arch/arm/mach-imx/generic.c @@ -195,56 +195,6 @@ void __init imx_set_mmc_info(struct imxmmc_platform_data *info) } EXPORT_SYMBOL(imx_set_mmc_info); -static struct resource imx_uart1_resources[] = { - [0] = { - .start = 0x00206000, - .end = 0x002060FF, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = (UART1_MINT_RX), - .end = (UART1_MINT_RX), - .flags = IORESOURCE_IRQ, - }, - [2] = { - .start = (UART1_MINT_TX), - .end = (UART1_MINT_TX), - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device imx_uart1_device = { - .name = "imx-uart", - .id = 0, - .num_resources = ARRAY_SIZE(imx_uart1_resources), - .resource = imx_uart1_resources, -}; - -static struct resource imx_uart2_resources[] = { - [0] = { - .start = 0x00207000, - .end = 0x002070FF, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = (UART2_MINT_RX), - .end = (UART2_MINT_RX), - .flags = IORESOURCE_IRQ, - }, - [2] = { - .start = (UART2_MINT_TX), - .end = (UART2_MINT_TX), - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device imx_uart2_device = { - .name = "imx-uart", - .id = 1, - .num_resources = ARRAY_SIZE(imx_uart2_resources), - .resource = imx_uart2_resources, -}; - static struct imxfb_mach_info imx_fb_info; void __init set_imx_fb_info(struct imxfb_mach_info *hard_imx_fb_info) @@ -283,8 +233,6 @@ static struct platform_device imxfb_device = { static struct platform_device *devices[] __initdata = { &imx_mmc_device, &imxfb_device, - &imx_uart1_device, - &imx_uart2_device, }; static struct map_desc imx_io_desc[] __initdata = { diff --git a/arch/arm/mach-imx/mx1ads.c b/arch/arm/mach-imx/mx1ads.c index e34d0df90aed..e1f6c0bbe1e7 100644 --- a/arch/arm/mach-imx/mx1ads.c +++ b/arch/arm/mach-imx/mx1ads.c @@ -26,6 +26,7 @@ #include #include +#include #include #include "generic.h" @@ -48,8 +49,70 @@ static struct platform_device cs89x0_device = { .resource = cs89x0_resources, }; +static struct imxuart_platform_data uart_pdata = { + .flags = IMXUART_HAVE_RTSCTS, +}; + +static struct resource imx_uart1_resources[] = { + [0] = { + .start = 0x00206000, + .end = 0x002060FF, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = (UART1_MINT_RX), + .end = (UART1_MINT_RX), + .flags = IORESOURCE_IRQ, + }, + [2] = { + .start = (UART1_MINT_TX), + .end = (UART1_MINT_TX), + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device imx_uart1_device = { + .name = "imx-uart", + .id = 0, + .num_resources = ARRAY_SIZE(imx_uart1_resources), + .resource = imx_uart1_resources, + .dev = { + .platform_data = &uart_pdata, + } +}; + +static struct resource imx_uart2_resources[] = { + [0] = { + .start = 0x00207000, + .end = 0x002070FF, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = (UART2_MINT_RX), + .end = (UART2_MINT_RX), + .flags = IORESOURCE_IRQ, + }, + [2] = { + .start = (UART2_MINT_TX), + .end = (UART2_MINT_TX), + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device imx_uart2_device = { + .name = "imx-uart", + .id = 1, + .num_resources = ARRAY_SIZE(imx_uart2_resources), + .resource = imx_uart2_resources, + .dev = { + .platform_data = &uart_pdata, + } +}; + static struct platform_device *devices[] __initdata = { &cs89x0_device, + &imx_uart1_device, + &imx_uart2_device, }; #ifdef CONFIG_MMC_IMX @@ -75,6 +138,17 @@ mx1ads_init(void) imx_gpio_mode(GPIO_PORTB | GPIO_GIUS | GPIO_IN | 20); imx_set_mmc_info(&mx1ads_mmc_info); #endif + + imx_gpio_mode(PC9_PF_UART1_CTS); + imx_gpio_mode(PC10_PF_UART1_RTS); + imx_gpio_mode(PC11_PF_UART1_TXD); + imx_gpio_mode(PC12_PF_UART1_RXD); + + imx_gpio_mode(PB28_PF_UART2_CTS); + imx_gpio_mode(PB29_PF_UART2_RTS); + imx_gpio_mode(PB30_PF_UART2_TXD); + imx_gpio_mode(PB31_PF_UART2_RXD); + platform_add_devices(devices, ARRAY_SIZE(devices)); } diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c index c3b7a6673e9c..d202eb4f3848 100644 --- a/drivers/serial/imx.c +++ b/drivers/serial/imx.c @@ -45,6 +45,7 @@ #include #include #include +#include /* We've been assigned a range on the "Low-density serial ports" major */ #define SERIAL_IMX_MAJOR 204 @@ -73,7 +74,8 @@ struct imx_port { struct uart_port port; struct timer_list timer; unsigned int old_status; - int txirq,rxirq,rtsirq; + int txirq,rxirq,rtsirq; + int have_rtscts:1; }; /* @@ -491,8 +493,12 @@ imx_set_termios(struct uart_port *port, struct termios *termios, ucr2 = UCR2_SRST | UCR2_IRTS; if (termios->c_cflag & CRTSCTS) { - ucr2 &= ~UCR2_IRTS; - ucr2 |= UCR2_CTSC; + if( sport->have_rtscts ) { + ucr2 &= ~UCR2_IRTS; + ucr2 |= UCR2_CTSC; + } else { + termios->c_cflag &= ~CRTSCTS; + } } if (termios->c_cflag & CSTOPB) @@ -719,27 +725,6 @@ static void __init imx_init_ports(void) imx_ports[i].timer.function = imx_timeout; imx_ports[i].timer.data = (unsigned long)&imx_ports[i]; } - - imx_gpio_mode(PC9_PF_UART1_CTS); - imx_gpio_mode(PC10_PF_UART1_RTS); - imx_gpio_mode(PC11_PF_UART1_TXD); - imx_gpio_mode(PC12_PF_UART1_RXD); - imx_gpio_mode(PB28_PF_UART2_CTS); - imx_gpio_mode(PB29_PF_UART2_RTS); - - imx_gpio_mode(PB30_PF_UART2_TXD); - imx_gpio_mode(PB31_PF_UART2_RXD); - -#if 0 /* We don't need these, on the mx1 the _modem_ side of the uart - * is implemented. - */ - imx_gpio_mode(PD7_AF_UART2_DTR); - imx_gpio_mode(PD8_AF_UART2_DCD); - imx_gpio_mode(PD9_AF_UART2_RI); - imx_gpio_mode(PD10_AF_UART2_DSR); -#endif - - } #ifdef CONFIG_SERIAL_IMX_CONSOLE @@ -932,7 +917,14 @@ static int serial_imx_resume(struct platform_device *dev) static int serial_imx_probe(struct platform_device *dev) { + struct imxuart_platform_data *pdata; + imx_ports[dev->id].port.dev = &dev->dev; + + pdata = (struct imxuart_platform_data *)dev->dev.platform_data; + if(pdata && (pdata->flags & IMXUART_HAVE_RTSCTS)) + imx_ports[dev->id].have_rtscts = 1; + uart_add_one_port(&imx_reg, &imx_ports[dev->id].port); platform_set_drvdata(dev, &imx_ports[dev->id]); return 0; diff --git a/include/asm-arm/arch-imx/imx-uart.h b/include/asm-arm/arch-imx/imx-uart.h new file mode 100644 index 000000000000..3a685e1780ea --- /dev/null +++ b/include/asm-arm/arch-imx/imx-uart.h @@ -0,0 +1,10 @@ +#ifndef ASMARM_ARCH_UART_H +#define ASMARM_ARCH_UART_H + +#define IMXUART_HAVE_RTSCTS (1<<0) + +struct imxuart_platform_data { + unsigned int flags; +}; + +#endif -- cgit v1.2.3 From ff10952a547dad934d9ed9afc5cf579ed1ccb53a Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 5 May 2006 15:11:14 +0100 Subject: [ARM] 3494/1: asm-arm/bug.h needs linux/stddef.h Patch from Nicolas Pitre ... for the definition of NULL. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- include/asm-arm/bug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-arm/bug.h b/include/asm-arm/bug.h index 7fb02138f585..5ab8216f5204 100644 --- a/include/asm-arm/bug.h +++ b/include/asm-arm/bug.h @@ -2,6 +2,7 @@ #define _ASMARM_BUG_H #include +#include #ifdef CONFIG_BUG #ifdef CONFIG_DEBUG_BUGVERBOSE -- cgit v1.2.3 From 99532559dc7a8e686b2cef14c780a7ad5dbd4a31 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 5 May 2006 22:32:24 +0100 Subject: [ARM] 3500/1: fix PXA27x DMA allocation priority Patch from Nicolas Pitre Intel PXA27x developers manual section 5.4.1.1 lists a priority distribution for the DMA channels differently than what the code currently assumes. This patch fixes that. Noticed by Simon Vogl Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mach-pxa/dma.c | 17 +++++------------ include/asm-arm/arch-pxa/dma.h | 26 ++++++++++++++------------ 2 files changed, 19 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-pxa/dma.c b/arch/arm/mach-pxa/dma.c index 458112b21e25..7d8c85486c66 100644 --- a/arch/arm/mach-pxa/dma.c +++ b/arch/arm/mach-pxa/dma.c @@ -45,23 +45,16 @@ int pxa_request_dma (char *name, pxa_dma_prio prio, local_irq_save(flags); - /* try grabbing a DMA channel with the requested priority */ - for (i = prio; i < prio + PXA_DMA_NBCH(prio); i++) { - if (!dma_channels[i].name) { - found = 1; - break; - } - } - - if (!found) { - /* requested prio group is full, try hier priorities */ - for (i = prio-1; i >= 0; i--) { + do { + /* try grabbing a DMA channel with the requested priority */ + pxa_for_each_dma_prio (i, prio) { if (!dma_channels[i].name) { found = 1; break; } } - } + /* if requested prio group is full, try a hier priority */ + } while (!found && prio--); if (found) { DCSR(i) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR; diff --git a/include/asm-arm/arch-pxa/dma.h b/include/asm-arm/arch-pxa/dma.h index 3e88a2a02a0f..a008150abc59 100644 --- a/include/asm-arm/arch-pxa/dma.h +++ b/include/asm-arm/arch-pxa/dma.h @@ -24,27 +24,29 @@ typedef struct pxa_dma_desc { volatile u32 dcmd; /* DCMD value for the current transfer */ } pxa_dma_desc; +typedef enum { + DMA_PRIO_HIGH = 0, + DMA_PRIO_MEDIUM = 1, + DMA_PRIO_LOW = 2 +} pxa_dma_prio; + #if defined(CONFIG_PXA27x) #define PXA_DMA_CHANNELS 32 -#define PXA_DMA_NBCH(prio) ((prio == DMA_PRIO_LOW) ? 16 : 8) -typedef enum { - DMA_PRIO_HIGH = 0, - DMA_PRIO_MEDIUM = 8, - DMA_PRIO_LOW = 16 -} pxa_dma_prio; +#define pxa_for_each_dma_prio(ch, prio) \ +for ( \ + ch = prio * 4; \ + ch != (4 << prio) + 16; \ + ch = (ch + 1 == (4 << prio)) ? (prio * 4 + 16) : (ch + 1) \ +) #elif defined(CONFIG_PXA25x) #define PXA_DMA_CHANNELS 16 -#define PXA_DMA_NBCH(prio) ((prio == DMA_PRIO_LOW) ? 8 : 4) -typedef enum { - DMA_PRIO_HIGH = 0, - DMA_PRIO_MEDIUM = 4, - DMA_PRIO_LOW = 8 -} pxa_dma_prio; +#define pxa_for_each_dma_prio(ch, prio) \ + for (ch = prio * 4; ch != (4 << prio); ch++) #endif -- cgit v1.2.3 From 568cb09b9d889b6f2852ede19772b8e9eed36c1e Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 5 May 2006 22:35:05 +0100 Subject: [ARM] 3495/1: EABI: undefine removed syscalls, but... Patch from Nicolas Pitre ... but only for user space. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- include/asm-arm/unistd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h index 26f2f4828e03..cbf39a56dbe7 100644 --- a/include/asm-arm/unistd.h +++ b/include/asm-arm/unistd.h @@ -363,7 +363,7 @@ /* * The following syscalls are obsolete and no longer available for EABI. */ -#if defined(__ARM_EABI__) +#if defined(__ARM_EABI__) && !defined(__KERNEL__) #undef __NR_time #undef __NR_umount #undef __NR_stime -- cgit v1.2.3 From 7c3ceb4fb9667f34f1599a062efecf4cdc4a4ce5 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 5 May 2006 17:02:09 -0700 Subject: [SCTP]: Allow spillover of receive buffer to avoid deadlock. This patch fixes a deadlock situation in the receive path by allowing temporary spillover of the receive buffer. - If the chunk we receive has a tsn that immediately follows the ctsn, accept it even if we run out of receive buffer space and renege data with higher TSNs. - Once we accept one chunk in a packet, accept all the remaining chunks even if we run out of receive buffer space. Signed-off-by: Neil Horman Acked-by: Mark Butler Acked-by: Vlad Yasevich Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/inqueue.c | 1 + net/sctp/sm_statefuns.c | 46 ++++++++++++++++++++++++++++++++++++---------- 3 files changed, 38 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index eba99f375517..7f4fea173fb1 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -712,6 +712,7 @@ struct sctp_chunk { __u8 tsn_gap_acked; /* Is this chunk acked by a GAP ACK? */ __s8 fast_retransmit; /* Is this chunk fast retransmitted? */ __u8 tsn_missing_report; /* Data chunk missing counter. */ + __u8 data_accepted; /* At least 1 chunk in this packet accepted */ }; void sctp_chunk_hold(struct sctp_chunk *); diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 297b8951463e..cf0c767d43ae 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -149,6 +149,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) /* This is the first chunk in the packet. */ chunk->singleton = 1; ch = (sctp_chunkhdr_t *) chunk->skb->data; + chunk->data_accepted = 0; } chunk->chunk_hdr = ch; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 2b9a832b29a7..f5d131f52a70 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -5151,7 +5151,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, int tmp; __u32 tsn; int account_value; + struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sock *sk = asoc->base.sk; + int rcvbuf_over = 0; data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data; skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); @@ -5162,10 +5164,16 @@ static int sctp_eat_data(const struct sctp_association *asoc, /* ASSERT: Now skb->data is really the user data. */ /* - * if we are established, and we have used up our receive - * buffer memory, drop the frame - */ - if (asoc->state == SCTP_STATE_ESTABLISHED) { + * If we are established, and we have used up our receive buffer + * memory, think about droping the frame. + * Note that we have an opportunity to improve performance here. + * If we accept one chunk from an skbuff, we have to keep all the + * memory of that skbuff around until the chunk is read into user + * space. Therefore, once we accept 1 chunk we may as well accept all + * remaining chunks in the skbuff. The data_accepted flag helps us do + * that. + */ + if ((asoc->state == SCTP_STATE_ESTABLISHED) && (!chunk->data_accepted)) { /* * If the receive buffer policy is 1, then each * association can allocate up to sk_rcvbuf bytes @@ -5176,9 +5184,25 @@ static int sctp_eat_data(const struct sctp_association *asoc, account_value = atomic_read(&asoc->rmem_alloc); else account_value = atomic_read(&sk->sk_rmem_alloc); - - if (account_value > sk->sk_rcvbuf) - return SCTP_IERROR_IGNORE_TSN; + if (account_value > sk->sk_rcvbuf) { + /* + * We need to make forward progress, even when we are + * under memory pressure, so we always allow the + * next tsn after the ctsn ack point to be accepted. + * This lets us avoid deadlocks in which we have to + * drop frames that would otherwise let us drain the + * receive queue. + */ + if ((sctp_tsnmap_get_ctsn(map) + 1) != tsn) + return SCTP_IERROR_IGNORE_TSN; + + /* + * We're going to accept the frame but we should renege + * to make space for it. This will send us down that + * path later in this function. + */ + rcvbuf_over = 1; + } } /* Process ECN based congestion. @@ -5226,6 +5250,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, datalen -= sizeof(sctp_data_chunk_t); deliver = SCTP_CMD_CHUNK_ULP; + chunk->data_accepted = 1; /* Think about partial delivery. */ if ((datalen >= asoc->rwnd) && (!asoc->ulpq.pd_mode)) { @@ -5242,7 +5267,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, * large spill over. */ if (!asoc->rwnd || asoc->rwnd_over || - (datalen > asoc->rwnd + asoc->frag_point)) { + (datalen > asoc->rwnd + asoc->frag_point) || + rcvbuf_over) { /* If this is the next TSN, consider reneging to make * room. Note: Playing nice with a confused sender. A @@ -5250,8 +5276,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, * space and in the future we may want to detect and * do more drastic reneging. */ - if (sctp_tsnmap_has_gap(&asoc->peer.tsn_map) && - (sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1) == tsn) { + if (sctp_tsnmap_has_gap(map) && + (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { SCTP_DEBUG_PRINTK("Reneging for tsn:%u\n", tsn); deliver = SCTP_CMD_RENEGE; } else { -- cgit v1.2.3