diff options
Diffstat (limited to 'include')
117 files changed, 4054 insertions, 2118 deletions
diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h index eda04546cdf6..473d584b1d31 100644 --- a/include/acpi/acexcep.h +++ b/include/acpi/acexcep.h @@ -103,8 +103,9 @@ #define AE_BAD_OCTAL_CONSTANT (acpi_status) (0x0006 | AE_CODE_PROGRAMMER) #define AE_BAD_DECIMAL_CONSTANT (acpi_status) (0x0007 | AE_CODE_PROGRAMMER) #define AE_MISSING_ARGUMENTS (acpi_status) (0x0008 | AE_CODE_PROGRAMMER) +#define AE_BAD_ADDRESS (acpi_status) (0x0009 | AE_CODE_PROGRAMMER) -#define AE_CODE_PGM_MAX 0x0008 +#define AE_CODE_PGM_MAX 0x0009 /* * Acpi table exceptions @@ -224,7 +225,8 @@ char const *acpi_gbl_exception_names_pgm[] = { "AE_BAD_HEX_CONSTANT", "AE_BAD_OCTAL_CONSTANT", "AE_BAD_DECIMAL_CONSTANT", - "AE_MISSING_ARGUMENTS" + "AE_MISSING_ARGUMENTS", + "AE_BAD_ADDRESS" }; char const *acpi_gbl_exception_names_tbl[] = { diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index e9f6574930ef..a2228511d4be 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -88,44 +88,30 @@ struct acpi_device; typedef int (*acpi_op_add) (struct acpi_device * device); typedef int (*acpi_op_remove) (struct acpi_device * device, int type); -typedef int (*acpi_op_lock) (struct acpi_device * device, int type); typedef int (*acpi_op_start) (struct acpi_device * device); typedef int (*acpi_op_stop) (struct acpi_device * device, int type); typedef int (*acpi_op_suspend) (struct acpi_device * device, pm_message_t state); typedef int (*acpi_op_resume) (struct acpi_device * device); -typedef int (*acpi_op_scan) (struct acpi_device * device); typedef int (*acpi_op_bind) (struct acpi_device * device); typedef int (*acpi_op_unbind) (struct acpi_device * device); -typedef int (*acpi_op_shutdown) (struct acpi_device * device); +typedef void (*acpi_op_notify) (struct acpi_device * device, u32 event); struct acpi_bus_ops { u32 acpi_op_add:1; - u32 acpi_op_remove:1; - u32 acpi_op_lock:1; u32 acpi_op_start:1; - u32 acpi_op_stop:1; - u32 acpi_op_suspend:1; - u32 acpi_op_resume:1; - u32 acpi_op_scan:1; - u32 acpi_op_bind:1; - u32 acpi_op_unbind:1; - u32 acpi_op_shutdown:1; - u32 reserved:21; }; struct acpi_device_ops { acpi_op_add add; acpi_op_remove remove; - acpi_op_lock lock; acpi_op_start start; acpi_op_stop stop; acpi_op_suspend suspend; acpi_op_resume resume; - acpi_op_scan scan; acpi_op_bind bind; acpi_op_unbind unbind; - acpi_op_shutdown shutdown; + acpi_op_notify notify; }; struct acpi_driver { diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 5fc1bb0f4a90..0352c8f0b05b 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -67,6 +67,16 @@ #define ACPI_BAY_HID "LNXIOBAY" #define ACPI_DOCK_HID "LNXDOCK" +/* + * For fixed hardware buttons, we fabricate acpi_devices with HID + * ACPI_BUTTON_HID_POWERF or ACPI_BUTTON_HID_SLEEPF. Fixed hardware + * signals only an event; it doesn't supply a notification value. + * To allow drivers to treat notifications from fixed hardware the + * same as those from real devices, we turn the events into this + * notification value. + */ +#define ACPI_FIXED_HARDWARE_EVENT 0x100 + /* -------------------------------------------------------------------------- PCI -------------------------------------------------------------------------- */ @@ -99,24 +109,6 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_device *device, int domain, int bus); /* -------------------------------------------------------------------------- - Power Resource - -------------------------------------------------------------------------- */ - -int acpi_device_sleep_wake(struct acpi_device *dev, - int enable, int sleep_state, int dev_state); -int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state); -int acpi_disable_wakeup_device_power(struct acpi_device *dev); -int acpi_power_get_inferred_state(struct acpi_device *device); -int acpi_power_transition(struct acpi_device *device, int state); -extern int acpi_power_nocheck; - -/* -------------------------------------------------------------------------- - Embedded Controller - -------------------------------------------------------------------------- */ -int acpi_ec_ecdt_probe(void); -int acpi_boot_ec_enable(void); - -/* -------------------------------------------------------------------------- Processor -------------------------------------------------------------------------- */ @@ -165,9 +157,4 @@ static inline void unregister_hotplug_dock_device(acpi_handle handle) } #endif -/*-------------------------------------------------------------------------- - Suspend/Resume - -------------------------------------------------------------------------- */ -extern int acpi_sleep_init(void); - #endif /*__ACPI_DRIVERS_H__*/ diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index ab0b85cf21f3..3e798593b17b 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -242,10 +242,6 @@ acpi_os_derive_pci_id(acpi_handle rhandle, acpi_status acpi_os_validate_interface(char *interface); acpi_status acpi_osi_invalidate(char* interface); -acpi_status -acpi_os_validate_address(u8 space_id, acpi_physical_address address, - acpi_size length, char *name); - u64 acpi_os_get_timer(void); acpi_status acpi_os_signal(u32 function, void *info); diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index cc40102fe2f3..aeaf7cd41dc7 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20081204 +#define ACPI_CA_VERSION 0x20090320 #include "actypes.h" #include "actbl.h" @@ -349,17 +349,15 @@ acpi_resource_to_address64(struct acpi_resource *resource, */ acpi_status acpi_reset(void); -acpi_status acpi_get_register(u32 register_id, u32 * return_value); +acpi_status acpi_read_bit_register(u32 register_id, u32 *return_value); -acpi_status acpi_get_register_unlocked(u32 register_id, u32 *return_value); +acpi_status acpi_write_bit_register(u32 register_id, u32 value); -acpi_status acpi_set_register(u32 register_id, u32 value); +acpi_status acpi_set_firmware_waking_vector(u32 physical_address); -acpi_status -acpi_set_firmware_waking_vector(u32 physical_address); - -acpi_status -acpi_set_firmware_waking_vector64(u64 physical_address); +#if ACPI_MACHINE_WIDTH == 64 +acpi_status acpi_set_firmware_waking_vector64(u64 physical_address); +#endif acpi_status acpi_read(u32 *value, struct acpi_generic_address *reg); diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index bf8d4cfd8cf5..222733d01f36 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -214,11 +214,11 @@ struct acpi_table_fadt { u16 flush_size; /* Processor's memory cache line width, in bytes */ u16 flush_stride; /* Number of flush strides that need to be read */ u8 duty_offset; /* Processor duty cycle index in processor's P_CNT reg */ - u8 duty_width; /* Processor duty cycle value bit width in P_CNT register. */ + u8 duty_width; /* Processor duty cycle value bit width in P_CNT register */ u8 day_alarm; /* Index to day-of-month alarm in RTC CMOS RAM */ u8 month_alarm; /* Index to month-of-year alarm in RTC CMOS RAM */ u8 century; /* Index to century in RTC CMOS RAM */ - u16 boot_flags; /* IA-PC Boot Architecture Flags. See Table 5-10 for description */ + u16 boot_flags; /* IA-PC Boot Architecture Flags (see below for individual flags) */ u8 reserved; /* Reserved, must be zero */ u32 flags; /* Miscellaneous flag bits (see below for individual flags) */ struct acpi_generic_address reset_register; /* 64-bit address of the Reset register */ @@ -236,32 +236,41 @@ struct acpi_table_fadt { struct acpi_generic_address xgpe1_block; /* 64-bit Extended General Purpose Event 1 Reg Blk address */ }; +/* FADT Boot Architecture Flags (boot_flags) */ + +#define ACPI_FADT_LEGACY_DEVICES (1) /* 00: [V2] System has LPC or ISA bus devices */ +#define ACPI_FADT_8042 (1<<1) /* 01: [V3] System has an 8042 controller on port 60/64 */ +#define ACPI_FADT_NO_VGA (1<<2) /* 02: [V4] It is not safe to probe for VGA hardware */ +#define ACPI_FADT_NO_MSI (1<<3) /* 03: [V4] Message Signaled Interrupts (MSI) must not be enabled */ +#define ACPI_FADT_NO_ASPM (1<<4) /* 04: [V4] PCIe ASPM control must not be enabled */ + +#define FADT2_REVISION_ID 3 + /* FADT flags */ -#define ACPI_FADT_WBINVD (1) /* 00: The wbinvd instruction works properly */ -#define ACPI_FADT_WBINVD_FLUSH (1<<1) /* 01: The wbinvd flushes but does not invalidate */ -#define ACPI_FADT_C1_SUPPORTED (1<<2) /* 02: All processors support C1 state */ -#define ACPI_FADT_C2_MP_SUPPORTED (1<<3) /* 03: C2 state works on MP system */ -#define ACPI_FADT_POWER_BUTTON (1<<4) /* 04: Power button is handled as a generic feature */ -#define ACPI_FADT_SLEEP_BUTTON (1<<5) /* 05: Sleep button is handled as a generic feature, or not present */ -#define ACPI_FADT_FIXED_RTC (1<<6) /* 06: RTC wakeup stat not in fixed register space */ -#define ACPI_FADT_S4_RTC_WAKE (1<<7) /* 07: RTC wakeup possible from S4 */ -#define ACPI_FADT_32BIT_TIMER (1<<8) /* 08: tmr_val is 32 bits 0=24-bits */ -#define ACPI_FADT_DOCKING_SUPPORTED (1<<9) /* 09: Docking supported */ -#define ACPI_FADT_RESET_REGISTER (1<<10) /* 10: System reset via the FADT RESET_REG supported */ -#define ACPI_FADT_SEALED_CASE (1<<11) /* 11: No internal expansion capabilities and case is sealed */ -#define ACPI_FADT_HEADLESS (1<<12) /* 12: No local video capabilities or local input devices */ -#define ACPI_FADT_SLEEP_TYPE (1<<13) /* 13: Must execute native instruction after writing SLP_TYPx register */ -#define ACPI_FADT_PCI_EXPRESS_WAKE (1<<14) /* 14: System supports PCIEXP_WAKE (STS/EN) bits (ACPI 3.0) */ -#define ACPI_FADT_PLATFORM_CLOCK (1<<15) /* 15: OSPM should use platform-provided timer (ACPI 3.0) */ -#define ACPI_FADT_S4_RTC_VALID (1<<16) /* 16: Contents of RTC_STS valid after S4 wake (ACPI 3.0) */ -#define ACPI_FADT_REMOTE_POWER_ON (1<<17) /* 17: System is compatible with remote power on (ACPI 3.0) */ -#define ACPI_FADT_APIC_CLUSTER (1<<18) /* 18: All local APICs must use cluster model (ACPI 3.0) */ -#define ACPI_FADT_APIC_PHYSICAL (1<<19) /* 19: All local x_aPICs must use physical dest mode (ACPI 3.0) */ +#define ACPI_FADT_WBINVD (1) /* 00: [V1] The wbinvd instruction works properly */ +#define ACPI_FADT_WBINVD_FLUSH (1<<1) /* 01: [V1] wbinvd flushes but does not invalidate caches */ +#define ACPI_FADT_C1_SUPPORTED (1<<2) /* 02: [V1] All processors support C1 state */ +#define ACPI_FADT_C2_MP_SUPPORTED (1<<3) /* 03: [V1] C2 state works on MP system */ +#define ACPI_FADT_POWER_BUTTON (1<<4) /* 04: [V1] Power button is handled as a control method device */ +#define ACPI_FADT_SLEEP_BUTTON (1<<5) /* 05: [V1] Sleep button is handled as a control method device */ +#define ACPI_FADT_FIXED_RTC (1<<6) /* 06: [V1] RTC wakeup status not in fixed register space */ +#define ACPI_FADT_S4_RTC_WAKE (1<<7) /* 07: [V1] RTC alarm can wake system from S4 */ +#define ACPI_FADT_32BIT_TIMER (1<<8) /* 08: [V1] ACPI timer width is 32-bit (0=24-bit) */ +#define ACPI_FADT_DOCKING_SUPPORTED (1<<9) /* 09: [V1] Docking supported */ +#define ACPI_FADT_RESET_REGISTER (1<<10) /* 10: [V2] System reset via the FADT RESET_REG supported */ +#define ACPI_FADT_SEALED_CASE (1<<11) /* 11: [V3] No internal expansion capabilities and case is sealed */ +#define ACPI_FADT_HEADLESS (1<<12) /* 12: [V3] No local video capabilities or local input devices */ +#define ACPI_FADT_SLEEP_TYPE (1<<13) /* 13: [V3] Must execute native instruction after writing SLP_TYPx register */ +#define ACPI_FADT_PCI_EXPRESS_WAKE (1<<14) /* 14: [V4] System supports PCIEXP_WAKE (STS/EN) bits (ACPI 3.0) */ +#define ACPI_FADT_PLATFORM_CLOCK (1<<15) /* 15: [V4] OSPM should use platform-provided timer (ACPI 3.0) */ +#define ACPI_FADT_S4_RTC_VALID (1<<16) /* 16: [V4] Contents of RTC_STS valid after S4 wake (ACPI 3.0) */ +#define ACPI_FADT_REMOTE_POWER_ON (1<<17) /* 17: [V4] System is compatible with remote power on (ACPI 3.0) */ +#define ACPI_FADT_APIC_CLUSTER (1<<18) /* 18: [V4] All local APICs must use cluster model (ACPI 3.0) */ +#define ACPI_FADT_APIC_PHYSICAL (1<<19) /* 19: [V4] All local x_aPICs must use physical dest mode (ACPI 3.0) */ + +/* FADT Prefered Power Management Profiles */ -/* - * FADT Prefered Power Management Profiles - */ enum acpi_prefered_pm_profiles { PM_UNSPECIFIED = 0, PM_DESKTOP = 1, @@ -272,16 +281,6 @@ enum acpi_prefered_pm_profiles { PM_APPLIANCE_PC = 6 }; -/* FADT Boot Arch Flags */ - -#define BAF_LEGACY_DEVICES 0x0001 -#define BAF_8042_KEYBOARD_CONTROLLER 0x0002 -#define BAF_MSI_NOT_SUPPORTED 0x0008 -#define BAF_PCIE_ASPM_CONTROL 0x0010 - -#define FADT2_REVISION_ID 3 -#define FADT2_MINUS_REVISION_ID 2 - /* Reset to default packing */ #pragma pack() @@ -310,8 +309,9 @@ struct acpi_table_desc { #define ACPI_TABLE_ORIGIN_UNKNOWN (0) #define ACPI_TABLE_ORIGIN_MAPPED (1) #define ACPI_TABLE_ORIGIN_ALLOCATED (2) -#define ACPI_TABLE_ORIGIN_MASK (3) -#define ACPI_TABLE_IS_LOADED (4) +#define ACPI_TABLE_ORIGIN_OVERRIDE (4) +#define ACPI_TABLE_ORIGIN_MASK (7) +#define ACPI_TABLE_IS_LOADED (8) /* * Get the remaining ACPI tables diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 18963b968114..59ade0752473 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -1016,9 +1016,9 @@ struct acpi_madt_interrupt_source { struct acpi_madt_local_x2apic { struct acpi_subtable_header header; u16 reserved; /* Reserved - must be zero */ - u32 local_apic_id; /* Processor X2_APIC ID */ + u32 local_apic_id; /* Processor x2APIC ID */ u32 lapic_flags; - u32 uid; /* Extended X2_APIC processor ID */ + u32 uid; /* ACPI processor UID */ }; /* 10: Local X2APIC NMI (07/2008) */ @@ -1026,7 +1026,7 @@ struct acpi_madt_local_x2apic { struct acpi_madt_local_x2apic_nmi { struct acpi_subtable_header header; u16 inti_flags; - u32 uid; /* Processor X2_APIC ID */ + u32 uid; /* ACPI processor UID */ u8 lint; /* LINTn to which NMI is connected */ u8 reserved[3]; }; diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index a20aab510173..f555d927f7c0 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -777,17 +777,25 @@ typedef u8 acpi_adr_space_type; #define ACPI_BITREG_SCI_ENABLE 0x0E #define ACPI_BITREG_BUS_MASTER_RLD 0x0F #define ACPI_BITREG_GLOBAL_LOCK_RELEASE 0x10 -#define ACPI_BITREG_SLEEP_TYPE_A 0x11 -#define ACPI_BITREG_SLEEP_TYPE_B 0x12 -#define ACPI_BITREG_SLEEP_ENABLE 0x13 +#define ACPI_BITREG_SLEEP_TYPE 0x11 +#define ACPI_BITREG_SLEEP_ENABLE 0x12 /* PM2 Control register */ -#define ACPI_BITREG_ARB_DISABLE 0x14 +#define ACPI_BITREG_ARB_DISABLE 0x13 -#define ACPI_BITREG_MAX 0x14 +#define ACPI_BITREG_MAX 0x13 #define ACPI_NUM_BITREG ACPI_BITREG_MAX + 1 +/* Status register values. A 1 clears a status bit. 0 = no effect */ + +#define ACPI_CLEAR_STATUS 1 + +/* Enable and Control register values */ + +#define ACPI_ENABLE_EVENT 1 +#define ACPI_DISABLE_EVENT 0 + /* * External ACPI object definition */ diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 0574add2a1e3..b09c4fde9725 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -322,7 +322,7 @@ static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr) int acpi_processor_tstate_has_changed(struct acpi_processor *pr); int acpi_processor_get_throttling_info(struct acpi_processor *pr); extern int acpi_processor_set_throttling(struct acpi_processor *pr, int state); -extern struct file_operations acpi_processor_throttling_fops; +extern const struct file_operations acpi_processor_throttling_fops; extern void acpi_processor_throttling_init(void); /* in processor_idle.c */ int acpi_processor_power_init(struct acpi_processor *pr, @@ -336,7 +336,7 @@ extern struct cpuidle_driver acpi_idle_driver; /* in processor_thermal.c */ int acpi_processor_get_limit_info(struct acpi_processor *pr); -extern struct file_operations acpi_processor_limit_fops; +extern const struct file_operations acpi_processor_limit_fops; extern struct thermal_cooling_device_ops processor_cooling_ops; #ifdef CONFIG_CPU_FREQ void acpi_thermal_cpufreq_init(void); diff --git a/include/acpi/video.h b/include/acpi/video.h new file mode 100644 index 000000000000..f0275bb79ce4 --- /dev/null +++ b/include/acpi/video.h @@ -0,0 +1,11 @@ +#ifndef __ACPI_VIDEO_H +#define __ACPI_VIDEO_H + +#if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE) +extern int acpi_video_register(void); +#else +static inline int acpi_video_register(void) { return 0; } +#endif + +#endif + diff --git a/include/asm-frv/ftrace.h b/include/asm-frv/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/include/asm-frv/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a654d724d3b0..7fa660fd449c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -61,6 +61,30 @@ #define BRANCH_PROFILE() #endif +#ifdef CONFIG_EVENT_TRACER +#define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \ + *(_ftrace_events) \ + VMLINUX_SYMBOL(__stop_ftrace_events) = .; +#else +#define FTRACE_EVENTS() +#endif + +#ifdef CONFIG_TRACING +#define TRACE_PRINTKS() VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .; \ + *(__trace_printk_fmt) /* Trace_printk fmt' pointer */ \ + VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .; +#else +#define TRACE_PRINTKS() +#endif + +#ifdef CONFIG_FTRACE_SYSCALLS +#define TRACE_SYSCALLS() VMLINUX_SYMBOL(__start_syscalls_metadata) = .; \ + *(__syscalls_metadata) \ + VMLINUX_SYMBOL(__stop_syscalls_metadata) = .; +#else +#define TRACE_SYSCALLS() +#endif + /* .data section */ #define DATA_DATA \ *(.data) \ @@ -86,7 +110,10 @@ *(__verbose) \ VMLINUX_SYMBOL(__stop___verbose) = .; \ LIKELY_PROFILE() \ - BRANCH_PROFILE() + BRANCH_PROFILE() \ + TRACE_PRINTKS() \ + FTRACE_EVENTS() \ + TRACE_SYSCALLS() #define RO_DATA(align) \ . = ALIGN((align)); \ diff --git a/include/asm-m32r/ftrace.h b/include/asm-m32r/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/include/asm-m32r/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/include/asm-mn10300/ftrace.h b/include/asm-mn10300/ftrace.h new file mode 100644 index 000000000000..40a8c178f10d --- /dev/null +++ b/include/asm-mn10300/ftrace.h @@ -0,0 +1 @@ +/* empty */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d047f846c3ed..6586cbd0d4af 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -97,6 +97,7 @@ void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); /* the following four functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); +void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); void acpi_numa_arch_fixup(void); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 45f6297821bd..5fc2ef8d97fa 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -21,6 +21,15 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> +/* on architectures without dma-mapping capabilities we need to ensure + * that the asynchronous path compiles away + */ +#ifdef CONFIG_HAS_DMA +#define __async_inline +#else +#define __async_inline __always_inline +#endif + /** * dma_chan_ref - object used to manage dma channels received from the * dmaengine core. diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index bee52abb8a4d..0ec2c594868e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -24,8 +24,8 @@ struct dentry; */ enum bdi_state { BDI_pdflush, /* A pdflush thread is working this device */ - BDI_write_congested, /* The write queue is getting full */ - BDI_read_congested, /* The read queue is getting full */ + BDI_async_congested, /* The async (write) queue is getting full */ + BDI_sync_congested, /* The sync queue is getting full */ BDI_unused, /* Available bits start here */ }; @@ -215,18 +215,18 @@ static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits) static inline int bdi_read_congested(struct backing_dev_info *bdi) { - return bdi_congested(bdi, 1 << BDI_read_congested); + return bdi_congested(bdi, 1 << BDI_sync_congested); } static inline int bdi_write_congested(struct backing_dev_info *bdi) { - return bdi_congested(bdi, 1 << BDI_write_congested); + return bdi_congested(bdi, 1 << BDI_async_congested); } static inline int bdi_rw_congested(struct backing_dev_info *bdi) { - return bdi_congested(bdi, (1 << BDI_read_congested)| - (1 << BDI_write_congested)); + return bdi_congested(bdi, (1 << BDI_sync_congested) | + (1 << BDI_async_congested)); } void clear_bdi_congested(struct backing_dev_info *bdi, int rw); diff --git a/include/linux/bio.h b/include/linux/bio.h index b05b1d4d17d2..b900d2c67d29 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -145,20 +145,21 @@ struct bio { * bit 2 -- barrier * Insert a serialization point in the IO queue, forcing previously * submitted IO to be completed before this one is issued. - * bit 3 -- synchronous I/O hint: the block layer will unplug immediately - * Note that this does NOT indicate that the IO itself is sync, just - * that the block layer will not postpone issue of this IO by plugging. - * bit 4 -- metadata request + * bit 3 -- synchronous I/O hint. + * bit 4 -- Unplug the device immediately after submitting this bio. + * bit 5 -- metadata request * Used for tracing to differentiate metadata and data IO. May also * get some preferential treatment in the IO scheduler - * bit 5 -- discard sectors + * bit 6 -- discard sectors * Informs the lower level device that this range of sectors is no longer * used by the file system and may thus be freed by the device. Used * for flash based storage. - * bit 6 -- fail fast device errors - * bit 7 -- fail fast transport errors - * bit 8 -- fail fast driver errors + * bit 7 -- fail fast device errors + * bit 8 -- fail fast transport errors + * bit 9 -- fail fast driver errors * Don't want driver retries for any fast fail whatever the reason. + * bit 10 -- Tell the IO scheduler not to wait for more requests after this + one has been submitted, even if it is a SYNC request. */ #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ @@ -170,6 +171,7 @@ struct bio { #define BIO_RW_FAILFAST_DEV 7 #define BIO_RW_FAILFAST_TRANSPORT 8 #define BIO_RW_FAILFAST_DRIVER 9 +#define BIO_RW_NOIDLE 10 #define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag))) @@ -188,6 +190,7 @@ struct bio { #define bio_rw_ahead(bio) bio_rw_flagged(bio, BIO_RW_AHEAD) #define bio_rw_meta(bio) bio_rw_flagged(bio, BIO_RW_META) #define bio_discard(bio) bio_rw_flagged(bio, BIO_RW_DISCARD) +#define bio_noidle(bio) bio_rw_flagged(bio, BIO_RW_NOIDLE) /* * upper 16 bits of bi_rw define the io priority of this bio diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 465d6babc847..e03660964e02 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -38,6 +38,10 @@ struct request; typedef void (rq_end_io_fn)(struct request *, int); struct request_list { + /* + * count[], starved[], and wait[] are indexed by + * BLK_RW_SYNC/BLK_RW_ASYNC + */ int count[2]; int starved[2]; int elvpriv; @@ -66,6 +70,11 @@ enum rq_cmd_type_bits { REQ_TYPE_ATA_PC, }; +enum { + BLK_RW_ASYNC = 0, + BLK_RW_SYNC = 1, +}; + /* * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a @@ -103,12 +112,13 @@ enum rq_flag_bits { __REQ_QUIET, /* don't worry about errors */ __REQ_PREEMPT, /* set for "ide_preempt" requests */ __REQ_ORDERED_COLOR, /* is before or after barrier */ - __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ + __REQ_RW_SYNC, /* request is sync (sync write or read) */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_RW_META, /* metadata io request */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_UNPLUG, /* unplug queue on submission */ + __REQ_NOIDLE, /* Don't anticipate more IO after this one */ __REQ_NR_BITS, /* stops here */ }; @@ -136,6 +146,7 @@ enum rq_flag_bits { #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define REQ_UNPLUG (1 << __REQ_UNPLUG) +#define REQ_NOIDLE (1 << __REQ_NOIDLE) #define BLK_MAX_CDB 16 @@ -438,8 +449,8 @@ struct request_queue #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ -#define QUEUE_FLAG_READFULL 3 /* read queue has been filled */ -#define QUEUE_FLAG_WRITEFULL 4 /* write queue has been filled */ +#define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ +#define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ @@ -611,32 +622,42 @@ enum { #define rq_data_dir(rq) ((rq)->cmd_flags & 1) /* - * We regard a request as sync, if it's a READ or a SYNC write. + * We regard a request as sync, if either a read or a sync write */ -#define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC) +static inline bool rw_is_sync(unsigned int rw_flags) +{ + return !(rw_flags & REQ_RW) || (rw_flags & REQ_RW_SYNC); +} + +static inline bool rq_is_sync(struct request *rq) +{ + return rw_is_sync(rq->cmd_flags); +} + #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) +#define rq_noidle(rq) ((rq)->cmd_flags & REQ_NOIDLE) -static inline int blk_queue_full(struct request_queue *q, int rw) +static inline int blk_queue_full(struct request_queue *q, int sync) { - if (rw == READ) - return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); - return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); + if (sync) + return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags); + return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags); } -static inline void blk_set_queue_full(struct request_queue *q, int rw) +static inline void blk_set_queue_full(struct request_queue *q, int sync) { - if (rw == READ) - queue_flag_set(QUEUE_FLAG_READFULL, q); + if (sync) + queue_flag_set(QUEUE_FLAG_SYNCFULL, q); else - queue_flag_set(QUEUE_FLAG_WRITEFULL, q); + queue_flag_set(QUEUE_FLAG_ASYNCFULL, q); } -static inline void blk_clear_queue_full(struct request_queue *q, int rw) +static inline void blk_clear_queue_full(struct request_queue *q, int sync) { - if (rw == READ) - queue_flag_clear(QUEUE_FLAG_READFULL, q); + if (sync) + queue_flag_clear(QUEUE_FLAG_SYNCFULL, q); else - queue_flag_clear(QUEUE_FLAG_WRITEFULL, q); + queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q); } diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 6e915878e88c..d960889e92ef 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -144,6 +144,9 @@ struct blk_user_trace_setup { #ifdef __KERNEL__ #if defined(CONFIG_BLK_DEV_IO_TRACE) + +#include <linux/sysfs.h> + struct blk_trace { int trace_state; struct rchan *rchan; @@ -194,6 +197,8 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern struct attribute_group blk_trace_attr_group; + #else /* !CONFIG_BLK_DEV_IO_TRACE */ #define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) #define blk_trace_shutdown(q) do { } while (0) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4316a546beb5..665fa70e4094 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -365,7 +365,10 @@ int cgroup_task_count(const struct cgroup *cgrp); /* Return true if cgrp is a descendant of the task's cgroup */ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); -/* Control Group subsystem type. See Documentation/cgroups.txt for details */ +/* + * Control Group subsystem type. + * See Documentation/cgroups/cgroups.txt for details + */ struct cgroup_subsys { struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss, diff --git a/include/linux/compat.h b/include/linux/compat.h index 9723edd6455c..f2ded21f9a3c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -193,10 +193,10 @@ asmlinkage ssize_t compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen); asmlinkage ssize_t compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec, - unsigned long vlen, u32 pos_high, u32 pos_low); + unsigned long vlen, u32 pos_low, u32 pos_high); asmlinkage ssize_t compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec, - unsigned long vlen, u32 pos_high, u32 pos_low); + unsigned long vlen, u32 pos_low, u32 pos_high); int compat_do_execve(char * filename, compat_uptr_t __user *argv, compat_uptr_t __user *envp, struct pt_regs * regs); diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d95da1020f1c..6faa7e549de4 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -68,6 +68,7 @@ struct ftrace_branch_data { unsigned long miss; unsigned long hit; }; + unsigned long miss_hit[2]; }; }; @@ -125,10 +126,7 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); .line = __LINE__, \ }; \ ______r = !!(cond); \ - if (______r) \ - ______f.hit++; \ - else \ - ______f.miss++; \ + ______f.miss_hit[______r]++; \ ______r; \ })) #endif /* CONFIG_PROFILE_ALL_BRANCHES */ diff --git a/include/linux/connector.h b/include/linux/connector.h index fc65d219d88c..b9966e64604e 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -39,8 +39,10 @@ #define CN_IDX_V86D 0x4 #define CN_VAL_V86D_UVESAFB 0x1 #define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */ +#define CN_DST_IDX 0x6 +#define CN_DST_VAL 0x1 -#define CN_NETLINK_USERS 6 +#define CN_NETLINK_USERS 7 /* * Maximum connector's message size. diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index af0e01d4c663..eb5c2ba2f81a 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -71,6 +71,9 @@ struct dentry *debugfs_create_bool(const char *name, mode_t mode, struct dentry *debugfs_create_blob(const char *name, mode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob); + +bool debugfs_initialized(void); + #else #include <linux/err.h> @@ -183,6 +186,11 @@ static inline struct dentry *debugfs_create_blob(const char *name, mode_t mode, return ERR_PTR(-ENODEV); } +static inline bool debugfs_initialized(void) +{ + return false; +} + #endif #endif diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 8209e08969f9..66ec05a57955 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -139,6 +139,9 @@ struct target_type { dm_ioctl_fn ioctl; dm_merge_fn merge; dm_busy_fn busy; + + /* For internal device-mapper use. */ + struct list_head list; }; struct io_restrictions { diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h index 600c5fb2daad..5e8b11d88f6f 100644 --- a/include/linux/dm-dirty-log.h +++ b/include/linux/dm-dirty-log.h @@ -28,6 +28,9 @@ struct dm_dirty_log_type { const char *name; struct module *module; + /* For internal device-mapper use */ + struct list_head list; + int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, unsigned argc, char **argv); void (*dtr)(struct dm_dirty_log *log); @@ -113,6 +116,16 @@ struct dm_dirty_log_type { */ int (*status)(struct dm_dirty_log *log, status_type_t status_type, char *result, unsigned maxlen); + + /* + * is_remote_recovering is necessary for cluster mirroring. It provides + * a way to detect recovery on another node, so we aren't writing + * concurrently. This function is likely to block (when a cluster log + * is used). + * + * Returns: 0, 1 + */ + int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region); }; int dm_dirty_log_type_register(struct dm_dirty_log_type *type); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index af1dab41674b..1a455f1f86d7 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -11,6 +11,7 @@ #define DMA_PTE_READ (1) #define DMA_PTE_WRITE (2) +#define DMA_PTE_SNP (1 << 11) struct intel_iommu; struct dmar_domain; diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 1956c8d46d32..2e2aa3df170c 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -23,9 +23,6 @@ #include <linux/device.h> #include <linux/uio.h> -#include <linux/kref.h> -#include <linux/completion.h> -#include <linux/rcupdate.h> #include <linux/dma-mapping.h> /** @@ -205,6 +202,7 @@ struct dma_async_tx_descriptor { /** * struct dma_device - info on the entity supplying DMA services * @chancnt: how many DMA channels are supported + * @privatecnt: how many DMA channels are requested by dma_request_channel * @channels: the list of struct dma_chan * @global_node: list_head for global dma_device_list * @cap_mask: one or more dma_capability flags @@ -227,6 +225,7 @@ struct dma_async_tx_descriptor { struct dma_device { unsigned int chancnt; + unsigned int privatecnt; struct list_head channels; struct list_head global_node; dma_cap_mask_t cap_mask; @@ -291,6 +290,24 @@ static inline void net_dmaengine_put(void) } #endif +#ifdef CONFIG_ASYNC_TX_DMA +#define async_dmaengine_get() dmaengine_get() +#define async_dmaengine_put() dmaengine_put() +#define async_dma_find_channel(type) dma_find_channel(type) +#else +static inline void async_dmaengine_get(void) +{ +} +static inline void async_dmaengine_put(void) +{ +} +static inline struct dma_chan * +async_dma_find_channel(enum dma_transaction_type type) +{ + return NULL; +} +#endif + dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, @@ -337,6 +354,13 @@ __dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp) set_bit(tx_type, dstp->bits); } +#define dma_cap_clear(tx, mask) __dma_cap_clear((tx), &(mask)) +static inline void +__dma_cap_clear(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp) +{ + clear_bit(tx_type, dstp->bits); +} + #define dma_cap_zero(mask) __dma_cap_zero(&(mask)) static inline void __dma_cap_zero(dma_cap_mask_t *dstp) { diff --git a/include/linux/ds1wm.h b/include/linux/ds1wm.h deleted file mode 100644 index d3c65e48a2e7..000000000000 --- a/include/linux/ds1wm.h +++ /dev/null @@ -1,12 +0,0 @@ -/* platform data for the DS1WM driver */ - -struct ds1wm_platform_data { - int bus_shift; /* number of shifts needed to calculate the - * offset between DS1WM registers; - * e.g. on h5xxx and h2200 this is 2 - * (registers aligned to 4-byte boundaries), - * while on hx4700 this is 1 */ - int active_high; - void (*enable)(struct platform_device *pdev); - void (*disable)(struct platform_device *pdev); -}; diff --git a/include/linux/dst.h b/include/linux/dst.h new file mode 100644 index 000000000000..e26fed84b1aa --- /dev/null +++ b/include/linux/dst.h @@ -0,0 +1,587 @@ +/* + * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __DST_H +#define __DST_H + +#include <linux/types.h> +#include <linux/connector.h> + +#define DST_NAMELEN 32 +#define DST_NAME "dst" + +enum { + /* Remove node with given id from storage */ + DST_DEL_NODE = 0, + /* Add remote node with given id to the storage */ + DST_ADD_REMOTE, + /* Add local node with given id to the storage to be exported and used by remote peers */ + DST_ADD_EXPORT, + /* Crypto initialization command (hash/cipher used to protect the connection) */ + DST_CRYPTO, + /* Security attributes for given connection (permissions for example) */ + DST_SECURITY, + /* Register given node in the block layer subsystem */ + DST_START, + DST_CMD_MAX +}; + +struct dst_ctl +{ + /* Storage name */ + char name[DST_NAMELEN]; + /* Command flags */ + __u32 flags; + /* Command itself (see above) */ + __u32 cmd; + /* Maximum number of pages per single request in this device */ + __u32 max_pages; + /* Stale/error transaction scanning timeout in milliseconds */ + __u32 trans_scan_timeout; + /* Maximum number of retry sends before completing transaction as broken */ + __u32 trans_max_retries; + /* Storage size */ + __u64 size; +}; + +/* Reply command carries completion status */ +struct dst_ctl_ack +{ + struct cn_msg msg; + int error; + int unused[3]; +}; + +/* + * Unfortunaltely socket address structure is not exported to userspace + * and is redefined there. + */ +#define SADDR_MAX_DATA 128 + +struct saddr { + /* address family, AF_xxx */ + unsigned short sa_family; + /* 14 bytes of protocol address */ + char sa_data[SADDR_MAX_DATA]; + /* Number of bytes used in sa_data */ + unsigned short sa_data_len; +}; + +/* Address structure */ +struct dst_network_ctl +{ + /* Socket type: datagram, stream...*/ + unsigned int type; + /* Let me guess, is it a Jupiter diameter? */ + unsigned int proto; + /* Peer's address */ + struct saddr addr; +}; + +struct dst_crypto_ctl +{ + /* Cipher and hash names */ + char cipher_algo[DST_NAMELEN]; + char hash_algo[DST_NAMELEN]; + + /* Key sizes. Can be zero for digest for example */ + unsigned int cipher_keysize, hash_keysize; + /* Alignment. Calculated by the DST itself. */ + unsigned int crypto_attached_size; + /* Number of threads to perform crypto operations */ + int thread_num; +}; + +/* Export security attributes have this bits checked in when client connects */ +#define DST_PERM_READ (1<<0) +#define DST_PERM_WRITE (1<<1) + +/* + * Right now it is simple model, where each remote address + * is assigned to set of permissions it is allowed to perform. + * In real world block device does not know anything but + * reading and writing, so it should be more than enough. + */ +struct dst_secure_user +{ + unsigned int permissions; + struct saddr addr; +}; + +/* + * Export control command: device to export and network address to accept + * clients to work with given device + */ +struct dst_export_ctl +{ + char device[DST_NAMELEN]; + struct dst_network_ctl ctl; +}; + +enum { + DST_CFG = 1, /* Request remote configuration */ + DST_IO, /* IO command */ + DST_IO_RESPONSE, /* IO response */ + DST_PING, /* Keepalive message */ + DST_NCMD_MAX, +}; + +struct dst_cmd +{ + /* Network command itself, see above */ + __u32 cmd; + /* + * Size of the attached data + * (in most cases, for READ command it means how many bytes were requested) + */ + __u32 size; + /* Crypto size: number of attached bytes with digest/hmac */ + __u32 csize; + /* Here we can carry secret data */ + __u32 reserved; + /* Read/write bits, see how they are encoded in bio structure */ + __u64 rw; + /* BIO flags */ + __u64 flags; + /* Unique command id (like transaction ID) */ + __u64 id; + /* Sector to start IO from */ + __u64 sector; + /* Hash data is placed after this header */ + __u8 hash[0]; +}; + +/* + * Convert command to/from network byte order. + * We do not use hton*() functions, since there is + * no 64-bit implementation. + */ +static inline void dst_convert_cmd(struct dst_cmd *c) +{ + c->cmd = __cpu_to_be32(c->cmd); + c->csize = __cpu_to_be32(c->csize); + c->size = __cpu_to_be32(c->size); + c->sector = __cpu_to_be64(c->sector); + c->id = __cpu_to_be64(c->id); + c->flags = __cpu_to_be64(c->flags); + c->rw = __cpu_to_be64(c->rw); +} + +/* Transaction id */ +typedef __u64 dst_gen_t; + +#ifdef __KERNEL__ + +#include <linux/blkdev.h> +#include <linux/bio.h> +#include <linux/device.h> +#include <linux/mempool.h> +#include <linux/net.h> +#include <linux/poll.h> +#include <linux/rbtree.h> + +#ifdef CONFIG_DST_DEBUG +#define dprintk(f, a...) printk(KERN_NOTICE f, ##a) +#else +static inline void __attribute__ ((format (printf, 1, 2))) + dprintk(const char *fmt, ...) {} +#endif + +struct dst_node; + +struct dst_trans +{ + /* DST node we are working with */ + struct dst_node *n; + + /* Entry inside transaction tree */ + struct rb_node trans_entry; + + /* Merlin kills this transaction when this memory cell equals zero */ + atomic_t refcnt; + + /* How this transaction should be processed by crypto engine */ + short enc; + /* How many times this transaction was resent */ + short retries; + /* Completion status */ + int error; + + /* When did we send it to the remote peer */ + long send_time; + + /* My name is... + * Well, computers does not speak, they have unique id instead */ + dst_gen_t gen; + + /* Block IO we are working with */ + struct bio *bio; + + /* Network command for above block IO request */ + struct dst_cmd cmd; +}; + +struct dst_crypto_engine +{ + /* What should we do with all block requests */ + struct crypto_hash *hash; + struct crypto_ablkcipher *cipher; + + /* Pool of pages used to encrypt data into before sending */ + int page_num; + struct page **pages; + + /* What to do with current request */ + int enc; + /* Who we are and where do we go */ + struct scatterlist *src, *dst; + + /* Maximum timeout waiting for encryption to be completed */ + long timeout; + /* IV is a 64-bit sequential counter */ + u64 iv; + + /* Secret data */ + void *private; + + /* Cached temporary data lives here */ + int size; + void *data; +}; + +struct dst_state +{ + /* The main state protection */ + struct mutex state_lock; + + /* Polling machinery for sockets */ + wait_queue_t wait; + wait_queue_head_t *whead; + /* Most of events are being waited here */ + wait_queue_head_t thread_wait; + + /* Who owns this? */ + struct dst_node *node; + + /* Network address for this state */ + struct dst_network_ctl ctl; + + /* Permissions to work with: read-only or rw connection */ + u32 permissions; + + /* Called when we need to clean private data */ + void (* cleanup)(struct dst_state *st); + + /* Used by the server: BIO completion queues BIOs here */ + struct list_head request_list; + spinlock_t request_lock; + + /* Guess what? No, it is not number of planets */ + atomic_t refcnt; + + /* This flags is set when connection should be dropped */ + int need_exit; + + /* + * Socket to work with. Second pointer is used for + * lockless check if socket was changed before performing + * next action (like working with cached polling result) + */ + struct socket *socket, *read_socket; + + /* Cached preallocated data */ + void *data; + unsigned int size; + + /* Currently processed command */ + struct dst_cmd cmd; +}; + +struct dst_info +{ + /* Device size */ + u64 size; + + /* Local device name for export devices */ + char local[DST_NAMELEN]; + + /* Network setup */ + struct dst_network_ctl net; + + /* Sysfs bits use this */ + struct device device; +}; + +struct dst_node +{ + struct list_head node_entry; + + /* Hi, my name is stored here */ + char name[DST_NAMELEN]; + /* My cache name is stored here */ + char cache_name[DST_NAMELEN]; + + /* Block device attached to given node. + * Only valid for exporting nodes */ + struct block_device *bdev; + /* Network state machine for given peer */ + struct dst_state *state; + + /* Block IO machinery */ + struct request_queue *queue; + struct gendisk *disk; + + /* Number of threads in processing pool */ + int thread_num; + /* Maximum number of pages in single IO */ + int max_pages; + + /* I'm that big in bytes */ + loff_t size; + + /* Exported to userspace node information */ + struct dst_info *info; + + /* + * Security attribute list. + * Used only by exporting node currently. + */ + struct list_head security_list; + struct mutex security_lock; + + /* + * When this unerflows below zero, university collapses. + * But this will not happen, since node will be freed, + * when reference counter reaches zero. + */ + atomic_t refcnt; + + /* How precisely should I be started? */ + int (*start)(struct dst_node *); + + /* Crypto capabilities */ + struct dst_crypto_ctl crypto; + u8 *hash_key; + u8 *cipher_key; + + /* Pool of processing thread */ + struct thread_pool *pool; + + /* Transaction IDs live here */ + atomic_long_t gen; + + /* + * How frequently and how many times transaction + * tree should be scanned to drop stale objects. + */ + long trans_scan_timeout; + int trans_max_retries; + + /* Small gnomes live here */ + struct rb_root trans_root; + struct mutex trans_lock; + + /* + * Transaction cache/memory pool. + * It is big enough to contain not only transaction + * itself, but additional crypto data (digest/hmac). + */ + struct kmem_cache *trans_cache; + mempool_t *trans_pool; + + /* This entity scans transaction tree */ + struct delayed_work trans_work; + + wait_queue_head_t wait; +}; + +/* Kernel representation of the security attribute */ +struct dst_secure +{ + struct list_head sec_entry; + struct dst_secure_user sec; +}; + +int dst_process_bio(struct dst_node *n, struct bio *bio); + +int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r); +int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le); + +static inline struct dst_state *dst_state_get(struct dst_state *st) +{ + BUG_ON(atomic_read(&st->refcnt) == 0); + atomic_inc(&st->refcnt); + return st; +} + +void dst_state_put(struct dst_state *st); + +struct dst_state *dst_state_alloc(struct dst_node *n); +int dst_state_socket_create(struct dst_state *st); +void dst_state_socket_release(struct dst_state *st); + +void dst_state_exit_connected(struct dst_state *st); + +int dst_state_schedule_receiver(struct dst_state *st); + +void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str); + +static inline void dst_state_lock(struct dst_state *st) +{ + mutex_lock(&st->state_lock); +} + +static inline void dst_state_unlock(struct dst_state *st) +{ + mutex_unlock(&st->state_lock); +} + +void dst_poll_exit(struct dst_state *st); +int dst_poll_init(struct dst_state *st); + +static inline unsigned int dst_state_poll(struct dst_state *st) +{ + unsigned int revents = POLLHUP | POLLERR; + + dst_state_lock(st); + if (st->socket) + revents = st->socket->ops->poll(NULL, st->socket, NULL); + dst_state_unlock(st); + + return revents; +} + +static inline int dst_thread_setup(void *private, void *data) +{ + return 0; +} + +void dst_node_put(struct dst_node *n); + +static inline struct dst_node *dst_node_get(struct dst_node *n) +{ + atomic_inc(&n->refcnt); + return n; +} + +int dst_data_recv(struct dst_state *st, void *data, unsigned int size); +int dst_recv_cdata(struct dst_state *st, void *cdata); +int dst_data_send_header(struct socket *sock, + void *data, unsigned int size, int more); + +int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio); + +int dst_process_io(struct dst_state *st); +int dst_export_crypto(struct dst_node *n, struct bio *bio); +int dst_export_send_bio(struct bio *bio); +int dst_start_export(struct dst_node *n); + +int __init dst_export_init(void); +void dst_export_exit(void); + +/* Private structure for export block IO requests */ +struct dst_export_priv +{ + struct list_head request_entry; + struct dst_state *state; + struct bio *bio; + struct dst_cmd cmd; +}; + +static inline void dst_trans_get(struct dst_trans *t) +{ + atomic_inc(&t->refcnt); +} + +struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen); +int dst_trans_remove(struct dst_trans *t); +int dst_trans_remove_nolock(struct dst_trans *t); +void dst_trans_put(struct dst_trans *t); + +/* + * Convert bio into network command. + */ +static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd, + u32 command, u64 id) +{ + cmd->cmd = command; + cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS; + cmd->rw = bio->bi_rw; + cmd->size = bio->bi_size; + cmd->csize = 0; + cmd->id = id; + cmd->sector = bio->bi_sector; +}; + +int dst_trans_send(struct dst_trans *t); +int dst_trans_crypto(struct dst_trans *t); + +int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl); +void dst_node_crypto_exit(struct dst_node *n); + +static inline int dst_need_crypto(struct dst_node *n) +{ + struct dst_crypto_ctl *c = &n->crypto; + /* + * Logical OR is appropriate here, but boolean one produces + * more optimal code, so it is used instead. + */ + return (c->hash_algo[0] | c->cipher_algo[0]); +} + +int dst_node_trans_init(struct dst_node *n, unsigned int size); +void dst_node_trans_exit(struct dst_node *n); + +/* + * Pool of threads. + * Ready list contains threads currently free to be used, + * active one contains threads with some work scheduled for them. + * Caller can wait in given queue when thread is ready. + */ +struct thread_pool +{ + int thread_num; + struct mutex thread_lock; + struct list_head ready_list, active_list; + + wait_queue_head_t wait; +}; + +void thread_pool_del_worker(struct thread_pool *p); +void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id); +int thread_pool_add_worker(struct thread_pool *p, + char *name, + unsigned int id, + void *(* init)(void *data), + void (* cleanup)(void *data), + void *data); + +void thread_pool_destroy(struct thread_pool *p); +struct thread_pool *thread_pool_create(int num, char *name, + void *(* init)(void *data), + void (* cleanup)(void *data), + void *data); + +int thread_pool_schedule(struct thread_pool *p, + int (* setup)(void *stored_private, void *setup_data), + int (* action)(void *stored_private, void *setup_data), + void *setup_data, long timeout); +int thread_pool_schedule_private(struct thread_pool *p, + int (* setup)(void *private, void *data), + int (* action)(void *private, void *data), + void *data, long timeout, void *id); + +#endif /* __KERNEL__ */ +#endif /* __DST_H */ diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h index d797dde247f7..c8aad713a046 100644 --- a/include/linux/dw_dmac.h +++ b/include/linux/dw_dmac.h @@ -74,4 +74,23 @@ struct dw_dma_slave { #define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ #define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ +/* DMA API extensions */ +struct dw_cyclic_desc { + struct dw_desc **desc; + unsigned long periods; + void (*period_callback)(void *param); + void *period_callback_param; +}; + +struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, + dma_addr_t buf_addr, size_t buf_len, size_t period_len, + enum dma_data_direction direction); +void dw_dma_cyclic_free(struct dma_chan *chan); +int dw_dma_cyclic_start(struct dma_chan *chan); +void dw_dma_cyclic_stop(struct dma_chan *chan); + +dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan); + +dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan); + #endif /* DW_DMAC_H */ diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index e263acaa405b..634a5e5aba3e 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -208,6 +208,7 @@ static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags) #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ #define EXT3_STATE_XATTR 0x00000004 /* has in-inode xattrs */ +#define EXT3_STATE_FLUSH_ON_CLOSE 0x00000008 /* Used to pass group descriptor data when online resize is done */ struct ext3_new_group_input { diff --git a/include/linux/fs.h b/include/linux/fs.h index a09e17c8f5fd..cae5720f431c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -95,8 +95,12 @@ struct inodes_stat_t { #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ #define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) #define READ_META (READ | (1 << BIO_RW_META)) -#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) -#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) +#define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) +#define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) +#define WRITE_ODIRECT (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) +#define SWRITE_SYNC_PLUG \ + (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) +#define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h new file mode 100644 index 000000000000..84d3532dd3ea --- /dev/null +++ b/include/linux/fscache-cache.h @@ -0,0 +1,505 @@ +/* General filesystem caching backing cache interface + * + * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NOTE!!! See: + * + * Documentation/filesystems/caching/backend-api.txt + * + * for a description of the cache backend interface declared here. + */ + +#ifndef _LINUX_FSCACHE_CACHE_H +#define _LINUX_FSCACHE_CACHE_H + +#include <linux/fscache.h> +#include <linux/sched.h> +#include <linux/slow-work.h> + +#define NR_MAXCACHES BITS_PER_LONG + +struct fscache_cache; +struct fscache_cache_ops; +struct fscache_object; +struct fscache_operation; + +/* + * cache tag definition + */ +struct fscache_cache_tag { + struct list_head link; + struct fscache_cache *cache; /* cache referred to by this tag */ + unsigned long flags; +#define FSCACHE_TAG_RESERVED 0 /* T if tag is reserved for a cache */ + atomic_t usage; + char name[0]; /* tag name */ +}; + +/* + * cache definition + */ +struct fscache_cache { + const struct fscache_cache_ops *ops; + struct fscache_cache_tag *tag; /* tag representing this cache */ + struct kobject *kobj; /* system representation of this cache */ + struct list_head link; /* link in list of caches */ + size_t max_index_size; /* maximum size of index data */ + char identifier[36]; /* cache label */ + + /* node management */ + struct work_struct op_gc; /* operation garbage collector */ + struct list_head object_list; /* list of data/index objects */ + struct list_head op_gc_list; /* list of ops to be deleted */ + spinlock_t object_list_lock; + spinlock_t op_gc_list_lock; + atomic_t object_count; /* no. of live objects in this cache */ + struct fscache_object *fsdef; /* object for the fsdef index */ + unsigned long flags; +#define FSCACHE_IOERROR 0 /* cache stopped on I/O error */ +#define FSCACHE_CACHE_WITHDRAWN 1 /* cache has been withdrawn */ +}; + +extern wait_queue_head_t fscache_cache_cleared_wq; + +/* + * operation to be applied to a cache object + * - retrieval initiation operations are done in the context of the process + * that issued them, and not in an async thread pool + */ +typedef void (*fscache_operation_release_t)(struct fscache_operation *op); +typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); + +struct fscache_operation { + union { + struct work_struct fast_work; /* record for fast ops */ + struct slow_work slow_work; /* record for (very) slow ops */ + }; + struct list_head pend_link; /* link in object->pending_ops */ + struct fscache_object *object; /* object to be operated upon */ + + unsigned long flags; +#define FSCACHE_OP_TYPE 0x000f /* operation type */ +#define FSCACHE_OP_FAST 0x0001 /* - fast op, processor may not sleep for disk */ +#define FSCACHE_OP_SLOW 0x0002 /* - (very) slow op, processor may sleep for disk */ +#define FSCACHE_OP_MYTHREAD 0x0003 /* - processing is done be issuing thread, not pool */ +#define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ +#define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ +#define FSCACHE_OP_DEAD 6 /* op is now dead */ + + atomic_t usage; + unsigned debug_id; /* debugging ID */ + + /* operation processor callback + * - can be NULL if FSCACHE_OP_WAITING is going to be used to perform + * the op in a non-pool thread */ + fscache_operation_processor_t processor; + + /* operation releaser */ + fscache_operation_release_t release; +}; + +extern atomic_t fscache_op_debug_id; +extern const struct slow_work_ops fscache_op_slow_work_ops; + +extern void fscache_enqueue_operation(struct fscache_operation *); +extern void fscache_put_operation(struct fscache_operation *); + +/** + * fscache_operation_init - Do basic initialisation of an operation + * @op: The operation to initialise + * @release: The release function to assign + * + * Do basic initialisation of an operation. The caller must still set flags, + * object, either fast_work or slow_work if necessary, and processor if needed. + */ +static inline void fscache_operation_init(struct fscache_operation *op, + fscache_operation_release_t release) +{ + atomic_set(&op->usage, 1); + op->debug_id = atomic_inc_return(&fscache_op_debug_id); + op->release = release; + INIT_LIST_HEAD(&op->pend_link); +} + +/** + * fscache_operation_init_slow - Do additional initialisation of a slow op + * @op: The operation to initialise + * @processor: The processor function to assign + * + * Do additional initialisation of an operation as required for slow work. + */ +static inline +void fscache_operation_init_slow(struct fscache_operation *op, + fscache_operation_processor_t processor) +{ + op->processor = processor; + slow_work_init(&op->slow_work, &fscache_op_slow_work_ops); +} + +/* + * data read operation + */ +struct fscache_retrieval { + struct fscache_operation op; + struct address_space *mapping; /* netfs pages */ + fscache_rw_complete_t end_io_func; /* function to call on I/O completion */ + void *context; /* netfs read context (pinned) */ + struct list_head to_do; /* list of things to be done by the backend */ + unsigned long start_time; /* time at which retrieval started */ +}; + +typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op, + struct page *page, + gfp_t gfp); + +typedef int (*fscache_pages_retrieval_func_t)(struct fscache_retrieval *op, + struct list_head *pages, + unsigned *nr_pages, + gfp_t gfp); + +/** + * fscache_get_retrieval - Get an extra reference on a retrieval operation + * @op: The retrieval operation to get a reference on + * + * Get an extra reference on a retrieval operation. + */ +static inline +struct fscache_retrieval *fscache_get_retrieval(struct fscache_retrieval *op) +{ + atomic_inc(&op->op.usage); + return op; +} + +/** + * fscache_enqueue_retrieval - Enqueue a retrieval operation for processing + * @op: The retrieval operation affected + * + * Enqueue a retrieval operation for processing by the FS-Cache thread pool. + */ +static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) +{ + fscache_enqueue_operation(&op->op); +} + +/** + * fscache_put_retrieval - Drop a reference to a retrieval operation + * @op: The retrieval operation affected + * + * Drop a reference to a retrieval operation. + */ +static inline void fscache_put_retrieval(struct fscache_retrieval *op) +{ + fscache_put_operation(&op->op); +} + +/* + * cached page storage work item + * - used to do three things: + * - batch writes to the cache + * - do cache writes asynchronously + * - defer writes until cache object lookup completion + */ +struct fscache_storage { + struct fscache_operation op; + pgoff_t store_limit; /* don't write more than this */ +}; + +/* + * cache operations + */ +struct fscache_cache_ops { + /* name of cache provider */ + const char *name; + + /* allocate an object record for a cookie */ + struct fscache_object *(*alloc_object)(struct fscache_cache *cache, + struct fscache_cookie *cookie); + + /* look up the object for a cookie */ + void (*lookup_object)(struct fscache_object *object); + + /* finished looking up */ + void (*lookup_complete)(struct fscache_object *object); + + /* increment the usage count on this object (may fail if unmounting) */ + struct fscache_object *(*grab_object)(struct fscache_object *object); + + /* pin an object in the cache */ + int (*pin_object)(struct fscache_object *object); + + /* unpin an object in the cache */ + void (*unpin_object)(struct fscache_object *object); + + /* store the updated auxilliary data on an object */ + void (*update_object)(struct fscache_object *object); + + /* discard the resources pinned by an object and effect retirement if + * necessary */ + void (*drop_object)(struct fscache_object *object); + + /* dispose of a reference to an object */ + void (*put_object)(struct fscache_object *object); + + /* sync a cache */ + void (*sync_cache)(struct fscache_cache *cache); + + /* notification that the attributes of a non-index object (such as + * i_size) have changed */ + int (*attr_changed)(struct fscache_object *object); + + /* reserve space for an object's data and associated metadata */ + int (*reserve_space)(struct fscache_object *object, loff_t i_size); + + /* request a backing block for a page be read or allocated in the + * cache */ + fscache_page_retrieval_func_t read_or_alloc_page; + + /* request backing blocks for a list of pages be read or allocated in + * the cache */ + fscache_pages_retrieval_func_t read_or_alloc_pages; + + /* request a backing block for a page be allocated in the cache so that + * it can be written directly */ + fscache_page_retrieval_func_t allocate_page; + + /* request backing blocks for pages be allocated in the cache so that + * they can be written directly */ + fscache_pages_retrieval_func_t allocate_pages; + + /* write a page to its backing block in the cache */ + int (*write_page)(struct fscache_storage *op, struct page *page); + + /* detach backing block from a page (optional) + * - must release the cookie lock before returning + * - may sleep + */ + void (*uncache_page)(struct fscache_object *object, + struct page *page); + + /* dissociate a cache from all the pages it was backing */ + void (*dissociate_pages)(struct fscache_cache *cache); +}; + +/* + * data file or index object cookie + * - a file will only appear in one cache + * - a request to cache a file may or may not be honoured, subject to + * constraints such as disk space + * - indices are created on disk just-in-time + */ +struct fscache_cookie { + atomic_t usage; /* number of users of this cookie */ + atomic_t n_children; /* number of children of this cookie */ + spinlock_t lock; + struct hlist_head backing_objects; /* object(s) backing this file/index */ + const struct fscache_cookie_def *def; /* definition */ + struct fscache_cookie *parent; /* parent of this entry */ + void *netfs_data; /* back pointer to netfs */ + struct radix_tree_root stores; /* pages to be stored on this cookie */ +#define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */ + + unsigned long flags; +#define FSCACHE_COOKIE_LOOKING_UP 0 /* T if non-index cookie being looked up still */ +#define FSCACHE_COOKIE_CREATING 1 /* T if non-index object being created still */ +#define FSCACHE_COOKIE_NO_DATA_YET 2 /* T if new object with no cached data yet */ +#define FSCACHE_COOKIE_PENDING_FILL 3 /* T if pending initial fill on object */ +#define FSCACHE_COOKIE_FILLING 4 /* T if filling object incrementally */ +#define FSCACHE_COOKIE_UNAVAILABLE 5 /* T if cookie is unavailable (error, etc) */ +}; + +extern struct fscache_cookie fscache_fsdef_index; + +/* + * on-disk cache file or index handle + */ +struct fscache_object { + enum fscache_object_state { + FSCACHE_OBJECT_INIT, /* object in initial unbound state */ + FSCACHE_OBJECT_LOOKING_UP, /* looking up object */ + FSCACHE_OBJECT_CREATING, /* creating object */ + + /* active states */ + FSCACHE_OBJECT_AVAILABLE, /* cleaning up object after creation */ + FSCACHE_OBJECT_ACTIVE, /* object is usable */ + FSCACHE_OBJECT_UPDATING, /* object is updating */ + + /* terminal states */ + FSCACHE_OBJECT_DYING, /* object waiting for accessors to finish */ + FSCACHE_OBJECT_LC_DYING, /* object cleaning up after lookup/create */ + FSCACHE_OBJECT_ABORT_INIT, /* abort the init state */ + FSCACHE_OBJECT_RELEASING, /* releasing object */ + FSCACHE_OBJECT_RECYCLING, /* retiring object */ + FSCACHE_OBJECT_WITHDRAWING, /* withdrawing object */ + FSCACHE_OBJECT_DEAD, /* object is now dead */ + } state; + + int debug_id; /* debugging ID */ + int n_children; /* number of child objects */ + int n_ops; /* number of ops outstanding on object */ + int n_obj_ops; /* number of object ops outstanding on object */ + int n_in_progress; /* number of ops in progress */ + int n_exclusive; /* number of exclusive ops queued */ + spinlock_t lock; /* state and operations lock */ + + unsigned long lookup_jif; /* time at which lookup started */ + unsigned long event_mask; /* events this object is interested in */ + unsigned long events; /* events to be processed by this object + * (order is important - using fls) */ +#define FSCACHE_OBJECT_EV_REQUEUE 0 /* T if object should be requeued */ +#define FSCACHE_OBJECT_EV_UPDATE 1 /* T if object should be updated */ +#define FSCACHE_OBJECT_EV_CLEARED 2 /* T if accessors all gone */ +#define FSCACHE_OBJECT_EV_ERROR 3 /* T if fatal error occurred during processing */ +#define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ +#define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ +#define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ + + unsigned long flags; +#define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ +#define FSCACHE_OBJECT_PENDING_WRITE 1 /* T if object has pending write */ +#define FSCACHE_OBJECT_WAITING 2 /* T if object is waiting on its parent */ + + struct list_head cache_link; /* link in cache->object_list */ + struct hlist_node cookie_link; /* link in cookie->backing_objects */ + struct fscache_cache *cache; /* cache that supplied this object */ + struct fscache_cookie *cookie; /* netfs's file/index object */ + struct fscache_object *parent; /* parent object */ + struct slow_work work; /* attention scheduling record */ + struct list_head dependents; /* FIFO of dependent objects */ + struct list_head dep_link; /* link in parent's dependents list */ + struct list_head pending_ops; /* unstarted operations on this object */ + pgoff_t store_limit; /* current storage limit */ +}; + +extern const char *fscache_object_states[]; + +#define fscache_object_is_active(obj) \ + (!test_bit(FSCACHE_IOERROR, &(obj)->cache->flags) && \ + (obj)->state >= FSCACHE_OBJECT_AVAILABLE && \ + (obj)->state < FSCACHE_OBJECT_DYING) + +extern const struct slow_work_ops fscache_object_slow_work_ops; + +/** + * fscache_object_init - Initialise a cache object description + * @object: Object description + * + * Initialise a cache object description to its basic values. + * + * See Documentation/filesystems/caching/backend-api.txt for a complete + * description. + */ +static inline +void fscache_object_init(struct fscache_object *object, + struct fscache_cookie *cookie, + struct fscache_cache *cache) +{ + atomic_inc(&cache->object_count); + + object->state = FSCACHE_OBJECT_INIT; + spin_lock_init(&object->lock); + INIT_LIST_HEAD(&object->cache_link); + INIT_HLIST_NODE(&object->cookie_link); + vslow_work_init(&object->work, &fscache_object_slow_work_ops); + INIT_LIST_HEAD(&object->dependents); + INIT_LIST_HEAD(&object->dep_link); + INIT_LIST_HEAD(&object->pending_ops); + object->n_children = 0; + object->n_ops = object->n_in_progress = object->n_exclusive = 0; + object->events = object->event_mask = 0; + object->flags = 0; + object->store_limit = 0; + object->cache = cache; + object->cookie = cookie; + object->parent = NULL; +} + +extern void fscache_object_lookup_negative(struct fscache_object *object); +extern void fscache_obtained_object(struct fscache_object *object); + +/** + * fscache_object_destroyed - Note destruction of an object in a cache + * @cache: The cache from which the object came + * + * Note the destruction and deallocation of an object record in a cache. + */ +static inline void fscache_object_destroyed(struct fscache_cache *cache) +{ + if (atomic_dec_and_test(&cache->object_count)) + wake_up_all(&fscache_cache_cleared_wq); +} + +/** + * fscache_object_lookup_error - Note an object encountered an error + * @object: The object on which the error was encountered + * + * Note that an object encountered a fatal error (usually an I/O error) and + * that it should be withdrawn as soon as possible. + */ +static inline void fscache_object_lookup_error(struct fscache_object *object) +{ + set_bit(FSCACHE_OBJECT_EV_ERROR, &object->events); +} + +/** + * fscache_set_store_limit - Set the maximum size to be stored in an object + * @object: The object to set the maximum on + * @i_size: The limit to set in bytes + * + * Set the maximum size an object is permitted to reach, implying the highest + * byte that may be written. Intended to be called by the attr_changed() op. + * + * See Documentation/filesystems/caching/backend-api.txt for a complete + * description. + */ +static inline +void fscache_set_store_limit(struct fscache_object *object, loff_t i_size) +{ + object->store_limit = i_size >> PAGE_SHIFT; + if (i_size & ~PAGE_MASK) + object->store_limit++; +} + +/** + * fscache_end_io - End a retrieval operation on a page + * @op: The FS-Cache operation covering the retrieval + * @page: The page that was to be fetched + * @error: The error code (0 if successful) + * + * Note the end of an operation to retrieve a page, as covered by a particular + * operation record. + */ +static inline void fscache_end_io(struct fscache_retrieval *op, + struct page *page, int error) +{ + op->end_io_func(page, op->context, error); +} + +/* + * out-of-line cache backend functions + */ +extern void fscache_init_cache(struct fscache_cache *cache, + const struct fscache_cache_ops *ops, + const char *idfmt, + ...) __attribute__ ((format (printf, 3, 4))); + +extern int fscache_add_cache(struct fscache_cache *cache, + struct fscache_object *fsdef, + const char *tagname); +extern void fscache_withdraw_cache(struct fscache_cache *cache); + +extern void fscache_io_error(struct fscache_cache *cache); + +extern void fscache_mark_pages_cached(struct fscache_retrieval *op, + struct pagevec *pagevec); + +extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object, + const void *data, + uint16_t datalen); + +#endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h new file mode 100644 index 000000000000..6d8ee466e0a0 --- /dev/null +++ b/include/linux/fscache.h @@ -0,0 +1,618 @@ +/* General filesystem caching interface + * + * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NOTE!!! See: + * + * Documentation/filesystems/caching/netfs-api.txt + * + * for a description of the network filesystem interface declared here. + */ + +#ifndef _LINUX_FSCACHE_H +#define _LINUX_FSCACHE_H + +#include <linux/fs.h> +#include <linux/list.h> +#include <linux/pagemap.h> +#include <linux/pagevec.h> + +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +#define fscache_available() (1) +#define fscache_cookie_valid(cookie) (cookie) +#else +#define fscache_available() (0) +#define fscache_cookie_valid(cookie) (0) +#endif + + +/* + * overload PG_private_2 to give us PG_fscache - this is used to indicate that + * a page is currently backed by a local disk cache + */ +#define PageFsCache(page) PagePrivate2((page)) +#define SetPageFsCache(page) SetPagePrivate2((page)) +#define ClearPageFsCache(page) ClearPagePrivate2((page)) +#define TestSetPageFsCache(page) TestSetPagePrivate2((page)) +#define TestClearPageFsCache(page) TestClearPagePrivate2((page)) + +/* pattern used to fill dead space in an index entry */ +#define FSCACHE_INDEX_DEADFILL_PATTERN 0x79 + +struct pagevec; +struct fscache_cache_tag; +struct fscache_cookie; +struct fscache_netfs; + +typedef void (*fscache_rw_complete_t)(struct page *page, + void *context, + int error); + +/* result of index entry consultation */ +enum fscache_checkaux { + FSCACHE_CHECKAUX_OKAY, /* entry okay as is */ + FSCACHE_CHECKAUX_NEEDS_UPDATE, /* entry requires update */ + FSCACHE_CHECKAUX_OBSOLETE, /* entry requires deletion */ +}; + +/* + * fscache cookie definition + */ +struct fscache_cookie_def { + /* name of cookie type */ + char name[16]; + + /* cookie type */ + uint8_t type; +#define FSCACHE_COOKIE_TYPE_INDEX 0 +#define FSCACHE_COOKIE_TYPE_DATAFILE 1 + + /* select the cache into which to insert an entry in this index + * - optional + * - should return a cache identifier or NULL to cause the cache to be + * inherited from the parent if possible or the first cache picked + * for a non-index file if not + */ + struct fscache_cache_tag *(*select_cache)( + const void *parent_netfs_data, + const void *cookie_netfs_data); + + /* get an index key + * - should store the key data in the buffer + * - should return the amount of amount stored + * - not permitted to return an error + * - the netfs data from the cookie being used as the source is + * presented + */ + uint16_t (*get_key)(const void *cookie_netfs_data, + void *buffer, + uint16_t bufmax); + + /* get certain file attributes from the netfs data + * - this function can be absent for an index + * - not permitted to return an error + * - the netfs data from the cookie being used as the source is + * presented + */ + void (*get_attr)(const void *cookie_netfs_data, uint64_t *size); + + /* get the auxilliary data from netfs data + * - this function can be absent if the index carries no state data + * - should store the auxilliary data in the buffer + * - should return the amount of amount stored + * - not permitted to return an error + * - the netfs data from the cookie being used as the source is + * presented + */ + uint16_t (*get_aux)(const void *cookie_netfs_data, + void *buffer, + uint16_t bufmax); + + /* consult the netfs about the state of an object + * - this function can be absent if the index carries no state data + * - the netfs data from the cookie being used as the target is + * presented, as is the auxilliary data + */ + enum fscache_checkaux (*check_aux)(void *cookie_netfs_data, + const void *data, + uint16_t datalen); + + /* get an extra reference on a read context + * - this function can be absent if the completion function doesn't + * require a context + */ + void (*get_context)(void *cookie_netfs_data, void *context); + + /* release an extra reference on a read context + * - this function can be absent if the completion function doesn't + * require a context + */ + void (*put_context)(void *cookie_netfs_data, void *context); + + /* indicate pages that now have cache metadata retained + * - this function should mark the specified pages as now being cached + * - the pages will have been marked with PG_fscache before this is + * called, so this is optional + */ + void (*mark_pages_cached)(void *cookie_netfs_data, + struct address_space *mapping, + struct pagevec *cached_pvec); + + /* indicate the cookie is no longer cached + * - this function is called when the backing store currently caching + * a cookie is removed + * - the netfs should use this to clean up any markers indicating + * cached pages + * - this is mandatory for any object that may have data + */ + void (*now_uncached)(void *cookie_netfs_data); +}; + +/* + * fscache cached network filesystem type + * - name, version and ops must be filled in before registration + * - all other fields will be set during registration + */ +struct fscache_netfs { + uint32_t version; /* indexing version */ + const char *name; /* filesystem name */ + struct fscache_cookie *primary_index; + struct list_head link; /* internal link */ +}; + +/* + * slow-path functions for when there is actually caching available, and the + * netfs does actually have a valid token + * - these are not to be called directly + * - these are undefined symbols when FS-Cache is not configured and the + * optimiser takes care of not using them + */ +extern int __fscache_register_netfs(struct fscache_netfs *); +extern void __fscache_unregister_netfs(struct fscache_netfs *); +extern struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *); +extern void __fscache_release_cache_tag(struct fscache_cache_tag *); + +extern struct fscache_cookie *__fscache_acquire_cookie( + struct fscache_cookie *, + const struct fscache_cookie_def *, + void *); +extern void __fscache_relinquish_cookie(struct fscache_cookie *, int); +extern void __fscache_update_cookie(struct fscache_cookie *); +extern int __fscache_attr_changed(struct fscache_cookie *); +extern int __fscache_read_or_alloc_page(struct fscache_cookie *, + struct page *, + fscache_rw_complete_t, + void *, + gfp_t); +extern int __fscache_read_or_alloc_pages(struct fscache_cookie *, + struct address_space *, + struct list_head *, + unsigned *, + fscache_rw_complete_t, + void *, + gfp_t); +extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t); +extern int __fscache_write_page(struct fscache_cookie *, struct page *, gfp_t); +extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); +extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); +extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); + +/** + * fscache_register_netfs - Register a filesystem as desiring caching services + * @netfs: The description of the filesystem + * + * Register a filesystem as desiring caching services if they're available. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_register_netfs(struct fscache_netfs *netfs) +{ + if (fscache_available()) + return __fscache_register_netfs(netfs); + else + return 0; +} + +/** + * fscache_unregister_netfs - Indicate that a filesystem no longer desires + * caching services + * @netfs: The description of the filesystem + * + * Indicate that a filesystem no longer desires caching services for the + * moment. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_unregister_netfs(struct fscache_netfs *netfs) +{ + if (fscache_available()) + __fscache_unregister_netfs(netfs); +} + +/** + * fscache_lookup_cache_tag - Look up a cache tag + * @name: The name of the tag to search for + * + * Acquire a specific cache referral tag that can be used to select a specific + * cache in which to cache an index. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name) +{ + if (fscache_available()) + return __fscache_lookup_cache_tag(name); + else + return NULL; +} + +/** + * fscache_release_cache_tag - Release a cache tag + * @tag: The tag to release + * + * Release a reference to a cache referral tag previously looked up. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_release_cache_tag(struct fscache_cache_tag *tag) +{ + if (fscache_available()) + __fscache_release_cache_tag(tag); +} + +/** + * fscache_acquire_cookie - Acquire a cookie to represent a cache object + * @parent: The cookie that's to be the parent of this one + * @def: A description of the cache object, including callback operations + * @netfs_data: An arbitrary piece of data to be kept in the cookie to + * represent the cache object to the netfs + * + * This function is used to inform FS-Cache about part of an index hierarchy + * that can be used to locate files. This is done by requesting a cookie for + * each index in the path to the file. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +struct fscache_cookie *fscache_acquire_cookie( + struct fscache_cookie *parent, + const struct fscache_cookie_def *def, + void *netfs_data) +{ + if (fscache_cookie_valid(parent)) + return __fscache_acquire_cookie(parent, def, netfs_data); + else + return NULL; +} + +/** + * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding + * it + * @cookie: The cookie being returned + * @retire: True if the cache object the cookie represents is to be discarded + * + * This function returns a cookie to the cache, forcibly discarding the + * associated cache object if retire is set to true. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) +{ + if (fscache_cookie_valid(cookie)) + __fscache_relinquish_cookie(cookie, retire); +} + +/** + * fscache_update_cookie - Request that a cache object be updated + * @cookie: The cookie representing the cache object + * + * Request an update of the index data for the cache object associated with the + * cookie. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_update_cookie(struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie)) + __fscache_update_cookie(cookie); +} + +/** + * fscache_pin_cookie - Pin a data-storage cache object in its cache + * @cookie: The cookie representing the cache object + * + * Permit data-storage cache objects to be pinned in the cache. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_pin_cookie(struct fscache_cookie *cookie) +{ + return -ENOBUFS; +} + +/** + * fscache_pin_cookie - Unpin a data-storage cache object in its cache + * @cookie: The cookie representing the cache object + * + * Permit data-storage cache objects to be unpinned from the cache. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_unpin_cookie(struct fscache_cookie *cookie) +{ +} + +/** + * fscache_attr_changed - Notify cache that an object's attributes changed + * @cookie: The cookie representing the cache object + * + * Send a notification to the cache indicating that an object's attributes have + * changed. This includes the data size. These attributes will be obtained + * through the get_attr() cookie definition op. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_attr_changed(struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie)) + return __fscache_attr_changed(cookie); + else + return -ENOBUFS; +} + +/** + * fscache_reserve_space - Reserve data space for a cached object + * @cookie: The cookie representing the cache object + * @i_size: The amount of space to be reserved + * + * Reserve an amount of space in the cache for the cache object attached to a + * cookie so that a write to that object within the space can always be + * honoured. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) +{ + return -ENOBUFS; +} + +/** + * fscache_read_or_alloc_page - Read a page from the cache or allocate a block + * in which to store it + * @cookie: The cookie representing the cache object + * @page: The netfs page to fill if possible + * @end_io_func: The callback to invoke when and if the page is filled + * @context: An arbitrary piece of data to pass on to end_io_func() + * @gfp: The conditions under which memory allocation should be made + * + * Read a page from the cache, or if that's not possible make a potential + * one-block reservation in the cache into which the page may be stored once + * fetched from the server. + * + * If the page is not backed by the cache object, or if it there's some reason + * it can't be, -ENOBUFS will be returned and nothing more will be done for + * that page. + * + * Else, if that page is backed by the cache, a read will be initiated directly + * to the netfs's page and 0 will be returned by this function. The + * end_io_func() callback will be invoked when the operation terminates on a + * completion or failure. Note that the callback may be invoked before the + * return. + * + * Else, if the page is unbacked, -ENODATA is returned and a block may have + * been allocated in the cache. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_read_or_alloc_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ + if (fscache_cookie_valid(cookie)) + return __fscache_read_or_alloc_page(cookie, page, end_io_func, + context, gfp); + else + return -ENOBUFS; +} + +/** + * fscache_read_or_alloc_pages - Read pages from the cache and/or allocate + * blocks in which to store them + * @cookie: The cookie representing the cache object + * @mapping: The netfs inode mapping to which the pages will be attached + * @pages: A list of potential netfs pages to be filled + * @end_io_func: The callback to invoke when and if each page is filled + * @context: An arbitrary piece of data to pass on to end_io_func() + * @gfp: The conditions under which memory allocation should be made + * + * Read a set of pages from the cache, or if that's not possible, attempt to + * make a potential one-block reservation for each page in the cache into which + * that page may be stored once fetched from the server. + * + * If some pages are not backed by the cache object, or if it there's some + * reason they can't be, -ENOBUFS will be returned and nothing more will be + * done for that pages. + * + * Else, if some of the pages are backed by the cache, a read will be initiated + * directly to the netfs's page and 0 will be returned by this function. The + * end_io_func() callback will be invoked when the operation terminates on a + * completion or failure. Note that the callback may be invoked before the + * return. + * + * Else, if a page is unbacked, -ENODATA is returned and a block may have + * been allocated in the cache. + * + * Because the function may want to return all of -ENOBUFS, -ENODATA and 0 in + * regard to different pages, the return values are prioritised in that order. + * Any pages submitted for reading are removed from the pages list. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ + if (fscache_cookie_valid(cookie)) + return __fscache_read_or_alloc_pages(cookie, mapping, pages, + nr_pages, end_io_func, + context, gfp); + else + return -ENOBUFS; +} + +/** + * fscache_alloc_page - Allocate a block in which to store a page + * @cookie: The cookie representing the cache object + * @page: The netfs page to allocate a page for + * @gfp: The conditions under which memory allocation should be made + * + * Request Allocation a block in the cache in which to store a netfs page + * without retrieving any contents from the cache. + * + * If the page is not backed by a file then -ENOBUFS will be returned and + * nothing more will be done, and no reservation will be made. + * + * Else, a block will be allocated if one wasn't already, and 0 will be + * returned + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_alloc_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + if (fscache_cookie_valid(cookie)) + return __fscache_alloc_page(cookie, page, gfp); + else + return -ENOBUFS; +} + +/** + * fscache_write_page - Request storage of a page in the cache + * @cookie: The cookie representing the cache object + * @page: The netfs page to store + * @gfp: The conditions under which memory allocation should be made + * + * Request the contents of the netfs page be written into the cache. This + * request may be ignored if no cache block is currently allocated, in which + * case it will return -ENOBUFS. + * + * If a cache block was already allocated, a write will be initiated and 0 will + * be returned. The PG_fscache_write page bit is set immediately and will then + * be cleared at the completion of the write to indicate the success or failure + * of the operation. Note that the completion may happen before the return. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +int fscache_write_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + if (fscache_cookie_valid(cookie)) + return __fscache_write_page(cookie, page, gfp); + else + return -ENOBUFS; +} + +/** + * fscache_uncache_page - Indicate that caching is no longer required on a page + * @cookie: The cookie representing the cache object + * @page: The netfs page that was being cached. + * + * Tell the cache that we no longer want a page to be cached and that it should + * remove any knowledge of the netfs page it may have. + * + * Note that this cannot cancel any outstanding I/O operations between this + * page and the cache. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_uncache_page(struct fscache_cookie *cookie, + struct page *page) +{ + if (fscache_cookie_valid(cookie)) + __fscache_uncache_page(cookie, page); +} + +/** + * fscache_check_page_write - Ask if a page is being writing to the cache + * @cookie: The cookie representing the cache object + * @page: The netfs page that is being cached. + * + * Ask the cache if a page is being written to the cache. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +bool fscache_check_page_write(struct fscache_cookie *cookie, + struct page *page) +{ + if (fscache_cookie_valid(cookie)) + return __fscache_check_page_write(cookie, page); + return false; +} + +/** + * fscache_wait_on_page_write - Wait for a page to complete writing to the cache + * @cookie: The cookie representing the cache object + * @page: The netfs page that is being cached. + * + * Ask the cache to wake us up when a page is no longer being written to the + * cache. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_wait_on_page_write(struct fscache_cookie *cookie, + struct page *page) +{ + if (fscache_cookie_valid(cookie)) + __fscache_wait_on_page_write(cookie, page); +} + +#endif /* _LINUX_FSCACHE_H */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a7f8134c594e..015a3d22cf74 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1,15 +1,18 @@ #ifndef _LINUX_FTRACE_H #define _LINUX_FTRACE_H -#include <linux/linkage.h> -#include <linux/fs.h> -#include <linux/ktime.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/module.h> +#include <linux/trace_clock.h> #include <linux/kallsyms.h> +#include <linux/linkage.h> #include <linux/bitops.h> +#include <linux/module.h> +#include <linux/ktime.h> #include <linux/sched.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/fs.h> + +#include <asm/ftrace.h> #ifdef CONFIG_FUNCTION_TRACER @@ -95,9 +98,41 @@ stack_trace_sysctl(struct ctl_table *table, int write, loff_t *ppos); #endif +struct ftrace_func_command { + struct list_head list; + char *name; + int (*func)(char *func, char *cmd, + char *params, int enable); +}; + #ifdef CONFIG_DYNAMIC_FTRACE -/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ -#include <asm/ftrace.h> + +int ftrace_arch_code_modify_prepare(void); +int ftrace_arch_code_modify_post_process(void); + +struct seq_file; + +struct ftrace_probe_ops { + void (*func)(unsigned long ip, + unsigned long parent_ip, + void **data); + int (*callback)(unsigned long ip, void **data); + void (*free)(void **data); + int (*print)(struct seq_file *m, + unsigned long ip, + struct ftrace_probe_ops *ops, + void *data); +}; + +extern int +register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, + void *data); +extern void +unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, + void *data); +extern void +unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops); +extern void unregister_ftrace_function_probe_all(char *glob); enum { FTRACE_FL_FREE = (1 << 0), @@ -110,15 +145,23 @@ enum { }; struct dyn_ftrace { - struct list_head list; - unsigned long ip; /* address of mcount call-site */ - unsigned long flags; - struct dyn_arch_ftrace arch; + union { + unsigned long ip; /* address of mcount call-site */ + struct dyn_ftrace *freelist; + }; + union { + unsigned long flags; + struct dyn_ftrace *newlist; + }; + struct dyn_arch_ftrace arch; }; int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); +int register_ftrace_command(struct ftrace_func_command *cmd); +int unregister_ftrace_command(struct ftrace_func_command *cmd); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern int ftrace_dyn_arch_init(void *data); @@ -126,6 +169,10 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); + +#ifndef FTRACE_ADDR +#define FTRACE_ADDR ((unsigned long)ftrace_caller) +#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern void ftrace_graph_caller(void); extern int ftrace_enable_ftrace_graph_caller(void); @@ -136,7 +183,7 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } #endif /** - * ftrace_make_nop - convert code into top + * ftrace_make_nop - convert code into nop * @mod: module structure if called by module load initialization * @rec: the mcount call site record * @addr: the address that the call site should be calling @@ -181,7 +228,6 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); - /* May be defined in arch */ extern int ftrace_arch_read_dyn_info(char *buf, int size); @@ -198,6 +244,14 @@ extern void ftrace_enable_daemon(void); # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) static inline void ftrace_release(void *start, unsigned long size) { } +static inline int register_ftrace_command(struct ftrace_func_command *cmd) +{ + return -EINVAL; +} +static inline int unregister_ftrace_command(char *cmd_name) +{ + return -EINVAL; +} #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ @@ -233,24 +287,25 @@ static inline void __ftrace_enabled_restore(int enabled) #endif } -#ifdef CONFIG_FRAME_POINTER -/* TODO: need to fix this for ARM */ -# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) -# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) -# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) -# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) -# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) -# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) -#else -# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -# define CALLER_ADDR1 0UL -# define CALLER_ADDR2 0UL -# define CALLER_ADDR3 0UL -# define CALLER_ADDR4 0UL -# define CALLER_ADDR5 0UL -# define CALLER_ADDR6 0UL -#endif +#ifndef HAVE_ARCH_CALLER_ADDR +# ifdef CONFIG_FRAME_POINTER +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) +# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) +# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) +# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) +# else +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 0UL +# define CALLER_ADDR2 0UL +# define CALLER_ADDR3 0UL +# define CALLER_ADDR4 0UL +# define CALLER_ADDR5 0UL +# define CALLER_ADDR6 0UL +# endif +#endif /* ifndef HAVE_ARCH_CALLER_ADDR */ #ifdef CONFIG_IRQSOFF_TRACER extern void time_hardirqs_on(unsigned long a0, unsigned long a1); @@ -268,54 +323,6 @@ static inline void __ftrace_enabled_restore(int enabled) # define trace_preempt_off(a0, a1) do { } while (0) #endif -#ifdef CONFIG_TRACING -extern int ftrace_dump_on_oops; - -extern void tracing_start(void); -extern void tracing_stop(void); -extern void ftrace_off_permanent(void); - -extern void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); - -/** - * ftrace_printk - printf formatting in the ftrace buffer - * @fmt: the printf format for printing - * - * Note: __ftrace_printk is an internal function for ftrace_printk and - * the @ip is passed in via the ftrace_printk macro. - * - * This function allows a kernel developer to debug fast path sections - * that printk is not appropriate for. By scattering in various - * printk like tracing in the code, a developer can quickly see - * where problems are occurring. - * - * This is intended as a debugging tool for the developer only. - * Please refrain from leaving ftrace_printks scattered around in - * your code. - */ -# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt) -extern int -__ftrace_printk(unsigned long ip, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void ftrace_dump(void); -#else -static inline void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } -static inline int -ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); - -static inline void tracing_start(void) { } -static inline void tracing_stop(void) { } -static inline void ftrace_off_permanent(void) { } -static inline int -ftrace_printk(const char *fmt, ...) -{ - return 0; -} -static inline void ftrace_dump(void) { } -#endif - #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); extern void ftrace_init_module(struct module *mod, @@ -327,36 +334,6 @@ ftrace_init_module(struct module *mod, unsigned long *start, unsigned long *end) { } #endif -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -struct power_trace { -#ifdef CONFIG_POWER_TRACER - ktime_t stamp; - ktime_t end; - int type; - int state; -#endif -}; - -#ifdef CONFIG_POWER_TRACER -extern void trace_power_start(struct power_trace *it, unsigned int type, - unsigned int state); -extern void trace_power_mark(struct power_trace *it, unsigned int type, - unsigned int state); -extern void trace_power_end(struct power_trace *it); -#else -static inline void trace_power_start(struct power_trace *it, unsigned int type, - unsigned int state) { } -static inline void trace_power_mark(struct power_trace *it, unsigned int type, - unsigned int state) { } -static inline void trace_power_end(struct power_trace *it) { } -#endif - - /* * Structure that defines an entry function trace. */ @@ -398,8 +375,7 @@ struct ftrace_ret_stack { extern void return_to_handler(void); extern int -ftrace_push_return_trace(unsigned long ret, unsigned long long time, - unsigned long func, int *depth); +ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); extern void ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); @@ -514,6 +490,50 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) return tsk->trace & TSK_TRACE_FL_GRAPH; } +extern int ftrace_dump_on_oops; + #endif /* CONFIG_TRACING */ + +#ifdef CONFIG_HW_BRANCH_TRACER + +void trace_hw_branch(u64 from, u64 to); +void trace_hw_branch_oops(void); + +#else /* CONFIG_HW_BRANCH_TRACER */ + +static inline void trace_hw_branch(u64 from, u64 to) {} +static inline void trace_hw_branch_oops(void) {} + +#endif /* CONFIG_HW_BRANCH_TRACER */ + +/* + * A syscall entry in the ftrace syscalls array. + * + * @name: name of the syscall + * @nb_args: number of parameters it takes + * @types: list of types as strings + * @args: list of args as strings (args[i] matches types[i]) + */ +struct syscall_metadata { + const char *name; + int nb_args; + const char **types; + const char **args; +}; + +#ifdef CONFIG_FTRACE_SYSCALLS +extern void arch_init_ftrace_syscalls(void); +extern struct syscall_metadata *syscall_nr_to_meta(int nr); +extern void start_ftrace_syscalls(void); +extern void stop_ftrace_syscalls(void); +extern void ftrace_syscall_enter(struct pt_regs *regs); +extern void ftrace_syscall_exit(struct pt_regs *regs); +#else +static inline void start_ftrace_syscalls(void) { } +static inline void stop_ftrace_syscalls(void) { } +static inline void ftrace_syscall_enter(struct pt_regs *regs) { } +static inline void ftrace_syscall_exit(struct pt_regs *regs) { } +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index 366a054d0b05..dca7bf8cffe2 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -2,7 +2,7 @@ #define _LINUX_FTRACE_IRQ_H -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER) +#ifdef CONFIG_FTRACE_NMI_ENTER extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else diff --git a/include/linux/gfp.h b/include/linux/gfp.h index dd20cd78faa8..0bbc15f54536 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -4,6 +4,7 @@ #include <linux/mmzone.h> #include <linux/stddef.h> #include <linux/linkage.h> +#include <linux/topology.h> struct vm_area_struct; diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index f83288347dda..faa1cf848bcd 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -15,55 +15,61 @@ * - bits 0-7 are the preemption count (max preemption depth: 256) * - bits 8-15 are the softirq count (max # of softirqs: 256) * - * The hardirq count can be overridden per architecture, the default is: + * The hardirq count can in theory reach the same as NR_IRQS. + * In reality, the number of nested IRQS is limited to the stack + * size as well. For archs with over 1000 IRQS it is not practical + * to expect that they will all nest. We give a max of 10 bits for + * hardirq nesting. An arch may choose to give less than 10 bits. + * m68k expects it to be 8. * - * - bits 16-27 are the hardirq count (max # of hardirqs: 4096) - * - ( bit 28 is the PREEMPT_ACTIVE flag. ) + * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024) + * - bit 26 is the NMI_MASK + * - bit 28 is the PREEMPT_ACTIVE flag * * PREEMPT_MASK: 0x000000ff * SOFTIRQ_MASK: 0x0000ff00 - * HARDIRQ_MASK: 0x0fff0000 + * HARDIRQ_MASK: 0x03ff0000 + * NMI_MASK: 0x04000000 */ #define PREEMPT_BITS 8 #define SOFTIRQ_BITS 8 +#define NMI_BITS 1 -#ifndef HARDIRQ_BITS -#define HARDIRQ_BITS 12 +#define MAX_HARDIRQ_BITS 10 -#ifndef MAX_HARDIRQS_PER_CPU -#define MAX_HARDIRQS_PER_CPU NR_IRQS +#ifndef HARDIRQ_BITS +# define HARDIRQ_BITS MAX_HARDIRQ_BITS #endif -/* - * The hardirq mask has to be large enough to have space for potentially - * all IRQ sources in the system nesting on a single CPU. - */ -#if (1 << HARDIRQ_BITS) < MAX_HARDIRQS_PER_CPU -# error HARDIRQ_BITS is too low! -#endif +#if HARDIRQ_BITS > MAX_HARDIRQ_BITS +#error HARDIRQ_BITS too high! #endif #define PREEMPT_SHIFT 0 #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) +#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) #define __IRQ_MASK(x) ((1UL << (x))-1) #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) +#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) +#define NMI_OFFSET (1UL << NMI_SHIFT) -#if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS)) +#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS)) #error PREEMPT_ACTIVE is too low! #endif #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #define softirq_count() (preempt_count() & SOFTIRQ_MASK) -#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK)) +#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ + | NMI_MASK)) /* * Are we doing bottom half or hardware interrupt processing? @@ -73,6 +79,11 @@ #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) +/* + * Are we in NMI context? + */ +#define in_nmi() (preempt_count() & NMI_MASK) + #if defined(CONFIG_PREEMPT) # define PREEMPT_INATOMIC_BASE kernel_locked() # define PREEMPT_CHECK_OFFSET 1 @@ -164,20 +175,24 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() \ - do { \ - ftrace_nmi_enter(); \ - lockdep_off(); \ - rcu_nmi_enter(); \ - __irq_enter(); \ +#define nmi_enter() \ + do { \ + ftrace_nmi_enter(); \ + BUG_ON(in_nmi()); \ + add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ + lockdep_off(); \ + rcu_nmi_enter(); \ + trace_hardirq_enter(); \ } while (0) -#define nmi_exit() \ - do { \ - __irq_exit(); \ - rcu_nmi_exit(); \ - lockdep_on(); \ - ftrace_nmi_exit(); \ +#define nmi_exit() \ + do { \ + trace_hardirq_exit(); \ + rcu_nmi_exit(); \ + lockdep_on(); \ + BUG_ON(!in_nmi()); \ + sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ + ftrace_nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/hdreg.h b/include/linux/hdreg.h index ed21bd3dbd25..29ee2873f4a8 100644 --- a/include/linux/hdreg.h +++ b/include/linux/hdreg.h @@ -1,68 +1,6 @@ #ifndef _LINUX_HDREG_H #define _LINUX_HDREG_H -#ifdef __KERNEL__ -#include <linux/ata.h> - -/* - * This file contains some defines for the AT-hd-controller. - * Various sources. - */ - -/* ide.c has its own port definitions in "ide.h" */ - -#define HD_IRQ 14 - -/* Hd controller regs. Ref: IBM AT Bios-listing */ -#define HD_DATA 0x1f0 /* _CTL when writing */ -#define HD_ERROR 0x1f1 /* see err-bits */ -#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */ -#define HD_SECTOR 0x1f3 /* starting sector */ -#define HD_LCYL 0x1f4 /* starting cylinder */ -#define HD_HCYL 0x1f5 /* high byte of starting cyl */ -#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */ -#define HD_STATUS 0x1f7 /* see status-bits */ -#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */ -#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */ -#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */ - -#define HD_CMD 0x3f6 /* used for resets */ -#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */ - -/* remainder is shared between hd.c, ide.c, ide-cd.c, and the hdparm utility */ - -/* Bits of HD_STATUS */ -#define ERR_STAT 0x01 -#define INDEX_STAT 0x02 -#define ECC_STAT 0x04 /* Corrected error */ -#define DRQ_STAT 0x08 -#define SEEK_STAT 0x10 -#define SRV_STAT 0x10 -#define WRERR_STAT 0x20 -#define READY_STAT 0x40 -#define BUSY_STAT 0x80 - -/* Bits for HD_ERROR */ -#define MARK_ERR 0x01 /* Bad address mark */ -#define ILI_ERR 0x01 /* Illegal Length Indication (ATAPI) */ -#define TRK0_ERR 0x02 /* couldn't find track 0 */ -#define EOM_ERR 0x02 /* End Of Media (ATAPI) */ -#define ABRT_ERR 0x04 /* Command aborted */ -#define MCR_ERR 0x08 /* media change request */ -#define ID_ERR 0x10 /* ID field not found */ -#define MC_ERR 0x20 /* media changed */ -#define ECC_ERR 0x40 /* Uncorrectable ECC error */ -#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ -#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ -#define LFS_ERR 0xf0 /* Last Failed Sense (ATAPI) */ - -/* Bits of HD_NSECTOR */ -#define CD 0x01 -#define IO 0x02 -#define REL 0x04 -#define TAG_MASK 0xf8 -#endif /* __KERNEL__ */ - #include <linux/types.h> /* @@ -191,6 +129,7 @@ typedef struct hd_drive_hob_hdr { #define TASKFILE_INVALID 0x7fff #endif +#ifndef __KERNEL__ /* ATA/ATAPI Commands pre T13 Spec */ #define WIN_NOP 0x00 /* @@ -379,6 +318,7 @@ typedef struct hd_drive_hob_hdr { #define SECURITY_ERASE_UNIT 0xBD #define SECURITY_FREEZE_LOCK 0xBE #define SECURITY_DISABLE_PASSWORD 0xBF +#endif /* __KERNEL__ */ struct hd_geometry { unsigned char heads; @@ -448,6 +388,7 @@ enum { #define __NEW_HD_DRIVE_ID +#ifndef __KERNEL__ /* * Structure returned by HDIO_GET_IDENTITY, as per ANSI NCITS ATA6 rev.1b spec. * @@ -699,6 +640,7 @@ struct hd_driveid { * 7:0 Signature */ }; +#endif /* __KERNEL__ */ /* * IDE "nice" flags. These are used on a per drive basis to determine diff --git a/include/linux/hid.h b/include/linux/hid.h index fa8ee9cef7be..a72876e43589 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -270,6 +270,7 @@ struct hid_item { #define HID_QUIRK_INVERT 0x00000001 #define HID_QUIRK_NOTOUCH 0x00000002 +#define HID_QUIRK_IGNORE 0x00000004 #define HID_QUIRK_NOGET 0x00000008 #define HID_QUIRK_BADPAD 0x00000020 #define HID_QUIRK_MULTI_INPUT 0x00000040 @@ -603,12 +604,17 @@ struct hid_ll_driver { int (*open)(struct hid_device *hdev); void (*close)(struct hid_device *hdev); + int (*power)(struct hid_device *hdev, int level); + int (*hidinput_input_event) (struct input_dev *idev, unsigned int type, unsigned int code, int value); int (*parse)(struct hid_device *hdev); }; +#define PM_HINT_FULLON 1<<5 +#define PM_HINT_NORMAL 1<<1 + /* Applications from HID Usage Tables 4/8/99 Version 1.1 */ /* We ignore a few input applications that are not widely used */ #define IS_INPUT_APPLICATION(a) (((a >= 0x00010000) && (a <= 0x00010008)) || (a == 0x00010080) || (a == 0x000c0001) || (a == 0x000d0002)) @@ -641,6 +647,7 @@ int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int void hid_output_report(struct hid_report *report, __u8 *data); struct hid_device *hid_allocate_device(void); int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size); +int hid_check_keys_pressed(struct hid_device *hid); int hid_connect(struct hid_device *hid, unsigned int connect_mask); /** @@ -791,21 +798,5 @@ dbg_hid(const char *fmt, ...) __FILE__ , ## arg) #endif /* HID_FF */ -#ifdef __KERNEL__ -#ifdef CONFIG_HID_COMPAT -#define HID_COMPAT_LOAD_DRIVER(name) \ -/* prototype to avoid sparse warning */ \ -extern void hid_compat_##name(void); \ -void hid_compat_##name(void) { } \ -EXPORT_SYMBOL(hid_compat_##name) -#else -#define HID_COMPAT_LOAD_DRIVER(name) -#endif /* HID_COMPAT */ -#define HID_COMPAT_CALL_DRIVER(name) do { \ - extern void hid_compat_##name(void); \ - hid_compat_##name(); \ -} while (0) -#endif /* __KERNEL__ */ - #endif diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 7ff5c55f9b55..1fcb7126a01f 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -19,8 +19,21 @@ static inline void flush_kernel_dcache_page(struct page *page) } #endif -#ifdef CONFIG_HIGHMEM +#include <asm/kmap_types.h> + +#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT) + +void debug_kmap_atomic(enum km_type type); + +#else +static inline void debug_kmap_atomic(enum km_type type) +{ +} + +#endif + +#ifdef CONFIG_HIGHMEM #include <asm/highmem.h> /* declarations for linux/mm/highmem.c */ @@ -44,8 +57,6 @@ static inline void *kmap(struct page *page) #define kunmap(page) do { (void) (page); } while (0) -#include <asm/kmap_types.h> - static inline void *kmap_atomic(struct page *page, enum km_type idx) { pagefault_disable(); @@ -187,16 +198,4 @@ static inline void copy_highpage(struct page *to, struct page *from) kunmap_atomic(vto, KM_USER1); } -#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT) - -void debug_kmap_atomic(enum km_type type); - -#else - -static inline void debug_kmap_atomic(enum km_type type) -{ -} - -#endif - #endif /* _LINUX_HIGHMEM_H */ diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index 8137f660a5cc..0dc80ef24975 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -218,6 +218,53 @@ int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes); /*----------------------------------------------------------------------*/ +/* Power bus message definitions */ + +#define DEV_GRP_NULL 0x0 +#define DEV_GRP_P1 0x1 +#define DEV_GRP_P2 0x2 +#define DEV_GRP_P3 0x4 + +#define RES_GRP_RES 0x0 +#define RES_GRP_PP 0x1 +#define RES_GRP_RC 0x2 +#define RES_GRP_PP_RC 0x3 +#define RES_GRP_PR 0x4 +#define RES_GRP_PP_PR 0x5 +#define RES_GRP_RC_PR 0x6 +#define RES_GRP_ALL 0x7 + +#define RES_TYPE2_R0 0x0 + +#define RES_TYPE_ALL 0x7 + +#define RES_STATE_WRST 0xF +#define RES_STATE_ACTIVE 0xE +#define RES_STATE_SLEEP 0x8 +#define RES_STATE_OFF 0x0 + +/* + * Power Bus Message Format ... these can be sent individually by Linux, + * but are usually part of downloaded scripts that are run when various + * power events are triggered. + * + * Broadcast Message (16 Bits): + * DEV_GRP[15:13] MT[12] RES_GRP[11:9] RES_TYPE2[8:7] RES_TYPE[6:4] + * RES_STATE[3:0] + * + * Singular Message (16 Bits): + * DEV_GRP[15:13] MT[12] RES_ID[11:4] RES_STATE[3:0] + */ + +#define MSG_BROADCAST(devgrp, grp, type, type2, state) \ + ( (devgrp) << 13 | 1 << 12 | (grp) << 9 | (type2) << 7 \ + | (type) << 4 | (state)) + +#define MSG_SINGULAR(devgrp, id, state) \ + ((devgrp) << 13 | 0 << 12 | (id) << 4 | (state)) + +/*----------------------------------------------------------------------*/ + struct twl4030_bci_platform_data { int *battery_tmp_tbl; unsigned int tblsize; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 1d6c71d96ede..77214ead1a36 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -123,7 +123,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) - +#define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */ /* IOTLB_REG */ #define DMA_TLB_FLUSH_GRANU_OFFSET 60 diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c68bffd182bb..ce2c07d99fc3 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -278,6 +278,11 @@ enum NR_SOFTIRQS }; +/* map softirq index to softirq name. update 'softirq_to_name' in + * kernel/softirq.c when adding a new softirq. + */ +extern char *softirq_to_name[NR_SOFTIRQS]; + /* softirq mask and active fields moved to irq_cpustat_t in * asm/hardirq.h to get better cache usage. KAO */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 8a7bfb1b6ca0..3af4ffd591b9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -21,6 +21,7 @@ #define IOMMU_READ (1) #define IOMMU_WRITE (2) +#define IOMMU_CACHE (4) /* DMA cache coherency */ struct device; @@ -28,6 +29,8 @@ struct iommu_domain { void *priv; }; +#define IOMMU_CAP_CACHE_COHERENCY 0x1 + struct iommu_ops { int (*domain_init)(struct iommu_domain *domain); void (*domain_destroy)(struct iommu_domain *domain); @@ -39,6 +42,8 @@ struct iommu_ops { size_t size); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, unsigned long iova); + int (*domain_has_cap)(struct iommu_domain *domain, + unsigned long cap); }; #ifdef CONFIG_IOMMU_API @@ -57,6 +62,8 @@ extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, size_t size); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, unsigned long iova); +extern int iommu_domain_has_cap(struct iommu_domain *domain, + unsigned long cap); #else /* CONFIG_IOMMU_API */ @@ -107,6 +114,12 @@ static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, return 0; } +static inline int domain_has_cap(struct iommu_domain *domain, + unsigned long cap) +{ + return 0; +} + #endif /* CONFIG_IOMMU_API */ #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 74bde13224c9..b02a3f1d46a0 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -24,8 +24,8 @@ # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) # define trace_hardirq_enter() do { current->hardirq_context++; } while (0) # define trace_hardirq_exit() do { current->hardirq_context--; } while (0) -# define trace_softirq_enter() do { current->softirq_context++; } while (0) -# define trace_softirq_exit() do { current->softirq_context--; } while (0) +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, #else # define trace_hardirqs_on() do { } while (0) @@ -38,8 +38,8 @@ # define trace_softirqs_enabled(p) 0 # define trace_hardirq_enter() do { } while (0) # define trace_hardirq_exit() do { } while (0) -# define trace_softirq_enter() do { } while (0) -# define trace_softirq_exit() do { } while (0) +# define lockdep_softirq_enter() do { } while (0) +# define lockdep_softirq_exit() do { } while (0) # define INIT_TRACE_IRQFLAGS #endif diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 64246dce5663..53ae4399da2d 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -35,7 +35,7 @@ #define journal_oom_retry 1 /* - * Define JBD_PARANIOD_IOFAIL to cause a kernel BUG() if ext3 finds + * Define JBD_PARANOID_IOFAIL to cause a kernel BUG() if ext3 finds * certain classes of error which can occur due to failed IOs. Under * normal use we want ext3 to continue after such errors, because * hardware _can_ fail, but for debugging purposes when running tests on @@ -552,6 +552,11 @@ struct transaction_s */ int t_handle_count; + /* + * This transaction is being forced and some process is + * waiting for it to finish. + */ + int t_synchronous_commit:1; }; /** diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index f3fe34391d8e..792274269f2b 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -13,10 +13,17 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) +struct module; + #ifdef CONFIG_KALLSYMS /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); +/* Call a function on each kallsyms symbol in the core kernel */ +int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, + unsigned long), + void *data); + extern int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset); @@ -43,6 +50,14 @@ static inline unsigned long kallsyms_lookup_name(const char *name) return 0; } +static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, + unsigned long), + void *data) +{ + return 0; +} + static inline int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 556d781e69fe..d9e75ec7def5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -242,6 +242,19 @@ extern struct ratelimit_state printk_ratelimit_state; extern int printk_ratelimit(void); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); + +/* + * Print a one-time message (analogous to WARN_ONCE() et al): + */ +#define printk_once(x...) ({ \ + static int __print_once = 1; \ + \ + if (__print_once) { \ + __print_once = 0; \ + printk(x); \ + } \ +}) + void log_buf_kexec_setup(void); #else static inline int vprintk(const char *s, va_list args) @@ -254,6 +267,10 @@ static inline int printk_ratelimit(void) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ unsigned int interval_msec) \ { return false; } + +/* No effect, but we still get type checking even in the !PRINTK case: */ +#define printk_once(x...) printk(x) + static inline void log_buf_kexec_setup(void) { } @@ -375,6 +392,139 @@ static inline char *pack_hex_byte(char *buf, u8 byte) #endif /* + * General tracing related utility functions - trace_printk(), + * tracing_on/tracing_off and tracing_start()/tracing_stop + * + * Use tracing_on/tracing_off when you want to quickly turn on or off + * tracing. It simply enables or disables the recording of the trace events. + * This also corresponds to the user space debugfs/tracing/tracing_on + * file, which gives a means for the kernel and userspace to interact. + * Place a tracing_off() in the kernel where you want tracing to end. + * From user space, examine the trace, and then echo 1 > tracing_on + * to continue tracing. + * + * tracing_stop/tracing_start has slightly more overhead. It is used + * by things like suspend to ram where disabling the recording of the + * trace is not enough, but tracing must actually stop because things + * like calling smp_processor_id() may crash the system. + * + * Most likely, you want to use tracing_on/tracing_off. + */ +#ifdef CONFIG_RING_BUFFER +void tracing_on(void); +void tracing_off(void); +/* trace_off_permanent stops recording with no way to bring it back */ +void tracing_off_permanent(void); +int tracing_is_on(void); +#else +static inline void tracing_on(void) { } +static inline void tracing_off(void) { } +static inline void tracing_off_permanent(void) { } +static inline int tracing_is_on(void) { return 0; } +#endif +#ifdef CONFIG_TRACING +extern void tracing_start(void); +extern void tracing_stop(void); +extern void ftrace_off_permanent(void); + +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); + +static inline void __attribute__ ((format (printf, 1, 2))) +____trace_printk_check_format(const char *fmt, ...) +{ +} +#define __trace_printk_check_format(fmt, args...) \ +do { \ + if (0) \ + ____trace_printk_check_format(fmt, ##args); \ +} while (0) + +/** + * trace_printk - printf formatting in the ftrace buffer + * @fmt: the printf format for printing + * + * Note: __trace_printk is an internal function for trace_printk and + * the @ip is passed in via the trace_printk macro. + * + * This function allows a kernel developer to debug fast path sections + * that printk is not appropriate for. By scattering in various + * printk like tracing in the code, a developer can quickly see + * where problems are occurring. + * + * This is intended as a debugging tool for the developer only. + * Please refrain from leaving trace_printks scattered around in + * your code. + */ + +#define trace_printk(fmt, args...) \ +do { \ + __trace_printk_check_format(fmt, ##args); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(_THIS_IP_, fmt, ##args); \ +} while (0) + +extern int +__trace_bprintk(unsigned long ip, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); + +extern int +__trace_printk(unsigned long ip, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); + +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement. + */ +#define ftrace_vprintk(fmt, vargs) \ +do { \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs); \ + } else \ + __ftrace_vprintk(_THIS_IP_, fmt, vargs); \ +} while (0) + +extern int +__ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); + +extern int +__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); + +extern void ftrace_dump(void); +#else +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } +static inline int +trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); + +static inline void tracing_start(void) { } +static inline void tracing_stop(void) { } +static inline void ftrace_off_permanent(void) { } +static inline int +trace_printk(const char *fmt, ...) +{ + return 0; +} +static inline int +ftrace_vprintk(const char *fmt, va_list ap) +{ + return 0; +} +static inline void ftrace_dump(void) { } +#endif /* CONFIG_TRACING */ + +/* * Display an IP address in readable format. */ diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 92213a9194e1..d5fa565086d1 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -29,10 +29,15 @@ #ifdef CONFIG_MODULES /* modprobe exit status on success, -ve on error. Return value * usually useless though. */ -extern int request_module(const char * name, ...) __attribute__ ((format (printf, 1, 2))); -#define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x))) +extern int __request_module(bool wait, const char *name, ...) \ + __attribute__((format(printf, 2, 3))); +#define request_module(mod...) __request_module(true, mod) +#define request_module_nowait(mod...) __request_module(false, mod) +#define try_then_request_module(x, mod...) \ + ((x) ?: (__request_module(false, mod), (x))) #else -static inline int request_module(const char * name, ...) { return -ENOSYS; } +static inline int request_module(const char *name, ...) { return -ENOSYS; } +static inline int request_module_nowait(const char *name, ...) { return -ENOSYS; } #define try_then_request_module(x, mod...) (x) #endif diff --git a/include/linux/leds-bd2802.h b/include/linux/leds-bd2802.h new file mode 100644 index 000000000000..42f854a1a199 --- /dev/null +++ b/include/linux/leds-bd2802.h @@ -0,0 +1,26 @@ +/* + * leds-bd2802.h - RGB LED Driver + * + * Copyright (C) 2009 Samsung Electronics + * Kim Kyuwon <q1.kim@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Datasheet: http://www.rohm.com/products/databook/driver/pdf/bd2802gu-e.pdf + * + */ +#ifndef _LEDS_BD2802_H_ +#define _LEDS_BD2802_H_ + +struct bd2802_led_platform_data{ + int reset_gpio; + u8 rgb_time; +}; + +#define RGB_TIME(slopedown, slopeup, waveform) \ + ((slopedown) << 6 | (slopeup) << 4 | (waveform)) + +#endif /* _LEDS_BD2802_H_ */ + diff --git a/include/linux/leds.h b/include/linux/leds.h index 24489da701e3..376fe07732ea 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -30,6 +30,7 @@ enum led_brightness { struct led_classdev { const char *name; int brightness; + int max_brightness; int flags; /* Lower 16 bits reflect status */ @@ -140,7 +141,8 @@ struct gpio_led { const char *name; const char *default_trigger; unsigned gpio; - u8 active_low; + u8 active_low : 1; + u8 retain_state_suspended : 1; }; struct gpio_led_platform_data { diff --git a/include/linux/leds_pwm.h b/include/linux/leds_pwm.h new file mode 100644 index 000000000000..33a071167489 --- /dev/null +++ b/include/linux/leds_pwm.h @@ -0,0 +1,21 @@ +/* + * PWM LED driver data - see drivers/leds/leds-pwm.c + */ +#ifndef __LINUX_LEDS_PWM_H +#define __LINUX_LEDS_PWM_H + +struct led_pwm { + const char *name; + const char *default_trigger; + unsigned pwm_id; + u8 active_low; + unsigned max_brightness; + unsigned pwm_period_ns; +}; + +struct led_pwm_platform_data { + int num_leds; + struct led_pwm *leds; +}; + +#endif diff --git a/include/linux/libata.h b/include/linux/libata.h index 76262d83656b..b450a2628855 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -379,7 +379,7 @@ enum { ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands not multiple of 16 bytes */ - ATA_HORKAGE_FIRMWARE_WARN = (1 << 12), /* firwmare update warning */ + ATA_HORKAGE_FIRMWARE_WARN = (1 << 12), /* firmware update warning */ ATA_HORKAGE_1_5_GBPS = (1 << 13), /* force 1.5 Gbps */ /* DMA mask for user DMA control: User visible values; DO NOT diff --git a/include/linux/memory.h b/include/linux/memory.h index 42767d1a62e7..37fa19b34ef5 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -110,4 +110,10 @@ struct memory_accessor { off_t offset, size_t count); }; +/* + * Kernel text modification mutex, used for code patching. Users of this lock + * can sleep. + */ +extern struct mutex text_mutex; + #endif /* _LINUX_MEMORY_H_ */ diff --git a/include/linux/mfd/ds1wm.h b/include/linux/mfd/ds1wm.h new file mode 100644 index 000000000000..be469a357cbb --- /dev/null +++ b/include/linux/mfd/ds1wm.h @@ -0,0 +1,6 @@ +/* MFD cell driver data for the DS1WM driver */ + +struct ds1wm_driver_data { + int active_high; + int clock_rate; +}; diff --git a/include/linux/mfd/htc-pasic3.h b/include/linux/mfd/htc-pasic3.h index b4294f12c4f8..3d3ed67bd969 100644 --- a/include/linux/mfd/htc-pasic3.h +++ b/include/linux/mfd/htc-pasic3.h @@ -48,7 +48,6 @@ struct pasic3_leds_machinfo { struct pasic3_platform_data { struct pasic3_leds_machinfo *led_pdata; - unsigned int bus_shift; unsigned int clock_rate; }; diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index 980669d50dca..42cca672f340 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -640,9 +640,11 @@ struct wm8350 { * * @init: Function called during driver initialisation. Should be * used by the platform to configure GPIO functions and similar. + * @irq_high: Set if WM8350 IRQ is active high. */ struct wm8350_platform_data { int (*init)(struct wm8350 *wm8350); + int irq_high; }; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 4e457256bd33..3e7615e9087e 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -192,5 +192,10 @@ static inline void mmc_signal_sdio_irq(struct mmc_host *host) wake_up_process(host->sdio_irq_thread); } +struct regulator; + +int mmc_regulator_get_ocrmask(struct regulator *supply); +int mmc_regulator_set_ocr(struct regulator *supply, unsigned short vdd_bit); + #endif diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 26ef24076b76..186ec6ab334d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -764,12 +764,6 @@ extern int numa_zonelist_order_handler(struct ctl_table *, int, extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ -#include <linux/topology.h> -/* Returns the number of the current Node. */ -#ifndef numa_node_id -#define numa_node_id() (cpu_to_node(raw_smp_processor_id())) -#endif - #ifndef CONFIG_NEED_MULTIPLE_NODES extern struct pglist_data contig_page_data; diff --git a/include/linux/module.h b/include/linux/module.h index 145a75528cc1..627ac082e2a6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -248,6 +248,10 @@ struct module const unsigned long *crcs; unsigned int num_syms; + /* Kernel parameters. */ + struct kernel_param *kp; + unsigned int num_kp; + /* GPL-only exported symbols. */ unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; @@ -329,6 +333,11 @@ struct module unsigned int num_tracepoints; #endif +#ifdef CONFIG_TRACING + const char **trace_bprintk_fmt_start; + unsigned int num_trace_bprintk_fmt; +#endif + #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ struct list_head modules_which_use_me; @@ -350,6 +359,8 @@ struct module #define MODULE_ARCH_INIT {} #endif +extern struct mutex module_mutex; + /* FIXME: It'd be nice to isolate modules during init, too, so they aren't used before they (may) fail. But presently too much code (IDE & SCSI) require entry into the module during init.*/ @@ -358,10 +369,10 @@ static inline int module_is_live(struct module *mod) return mod->state != MODULE_STATE_GOING; } -/* Is this address in a module? (second is with no locks, for oops) */ -struct module *module_text_address(unsigned long addr); struct module *__module_text_address(unsigned long addr); -int is_module_address(unsigned long addr); +struct module *__module_address(unsigned long addr); +bool is_module_address(unsigned long addr); +bool is_module_text_address(unsigned long addr); static inline int within_module_core(unsigned long addr, struct module *mod) { @@ -375,6 +386,31 @@ static inline int within_module_init(unsigned long addr, struct module *mod) addr < (unsigned long)mod->module_init + mod->init_size; } +/* Search for module by name: must hold module_mutex. */ +struct module *find_module(const char *name); + +struct symsearch { + const struct kernel_symbol *start, *stop; + const unsigned long *crcs; + enum { + NOT_GPL_ONLY, + GPL_ONLY, + WILL_BE_GPL_ONLY, + } licence; + bool unused; +}; + +/* Search for an exported symbol by name. */ +const struct kernel_symbol *find_symbol(const char *name, + struct module **owner, + const unsigned long **crc, + bool gplok, + bool warn); + +/* Walk the exported symbol table */ +bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner, + unsigned int symnum, void *data), void *data); + /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, @@ -383,6 +419,10 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name); +int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, unsigned long), + void *data); + extern void __module_put_and_exit(struct module *mod, long code) __attribute__((noreturn)); #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code); @@ -444,6 +484,7 @@ static inline void __module_get(struct module *module) #define symbol_put_addr(p) do { } while(0) #endif /* CONFIG_MODULE_UNLOAD */ +int use_module(struct module *a, struct module *b); /* This is a #define so the string doesn't get put in every .o file */ #define module_name(mod) \ @@ -490,21 +531,24 @@ search_module_extables(unsigned long addr) return NULL; } -/* Is this address in a module? */ -static inline struct module *module_text_address(unsigned long addr) +static inline struct module *__module_address(unsigned long addr) { return NULL; } -/* Is this address in a module? (don't take a lock, we're oopsing) */ static inline struct module *__module_text_address(unsigned long addr) { return NULL; } -static inline int is_module_address(unsigned long addr) +static inline bool is_module_address(unsigned long addr) { - return 0; + return false; +} + +static inline bool is_module_text_address(unsigned long addr) +{ + return false; } /* Get/put a kernel symbol (calls should be symmetric) */ @@ -559,6 +603,14 @@ static inline unsigned long module_kallsyms_lookup_name(const char *name) return 0; } +static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, + unsigned long), + void *data) +{ + return 0; +} + static inline int register_module_notifier(struct notifier_block * nb) { /* no events will happen anyway, so this can always succeed */ diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index e4af3399ef48..a4f0b931846c 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -138,6 +138,16 @@ extern int parse_args(const char *name, unsigned num, int (*unknown)(char *param, char *val)); +/* Called by module remove. */ +#ifdef CONFIG_SYSFS +extern void destroy_params(const struct kernel_param *params, unsigned num); +#else +static inline void destroy_params(const struct kernel_param *params, + unsigned num) +{ +} +#endif /* !CONFIG_SYSFS */ + /* All the helper functions */ /* The macros to do compile-time type checking stolen from Jakub Jelinek, who IIRC came up with this idea for the 2.4 module init code. */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index bde2557c2a9c..fdffb413b192 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -185,6 +185,9 @@ struct nfs_inode { fmode_t delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ +#ifdef CONFIG_NFS_FSCACHE + struct fscache_cookie *fscache; +#endif struct inode vfs_inode; }; @@ -207,6 +210,8 @@ struct nfs_inode { #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ #define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ #define NFS_INO_FLUSHING (4) /* inode is flushing out data */ +#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ +#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { @@ -260,6 +265,11 @@ static inline int NFS_STALE(const struct inode *inode) return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags); } +static inline int NFS_FSCACHE(const struct inode *inode) +{ + return test_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); +} + static inline __u64 NFS_FILEID(const struct inode *inode) { return NFS_I(inode)->fileid; @@ -506,6 +516,8 @@ extern int nfs_readpages(struct file *, struct address_space *, struct list_head *, unsigned); extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); extern void nfs_readdata_release(void *data); +extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, + struct page *); /* * Allocate nfs_read_data structures @@ -583,6 +595,7 @@ extern void * nfs_root_data(void); #define NFSDBG_CALLBACK 0x0100 #define NFSDBG_CLIENT 0x0200 #define NFSDBG_MOUNT 0x0400 +#define NFSDBG_FSCACHE 0x0800 #define NFSDBG_ALL 0xFFFF #ifdef __KERNEL__ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 29b1e40dce99..6ad75948cbf7 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -64,6 +64,10 @@ struct nfs_client { char cl_ipaddr[48]; unsigned char cl_id_uniquifier; #endif + +#ifdef CONFIG_NFS_FSCACHE + struct fscache_cookie *fscache; /* client index cache cookie */ +#endif }; /* @@ -96,12 +100,19 @@ struct nfs_server { unsigned int acdirmin; unsigned int acdirmax; unsigned int namelen; + unsigned int options; /* extra options enabled by mount */ +#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ struct nfs_fsid fsid; __u64 maxfilesize; /* maximum file size */ unsigned long mount_time; /* when this fs was mounted */ dev_t s_dev; /* superblock dev numbers */ +#ifdef CONFIG_NFS_FSCACHE + struct nfs_fscache_key *fscache_key; /* unique key for superblock */ + struct fscache_cookie *fscache; /* superblock cookie */ +#endif + #ifdef CONFIG_NFS_V4 u32 attr_bitmask[2];/* V4 bitmask representing the set of attributes supported on this diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h index 1cb9a3fed2b3..68b10f5f8907 100644 --- a/include/linux/nfs_iostat.h +++ b/include/linux/nfs_iostat.h @@ -116,4 +116,16 @@ enum nfs_stat_eventcounters { __NFSIOS_COUNTSMAX, }; +/* + * NFS local caching servicing counters + */ +enum nfs_stat_fscachecounters { + NFSIOS_FSCACHE_PAGES_READ_OK, + NFSIOS_FSCACHE_PAGES_READ_FAIL, + NFSIOS_FSCACHE_PAGES_WRITTEN_OK, + NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, + NFSIOS_FSCACHE_PAGES_UNCACHED, + __NFSIOS_FSCACHEMAX, +}; + #endif /* _LINUX_NFS_IOSTAT */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 61df1779b2a5..62214c7d2d93 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -82,6 +82,7 @@ enum pageflags { PG_arch_1, PG_reserved, PG_private, /* If pagecache, has fs-private data */ + PG_private_2, /* If pagecache, has fs aux data */ PG_writeback, /* Page is under writeback */ #ifdef CONFIG_PAGEFLAGS_EXTENDED PG_head, /* A head page */ @@ -108,6 +109,12 @@ enum pageflags { /* Filesystems */ PG_checked = PG_owner_priv_1, + /* Two page bits are conscripted by FS-Cache to maintain local caching + * state. These bits are set on pages belonging to the netfs's inodes + * when those inodes are being locally cached. + */ + PG_fscache = PG_private_2, /* page backed by cache */ + /* XEN */ PG_pinned = PG_owner_priv_1, PG_savepinned = PG_dirty, @@ -182,7 +189,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } struct page; /* forward declaration */ -TESTPAGEFLAG(Locked, locked) +TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked) PAGEFLAG(Error, error) PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) @@ -194,8 +201,6 @@ PAGEFLAG(Checked, checked) /* Used by some filesystems */ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ PAGEFLAG(SavePinned, savepinned); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) -PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) - __SETPAGEFLAG(Private, private) PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) __PAGEFLAG(SlobPage, slob_page) @@ -205,6 +210,16 @@ __PAGEFLAG(SlubFrozen, slub_frozen) __PAGEFLAG(SlubDebug, slub_debug) /* + * Private page markings that may be used by the filesystem that owns the page + * for its own purposes. + * - PG_private and PG_private_2 cause releasepage() and co to be invoked + */ +PAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) + __CLEARPAGEFLAG(Private, private) +PAGEFLAG(Private2, private_2) TESTSCFLAG(Private2, private_2) +PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) + +/* * Only test-and-set exist for PG_writeback. The unconditional operators are * risky: they bypass page accounting. */ @@ -384,9 +399,10 @@ static inline void __ClearPageTail(struct page *page) * these flags set. It they are, there is a problem. */ #define PAGE_FLAGS_CHECK_AT_FREE \ - (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ - 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ - 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ + (1 << PG_lru | 1 << PG_locked | \ + 1 << PG_private | 1 << PG_private_2 | \ + 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ + 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ __PG_UNEVICTABLE | __PG_MLOCKED) /* @@ -397,4 +413,16 @@ static inline void __ClearPageTail(struct page *page) #define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) #endif /* !__GENERATING_BOUNDS_H */ + +/** + * page_has_private - Determine if page has private stuff + * @page: The page to be checked + * + * Determine if a page has private stuff, indicating that release routines + * should be invoked upon it. + */ +#define page_has_private(page) \ + ((page)->flags & ((1 << PG_private) | \ + (1 << PG_private_2))) + #endif /* PAGE_FLAGS_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 076a7dc67c2b..34da5230faab 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -384,6 +384,11 @@ static inline void wait_on_page_writeback(struct page *page) extern void end_page_writeback(struct page *page); /* + * Add an arbitrary waiter to a page's wait queue + */ +extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter); + +/* * Fault a userspace page into pagetables. Return non-zero on a fault. * * This assumes that two userspace pages are always sufficient. That's diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 8ff25e0e7f7a..594c494ac3f0 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -73,6 +73,8 @@ enum power_supply_property { POWER_SUPPLY_PROP_VOLTAGE_AVG, POWER_SUPPLY_PROP_CURRENT_NOW, POWER_SUPPLY_PROP_CURRENT_AVG, + POWER_SUPPLY_PROP_POWER_NOW, + POWER_SUPPLY_PROP_POWER_AVG, POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, POWER_SUPPLY_PROP_CHARGE_EMPTY_DESIGN, POWER_SUPPLY_PROP_CHARGE_FULL, diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 3945f803d514..7c775751392c 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -28,4 +28,4 @@ int pwm_enable(struct pwm_device *pwm); */ void pwm_disable(struct pwm_device *pwm); -#endif /* __ASM_ARCH_PWM_H */ +#endif /* __LINUX_PWM_H */ diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h deleted file mode 100644 index e98900671ca9..000000000000 --- a/include/linux/raid/bitmap.h +++ /dev/null @@ -1,288 +0,0 @@ -/* - * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 - * - * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc. - */ -#ifndef BITMAP_H -#define BITMAP_H 1 - -#define BITMAP_MAJOR_LO 3 -/* version 4 insists the bitmap is in little-endian order - * with version 3, it is host-endian which is non-portable - */ -#define BITMAP_MAJOR_HI 4 -#define BITMAP_MAJOR_HOSTENDIAN 3 - -#define BITMAP_MINOR 39 - -/* - * in-memory bitmap: - * - * Use 16 bit block counters to track pending writes to each "chunk". - * The 2 high order bits are special-purpose, the first is a flag indicating - * whether a resync is needed. The second is a flag indicating whether a - * resync is active. - * This means that the counter is actually 14 bits: - * - * +--------+--------+------------------------------------------------+ - * | resync | resync | counter | - * | needed | active | | - * | (0-1) | (0-1) | (0-16383) | - * +--------+--------+------------------------------------------------+ - * - * The "resync needed" bit is set when: - * a '1' bit is read from storage at startup. - * a write request fails on some drives - * a resync is aborted on a chunk with 'resync active' set - * It is cleared (and resync-active set) when a resync starts across all drives - * of the chunk. - * - * - * The "resync active" bit is set when: - * a resync is started on all drives, and resync_needed is set. - * resync_needed will be cleared (as long as resync_active wasn't already set). - * It is cleared when a resync completes. - * - * The counter counts pending write requests, plus the on-disk bit. - * When the counter is '1' and the resync bits are clear, the on-disk - * bit can be cleared aswell, thus setting the counter to 0. - * When we set a bit, or in the counter (to start a write), if the fields is - * 0, we first set the disk bit and set the counter to 1. - * - * If the counter is 0, the on-disk bit is clear and the stipe is clean - * Anything that dirties the stipe pushes the counter to 2 (at least) - * and sets the on-disk bit (lazily). - * If a periodic sweep find the counter at 2, it is decremented to 1. - * If the sweep find the counter at 1, the on-disk bit is cleared and the - * counter goes to zero. - * - * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block - * counters as a fallback when "page" memory cannot be allocated: - * - * Normal case (page memory allocated): - * - * page pointer (32-bit) - * - * [ ] ------+ - * | - * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) - * c1 c2 c2048 - * - * Hijacked case (page memory allocation failed): - * - * hijacked page pointer (32-bit) - * - * [ ][ ] (no page memory allocated) - * counter #1 (16-bit) counter #2 (16-bit) - * - */ - -#ifdef __KERNEL__ - -#define PAGE_BITS (PAGE_SIZE << 3) -#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) - -typedef __u16 bitmap_counter_t; -#define COUNTER_BITS 16 -#define COUNTER_BIT_SHIFT 4 -#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8) -#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3) - -#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1))) -#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2))) -#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1) -#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) -#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) -#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) - -/* how many counters per page? */ -#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) -/* same, except a shift value for more efficient bitops */ -#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) -/* same, except a mask value for more efficient bitops */ -#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) - -#define BITMAP_BLOCK_SIZE 512 -#define BITMAP_BLOCK_SHIFT 9 - -/* how many blocks per chunk? (this is variable) */ -#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT) -#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT) -#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1) - -/* when hijacked, the counters and bits represent even larger "chunks" */ -/* there will be 1024 chunks represented by each counter in the page pointers */ -#define PAGEPTR_BLOCK_RATIO(bitmap) \ - (CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1) -#define PAGEPTR_BLOCK_SHIFT(bitmap) \ - (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1) -#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1) - -/* - * on-disk bitmap: - * - * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap - * file a page at a time. There's a superblock at the start of the file. - */ - -/* map chunks (bits) to file pages - offset by the size of the superblock */ -#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3)) - -#endif - -/* - * bitmap structures: - */ - -#define BITMAP_MAGIC 0x6d746962 - -/* use these for bitmap->flags and bitmap->sb->state bit-fields */ -enum bitmap_state { - BITMAP_STALE = 0x002, /* the bitmap file is out of date or had -EIO */ - BITMAP_WRITE_ERROR = 0x004, /* A write error has occurred */ - BITMAP_HOSTENDIAN = 0x8000, -}; - -/* the superblock at the front of the bitmap file -- little endian */ -typedef struct bitmap_super_s { - __le32 magic; /* 0 BITMAP_MAGIC */ - __le32 version; /* 4 the bitmap major for now, could change... */ - __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */ - __le64 events; /* 24 event counter for the bitmap (1)*/ - __le64 events_cleared;/*32 event counter when last bit cleared (2) */ - __le64 sync_size; /* 40 the size of the md device's sync range(3) */ - __le32 state; /* 48 bitmap state information */ - __le32 chunksize; /* 52 the bitmap chunk size in bytes */ - __le32 daemon_sleep; /* 56 seconds between disk flushes */ - __le32 write_behind; /* 60 number of outstanding write-behind writes */ - - __u8 pad[256 - 64]; /* set to zero */ -} bitmap_super_t; - -/* notes: - * (1) This event counter is updated before the eventcounter in the md superblock - * When a bitmap is loaded, it is only accepted if this event counter is equal - * to, or one greater than, the event counter in the superblock. - * (2) This event counter is updated when the other one is *if*and*only*if* the - * array is not degraded. As bits are not cleared when the array is degraded, - * this represents the last time that any bits were cleared. - * If a device is being added that has an event count with this value or - * higher, it is accepted as conforming to the bitmap. - * (3)This is the number of sectors represented by the bitmap, and is the range that - * resync happens across. For raid1 and raid5/6 it is the size of individual - * devices. For raid10 it is the size of the array. - */ - -#ifdef __KERNEL__ - -/* the in-memory bitmap is represented by bitmap_pages */ -struct bitmap_page { - /* - * map points to the actual memory page - */ - char *map; - /* - * in emergencies (when map cannot be alloced), hijack the map - * pointer and use it as two counters itself - */ - unsigned int hijacked:1; - /* - * count of dirty bits on the page - */ - unsigned int count:31; -}; - -/* keep track of bitmap file pages that have pending writes on them */ -struct page_list { - struct list_head list; - struct page *page; -}; - -/* the main bitmap structure - one per mddev */ -struct bitmap { - struct bitmap_page *bp; - unsigned long pages; /* total number of pages in the bitmap */ - unsigned long missing_pages; /* number of pages not yet allocated */ - - mddev_t *mddev; /* the md device that the bitmap is for */ - - int counter_bits; /* how many bits per block counter */ - - /* bitmap chunksize -- how much data does each bit represent? */ - unsigned long chunksize; - unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */ - unsigned long chunks; /* total number of data chunks for the array */ - - /* We hold a count on the chunk currently being synced, and drop - * it when the last block is started. If the resync is aborted - * midway, we need to be able to drop that count, so we remember - * the counted chunk.. - */ - unsigned long syncchunk; - - __u64 events_cleared; - int need_sync; - - /* bitmap spinlock */ - spinlock_t lock; - - long offset; /* offset from superblock if file is NULL */ - struct file *file; /* backing disk file */ - struct page *sb_page; /* cached copy of the bitmap file superblock */ - struct page **filemap; /* list of cache pages for the file */ - unsigned long *filemap_attr; /* attributes associated w/ filemap pages */ - unsigned long file_pages; /* number of pages in the file */ - int last_page_size; /* bytes in the last page */ - - unsigned long flags; - - int allclean; - - unsigned long max_write_behind; /* write-behind mode */ - atomic_t behind_writes; - - /* - * the bitmap daemon - periodically wakes up and sweeps the bitmap - * file, cleaning up bits and flushing out pages to disk as necessary - */ - unsigned long daemon_lastrun; /* jiffies of last run */ - unsigned long daemon_sleep; /* how many seconds between updates? */ - unsigned long last_end_sync; /* when we lasted called end_sync to - * update bitmap with resync progress */ - - atomic_t pending_writes; /* pending writes to the bitmap file */ - wait_queue_head_t write_wait; - wait_queue_head_t overflow_wait; - -}; - -/* the bitmap API */ - -/* these are used only by md/bitmap */ -int bitmap_create(mddev_t *mddev); -void bitmap_flush(mddev_t *mddev); -void bitmap_destroy(mddev_t *mddev); - -void bitmap_print_sb(struct bitmap *bitmap); -void bitmap_update_sb(struct bitmap *bitmap); - -int bitmap_setallbits(struct bitmap *bitmap); -void bitmap_write_all(struct bitmap *bitmap); - -void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e); - -/* these are exported */ -int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int behind); -void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int success, int behind); -int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int degraded); -void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted); -void bitmap_close_sync(struct bitmap *bitmap); -void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector); - -void bitmap_unplug(struct bitmap *bitmap); -void bitmap_daemon_work(struct bitmap *bitmap); -#endif - -#endif diff --git a/include/linux/raid/linear.h b/include/linux/raid/linear.h deleted file mode 100644 index f38b9c586afb..000000000000 --- a/include/linux/raid/linear.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef _LINEAR_H -#define _LINEAR_H - -#include <linux/raid/md.h> - -struct dev_info { - mdk_rdev_t *rdev; - sector_t num_sectors; - sector_t start_sector; -}; - -typedef struct dev_info dev_info_t; - -struct linear_private_data -{ - struct linear_private_data *prev; /* earlier version */ - dev_info_t **hash_table; - sector_t spacing; - sector_t array_sectors; - int sector_shift; /* shift before dividing - * by spacing - */ - dev_info_t disks[0]; -}; - - -typedef struct linear_private_data linear_conf_t; - -#define mddev_to_conf(mddev) ((linear_conf_t *) mddev->private) - -#endif diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h deleted file mode 100644 index 82bea14cae1a..000000000000 --- a/include/linux/raid/md.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - md.h : Multiple Devices driver for Linux - Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman - Copyright (C) 1994-96 Marc ZYNGIER - <zyngier@ufr-info-p7.ibp.fr> or - <maz@gloups.fdn.fr> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - You should have received a copy of the GNU General Public License - (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _MD_H -#define _MD_H - -#include <linux/blkdev.h> -#include <linux/seq_file.h> - -/* - * 'md_p.h' holds the 'physical' layout of RAID devices - * 'md_u.h' holds the user <=> kernel API - * - * 'md_k.h' holds kernel internal definitions - */ - -#include <linux/raid/md_p.h> -#include <linux/raid/md_u.h> -#include <linux/raid/md_k.h> - -#ifdef CONFIG_MD - -/* - * Different major versions are not compatible. - * Different minor versions are only downward compatible. - * Different patchlevel versions are downward and upward compatible. - */ -#define MD_MAJOR_VERSION 0 -#define MD_MINOR_VERSION 90 -/* - * MD_PATCHLEVEL_VERSION indicates kernel functionality. - * >=1 means different superblock formats are selectable using SET_ARRAY_INFO - * and major_version/minor_version accordingly - * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT - * in the super status byte - * >=3 means that bitmap superblock version 4 is supported, which uses - * little-ending representation rather than host-endian - */ -#define MD_PATCHLEVEL_VERSION 3 - -extern int mdp_major; - -extern int register_md_personality(struct mdk_personality *p); -extern int unregister_md_personality(struct mdk_personality *p); -extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev), - mddev_t *mddev, const char *name); -extern void md_unregister_thread(mdk_thread_t *thread); -extern void md_wakeup_thread(mdk_thread_t *thread); -extern void md_check_recovery(mddev_t *mddev); -extern void md_write_start(mddev_t *mddev, struct bio *bi); -extern void md_write_end(mddev_t *mddev); -extern void md_done_sync(mddev_t *mddev, int blocks, int ok); -extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); - -extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, - sector_t sector, int size, struct page *page); -extern void md_super_wait(mddev_t *mddev); -extern int sync_page_io(struct block_device *bdev, sector_t sector, int size, - struct page *page, int rw); -extern void md_do_sync(mddev_t *mddev); -extern void md_new_event(mddev_t *mddev); -extern int md_allow_write(mddev_t *mddev); -extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); - -#endif /* CONFIG_MD */ -#endif - diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h deleted file mode 100644 index 9743e4dbc918..000000000000 --- a/include/linux/raid/md_k.h +++ /dev/null @@ -1,402 +0,0 @@ -/* - md_k.h : kernel internal structure of the Linux MD driver - Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - You should have received a copy of the GNU General Public License - (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _MD_K_H -#define _MD_K_H - -/* and dm-bio-list.h is not under include/linux because.... ??? */ -#include "../../../drivers/md/dm-bio-list.h" - -#ifdef CONFIG_BLOCK - -#define LEVEL_MULTIPATH (-4) -#define LEVEL_LINEAR (-1) -#define LEVEL_FAULTY (-5) - -/* we need a value for 'no level specified' and 0 - * means 'raid0', so we need something else. This is - * for internal use only - */ -#define LEVEL_NONE (-1000000) - -#define MaxSector (~(sector_t)0) - -typedef struct mddev_s mddev_t; -typedef struct mdk_rdev_s mdk_rdev_t; - -/* - * options passed in raidrun: - */ - -/* Currently this must fit in an 'int' */ -#define MAX_CHUNK_SIZE (1<<30) - -/* - * MD's 'extended' device - */ -struct mdk_rdev_s -{ - struct list_head same_set; /* RAID devices within the same set */ - - sector_t size; /* Device size (in blocks) */ - mddev_t *mddev; /* RAID array if running */ - long last_events; /* IO event timestamp */ - - struct block_device *bdev; /* block device handle */ - - struct page *sb_page; - int sb_loaded; - __u64 sb_events; - sector_t data_offset; /* start of data in array */ - sector_t sb_start; /* offset of the super block (in 512byte sectors) */ - int sb_size; /* bytes in the superblock */ - int preferred_minor; /* autorun support */ - - struct kobject kobj; - - /* A device can be in one of three states based on two flags: - * Not working: faulty==1 in_sync==0 - * Fully working: faulty==0 in_sync==1 - * Working, but not - * in sync with array - * faulty==0 in_sync==0 - * - * It can never have faulty==1, in_sync==1 - * This reduces the burden of testing multiple flags in many cases - */ - - unsigned long flags; -#define Faulty 1 /* device is known to have a fault */ -#define In_sync 2 /* device is in_sync with rest of array */ -#define WriteMostly 4 /* Avoid reading if at all possible */ -#define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */ -#define AllReserved 6 /* If whole device is reserved for - * one array */ -#define AutoDetected 7 /* added by auto-detect */ -#define Blocked 8 /* An error occured on an externally - * managed array, don't allow writes - * until it is cleared */ -#define StateChanged 9 /* Faulty or Blocked has changed during - * interrupt, so it needs to be - * notified by the thread */ - wait_queue_head_t blocked_wait; - - int desc_nr; /* descriptor index in the superblock */ - int raid_disk; /* role of device in array */ - int saved_raid_disk; /* role that device used to have in the - * array and could again if we did a partial - * resync from the bitmap - */ - sector_t recovery_offset;/* If this device has been partially - * recovered, this is where we were - * up to. - */ - - atomic_t nr_pending; /* number of pending requests. - * only maintained for arrays that - * support hot removal - */ - atomic_t read_errors; /* number of consecutive read errors that - * we have tried to ignore. - */ - atomic_t corrected_errors; /* number of corrected read errors, - * for reporting to userspace and storing - * in superblock. - */ - struct work_struct del_work; /* used for delayed sysfs removal */ - - struct sysfs_dirent *sysfs_state; /* handle for 'state' - * sysfs entry */ -}; - -struct mddev_s -{ - void *private; - struct mdk_personality *pers; - dev_t unit; - int md_minor; - struct list_head disks; - unsigned long flags; -#define MD_CHANGE_DEVS 0 /* Some device status has changed */ -#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ -#define MD_CHANGE_PENDING 2 /* superblock update in progress */ - - int ro; - - struct gendisk *gendisk; - - struct kobject kobj; - int hold_active; -#define UNTIL_IOCTL 1 -#define UNTIL_STOP 2 - - /* Superblock information */ - int major_version, - minor_version, - patch_version; - int persistent; - int external; /* metadata is - * managed externally */ - char metadata_type[17]; /* externally set*/ - int chunk_size; - time_t ctime, utime; - int level, layout; - char clevel[16]; - int raid_disks; - int max_disks; - sector_t size; /* used size of component devices */ - sector_t array_sectors; /* exported array size */ - __u64 events; - - char uuid[16]; - - /* If the array is being reshaped, we need to record the - * new shape and an indication of where we are up to. - * This is written to the superblock. - * If reshape_position is MaxSector, then no reshape is happening (yet). - */ - sector_t reshape_position; - int delta_disks, new_level, new_layout, new_chunk; - - struct mdk_thread_s *thread; /* management thread */ - struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ - sector_t curr_resync; /* last block scheduled */ - unsigned long resync_mark; /* a recent timestamp */ - sector_t resync_mark_cnt;/* blocks written at resync_mark */ - sector_t curr_mark_cnt; /* blocks scheduled now */ - - sector_t resync_max_sectors; /* may be set by personality */ - - sector_t resync_mismatches; /* count of sectors where - * parity/replica mismatch found - */ - - /* allow user-space to request suspension of IO to regions of the array */ - sector_t suspend_lo; - sector_t suspend_hi; - /* if zero, use the system-wide default */ - int sync_speed_min; - int sync_speed_max; - - /* resync even though the same disks are shared among md-devices */ - int parallel_resync; - - int ok_start_degraded; - /* recovery/resync flags - * NEEDED: we might need to start a resync/recover - * RUNNING: a thread is running, or about to be started - * SYNC: actually doing a resync, not a recovery - * RECOVER: doing recovery, or need to try it. - * INTR: resync needs to be aborted for some reason - * DONE: thread is done and is waiting to be reaped - * REQUEST: user-space has requested a sync (used with SYNC) - * CHECK: user-space request for for check-only, no repair - * RESHAPE: A reshape is happening - * - * If neither SYNC or RESHAPE are set, then it is a recovery. - */ -#define MD_RECOVERY_RUNNING 0 -#define MD_RECOVERY_SYNC 1 -#define MD_RECOVERY_RECOVER 2 -#define MD_RECOVERY_INTR 3 -#define MD_RECOVERY_DONE 4 -#define MD_RECOVERY_NEEDED 5 -#define MD_RECOVERY_REQUESTED 6 -#define MD_RECOVERY_CHECK 7 -#define MD_RECOVERY_RESHAPE 8 -#define MD_RECOVERY_FROZEN 9 - - unsigned long recovery; - int recovery_disabled; /* if we detect that recovery - * will always fail, set this - * so we don't loop trying */ - - int in_sync; /* know to not need resync */ - struct mutex reconfig_mutex; - atomic_t active; /* general refcount */ - atomic_t openers; /* number of active opens */ - - int changed; /* true if we might need to reread partition info */ - int degraded; /* whether md should consider - * adding a spare - */ - int barriers_work; /* initialised to true, cleared as soon - * as a barrier request to slave - * fails. Only supported - */ - struct bio *biolist; /* bios that need to be retried - * because BIO_RW_BARRIER is not supported - */ - - atomic_t recovery_active; /* blocks scheduled, but not written */ - wait_queue_head_t recovery_wait; - sector_t recovery_cp; - sector_t resync_min; /* user requested sync - * starts here */ - sector_t resync_max; /* resync should pause - * when it gets here */ - - struct sysfs_dirent *sysfs_state; /* handle for 'array_state' - * file in sysfs. - */ - struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */ - - struct work_struct del_work; /* used for delayed sysfs removal */ - - spinlock_t write_lock; - wait_queue_head_t sb_wait; /* for waiting on superblock updates */ - atomic_t pending_writes; /* number of active superblock writes */ - - unsigned int safemode; /* if set, update "clean" superblock - * when no writes pending. - */ - unsigned int safemode_delay; - struct timer_list safemode_timer; - atomic_t writes_pending; - struct request_queue *queue; /* for plugging ... */ - - atomic_t write_behind; /* outstanding async IO */ - unsigned int max_write_behind; /* 0 = sync */ - - struct bitmap *bitmap; /* the bitmap for the device */ - struct file *bitmap_file; /* the bitmap file */ - long bitmap_offset; /* offset from superblock of - * start of bitmap. May be - * negative, but not '0' - */ - long default_bitmap_offset; /* this is the offset to use when - * hot-adding a bitmap. It should - * eventually be settable by sysfs. - */ - - struct list_head all_mddevs; -}; - - -static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev) -{ - int faulty = test_bit(Faulty, &rdev->flags); - if (atomic_dec_and_test(&rdev->nr_pending) && faulty) - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); -} - -static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) -{ - atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); -} - -struct mdk_personality -{ - char *name; - int level; - struct list_head list; - struct module *owner; - int (*make_request)(struct request_queue *q, struct bio *bio); - int (*run)(mddev_t *mddev); - int (*stop)(mddev_t *mddev); - void (*status)(struct seq_file *seq, mddev_t *mddev); - /* error_handler must set ->faulty and clear ->in_sync - * if appropriate, and should abort recovery if needed - */ - void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev); - int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev); - int (*hot_remove_disk) (mddev_t *mddev, int number); - int (*spare_active) (mddev_t *mddev); - sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster); - int (*resize) (mddev_t *mddev, sector_t sectors); - int (*check_reshape) (mddev_t *mddev); - int (*start_reshape) (mddev_t *mddev); - int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); - /* quiesce moves between quiescence states - * 0 - fully active - * 1 - no new requests allowed - * others - reserved - */ - void (*quiesce) (mddev_t *mddev, int state); -}; - - -struct md_sysfs_entry { - struct attribute attr; - ssize_t (*show)(mddev_t *, char *); - ssize_t (*store)(mddev_t *, const char *, size_t); -}; - - -static inline char * mdname (mddev_t * mddev) -{ - return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; -} - -/* - * iterates through some rdev ringlist. It's safe to remove the - * current 'rdev'. Dont touch 'tmp' though. - */ -#define rdev_for_each_list(rdev, tmp, head) \ - list_for_each_entry_safe(rdev, tmp, head, same_set) - -/* - * iterates through the 'same array disks' ringlist - */ -#define rdev_for_each(rdev, tmp, mddev) \ - list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set) - -#define rdev_for_each_rcu(rdev, mddev) \ - list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) - -typedef struct mdk_thread_s { - void (*run) (mddev_t *mddev); - mddev_t *mddev; - wait_queue_head_t wqueue; - unsigned long flags; - struct task_struct *tsk; - unsigned long timeout; -} mdk_thread_t; - -#define THREAD_WAKEUP 0 - -#define __wait_event_lock_irq(wq, condition, lock, cmd) \ -do { \ - wait_queue_t __wait; \ - init_waitqueue_entry(&__wait, current); \ - \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - if (condition) \ - break; \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ - spin_lock_irq(&lock); \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#define wait_event_lock_irq(wq, condition, lock, cmd) \ -do { \ - if (condition) \ - break; \ - __wait_event_lock_irq(wq, condition, lock, cmd); \ -} while (0) - -static inline void safe_put_page(struct page *p) -{ - if (p) put_page(p); -} - -#endif /* CONFIG_BLOCK */ -#endif - diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h index 7192035fc4b0..fb1abb3367e9 100644 --- a/include/linux/raid/md_u.h +++ b/include/linux/raid/md_u.h @@ -15,6 +15,24 @@ #ifndef _MD_U_H #define _MD_U_H +/* + * Different major versions are not compatible. + * Different minor versions are only downward compatible. + * Different patchlevel versions are downward and upward compatible. + */ +#define MD_MAJOR_VERSION 0 +#define MD_MINOR_VERSION 90 +/* + * MD_PATCHLEVEL_VERSION indicates kernel functionality. + * >=1 means different superblock formats are selectable using SET_ARRAY_INFO + * and major_version/minor_version accordingly + * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT + * in the super status byte + * >=3 means that bitmap superblock version 4 is supported, which uses + * little-ending representation rather than host-endian + */ +#define MD_PATCHLEVEL_VERSION 3 + /* ioctls */ /* status */ @@ -46,6 +64,12 @@ #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) +/* 63 partitions with the alternate major number (mdp) */ +#define MdpMinorShift 6 +#ifdef __KERNEL__ +extern int mdp_major; +#endif + typedef struct mdu_version_s { int major; int minor; @@ -85,6 +109,17 @@ typedef struct mdu_array_info_s { } mdu_array_info_t; +/* non-obvious values for 'level' */ +#define LEVEL_MULTIPATH (-4) +#define LEVEL_LINEAR (-1) +#define LEVEL_FAULTY (-5) + +/* we need a value for 'no level specified' and 0 + * means 'raid0', so we need something else. This is + * for internal use only + */ +#define LEVEL_NONE (-1000000) + typedef struct mdu_disk_info_s { /* * configuration/status of one particular disk diff --git a/include/linux/raid/multipath.h b/include/linux/raid/multipath.h deleted file mode 100644 index 6f53fc177a47..000000000000 --- a/include/linux/raid/multipath.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef _MULTIPATH_H -#define _MULTIPATH_H - -#include <linux/raid/md.h> - -struct multipath_info { - mdk_rdev_t *rdev; -}; - -struct multipath_private_data { - mddev_t *mddev; - struct multipath_info *multipaths; - int raid_disks; - int working_disks; - spinlock_t device_lock; - struct list_head retry_list; - - mempool_t *pool; -}; - -typedef struct multipath_private_data multipath_conf_t; - -/* - * this is the only point in the RAID code where we violate - * C type safety. mddev->private is an 'opaque' pointer. - */ -#define mddev_to_conf(mddev) ((multipath_conf_t *) mddev->private) - -/* - * this is our 'private' 'collective' MULTIPATH buffer head. - * it contains information about what kind of IO operations were started - * for this MULTIPATH operation, and about their status: - */ - -struct multipath_bh { - mddev_t *mddev; - struct bio *master_bio; - struct bio bio; - int path; - struct list_head retry_list; -}; -#endif diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h new file mode 100644 index 000000000000..d92480f8285c --- /dev/null +++ b/include/linux/raid/pq.h @@ -0,0 +1,132 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2003 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +#ifndef LINUX_RAID_RAID6_H +#define LINUX_RAID_RAID6_H + +#ifdef __KERNEL__ + +/* Set to 1 to use kernel-wide empty_zero_page */ +#define RAID6_USE_EMPTY_ZERO_PAGE 0 +#include <linux/blkdev.h> + +/* We need a pre-zeroed page... if we don't want to use the kernel-provided + one define it here */ +#if RAID6_USE_EMPTY_ZERO_PAGE +# define raid6_empty_zero_page empty_zero_page +#else +extern const char raid6_empty_zero_page[PAGE_SIZE]; +#endif + +#else /* ! __KERNEL__ */ +/* Used for testing in user space */ + +#include <errno.h> +#include <inttypes.h> +#include <limits.h> +#include <stddef.h> +#include <sys/mman.h> +#include <sys/types.h> + +/* Not standard, but glibc defines it */ +#define BITS_PER_LONG __WORDSIZE + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +#ifndef PAGE_SIZE +# define PAGE_SIZE 4096 +#endif +extern const char raid6_empty_zero_page[PAGE_SIZE]; + +#define __init +#define __exit +#define __attribute_const__ __attribute__((const)) +#define noinline __attribute__((noinline)) + +#define preempt_enable() +#define preempt_disable() +#define cpu_has_feature(x) 1 +#define enable_kernel_altivec() +#define disable_kernel_altivec() + +#define EXPORT_SYMBOL(sym) +#define MODULE_LICENSE(licence) +#define subsys_initcall(x) +#define module_exit(x) +#endif /* __KERNEL__ */ + +/* Routine choices */ +struct raid6_calls { + void (*gen_syndrome)(int, size_t, void **); + int (*valid)(void); /* Returns 1 if this routine set is usable */ + const char *name; /* Name of this routine set */ + int prefer; /* Has special performance attribute */ +}; + +/* Selected algorithm */ +extern struct raid6_calls raid6_call; + +/* Algorithm list */ +extern const struct raid6_calls * const raid6_algos[]; +int raid6_select_algo(void); + +/* Return values from chk_syndrome */ +#define RAID6_OK 0 +#define RAID6_P_BAD 1 +#define RAID6_Q_BAD 2 +#define RAID6_PQ_BAD 3 + +/* Galois field tables */ +extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256))); +extern const u8 raid6_gfexp[256] __attribute__((aligned(256))); +extern const u8 raid6_gfinv[256] __attribute__((aligned(256))); +extern const u8 raid6_gfexi[256] __attribute__((aligned(256))); + +/* Recovery routines */ +void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, + void **ptrs); +void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs); +void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, + void **ptrs); + +/* Some definitions to allow code to be compiled for testing in userspace */ +#ifndef __KERNEL__ + +# define jiffies raid6_jiffies() +# define printk printf +# define GFP_KERNEL 0 +# define __get_free_pages(x, y) ((unsigned long)mmap(NULL, PAGE_SIZE << (y), \ + PROT_READ|PROT_WRITE, \ + MAP_PRIVATE|MAP_ANONYMOUS,\ + 0, 0)) +# define free_pages(x, y) munmap((void *)(x), (y)*PAGE_SIZE) + +static inline void cpu_relax(void) +{ + /* Nothing */ +} + +#undef HZ +#define HZ 1000 +static inline uint32_t raid6_jiffies(void) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec*1000 + tv.tv_usec/1000; +} + +#endif /* ! __KERNEL__ */ + +#endif /* LINUX_RAID_RAID6_H */ diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h deleted file mode 100644 index fd42aa87c391..000000000000 --- a/include/linux/raid/raid0.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef _RAID0_H -#define _RAID0_H - -#include <linux/raid/md.h> - -struct strip_zone -{ - sector_t zone_start; /* Zone offset in md_dev (in sectors) */ - sector_t dev_start; /* Zone offset in real dev (in sectors) */ - sector_t sectors; /* Zone size in sectors */ - int nb_dev; /* # of devices attached to the zone */ - mdk_rdev_t **dev; /* Devices attached to the zone */ -}; - -struct raid0_private_data -{ - struct strip_zone **hash_table; /* Table of indexes into strip_zone */ - struct strip_zone *strip_zone; - mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ - int nr_strip_zones; - - sector_t spacing; - int sector_shift; /* shift this before divide by spacing */ -}; - -typedef struct raid0_private_data raid0_conf_t; - -#define mddev_to_conf(mddev) ((raid0_conf_t *) mddev->private) - -#endif diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h deleted file mode 100644 index 0a9ba7c3302e..000000000000 --- a/include/linux/raid/raid1.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef _RAID1_H -#define _RAID1_H - -#include <linux/raid/md.h> - -typedef struct mirror_info mirror_info_t; - -struct mirror_info { - mdk_rdev_t *rdev; - sector_t head_position; -}; - -/* - * memory pools need a pointer to the mddev, so they can force an unplug - * when memory is tight, and a count of the number of drives that the - * pool was allocated for, so they know how much to allocate and free. - * mddev->raid_disks cannot be used, as it can change while a pool is active - * These two datums are stored in a kmalloced struct. - */ - -struct pool_info { - mddev_t *mddev; - int raid_disks; -}; - - -typedef struct r1bio_s r1bio_t; - -struct r1_private_data_s { - mddev_t *mddev; - mirror_info_t *mirrors; - int raid_disks; - int last_used; - sector_t next_seq_sect; - spinlock_t device_lock; - - struct list_head retry_list; - /* queue pending writes and submit them on unplug */ - struct bio_list pending_bio_list; - /* queue of writes that have been unplugged */ - struct bio_list flushing_bio_list; - - /* for use when syncing mirrors: */ - - spinlock_t resync_lock; - int nr_pending; - int nr_waiting; - int nr_queued; - int barrier; - sector_t next_resync; - int fullsync; /* set to 1 if a full sync is needed, - * (fresh device added). - * Cleared when a sync completes. - */ - - wait_queue_head_t wait_barrier; - - struct pool_info *poolinfo; - - struct page *tmppage; - - mempool_t *r1bio_pool; - mempool_t *r1buf_pool; -}; - -typedef struct r1_private_data_s conf_t; - -/* - * this is the only point in the RAID code where we violate - * C type safety. mddev->private is an 'opaque' pointer. - */ -#define mddev_to_conf(mddev) ((conf_t *) mddev->private) - -/* - * this is our 'private' RAID1 bio. - * - * it contains information about what kind of IO operations were started - * for this RAID1 operation, and about their status: - */ - -struct r1bio_s { - atomic_t remaining; /* 'have we finished' count, - * used from IRQ handlers - */ - atomic_t behind_remaining; /* number of write-behind ios remaining - * in this BehindIO request - */ - sector_t sector; - int sectors; - unsigned long state; - mddev_t *mddev; - /* - * original bio going to /dev/mdx - */ - struct bio *master_bio; - /* - * if the IO is in READ direction, then this is where we read - */ - int read_disk; - - struct list_head retry_list; - struct bitmap_update *bitmap_update; - /* - * if the IO is in WRITE direction, then multiple bios are used. - * We choose the number when they are allocated. - */ - struct bio *bios[0]; - /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ -}; - -/* when we get a read error on a read-only array, we redirect to another - * device without failing the first device, or trying to over-write to - * correct the read error. To keep track of bad blocks on a per-bio - * level, we store IO_BLOCKED in the appropriate 'bios' pointer - */ -#define IO_BLOCKED ((struct bio*)1) - -/* bits for r1bio.state */ -#define R1BIO_Uptodate 0 -#define R1BIO_IsSync 1 -#define R1BIO_Degraded 2 -#define R1BIO_BehindIO 3 -#define R1BIO_Barrier 4 -#define R1BIO_BarrierRetry 5 -/* For write-behind requests, we call bi_end_io when - * the last non-write-behind device completes, providing - * any write was successful. Otherwise we call when - * any write-behind write succeeds, otherwise we call - * with failure when last write completes (and all failed). - * Record that bi_end_io was called with this flag... - */ -#define R1BIO_Returned 6 - -#endif diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h deleted file mode 100644 index e9091cfeb286..000000000000 --- a/include/linux/raid/raid10.h +++ /dev/null @@ -1,123 +0,0 @@ -#ifndef _RAID10_H -#define _RAID10_H - -#include <linux/raid/md.h> - -typedef struct mirror_info mirror_info_t; - -struct mirror_info { - mdk_rdev_t *rdev; - sector_t head_position; -}; - -typedef struct r10bio_s r10bio_t; - -struct r10_private_data_s { - mddev_t *mddev; - mirror_info_t *mirrors; - int raid_disks; - spinlock_t device_lock; - - /* geometry */ - int near_copies; /* number of copies layed out raid0 style */ - int far_copies; /* number of copies layed out - * at large strides across drives - */ - int far_offset; /* far_copies are offset by 1 stripe - * instead of many - */ - int copies; /* near_copies * far_copies. - * must be <= raid_disks - */ - sector_t stride; /* distance between far copies. - * This is size / far_copies unless - * far_offset, in which case it is - * 1 stripe. - */ - - int chunk_shift; /* shift from chunks to sectors */ - sector_t chunk_mask; - - struct list_head retry_list; - /* queue pending writes and submit them on unplug */ - struct bio_list pending_bio_list; - - - spinlock_t resync_lock; - int nr_pending; - int nr_waiting; - int nr_queued; - int barrier; - sector_t next_resync; - int fullsync; /* set to 1 if a full sync is needed, - * (fresh device added). - * Cleared when a sync completes. - */ - - wait_queue_head_t wait_barrier; - - mempool_t *r10bio_pool; - mempool_t *r10buf_pool; - struct page *tmppage; -}; - -typedef struct r10_private_data_s conf_t; - -/* - * this is the only point in the RAID code where we violate - * C type safety. mddev->private is an 'opaque' pointer. - */ -#define mddev_to_conf(mddev) ((conf_t *) mddev->private) - -/* - * this is our 'private' RAID10 bio. - * - * it contains information about what kind of IO operations were started - * for this RAID10 operation, and about their status: - */ - -struct r10bio_s { - atomic_t remaining; /* 'have we finished' count, - * used from IRQ handlers - */ - sector_t sector; /* virtual sector number */ - int sectors; - unsigned long state; - mddev_t *mddev; - /* - * original bio going to /dev/mdx - */ - struct bio *master_bio; - /* - * if the IO is in READ direction, then this is where we read - */ - int read_slot; - - struct list_head retry_list; - /* - * if the IO is in WRITE direction, then multiple bios are used, - * one for each copy. - * When resyncing we also use one for each copy. - * When reconstructing, we use 2 bios, one for read, one for write. - * We choose the number when they are allocated. - */ - struct { - struct bio *bio; - sector_t addr; - int devnum; - } devs[0]; -}; - -/* when we get a read error on a read-only array, we redirect to another - * device without failing the first device, or trying to over-write to - * correct the read error. To keep track of bad blocks on a per-bio - * level, we store IO_BLOCKED in the appropriate 'bios' pointer - */ -#define IO_BLOCKED ((struct bio*)1) - -/* bits for r10bio.state */ -#define R10BIO_Uptodate 0 -#define R10BIO_IsSync 1 -#define R10BIO_IsRecover 2 -#define R10BIO_Degraded 3 -#endif diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h deleted file mode 100644 index 3b2672792457..000000000000 --- a/include/linux/raid/raid5.h +++ /dev/null @@ -1,402 +0,0 @@ -#ifndef _RAID5_H -#define _RAID5_H - -#include <linux/raid/md.h> -#include <linux/raid/xor.h> - -/* - * - * Each stripe contains one buffer per disc. Each buffer can be in - * one of a number of states stored in "flags". Changes between - * these states happen *almost* exclusively under a per-stripe - * spinlock. Some very specific changes can happen in bi_end_io, and - * these are not protected by the spin lock. - * - * The flag bits that are used to represent these states are: - * R5_UPTODATE and R5_LOCKED - * - * State Empty == !UPTODATE, !LOCK - * We have no data, and there is no active request - * State Want == !UPTODATE, LOCK - * A read request is being submitted for this block - * State Dirty == UPTODATE, LOCK - * Some new data is in this buffer, and it is being written out - * State Clean == UPTODATE, !LOCK - * We have valid data which is the same as on disc - * - * The possible state transitions are: - * - * Empty -> Want - on read or write to get old data for parity calc - * Empty -> Dirty - on compute_parity to satisfy write/sync request.(RECONSTRUCT_WRITE) - * Empty -> Clean - on compute_block when computing a block for failed drive - * Want -> Empty - on failed read - * Want -> Clean - on successful completion of read request - * Dirty -> Clean - on successful completion of write request - * Dirty -> Clean - on failed write - * Clean -> Dirty - on compute_parity to satisfy write/sync (RECONSTRUCT or RMW) - * - * The Want->Empty, Want->Clean, Dirty->Clean, transitions - * all happen in b_end_io at interrupt time. - * Each sets the Uptodate bit before releasing the Lock bit. - * This leaves one multi-stage transition: - * Want->Dirty->Clean - * This is safe because thinking that a Clean buffer is actually dirty - * will at worst delay some action, and the stripe will be scheduled - * for attention after the transition is complete. - * - * There is one possibility that is not covered by these states. That - * is if one drive has failed and there is a spare being rebuilt. We - * can't distinguish between a clean block that has been generated - * from parity calculations, and a clean block that has been - * successfully written to the spare ( or to parity when resyncing). - * To distingush these states we have a stripe bit STRIPE_INSYNC that - * is set whenever a write is scheduled to the spare, or to the parity - * disc if there is no spare. A sync request clears this bit, and - * when we find it set with no buffers locked, we know the sync is - * complete. - * - * Buffers for the md device that arrive via make_request are attached - * to the appropriate stripe in one of two lists linked on b_reqnext. - * One list (bh_read) for read requests, one (bh_write) for write. - * There should never be more than one buffer on the two lists - * together, but we are not guaranteed of that so we allow for more. - * - * If a buffer is on the read list when the associated cache buffer is - * Uptodate, the data is copied into the read buffer and it's b_end_io - * routine is called. This may happen in the end_request routine only - * if the buffer has just successfully been read. end_request should - * remove the buffers from the list and then set the Uptodate bit on - * the buffer. Other threads may do this only if they first check - * that the Uptodate bit is set. Once they have checked that they may - * take buffers off the read queue. - * - * When a buffer on the write list is committed for write it is copied - * into the cache buffer, which is then marked dirty, and moved onto a - * third list, the written list (bh_written). Once both the parity - * block and the cached buffer are successfully written, any buffer on - * a written list can be returned with b_end_io. - * - * The write list and read list both act as fifos. The read list is - * protected by the device_lock. The write and written lists are - * protected by the stripe lock. The device_lock, which can be - * claimed while the stipe lock is held, is only for list - * manipulations and will only be held for a very short time. It can - * be claimed from interrupts. - * - * - * Stripes in the stripe cache can be on one of two lists (or on - * neither). The "inactive_list" contains stripes which are not - * currently being used for any request. They can freely be reused - * for another stripe. The "handle_list" contains stripes that need - * to be handled in some way. Both of these are fifo queues. Each - * stripe is also (potentially) linked to a hash bucket in the hash - * table so that it can be found by sector number. Stripes that are - * not hashed must be on the inactive_list, and will normally be at - * the front. All stripes start life this way. - * - * The inactive_list, handle_list and hash bucket lists are all protected by the - * device_lock. - * - stripes on the inactive_list never have their stripe_lock held. - * - stripes have a reference counter. If count==0, they are on a list. - * - If a stripe might need handling, STRIPE_HANDLE is set. - * - When refcount reaches zero, then if STRIPE_HANDLE it is put on - * handle_list else inactive_list - * - * This, combined with the fact that STRIPE_HANDLE is only ever - * cleared while a stripe has a non-zero count means that if the - * refcount is 0 and STRIPE_HANDLE is set, then it is on the - * handle_list and if recount is 0 and STRIPE_HANDLE is not set, then - * the stripe is on inactive_list. - * - * The possible transitions are: - * activate an unhashed/inactive stripe (get_active_stripe()) - * lockdev check-hash unlink-stripe cnt++ clean-stripe hash-stripe unlockdev - * activate a hashed, possibly active stripe (get_active_stripe()) - * lockdev check-hash if(!cnt++)unlink-stripe unlockdev - * attach a request to an active stripe (add_stripe_bh()) - * lockdev attach-buffer unlockdev - * handle a stripe (handle_stripe()) - * lockstripe clrSTRIPE_HANDLE ... - * (lockdev check-buffers unlockdev) .. - * change-state .. - * record io/ops needed unlockstripe schedule io/ops - * release an active stripe (release_stripe()) - * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev - * - * The refcount counts each thread that have activated the stripe, - * plus raid5d if it is handling it, plus one for each active request - * on a cached buffer, and plus one if the stripe is undergoing stripe - * operations. - * - * Stripe operations are performed outside the stripe lock, - * the stripe operations are: - * -copying data between the stripe cache and user application buffers - * -computing blocks to save a disk access, or to recover a missing block - * -updating the parity on a write operation (reconstruct write and - * read-modify-write) - * -checking parity correctness - * -running i/o to disk - * These operations are carried out by raid5_run_ops which uses the async_tx - * api to (optionally) offload operations to dedicated hardware engines. - * When requesting an operation handle_stripe sets the pending bit for the - * operation and increments the count. raid5_run_ops is then run whenever - * the count is non-zero. - * There are some critical dependencies between the operations that prevent some - * from being requested while another is in flight. - * 1/ Parity check operations destroy the in cache version of the parity block, - * so we prevent parity dependent operations like writes and compute_blocks - * from starting while a check is in progress. Some dma engines can perform - * the check without damaging the parity block, in these cases the parity - * block is re-marked up to date (assuming the check was successful) and is - * not re-read from disk. - * 2/ When a write operation is requested we immediately lock the affected - * blocks, and mark them as not up to date. This causes new read requests - * to be held off, as well as parity checks and compute block operations. - * 3/ Once a compute block operation has been requested handle_stripe treats - * that block as if it is up to date. raid5_run_ops guaruntees that any - * operation that is dependent on the compute block result is initiated after - * the compute block completes. - */ - -/* - * Operations state - intermediate states that are visible outside of sh->lock - * In general _idle indicates nothing is running, _run indicates a data - * processing operation is active, and _result means the data processing result - * is stable and can be acted upon. For simple operations like biofill and - * compute that only have an _idle and _run state they are indicated with - * sh->state flags (STRIPE_BIOFILL_RUN and STRIPE_COMPUTE_RUN) - */ -/** - * enum check_states - handles syncing / repairing a stripe - * @check_state_idle - check operations are quiesced - * @check_state_run - check operation is running - * @check_state_result - set outside lock when check result is valid - * @check_state_compute_run - check failed and we are repairing - * @check_state_compute_result - set outside lock when compute result is valid - */ -enum check_states { - check_state_idle = 0, - check_state_run, /* parity check */ - check_state_check_result, - check_state_compute_run, /* parity repair */ - check_state_compute_result, -}; - -/** - * enum reconstruct_states - handles writing or expanding a stripe - */ -enum reconstruct_states { - reconstruct_state_idle = 0, - reconstruct_state_prexor_drain_run, /* prexor-write */ - reconstruct_state_drain_run, /* write */ - reconstruct_state_run, /* expand */ - reconstruct_state_prexor_drain_result, - reconstruct_state_drain_result, - reconstruct_state_result, -}; - -struct stripe_head { - struct hlist_node hash; - struct list_head lru; /* inactive_list or handle_list */ - struct raid5_private_data *raid_conf; - sector_t sector; /* sector of this row */ - int pd_idx; /* parity disk index */ - unsigned long state; /* state flags */ - atomic_t count; /* nr of active thread/requests */ - spinlock_t lock; - int bm_seq; /* sequence number for bitmap flushes */ - int disks; /* disks in stripe */ - enum check_states check_state; - enum reconstruct_states reconstruct_state; - /* stripe_operations - * @target - STRIPE_OP_COMPUTE_BLK target - */ - struct stripe_operations { - int target; - u32 zero_sum_result; - } ops; - struct r5dev { - struct bio req; - struct bio_vec vec; - struct page *page; - struct bio *toread, *read, *towrite, *written; - sector_t sector; /* sector of this page */ - unsigned long flags; - } dev[1]; /* allocated with extra space depending of RAID geometry */ -}; - -/* stripe_head_state - collects and tracks the dynamic state of a stripe_head - * for handle_stripe. It is only valid under spin_lock(sh->lock); - */ -struct stripe_head_state { - int syncing, expanding, expanded; - int locked, uptodate, to_read, to_write, failed, written; - int to_fill, compute, req_compute, non_overwrite; - int failed_num; - unsigned long ops_request; -}; - -/* r6_state - extra state data only relevant to r6 */ -struct r6_state { - int p_failed, q_failed, qd_idx, failed_num[2]; -}; - -/* Flags */ -#define R5_UPTODATE 0 /* page contains current data */ -#define R5_LOCKED 1 /* IO has been submitted on "req" */ -#define R5_OVERWRITE 2 /* towrite covers whole page */ -/* and some that are internal to handle_stripe */ -#define R5_Insync 3 /* rdev && rdev->in_sync at start */ -#define R5_Wantread 4 /* want to schedule a read */ -#define R5_Wantwrite 5 -#define R5_Overlap 7 /* There is a pending overlapping request on this block */ -#define R5_ReadError 8 /* seen a read error here recently */ -#define R5_ReWrite 9 /* have tried to over-write the readerror */ - -#define R5_Expanded 10 /* This block now has post-expand data */ -#define R5_Wantcompute 11 /* compute_block in progress treat as - * uptodate - */ -#define R5_Wantfill 12 /* dev->toread contains a bio that needs - * filling - */ -#define R5_Wantdrain 13 /* dev->towrite needs to be drained */ -/* - * Write method - */ -#define RECONSTRUCT_WRITE 1 -#define READ_MODIFY_WRITE 2 -/* not a write method, but a compute_parity mode */ -#define CHECK_PARITY 3 - -/* - * Stripe state - */ -#define STRIPE_HANDLE 2 -#define STRIPE_SYNCING 3 -#define STRIPE_INSYNC 4 -#define STRIPE_PREREAD_ACTIVE 5 -#define STRIPE_DELAYED 6 -#define STRIPE_DEGRADED 7 -#define STRIPE_BIT_DELAY 8 -#define STRIPE_EXPANDING 9 -#define STRIPE_EXPAND_SOURCE 10 -#define STRIPE_EXPAND_READY 11 -#define STRIPE_IO_STARTED 12 /* do not count towards 'bypass_count' */ -#define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */ -#define STRIPE_BIOFILL_RUN 14 -#define STRIPE_COMPUTE_RUN 15 -/* - * Operation request flags - */ -#define STRIPE_OP_BIOFILL 0 -#define STRIPE_OP_COMPUTE_BLK 1 -#define STRIPE_OP_PREXOR 2 -#define STRIPE_OP_BIODRAIN 3 -#define STRIPE_OP_POSTXOR 4 -#define STRIPE_OP_CHECK 5 - -/* - * Plugging: - * - * To improve write throughput, we need to delay the handling of some - * stripes until there has been a chance that several write requests - * for the one stripe have all been collected. - * In particular, any write request that would require pre-reading - * is put on a "delayed" queue until there are no stripes currently - * in a pre-read phase. Further, if the "delayed" queue is empty when - * a stripe is put on it then we "plug" the queue and do not process it - * until an unplug call is made. (the unplug_io_fn() is called). - * - * When preread is initiated on a stripe, we set PREREAD_ACTIVE and add - * it to the count of prereading stripes. - * When write is initiated, or the stripe refcnt == 0 (just in case) we - * clear the PREREAD_ACTIVE flag and decrement the count - * Whenever the 'handle' queue is empty and the device is not plugged, we - * move any strips from delayed to handle and clear the DELAYED flag and set - * PREREAD_ACTIVE. - * In stripe_handle, if we find pre-reading is necessary, we do it if - * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue. - * HANDLE gets cleared if stripe_handle leave nothing locked. - */ - - -struct disk_info { - mdk_rdev_t *rdev; -}; - -struct raid5_private_data { - struct hlist_head *stripe_hashtbl; - mddev_t *mddev; - struct disk_info *spare; - int chunk_size, level, algorithm; - int max_degraded; - int raid_disks; - int max_nr_stripes; - - /* used during an expand */ - sector_t expand_progress; /* MaxSector when no expand happening */ - sector_t expand_lo; /* from here up to expand_progress it out-of-bounds - * as we haven't flushed the metadata yet - */ - int previous_raid_disks; - - struct list_head handle_list; /* stripes needing handling */ - struct list_head hold_list; /* preread ready stripes */ - struct list_head delayed_list; /* stripes that have plugged requests */ - struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ - struct bio *retry_read_aligned; /* currently retrying aligned bios */ - struct bio *retry_read_aligned_list; /* aligned bios retry list */ - atomic_t preread_active_stripes; /* stripes with scheduled io */ - atomic_t active_aligned_reads; - atomic_t pending_full_writes; /* full write backlog */ - int bypass_count; /* bypassed prereads */ - int bypass_threshold; /* preread nice */ - struct list_head *last_hold; /* detect hold_list promotions */ - - atomic_t reshape_stripes; /* stripes with pending writes for reshape */ - /* unfortunately we need two cache names as we temporarily have - * two caches. - */ - int active_name; - char cache_name[2][20]; - struct kmem_cache *slab_cache; /* for allocating stripes */ - - int seq_flush, seq_write; - int quiesce; - - int fullsync; /* set to 1 if a full sync is needed, - * (fresh device added). - * Cleared when a sync completes. - */ - - struct page *spare_page; /* Used when checking P/Q in raid6 */ - - /* - * Free stripes pool - */ - atomic_t active_stripes; - struct list_head inactive_list; - wait_queue_head_t wait_for_stripe; - wait_queue_head_t wait_for_overlap; - int inactive_blocked; /* release of inactive stripes blocked, - * waiting for 25% to be free - */ - int pool_size; /* number of disks in stripeheads in pool */ - spinlock_t device_lock; - struct disk_info *disks; -}; - -typedef struct raid5_private_data raid5_conf_t; - -#define mddev_to_conf(mddev) ((raid5_conf_t *) mddev->private) - -/* - * Our supported algorithms - */ -#define ALGORITHM_LEFT_ASYMMETRIC 0 -#define ALGORITHM_RIGHT_ASYMMETRIC 1 -#define ALGORITHM_LEFT_SYMMETRIC 2 -#define ALGORITHM_RIGHT_SYMMETRIC 3 - -#endif diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index 3e120587eada..5a210959e3f8 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h @@ -1,8 +1,6 @@ #ifndef _XOR_H #define _XOR_H -#include <linux/raid/md.h> - #define MAX_XOR_BLOCKS 4 extern void xor_blocks(unsigned int count, unsigned int bytes, diff --git a/include/linux/regulator/bq24022.h b/include/linux/regulator/bq24022.h index e84b0a9feda5..a6d014005d49 100644 --- a/include/linux/regulator/bq24022.h +++ b/include/linux/regulator/bq24022.h @@ -10,6 +10,8 @@ * */ +struct regulator_init_data; + /** * bq24022_mach_info - platform data for bq24022 * @gpio_nce: GPIO line connected to the nCE pin, used to enable / disable charging @@ -18,4 +20,5 @@ struct bq24022_mach_info { int gpio_nce; int gpio_iset2; + struct regulator_init_data *init_data; }; diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 801bf77ff4e2..277f4b964df5 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -3,7 +3,7 @@ * * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC. * - * Author: Liam Girdwood <lg@opensource.wolfsonmicro.com> + * Author: Liam Girdwood <lrg@slimlogic.co.uk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -88,6 +88,7 @@ * FAIL Regulator output has failed. * OVER_TEMP Regulator over temp. * FORCE_DISABLE Regulator shut down by software. + * VOLTAGE_CHANGE Regulator voltage changed. * * NOTE: These events can be OR'ed together when passed into handler. */ @@ -98,6 +99,7 @@ #define REGULATOR_EVENT_FAIL 0x08 #define REGULATOR_EVENT_OVER_TEMP 0x10 #define REGULATOR_EVENT_FORCE_DISABLE 0x20 +#define REGULATOR_EVENT_VOLTAGE_CHANGE 0x40 struct regulator; @@ -140,6 +142,8 @@ int regulator_bulk_disable(int num_consumers, void regulator_bulk_free(int num_consumers, struct regulator_bulk_data *consumers); +int regulator_count_voltages(struct regulator *regulator); +int regulator_list_voltage(struct regulator *regulator, unsigned selector); int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV); int regulator_get_voltage(struct regulator *regulator); int regulator_set_current_limit(struct regulator *regulator, diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 2dae05705f13..4848d8dacd90 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -3,7 +3,7 @@ * * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC. * - * Author: Liam Girdwood <lg@opensource.wolfsonmicro.com> + * Author: Liam Girdwood <lrg@slimlogic.co.uk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -21,25 +21,38 @@ struct regulator_dev; struct regulator_init_data; +enum regulator_status { + REGULATOR_STATUS_OFF, + REGULATOR_STATUS_ON, + REGULATOR_STATUS_ERROR, + /* fast/normal/idle/standby are flavors of "on" */ + REGULATOR_STATUS_FAST, + REGULATOR_STATUS_NORMAL, + REGULATOR_STATUS_IDLE, + REGULATOR_STATUS_STANDBY, +}; + /** * struct regulator_ops - regulator operations. * - * This struct describes regulator operations which can be implemented by - * regulator chip drivers. - * - * @enable: Enable the regulator. - * @disable: Disable the regulator. + * @enable: Configure the regulator as enabled. + * @disable: Configure the regulator as disabled. * @is_enabled: Return 1 if the regulator is enabled, 0 otherwise. * * @set_voltage: Set the voltage for the regulator within the range specified. * The driver should select the voltage closest to min_uV. * @get_voltage: Return the currently configured voltage for the regulator. + * @list_voltage: Return one of the supported voltages, in microvolts; zero + * if the selector indicates a voltage that is unusable on this system; + * or negative errno. Selectors range from zero to one less than + * regulator_desc.n_voltages. Voltages may be reported in any order. * * @set_current_limit: Configure a limit for a current-limited regulator. - * @get_current_limit: Get the limit for a current-limited regulator. + * @get_current_limit: Get the configured limit for a current-limited regulator. * - * @set_mode: Set the operating mode for the regulator. - * @get_mode: Get the current operating mode for the regulator. + * @get_mode: Get the configured operating mode for the regulator. + * @get_status: Return actual (not as-configured) status of regulator, as a + * REGULATOR_STATUS value (or negative errno) * @get_optimum_mode: Get the most efficient operating mode for the regulator * when running with the specified parameters. * @@ -51,9 +64,15 @@ struct regulator_init_data; * suspended. * @set_suspend_mode: Set the operating mode for the regulator when the * system is suspended. + * + * This struct describes regulator operations which can be implemented by + * regulator chip drivers. */ struct regulator_ops { + /* enumerate supported voltages */ + int (*list_voltage) (struct regulator_dev *, unsigned selector); + /* get/set regulator voltage */ int (*set_voltage) (struct regulator_dev *, int min_uV, int max_uV); int (*get_voltage) (struct regulator_dev *); @@ -72,6 +91,13 @@ struct regulator_ops { int (*set_mode) (struct regulator_dev *, unsigned int mode); unsigned int (*get_mode) (struct regulator_dev *); + /* report regulator status ... most other accessors report + * control inputs, this reports results of combining inputs + * from Linux (and other sources) with the actual load. + * returns REGULATOR_STATUS_* or negative errno. + */ + int (*get_status)(struct regulator_dev *); + /* get most efficient regulator operating mode for load */ unsigned int (*get_optimum_mode) (struct regulator_dev *, int input_uV, int output_uV, int load_uA); @@ -106,6 +132,7 @@ enum regulator_type { * * @name: Identifying name for the regulator. * @id: Numerical identifier for the regulator. + * @n_voltages: Number of selectors available for ops.list_voltage(). * @ops: Regulator operations table. * @irq: Interrupt number for the regulator. * @type: Indicates if the regulator is a voltage or current regulator. @@ -114,14 +141,48 @@ enum regulator_type { struct regulator_desc { const char *name; int id; + unsigned n_voltages; struct regulator_ops *ops; int irq; enum regulator_type type; struct module *owner; }; +/* + * struct regulator_dev + * + * Voltage / Current regulator class device. One for each + * regulator. + * + * This should *not* be used directly by anything except the regulator + * core and notification injection (which should take the mutex and do + * no other direct access). + */ +struct regulator_dev { + struct regulator_desc *desc; + int use_count; + + /* lists we belong to */ + struct list_head list; /* list of all regulators */ + struct list_head slist; /* list of supplied regulators */ + + /* lists we own */ + struct list_head consumer_list; /* consumers we supply */ + struct list_head supply_list; /* regulators we supply */ + + struct blocking_notifier_head notifier; + struct mutex mutex; /* consumer lock */ + struct module *owner; + struct device dev; + struct regulation_constraints *constraints; + struct regulator_dev *supply; /* for tree */ + + void *reg_data; /* regulator_dev data */ +}; + struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, - struct device *dev, void *driver_data); + struct device *dev, struct regulator_init_data *init_data, + void *driver_data); void regulator_unregister(struct regulator_dev *rdev); int regulator_notifier_call_chain(struct regulator_dev *rdev, diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h index 1387a5d2190e..91b4da31f1b5 100644 --- a/include/linux/regulator/fixed.h +++ b/include/linux/regulator/fixed.h @@ -14,9 +14,12 @@ #ifndef __REGULATOR_FIXED_H #define __REGULATOR_FIXED_H +struct regulator_init_data; + struct fixed_voltage_config { const char *supply_name; int microvolts; + struct regulator_init_data *init_data; }; #endif diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 3794773b23d2..bac64fa390f2 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -3,7 +3,7 @@ * * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC. * - * Author: Liam Girdwood <lg@opensource.wolfsonmicro.com> + * Author: Liam Girdwood <lrg@slimlogic.co.uk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -73,7 +73,9 @@ struct regulator_state { * * @always_on: Set if the regulator should never be disabled. * @boot_on: Set if the regulator is enabled when the system is initially - * started. + * started. If the regulator is not enabled by the hardware or + * bootloader then it will be enabled when the constraints are + * applied. * @apply_uV: Apply the voltage constraint when initialising. * * @input_uV: Input voltage for regulator when supplied by another regulator. @@ -83,6 +85,7 @@ struct regulator_state { * @state_standby: State for regulator when system is suspended in standby * mode. * @initial_state: Suspend state to set by default. + * @initial_mode: Mode to set at startup. */ struct regulation_constraints { @@ -111,6 +114,9 @@ struct regulation_constraints { struct regulator_state state_standby; suspend_state_t initial_state; /* suspend state to set at init */ + /* mode to set on startup */ + unsigned int initial_mode; + /* constriant flags */ unsigned always_on:1; /* regulator never off when system is on */ unsigned boot_on:1; /* bootloader/firmware enabled regulator */ @@ -160,4 +166,6 @@ struct regulator_init_data { int regulator_suspend_prepare(suspend_state_t state); +void regulator_has_full_constraints(void); + #endif diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index b3b359660082..e1b7b2173885 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -8,7 +8,7 @@ struct ring_buffer; struct ring_buffer_iter; /* - * Don't reference this struct directly, use functions below. + * Don't refer to this struct directly, use functions below. */ struct ring_buffer_event { u32 type:2, len:3, time_delta:27; @@ -18,10 +18,13 @@ struct ring_buffer_event { /** * enum ring_buffer_type - internal ring buffer types * - * @RINGBUF_TYPE_PADDING: Left over page padding - * array is ignored - * size is variable depending on how much + * @RINGBUF_TYPE_PADDING: Left over page padding or discarded event + * If time_delta is 0: + * array is ignored + * size is variable depending on how much * padding is needed + * If time_delta is non zero: + * everything else same as RINGBUF_TYPE_DATA * * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta * array[0] = time delta (28 .. 59) @@ -65,6 +68,8 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) return event->time_delta; } +void ring_buffer_event_discard(struct ring_buffer_event *event); + /* * size is in bytes for each per CPU buffer. */ @@ -74,13 +79,10 @@ void ring_buffer_free(struct ring_buffer *buffer); int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); -struct ring_buffer_event * -ring_buffer_lock_reserve(struct ring_buffer *buffer, - unsigned long length, - unsigned long *flags); +struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer, + unsigned long length); int ring_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags); + struct ring_buffer_event *event); int ring_buffer_write(struct ring_buffer *buffer, unsigned long length, void *data); @@ -121,17 +123,19 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); -u64 ring_buffer_time_stamp(int cpu); -void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); +u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); +void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, + int cpu, u64 *ts); +void ring_buffer_set_clock(struct ring_buffer *buffer, + u64 (*clock)(void)); + +size_t ring_buffer_page_len(void *page); -void tracing_on(void); -void tracing_off(void); -void tracing_off_permanent(void); void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); -int ring_buffer_read_page(struct ring_buffer *buffer, - void **data_page, int cpu, int full); +int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, + size_t len, int cpu, int full); enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, diff --git a/include/linux/sched.h b/include/linux/sched.h index 9da5aa0771ef..b94f3541f67b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -138,6 +138,8 @@ extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); +extern unsigned long get_parent_ip(unsigned long addr); + struct seq_file; struct cfs_rq; struct task_group; @@ -1405,6 +1407,8 @@ struct task_struct { int curr_ret_stack; /* Stack of return addresses for return function tracing */ struct ftrace_ret_stack *ret_stack; + /* time stamp for last schedule */ + unsigned long long ftrace_timestamp; /* * Number of functions that haven't been traced * because of depth overrun. diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 6ca6a7b66d75..f4523651fa42 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -14,6 +14,7 @@ #include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ #include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include <linux/compiler.h> +#include <trace/kmemtrace.h> /* Size description struct for general caches. */ struct cache_sizes { @@ -28,8 +29,26 @@ extern struct cache_sizes malloc_sizes[]; void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); -static inline void *kmalloc(size_t size, gfp_t flags) +#ifdef CONFIG_KMEMTRACE +extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags); +extern size_t slab_buffer_size(struct kmem_cache *cachep); +#else +static __always_inline void * +kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags) { + return kmem_cache_alloc(cachep, flags); +} +static inline size_t slab_buffer_size(struct kmem_cache *cachep) +{ + return 0; +} +#endif + +static __always_inline void *kmalloc(size_t size, gfp_t flags) +{ + struct kmem_cache *cachep; + void *ret; + if (__builtin_constant_p(size)) { int i = 0; @@ -47,10 +66,17 @@ static inline void *kmalloc(size_t size, gfp_t flags) found: #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) - return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep, - flags); + cachep = malloc_sizes[i].cs_dmacachep; + else #endif - return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags); + cachep = malloc_sizes[i].cs_cachep; + + ret = kmem_cache_alloc_notrace(cachep, flags); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret, + size, slab_buffer_size(cachep), flags); + + return ret; } return __kmalloc(size, flags); } @@ -59,8 +85,25 @@ found: extern void *__kmalloc_node(size_t size, gfp_t flags, int node); extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); -static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +#ifdef CONFIG_KMEMTRACE +extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, + gfp_t flags, + int nodeid); +#else +static __always_inline void * +kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, + gfp_t flags, + int nodeid) +{ + return kmem_cache_alloc_node(cachep, flags, nodeid); +} +#endif + +static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { + struct kmem_cache *cachep; + void *ret; + if (__builtin_constant_p(size)) { int i = 0; @@ -78,11 +121,18 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) found: #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) - return kmem_cache_alloc_node(malloc_sizes[i].cs_dmacachep, - flags, node); + cachep = malloc_sizes[i].cs_dmacachep; + else #endif - return kmem_cache_alloc_node(malloc_sizes[i].cs_cachep, - flags, node); + cachep = malloc_sizes[i].cs_cachep; + + ret = kmem_cache_alloc_node_notrace(cachep, flags, node); + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, + ret, size, slab_buffer_size(cachep), + flags, node); + + return ret; } return __kmalloc_node(size, flags, node); } diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index 59a3fa476ab9..0ec00b39d006 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -3,14 +3,15 @@ void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); -static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) +static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep, + gfp_t flags) { return kmem_cache_alloc_node(cachep, flags, -1); } void *__kmalloc_node(size_t size, gfp_t flags, int node); -static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { return __kmalloc_node(size, flags, node); } @@ -23,12 +24,12 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) * kmalloc is the normal method of allocating memory * in the kernel. */ -static inline void *kmalloc(size_t size, gfp_t flags) +static __always_inline void *kmalloc(size_t size, gfp_t flags) { return __kmalloc_node(size, flags, -1); } -static inline void *__kmalloc(size_t size, gfp_t flags) +static __always_inline void *__kmalloc(size_t size, gfp_t flags) { return kmalloc(size, flags); } diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h new file mode 100644 index 000000000000..85958277f83d --- /dev/null +++ b/include/linux/slow-work.h @@ -0,0 +1,95 @@ +/* Worker thread pool for slow items, such as filesystem lookups or mkdirs + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + * + * See Documentation/slow-work.txt + */ + +#ifndef _LINUX_SLOW_WORK_H +#define _LINUX_SLOW_WORK_H + +#ifdef CONFIG_SLOW_WORK + +#include <linux/sysctl.h> + +struct slow_work; + +/* + * The operations used to support slow work items + */ +struct slow_work_ops { + /* get a ref on a work item + * - return 0 if successful, -ve if not + */ + int (*get_ref)(struct slow_work *work); + + /* discard a ref to a work item */ + void (*put_ref)(struct slow_work *work); + + /* execute a work item */ + void (*execute)(struct slow_work *work); +}; + +/* + * A slow work item + * - A reference is held on the parent object by the thread pool when it is + * queued + */ +struct slow_work { + unsigned long flags; +#define SLOW_WORK_PENDING 0 /* item pending (further) execution */ +#define SLOW_WORK_EXECUTING 1 /* item currently executing */ +#define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */ +#define SLOW_WORK_VERY_SLOW 3 /* item is very slow */ + const struct slow_work_ops *ops; /* operations table for this item */ + struct list_head link; /* link in queue */ +}; + +/** + * slow_work_init - Initialise a slow work item + * @work: The work item to initialise + * @ops: The operations to use to handle the slow work item + * + * Initialise a slow work item. + */ +static inline void slow_work_init(struct slow_work *work, + const struct slow_work_ops *ops) +{ + work->flags = 0; + work->ops = ops; + INIT_LIST_HEAD(&work->link); +} + +/** + * slow_work_init - Initialise a very slow work item + * @work: The work item to initialise + * @ops: The operations to use to handle the slow work item + * + * Initialise a very slow work item. This item will be restricted such that + * only a certain number of the pool threads will be able to execute items of + * this type. + */ +static inline void vslow_work_init(struct slow_work *work, + const struct slow_work_ops *ops) +{ + work->flags = 1 << SLOW_WORK_VERY_SLOW; + work->ops = ops; + INIT_LIST_HEAD(&work->link); +} + +extern int slow_work_enqueue(struct slow_work *work); +extern int slow_work_register_user(void); +extern void slow_work_unregister_user(void); + +#ifdef CONFIG_SYSCTL +extern ctl_table slow_work_sysctls[]; +#endif + +#endif /* CONFIG_SLOW_WORK */ +#endif /* _LINUX_SLOW_WORK_H */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index e37b6aa8a9fb..a1f90528e70b 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,6 +10,7 @@ #include <linux/gfp.h> #include <linux/workqueue.h> #include <linux/kobject.h> +#include <trace/kmemtrace.h> enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ @@ -217,13 +218,31 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size) void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); +#ifdef CONFIG_KMEMTRACE +extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags); +#else +static __always_inline void * +kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) +{ + return kmem_cache_alloc(s, gfpflags); +} +#endif + static __always_inline void *kmalloc_large(size_t size, gfp_t flags) { - return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size)); + unsigned int order = get_order(size); + void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret, + size, PAGE_SIZE << order, flags); + + return ret; } static __always_inline void *kmalloc(size_t size, gfp_t flags) { + void *ret; + if (__builtin_constant_p(size)) { if (size > SLUB_MAX_SIZE) return kmalloc_large(size, flags); @@ -234,7 +253,13 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) if (!s) return ZERO_SIZE_PTR; - return kmem_cache_alloc(s, flags); + ret = kmem_cache_alloc_notrace(s, flags); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, + _THIS_IP_, ret, + size, s->size, flags); + + return ret; } } return __kmalloc(size, flags); @@ -244,8 +269,24 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) void *__kmalloc_node(size_t size, gfp_t flags, int node); void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); +#ifdef CONFIG_KMEMTRACE +extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, + gfp_t gfpflags, + int node); +#else +static __always_inline void * +kmem_cache_alloc_node_notrace(struct kmem_cache *s, + gfp_t gfpflags, + int node) +{ + return kmem_cache_alloc_node(s, gfpflags, node); +} +#endif + static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { + void *ret; + if (__builtin_constant_p(size) && size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) { struct kmem_cache *s = kmalloc_slab(size); @@ -253,7 +294,13 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) if (!s) return ZERO_SIZE_PTR; - return kmem_cache_alloc_node(s, flags, node); + ret = kmem_cache_alloc_node_notrace(s, flags, node); + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, + _THIS_IP_, ret, + size, s->size, flags, node); + + return ret; } return __kmalloc_node(size, flags, node); } diff --git a/include/linux/smp.h b/include/linux/smp.h index bbacb7baa446..a69db820eed6 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -38,7 +38,7 @@ int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, /* * main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc. * (defined in asm header): - */ + */ /* * stops all CPUs but the current one: @@ -82,7 +82,8 @@ smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, return 0; } -void __smp_call_function_single(int cpuid, struct call_single_data *data); +void __smp_call_function_single(int cpuid, struct call_single_data *data, + int wait); /* * Generic and arch helpers @@ -121,6 +122,8 @@ extern unsigned int setup_max_cpus; #else /* !SMP */ +static inline void smp_send_stop(void) { } + /* * These macros fold the SMP functionality into a single CPU system */ diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h index f41ffd7c2dd9..34c4475ac4a2 100644 --- a/include/linux/sonypi.h +++ b/include/linux/sonypi.h @@ -103,6 +103,14 @@ #define SONYPI_EVENT_WIRELESS_OFF 61 #define SONYPI_EVENT_ZOOM_IN_PRESSED 62 #define SONYPI_EVENT_ZOOM_OUT_PRESSED 63 +#define SONYPI_EVENT_CD_EJECT_PRESSED 64 +#define SONYPI_EVENT_MODEKEY_PRESSED 65 +#define SONYPI_EVENT_PKEY_P4 66 +#define SONYPI_EVENT_PKEY_P5 67 +#define SONYPI_EVENT_SETTINGKEY_PRESSED 68 +#define SONYPI_EVENT_VOLUME_INC_PRESSED 69 +#define SONYPI_EVENT_VOLUME_DEC_PRESSED 70 +#define SONYPI_EVENT_BRIGHTNESS_PRESSED 71 /* get/set brightness */ #define SONYPI_IOCGBRT _IOR('v', 0, __u8) diff --git a/include/linux/string.h b/include/linux/string.h index 8852739f36df..489019ef1694 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -10,6 +10,7 @@ #include <linux/compiler.h> /* for inline */ #include <linux/types.h> /* for size_t */ #include <linux/stddef.h> /* for NULL */ +#include <stdarg.h> extern char *strndup_user(const char __user *, long); extern void *memdup_user(const void __user *, size_t); @@ -112,8 +113,23 @@ extern void argv_free(char **argv); extern bool sysfs_streq(const char *s1, const char *s2); +#ifdef CONFIG_BINARY_PRINTF +int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); +int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); +int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4); +#endif + extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, const void *from, size_t available); +/** + * strstarts - does @str start with @prefix? + * @str: string to examine + * @prefix: prefix to look for. + */ +static inline bool strstarts(const char *str, const char *prefix) +{ + return strncmp(str, prefix, strlen(prefix)) == 0; +} #endif #endif /* _LINUX_STRING_H_ */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b299a82a05e7..6470f74074af 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -65,6 +65,7 @@ struct old_linux_dirent; #include <asm/signal.h> #include <linux/quota.h> #include <linux/key.h> +#include <linux/ftrace.h> #define __SC_DECL1(t1, a1) t1 a1 #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__) @@ -95,7 +96,46 @@ struct old_linux_dirent; #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) +#ifdef CONFIG_FTRACE_SYSCALLS +#define __SC_STR_ADECL1(t, a) #a +#define __SC_STR_ADECL2(t, a, ...) #a, __SC_STR_ADECL1(__VA_ARGS__) +#define __SC_STR_ADECL3(t, a, ...) #a, __SC_STR_ADECL2(__VA_ARGS__) +#define __SC_STR_ADECL4(t, a, ...) #a, __SC_STR_ADECL3(__VA_ARGS__) +#define __SC_STR_ADECL5(t, a, ...) #a, __SC_STR_ADECL4(__VA_ARGS__) +#define __SC_STR_ADECL6(t, a, ...) #a, __SC_STR_ADECL5(__VA_ARGS__) + +#define __SC_STR_TDECL1(t, a) #t +#define __SC_STR_TDECL2(t, a, ...) #t, __SC_STR_TDECL1(__VA_ARGS__) +#define __SC_STR_TDECL3(t, a, ...) #t, __SC_STR_TDECL2(__VA_ARGS__) +#define __SC_STR_TDECL4(t, a, ...) #t, __SC_STR_TDECL3(__VA_ARGS__) +#define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) +#define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) + +#define SYSCALL_METADATA(sname, nb) \ + static const struct syscall_metadata __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("__syscalls_metadata"))) \ + __syscall_meta_##sname = { \ + .name = "sys"#sname, \ + .nb_args = nb, \ + .types = types_##sname, \ + .args = args_##sname, \ + } + +#define SYSCALL_DEFINE0(sname) \ + static const struct syscall_metadata __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("__syscalls_metadata"))) \ + __syscall_meta_##sname = { \ + .name = "sys_"#sname, \ + .nb_args = 0, \ + }; \ + asmlinkage long sys_##sname(void) + +#else #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) +#endif + #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) #define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__) @@ -117,10 +157,26 @@ struct old_linux_dirent; #endif #endif +#ifdef CONFIG_FTRACE_SYSCALLS +#define SYSCALL_DEFINEx(x, sname, ...) \ + static const char *types_##sname[] = { \ + __SC_STR_TDECL##x(__VA_ARGS__) \ + }; \ + static const char *args_##sname[] = { \ + __SC_STR_ADECL##x(__VA_ARGS__) \ + }; \ + SYSCALL_METADATA(sname, x); \ + __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) +#else +#define SYSCALL_DEFINEx(x, sname, ...) \ + __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) +#endif + #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS #define SYSCALL_DEFINE(name) static inline long SYSC_##name -#define SYSCALL_DEFINEx(x, name, ...) \ + +#define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)); \ static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)); \ asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__)) \ @@ -134,7 +190,7 @@ struct old_linux_dirent; #else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ #define SYSCALL_DEFINE(name) asmlinkage long sys_##name -#define SYSCALL_DEFINEx(x, name, ...) \ +#define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)) #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ @@ -462,9 +518,9 @@ asmlinkage long sys_pread64(unsigned int fd, char __user *buf, asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos); asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, u32 pos_high, u32 pos_low); + unsigned long vlen, unsigned long pos_l, unsigned long pos_h); asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, u32 pos_high, u32 pos_low); + unsigned long vlen, unsigned long pos_l, unsigned long pos_h); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); asmlinkage long sys_mkdir(const char __user *pathname, int mode); asmlinkage long sys_chdir(const char __user *filename); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 917707e6151d..1de8b9eb841b 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -27,27 +27,46 @@ #include <linux/idr.h> #include <linux/device.h> +#include <linux/workqueue.h> struct thermal_zone_device; struct thermal_cooling_device; +enum thermal_device_mode { + THERMAL_DEVICE_DISABLED = 0, + THERMAL_DEVICE_ENABLED, +}; + +enum thermal_trip_type { + THERMAL_TRIP_ACTIVE = 0, + THERMAL_TRIP_PASSIVE, + THERMAL_TRIP_HOT, + THERMAL_TRIP_CRITICAL, +}; + struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *); int (*unbind) (struct thermal_zone_device *, struct thermal_cooling_device *); - int (*get_temp) (struct thermal_zone_device *, char *); - int (*get_mode) (struct thermal_zone_device *, char *); - int (*set_mode) (struct thermal_zone_device *, const char *); - int (*get_trip_type) (struct thermal_zone_device *, int, char *); - int (*get_trip_temp) (struct thermal_zone_device *, int, char *); + int (*get_temp) (struct thermal_zone_device *, unsigned long *); + int (*get_mode) (struct thermal_zone_device *, + enum thermal_device_mode *); + int (*set_mode) (struct thermal_zone_device *, + enum thermal_device_mode); + int (*get_trip_type) (struct thermal_zone_device *, int, + enum thermal_trip_type *); + int (*get_trip_temp) (struct thermal_zone_device *, int, + unsigned long *); int (*get_crit_temp) (struct thermal_zone_device *, unsigned long *); + int (*notify) (struct thermal_zone_device *, int, + enum thermal_trip_type); }; struct thermal_cooling_device_ops { - int (*get_max_state) (struct thermal_cooling_device *, char *); - int (*get_cur_state) (struct thermal_cooling_device *, char *); - int (*set_cur_state) (struct thermal_cooling_device *, unsigned int); + int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); + int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); + int (*set_cur_state) (struct thermal_cooling_device *, unsigned long); }; #define THERMAL_TRIPS_NONE -1 @@ -88,11 +107,19 @@ struct thermal_zone_device { struct device device; void *devdata; int trips; + int tc1; + int tc2; + int passive_delay; + int polling_delay; + int last_temperature; + bool passive; + unsigned int forced_passive; struct thermal_zone_device_ops *ops; struct list_head cooling_devices; struct idr idr; struct mutex lock; /* protect cooling devices list */ struct list_head node; + struct delayed_work poll_queue; #if defined(CONFIG_THERMAL_HWMON) struct list_head hwmon_node; struct thermal_hwmon_device *hwmon; @@ -104,13 +131,16 @@ struct thermal_zone_device { struct thermal_zone_device *thermal_zone_device_register(char *, int, void *, struct thermal_zone_device_ops - *); + *, int tc1, int tc2, + int passive_freq, + int polling_freq); void thermal_zone_device_unregister(struct thermal_zone_device *); int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); +void thermal_zone_device_update(struct thermal_zone_device *); struct thermal_cooling_device *thermal_cooling_device_register(char *, void *, struct thermal_cooling_device_ops diff --git a/include/linux/timeriomem-rng.h b/include/linux/timeriomem-rng.h index dd253177f65f..3e08a1c86830 100644 --- a/include/linux/timeriomem-rng.h +++ b/include/linux/timeriomem-rng.h @@ -14,7 +14,7 @@ struct timeriomem_rng_data { struct completion completion; unsigned int present:1; - u32 __iomem *address; + void __iomem *address; /* measures in usecs */ unsigned int period; diff --git a/include/linux/topology.h b/include/linux/topology.h index a16b9e06f2e5..7402c1a27c4f 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -38,11 +38,7 @@ #endif #ifndef nr_cpus_node -#define nr_cpus_node(node) \ - ({ \ - node_to_cpumask_ptr(__tmp__, node); \ - cpus_weight(*__tmp__); \ - }) +#define nr_cpus_node(node) cpumask_weight(cpumask_of_node(node)) #endif #define for_each_node_with_cpus(node) \ @@ -200,4 +196,9 @@ int arch_update_cpu_topology(void); #define topology_core_cpumask(cpu) cpumask_of(cpu) #endif +/* Returns the number of the current Node. */ +#ifndef numa_node_id +#define numa_node_id() (cpu_to_node(raw_smp_processor_id())) +#endif + #endif /* _LINUX_TOPOLOGY_H */ diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h new file mode 100644 index 000000000000..7a8130384087 --- /dev/null +++ b/include/linux/trace_clock.h @@ -0,0 +1,19 @@ +#ifndef _LINUX_TRACE_CLOCK_H +#define _LINUX_TRACE_CLOCK_H + +/* + * 3 trace clock variants, with differing scalability/precision + * tradeoffs: + * + * - local: CPU-local trace clock + * - medium: scalable global clock with some jitter + * - global: globally monotonic, serialized clock + */ +#include <linux/compiler.h> +#include <linux/types.h> + +extern u64 notrace trace_clock_local(void); +extern u64 notrace trace_clock(void); +extern u64 notrace trace_clock_global(void); + +#endif /* _LINUX_TRACE_CLOCK_H */ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 757005458366..d35a7ee7611f 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -31,8 +31,8 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ -#define TPPROTO(args...) args -#define TPARGS(args...) args +#define TP_PROTO(args...) args +#define TP_ARGS(args...) args #ifdef CONFIG_TRACEPOINTS @@ -65,7 +65,7 @@ struct tracepoint { { \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ - TPPROTO(proto), TPARGS(args)); \ + TP_PROTO(proto), TP_ARGS(args)); \ } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ @@ -153,4 +153,114 @@ static inline void tracepoint_synchronize_unregister(void) synchronize_sched(); } +#define PARAMS(args...) args +#define TRACE_FORMAT(name, proto, args, fmt) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + + +/* + * For use with the TRACE_EVENT macro: + * + * We define a tracepoint, its arguments, its printk format + * and its 'fast binay record' layout. + * + * Firstly, name your tracepoint via TRACE_EVENT(name : the + * 'subsystem_event' notation is fine. + * + * Think about this whole construct as the + * 'trace_sched_switch() function' from now on. + * + * + * TRACE_EVENT(sched_switch, + * + * * + * * A function has a regular function arguments + * * prototype, declare it via TP_PROTO(): + * * + * + * TP_PROTO(struct rq *rq, struct task_struct *prev, + * struct task_struct *next), + * + * * + * * Define the call signature of the 'function'. + * * (Design sidenote: we use this instead of a + * * TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.) + * * + * + * TP_ARGS(rq, prev, next), + * + * * + * * Fast binary tracing: define the trace record via + * * TP_STRUCT__entry(). You can think about it like a + * * regular C structure local variable definition. + * * + * * This is how the trace record is structured and will + * * be saved into the ring buffer. These are the fields + * * that will be exposed to user-space in + * * /debug/tracing/events/<*>/format. + * * + * * The declared 'local variable' is called '__entry' + * * + * * __field(pid_t, prev_prid) is equivalent to a standard declariton: + * * + * * pid_t prev_pid; + * * + * * __array(char, prev_comm, TASK_COMM_LEN) is equivalent to: + * * + * * char prev_comm[TASK_COMM_LEN]; + * * + * + * TP_STRUCT__entry( + * __array( char, prev_comm, TASK_COMM_LEN ) + * __field( pid_t, prev_pid ) + * __field( int, prev_prio ) + * __array( char, next_comm, TASK_COMM_LEN ) + * __field( pid_t, next_pid ) + * __field( int, next_prio ) + * ), + * + * * + * * Assign the entry into the trace record, by embedding + * * a full C statement block into TP_fast_assign(). You + * * can refer to the trace record as '__entry' - + * * otherwise you can put arbitrary C code in here. + * * + * * Note: this C code will execute every time a trace event + * * happens, on an active tracepoint. + * * + * + * TP_fast_assign( + * memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + * __entry->prev_pid = prev->pid; + * __entry->prev_prio = prev->prio; + * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + * __entry->next_pid = next->pid; + * __entry->next_prio = next->prio; + * ) + * + * * + * * Formatted output of a trace record via TP_printk(). + * * This is how the tracepoint will appear under ftrace + * * plugins that make use of this tracepoint. + * * + * * (raw-binary tracing wont actually perform this step.) + * * + * + * TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + * __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + * __entry->next_comm, __entry->next_pid, __entry->next_prio), + * + * ); + * + * This macro construct is thus used for the regular printk format + * tracing setup, it is used to construct a function pointer based + * tracepoint callback (this is used by programmatic plugins and + * can also by used by generic instrumentation like SystemTap), and + * it is also used to expose a structured trace record in + * /debug/tracing/events/. + */ + +#define TRACE_EVENT(name, proto, args, struct, assign, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + #endif diff --git a/include/linux/usb/wusb.h b/include/linux/usb/wusb.h index 5f401b644ed5..429c631d2aad 100644 --- a/include/linux/usb/wusb.h +++ b/include/linux/usb/wusb.h @@ -80,8 +80,7 @@ struct wusb_ckhdid { u8 data[16]; } __attribute__((packed)); -const static -struct wusb_ckhdid wusb_ckhdid_zero = { .data = { 0 } }; +static const struct wusb_ckhdid wusb_ckhdid_zero = { .data = { 0 } }; #define WUSB_CKHDID_STRSIZE (3 * sizeof(struct wusb_ckhdid) + 1) diff --git a/include/trace/block.h b/include/trace/block.h index 25c6a1fd5b77..25b7068b819e 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -5,72 +5,72 @@ #include <linux/tracepoint.h> DECLARE_TRACE(block_rq_abort, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_rq_insert, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_rq_issue, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_rq_requeue, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_rq_complete, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_bio_bounce, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_bio_complete, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_bio_backmerge, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_bio_frontmerge, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_bio_queue, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_getrq, - TPPROTO(struct request_queue *q, struct bio *bio, int rw), - TPARGS(q, bio, rw)); + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + TP_ARGS(q, bio, rw)); DECLARE_TRACE(block_sleeprq, - TPPROTO(struct request_queue *q, struct bio *bio, int rw), - TPARGS(q, bio, rw)); + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + TP_ARGS(q, bio, rw)); DECLARE_TRACE(block_plug, - TPPROTO(struct request_queue *q), - TPARGS(q)); + TP_PROTO(struct request_queue *q), + TP_ARGS(q)); DECLARE_TRACE(block_unplug_timer, - TPPROTO(struct request_queue *q), - TPARGS(q)); + TP_PROTO(struct request_queue *q), + TP_ARGS(q)); DECLARE_TRACE(block_unplug_io, - TPPROTO(struct request_queue *q), - TPARGS(q)); + TP_PROTO(struct request_queue *q), + TP_ARGS(q)); DECLARE_TRACE(block_split, - TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), - TPARGS(q, bio, pdu)); + TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), + TP_ARGS(q, bio, pdu)); DECLARE_TRACE(block_remap, - TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from, sector_t to), - TPARGS(q, bio, dev, from, to)); + TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, + sector_t from, sector_t to), + TP_ARGS(q, bio, dev, from, to)); #endif diff --git a/include/trace/irq.h b/include/trace/irq.h new file mode 100644 index 000000000000..ff5d4495dc37 --- /dev/null +++ b/include/trace/irq.h @@ -0,0 +1,9 @@ +#ifndef _TRACE_IRQ_H +#define _TRACE_IRQ_H + +#include <linux/interrupt.h> +#include <linux/tracepoint.h> + +#include <trace/irq_event_types.h> + +#endif diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h new file mode 100644 index 000000000000..85964ebd47ec --- /dev/null +++ b/include/trace/irq_event_types.h @@ -0,0 +1,55 @@ + +/* use <trace/irq.h> instead */ +#ifndef TRACE_FORMAT +# error Do not include this file directly. +# error Unless you know what you are doing. +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq + +/* + * Tracepoint for entry of interrupt handler: + */ +TRACE_FORMAT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), + TP_FMT("irq=%d handler=%s", irq, action->name) + ); + +/* + * Tracepoint for return of an interrupt handler: + */ +TRACE_EVENT(irq_handler_exit, + + TP_PROTO(int irq, struct irqaction *action, int ret), + + TP_ARGS(irq, action, ret), + + TP_STRUCT__entry( + __field( int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled") +); + +TRACE_FORMAT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +TRACE_FORMAT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +#undef TRACE_SYSTEM diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h new file mode 100644 index 000000000000..ad8b7857855a --- /dev/null +++ b/include/trace/kmemtrace.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * + * This file is released under GPL version 2. + */ + +#ifndef _LINUX_KMEMTRACE_H +#define _LINUX_KMEMTRACE_H + +#ifdef __KERNEL__ + +#include <linux/types.h> +#include <linux/marker.h> + +enum kmemtrace_type_id { + KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ + KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ + KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ +}; + +#ifdef CONFIG_KMEMTRACE + +extern void kmemtrace_init(void); + +extern void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node); + +extern void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr); + +#else /* CONFIG_KMEMTRACE */ + +static inline void kmemtrace_init(void) +{ +} + +static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node) +{ +} + +static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr) +{ +} + +#endif /* CONFIG_KMEMTRACE */ + +static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags) +{ + kmemtrace_mark_alloc_node(type_id, call_site, ptr, + bytes_req, bytes_alloc, gfp_flags, -1); +} + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_KMEMTRACE_H */ + diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h new file mode 100644 index 000000000000..5ca67df87f2a --- /dev/null +++ b/include/trace/lockdep.h @@ -0,0 +1,9 @@ +#ifndef _TRACE_LOCKDEP_H +#define _TRACE_LOCKDEP_H + +#include <linux/lockdep.h> +#include <linux/tracepoint.h> + +#include <trace/lockdep_event_types.h> + +#endif diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h new file mode 100644 index 000000000000..adccfcd2ec8f --- /dev/null +++ b/include/trace/lockdep_event_types.h @@ -0,0 +1,44 @@ + +#ifndef TRACE_FORMAT +# error Do not include this file directly. +# error Unless you know what you are doing. +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lock + +#ifdef CONFIG_LOCKDEP + +TRACE_FORMAT(lock_acquire, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + TP_FMT("%s%s%s", trylock ? "try " : "", + read ? "read " : "", lock->name) + ); + +TRACE_FORMAT(lock_release, + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), + TP_FMT("%s", lock->name) + ); + +#ifdef CONFIG_LOCK_STAT + +TRACE_FORMAT(lock_contended, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) + ); + +TRACE_FORMAT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) + ); + +#endif +#endif + +#undef TRACE_SYSTEM diff --git a/include/trace/power.h b/include/trace/power.h new file mode 100644 index 000000000000..ef204666e983 --- /dev/null +++ b/include/trace/power.h @@ -0,0 +1,32 @@ +#ifndef _TRACE_POWER_H +#define _TRACE_POWER_H + +#include <linux/ktime.h> +#include <linux/tracepoint.h> + +enum { + POWER_NONE = 0, + POWER_CSTATE = 1, + POWER_PSTATE = 2, +}; + +struct power_trace { + ktime_t stamp; + ktime_t end; + int type; + int state; +}; + +DECLARE_TRACE(power_start, + TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), + TP_ARGS(it, type, state)); + +DECLARE_TRACE(power_mark, + TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), + TP_ARGS(it, type, state)); + +DECLARE_TRACE(power_end, + TP_PROTO(struct power_trace *it), + TP_ARGS(it)); + +#endif /* _TRACE_POWER_H */ diff --git a/include/trace/sched.h b/include/trace/sched.h index 0d81098ee9fc..4e372a1a29bf 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -4,53 +4,6 @@ #include <linux/sched.h> #include <linux/tracepoint.h> -DECLARE_TRACE(sched_kthread_stop, - TPPROTO(struct task_struct *t), - TPARGS(t)); - -DECLARE_TRACE(sched_kthread_stop_ret, - TPPROTO(int ret), - TPARGS(ret)); - -DECLARE_TRACE(sched_wait_task, - TPPROTO(struct rq *rq, struct task_struct *p), - TPARGS(rq, p)); - -DECLARE_TRACE(sched_wakeup, - TPPROTO(struct rq *rq, struct task_struct *p, int success), - TPARGS(rq, p, success)); - -DECLARE_TRACE(sched_wakeup_new, - TPPROTO(struct rq *rq, struct task_struct *p, int success), - TPARGS(rq, p, success)); - -DECLARE_TRACE(sched_switch, - TPPROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), - TPARGS(rq, prev, next)); - -DECLARE_TRACE(sched_migrate_task, - TPPROTO(struct task_struct *p, int orig_cpu, int dest_cpu), - TPARGS(p, orig_cpu, dest_cpu)); - -DECLARE_TRACE(sched_process_free, - TPPROTO(struct task_struct *p), - TPARGS(p)); - -DECLARE_TRACE(sched_process_exit, - TPPROTO(struct task_struct *p), - TPARGS(p)); - -DECLARE_TRACE(sched_process_wait, - TPPROTO(struct pid *pid), - TPARGS(pid)); - -DECLARE_TRACE(sched_process_fork, - TPPROTO(struct task_struct *parent, struct task_struct *child), - TPARGS(parent, child)); - -DECLARE_TRACE(sched_signal_send, - TPPROTO(int sig, struct task_struct *p), - TPARGS(sig, p)); +#include <trace/sched_event_types.h> #endif diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h new file mode 100644 index 000000000000..63547dc1125f --- /dev/null +++ b/include/trace/sched_event_types.h @@ -0,0 +1,337 @@ + +/* use <trace/sched.h> instead */ +#ifndef TRACE_EVENT +# error Do not include this file directly. +# error Unless you know what you are doing. +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sched + +/* + * Tracepoint for calling kthread_stop, performed to end a kthread: + */ +TRACE_EVENT(sched_kthread_stop, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __entry->pid = t->pid; + ), + + TP_printk("task %s:%d", __entry->comm, __entry->pid) +); + +/* + * Tracepoint for the return value of the kthread stopping: + */ +TRACE_EVENT(sched_kthread_stop_ret, + + TP_PROTO(int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( int, ret ) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret %d", __entry->ret) +); + +/* + * Tracepoint for waiting on task to unschedule: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wait_task, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for waking up a task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for waking up a new task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup_new, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + + TP_PROTO(struct rq *rq, struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(rq, prev, next), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) + ), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ), + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio) +); + +/* + * Tracepoint for a task being migrated: + */ +TRACE_EVENT(sched_migrate_task, + + TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + + TP_ARGS(p, orig_cpu, dest_cpu), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, orig_cpu ) + __field( int, dest_cpu ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->orig_cpu = orig_cpu; + __entry->dest_cpu = dest_cpu; + ), + + TP_printk("task %s:%d [%d] from: %d to: %d", + __entry->comm, __entry->pid, __entry->prio, + __entry->orig_cpu, __entry->dest_cpu) +); + +/* + * Tracepoint for freeing a task: + */ +TRACE_EVENT(sched_process_free, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a task exiting: + */ +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a waiting task: + */ +TRACE_EVENT(sched_process_wait, + + TP_PROTO(struct pid *pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->pid = pid_nr(pid); + __entry->prio = current->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for do_fork: + */ +TRACE_EVENT(sched_process_fork, + + TP_PROTO(struct task_struct *parent, struct task_struct *child), + + TP_ARGS(parent, child), + + TP_STRUCT__entry( + __array( char, parent_comm, TASK_COMM_LEN ) + __field( pid_t, parent_pid ) + __array( char, child_comm, TASK_COMM_LEN ) + __field( pid_t, child_pid ) + ), + + TP_fast_assign( + memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); + __entry->parent_pid = parent->pid; + memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); + __entry->child_pid = child->pid; + ), + + TP_printk("parent %s:%d child %s:%d", + __entry->parent_comm, __entry->parent_pid, + __entry->child_comm, __entry->child_pid) +); + +/* + * Tracepoint for sending a signal: + */ +TRACE_EVENT(sched_signal_send, + + TP_PROTO(int sig, struct task_struct *p), + + TP_ARGS(sig, p), + + TP_STRUCT__entry( + __field( int, sig ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->sig = sig; + ), + + TP_printk("sig: %d task %s:%d", + __entry->sig, __entry->comm, __entry->pid) +); + +#undef TRACE_SYSTEM diff --git a/include/trace/skb.h b/include/trace/skb.h index a96610f92f69..b66206d9be72 100644 --- a/include/trace/skb.h +++ b/include/trace/skb.h @@ -5,7 +5,7 @@ #include <linux/tracepoint.h> DECLARE_TRACE(kfree_skb, - TPPROTO(struct sk_buff *skb, void *location), - TPARGS(skb, location)); + TP_PROTO(struct sk_buff *skb, void *location), + TP_ARGS(skb, location)); #endif diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h new file mode 100644 index 000000000000..df56f5694be6 --- /dev/null +++ b/include/trace/trace_event_types.h @@ -0,0 +1,5 @@ +/* trace/<type>_event_types.h here */ + +#include <trace/sched_event_types.h> +#include <trace/irq_event_types.h> +#include <trace/lockdep_event_types.h> diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h new file mode 100644 index 000000000000..fd13750ca4ba --- /dev/null +++ b/include/trace/trace_events.h @@ -0,0 +1,5 @@ +/* trace/<type>.h here */ + +#include <trace/sched.h> +#include <trace/irq.h> +#include <trace/lockdep.h> diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h new file mode 100644 index 000000000000..7626523deeba --- /dev/null +++ b/include/trace/workqueue.h @@ -0,0 +1,25 @@ +#ifndef __TRACE_WORKQUEUE_H +#define __TRACE_WORKQUEUE_H + +#include <linux/tracepoint.h> +#include <linux/workqueue.h> +#include <linux/sched.h> + +DECLARE_TRACE(workqueue_insertion, + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + TP_ARGS(wq_thread, work)); + +DECLARE_TRACE(workqueue_execution, + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + TP_ARGS(wq_thread, work)); + +/* Trace the creation of one workqueue thread on a cpu */ +DECLARE_TRACE(workqueue_creation, + TP_PROTO(struct task_struct *wq_thread, int cpu), + TP_ARGS(wq_thread, cpu)); + +DECLARE_TRACE(workqueue_destruction, + TP_PROTO(struct task_struct *wq_thread), + TP_ARGS(wq_thread)); + +#endif /* __TRACE_WORKQUEUE_H */ |