From 7be865ab8634d4ec2a6bdb9459b268cd60e832af Mon Sep 17 00:00:00 2001 From: Kim, Milo Date: Fri, 23 Mar 2012 15:02:01 -0700 Subject: backlight: new backlight driver for LP855x devices THis driver supports TI LP8550/LP8551/LP8552/LP8553/LP8556 backlight devices. The brightness can be controlled by the I2C or PWM input. The lp855x driver provides both modes. For the PWM control, pwm-specific functions can be defined in the platform data. And some information can be read via the sysfs(lp855x device attributes). For details, please refer to Documentation/backlight/lp855x-driver.txt. [axel.lin@gmail.com: add missing mutex_unlock in lp855x_read_byte() error path] [axel.lin@gmail.com: check platform data in lp855x_probe()] [axel.lin@gmail.com: small cleanups] [dan.carpenter@oracle.com: silence a compiler warning] [axel.lin@gmail.com: use id->driver_data to differentiate lp855x chips] [akpm@linux-foundation.org: simplify boolean return expression] Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Axel Lin Signed-off-by: Dan Carpenter Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/backlight/lp855x-driver.txt | 78 +++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 Documentation/backlight/lp855x-driver.txt (limited to 'Documentation') diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt new file mode 100644 index 000000000000..f5e4caafab7d --- /dev/null +++ b/Documentation/backlight/lp855x-driver.txt @@ -0,0 +1,78 @@ +Kernel driver lp855x +==================== + +Backlight driver for LP855x ICs + +Supported chips: + Texas Instruments LP8550, LP8551, LP8552, LP8553 and LP8556 + +Author: Milo(Woogyom) Kim + +Description +----------- + +* Brightness control + +Brightness can be controlled by the pwm input or the i2c command. +The lp855x driver supports both cases. + +* Device attributes + +1) bl_ctl_mode +Backlight control mode. +Value : pwm based or register based + +2) chip_id +The lp855x chip id. +Value : lp8550/lp8551/lp8552/lp8553/lp8556 + +Platform data for lp855x +------------------------ + +For supporting platform specific data, the lp855x platform data can be used. + +* name : Backlight driver name. If it is not defined, default name is set. +* mode : Brightness control mode. PWM or register based. +* device_control : Value of DEVICE CONTROL register. +* initial_brightness : Initial value of backlight brightness. +* pwm_data : Platform specific pwm generation functions. + Only valid when brightness is pwm input mode. + Functions should be implemented by PWM driver. + - pwm_set_intensity() : set duty of PWM + - pwm_get_intensity() : get current duty of PWM +* load_new_rom_data : + 0 : use default configuration data + 1 : update values of eeprom or eprom registers on loading driver +* size_program : Total size of lp855x_rom_data. +* rom_data : List of new eeprom/eprom registers. + +example 1) lp8552 platform data : i2c register mode with new eeprom data + +#define EEPROM_A5_ADDR 0xA5 +#define EEPROM_A5_VAL 0x4f /* EN_VSYNC=0 */ + +static struct lp855x_rom_data lp8552_eeprom_arr[] = { + {EEPROM_A5_ADDR, EEPROM_A5_VAL}, +}; + +static struct lp855x_platform_data lp8552_pdata = { + .name = "lcd-bl", + .mode = REGISTER_BASED, + .device_control = I2C_CONFIG(LP8552), + .initial_brightness = INITIAL_BRT, + .load_new_rom_data = 1, + .size_program = ARRAY_SIZE(lp8552_eeprom_arr), + .rom_data = lp8552_eeprom_arr, +}; + +example 2) lp8556 platform data : pwm input mode with default rom data + +static struct lp855x_platform_data lp8556_pdata = { + .mode = PWM_BASED, + .device_control = PWM_CONFIG(LP8556), + .initial_brightness = INITIAL_BRT, + .pwm_data = { + .pwm_set_intensity = platform_pwm_set_intensity, + .pwm_get_intensity = platform_pwm_get_intensity, + }, +}; -- cgit v1.2.3 From 5ae4e8a77dc82afcfe8460168ec0b94f4b79a54a Mon Sep 17 00:00:00 2001 From: Kim, Milo Date: Fri, 23 Mar 2012 15:02:08 -0700 Subject: drivers/leds/leds-lp5521.c: add 'name' in the lp5521_led_config The name of each led channel can be configurable. For the compatibility, the name is set to default value(xx:channelN) when 'name' is not defined. Signed-off-by: Milo(Woogyom) Kim Acked-by: Linus Walleij Cc: Arun MURTHY Cc: Srinidhi Kasagar Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/leds/leds-lp5521.txt | 6 ++++++ drivers/leds/leds-lp5521.c | 11 ++++++++--- include/linux/leds-lp5521.h | 1 + 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt index c4d8d151e0fe..f48ab757d120 100644 --- a/Documentation/leds/leds-lp5521.txt +++ b/Documentation/leds/leds-lp5521.txt @@ -43,17 +43,23 @@ Format: 10x mA i.e 10 means 1.0 mA example platform data: Note: chan_nr can have values between 0 and 2. +The name of each channel can be configurable. +If the name field is not defined, the default name will be set to 'xxxx:channelN' +(XXXX : pdata->label or i2c client name, N : channel number) static struct lp5521_led_config lp5521_led_config[] = { { + .name = "red", .chan_nr = 0, .led_current = 50, .max_current = 130, }, { + .name = "green", .chan_nr = 1, .led_current = 0, .max_current = 130, }, { + .name = "blue", .chan_nr = 2, .led_current = 0, .max_current = 130, diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index c42c8f049565..59feecdfe3a8 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -620,10 +620,15 @@ static int __devinit lp5521_init_led(struct lp5521_led *led, return -EINVAL; } - snprintf(name, sizeof(name), "%s:channel%d", - pdata->label ?: client->name, chan); led->cdev.brightness_set = lp5521_set_brightness; - led->cdev.name = name; + if (pdata->led_config[chan].name) { + led->cdev.name = pdata->led_config[chan].name; + } else { + snprintf(name, sizeof(name), "%s:channel%d", + pdata->label ?: client->name, chan); + led->cdev.name = name; + } + res = led_classdev_register(dev, &led->cdev); if (res < 0) { dev_err(dev, "couldn't register led on channel %d\n", chan); diff --git a/include/linux/leds-lp5521.h b/include/linux/leds-lp5521.h index fd548d2a8775..e675b8d4c7bf 100644 --- a/include/linux/leds-lp5521.h +++ b/include/linux/leds-lp5521.h @@ -26,6 +26,7 @@ /* See Documentation/leds/leds-lp5521.txt */ struct lp5521_led_config { + char *name; u8 chan_nr; u8 led_current; /* mA x10, 0 if led is not connected */ u8 max_current; -- cgit v1.2.3 From 3b49aacd0e56d5bf1b511f6554f17cd65eb8da64 Mon Sep 17 00:00:00 2001 From: Kim, Milo Date: Fri, 23 Mar 2012 15:02:08 -0700 Subject: drivers/leds/leds-lp5521.c: add 'update_config' in the lp5521_platform_data The value of CONFIG register(Addr 08h) is configurable. For supporting this feature, update_config is added in the platform data. If 'update_config' is not defined, the default value is 'LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT'. To define CONFIG register in the platform data, the bit definitions were mo= ved to the header file. Documentation updated : description about 'update_config' and example. Signed-off-by: Milo(Woogyom) Kim Acked-by: Linus Walleij Cc: Arun MURTHY Cc: Srinidhi Kasagar Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/leds/leds-lp5521.txt | 19 +++++++++++++++++++ drivers/leds/leds-lp5521.c | 19 ++++--------------- include/linux/leds-lp5521.h | 13 +++++++++++++ 3 files changed, 36 insertions(+), 15 deletions(-) (limited to 'Documentation') diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt index f48ab757d120..e3c66c64591d 100644 --- a/Documentation/leds/leds-lp5521.txt +++ b/Documentation/leds/leds-lp5521.txt @@ -92,3 +92,22 @@ static struct lp5521_platform_data lp5521_platform_data = { If the current is set to 0 in the platform data, that channel is disabled and it is not visible in the sysfs. + +The 'update_config' : CONFIG register (ADDR 08h) +This value is platform-specific data. +If update_config is not defined, the CONFIG register is set with +'LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT'. +(Enable auto-powersave, set charge pump to auto, red to battery) + +example of update_config : + +#define LP5521_CONFIGS (LP5521_PWM_HF | LP5521_PWRSAVE_EN | \ + LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT | \ + LP5521_CLK_INT) + +static struct lp5521_platform_data lp5521_pdata = { + .led_config = lp5521_led_config, + .num_channels = ARRAY_SIZE(lp5521_led_config), + .clock_mode = LP5521_CLOCK_INT, + .update_config = LP5521_CONFIGS, +}; diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 59feecdfe3a8..9682ece16011 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -82,18 +82,6 @@ #define LP5521_LOGARITHMIC_PWM 0x80 /* Logarithmic PWM adjustment */ #define LP5521_EXEC_RUN 0x2A -/* Bits in CONFIG register */ -#define LP5521_PWM_HF 0x40 /* PWM: 0 = 256Hz, 1 = 558Hz */ -#define LP5521_PWRSAVE_EN 0x20 /* 1 = Power save mode */ -#define LP5521_CP_MODE_OFF 0 /* Charge pump (CP) off */ -#define LP5521_CP_MODE_BYPASS 8 /* CP forced to bypass mode */ -#define LP5521_CP_MODE_1X5 0x10 /* CP forced to 1.5x mode */ -#define LP5521_CP_MODE_AUTO 0x18 /* Automatic mode selection */ -#define LP5521_R_TO_BATT 4 /* R out: 0 = CP, 1 = Vbat */ -#define LP5521_CLK_SRC_EXT 0 /* Ext-clk source (CLK_32K) */ -#define LP5521_CLK_INT 1 /* Internal clock */ -#define LP5521_CLK_AUTO 2 /* Automatic clock selection */ - /* Status */ #define LP5521_EXT_CLK_USED 0x08 @@ -241,15 +229,16 @@ static int lp5521_configure(struct i2c_client *client) { struct lp5521_chip *chip = i2c_get_clientdata(client); int ret; + u8 cfg; lp5521_init_engine(chip); /* Set all PWMs to direct control mode */ ret = lp5521_write(client, LP5521_REG_OP_MODE, 0x3F); - /* Enable auto-powersave, set charge pump to auto, red to battery */ - ret |= lp5521_write(client, LP5521_REG_CONFIG, - LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT); + cfg = chip->pdata->update_config ? + : (LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT); + ret |= lp5521_write(client, LP5521_REG_CONFIG, cfg); /* Initialize all channels PWM to zero -> leds off */ ret |= lp5521_write(client, LP5521_REG_R_PWM, 0); diff --git a/include/linux/leds-lp5521.h b/include/linux/leds-lp5521.h index e675b8d4c7bf..e9ab583cac36 100644 --- a/include/linux/leds-lp5521.h +++ b/include/linux/leds-lp5521.h @@ -36,6 +36,18 @@ struct lp5521_led_config { #define LP5521_CLOCK_INT 1 #define LP5521_CLOCK_EXT 2 +/* Bits in CONFIG register */ +#define LP5521_PWM_HF 0x40 /* PWM: 0 = 256Hz, 1 = 558Hz */ +#define LP5521_PWRSAVE_EN 0x20 /* 1 = Power save mode */ +#define LP5521_CP_MODE_OFF 0 /* Charge pump (CP) off */ +#define LP5521_CP_MODE_BYPASS 8 /* CP forced to bypass mode */ +#define LP5521_CP_MODE_1X5 0x10 /* CP forced to 1.5x mode */ +#define LP5521_CP_MODE_AUTO 0x18 /* Automatic mode selection */ +#define LP5521_R_TO_BATT 4 /* R out: 0 = CP, 1 = Vbat */ +#define LP5521_CLK_SRC_EXT 0 /* Ext-clk source (CLK_32K) */ +#define LP5521_CLK_INT 1 /* Internal clock */ +#define LP5521_CLK_AUTO 2 /* Automatic clock selection */ + struct lp5521_platform_data { struct lp5521_led_config *led_config; u8 num_channels; @@ -44,6 +56,7 @@ struct lp5521_platform_data { void (*release_resources)(void); void (*enable)(bool state); const char *label; + u8 update_config; }; #endif /* __LINUX_LP5521_H */ -- cgit v1.2.3 From 011af7bc7cd188a0310e2d26cdc2cc5d90148b0c Mon Sep 17 00:00:00 2001 From: Kim, Milo Date: Fri, 23 Mar 2012 15:02:09 -0700 Subject: drivers/leds/leds-lp5521.c: support led pattern data The lp5521 has autonomous operation mode without external control. Using lp5521_platform_data, various led patterns can be configurable. For supporting this feature, new functions and device attribute are added. Structure of lp5521_led_pattern: 3 channels are supported - red, green and blue. Pattern(s) of each channel and numbers of pattern(s) are defined in the pla= tform data. Pattern data are hexa codes which include pattern commands such like set pwm, wait, ramp up/down, branch and so on. Pattern mode functions: * lp5521_clear_program_memory Before running new led pattern, program memory should be cleared. * lp5521_write_program_memory Pattern data updated in the program memory via the i2c. * lp5521_get_pattern Get pattern from predefined in the platform data. * lp5521_run_led_pattern Stop current pattern or run new pattern. Transition time is required between different operation mode. Device attribute - 'led_pattern': To load specific led pattern, new device attribute is added. When the lp5521 driver is unloaded, stop current led pattern mode. Documentation updated : description about how to define the led patterns and example. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Milo(Woogyom) Kim Acked-by: Linus Walleij Cc: Arun MURTHY Cc: Srinidhi Kasagar Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/leds/leds-lp5521.txt | 38 ++++++++++++++ drivers/leds/leds-lp5521.c | 102 ++++++++++++++++++++++++++++++++++++- include/linux/leds-lp5521.h | 11 ++++ 3 files changed, 150 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt index e3c66c64591d..0e542ab3d4a0 100644 --- a/Documentation/leds/leds-lp5521.txt +++ b/Documentation/leds/leds-lp5521.txt @@ -111,3 +111,41 @@ static struct lp5521_platform_data lp5521_pdata = { .clock_mode = LP5521_CLOCK_INT, .update_config = LP5521_CONFIGS, }; + +LED patterns : LP5521 has autonomous operation without external control. +Pattern data can be defined in the platform data. + +example of led pattern data : + +/* RGB(50,5,0) 500ms on, 500ms off, infinite loop */ +static u8 pattern_red[] = { + 0x40, 0x32, 0x60, 0x00, 0x40, 0x00, 0x60, 0x00, + }; + +static u8 pattern_green[] = { + 0x40, 0x05, 0x60, 0x00, 0x40, 0x00, 0x60, 0x00, + }; + +static struct lp5521_led_pattern board_led_patterns[] = { + { + .r = pattern_red, + .g = pattern_green, + .size_r = ARRAY_SIZE(pattern_red), + .size_g = ARRAY_SIZE(pattern_green), + }, +}; + +static struct lp5521_platform_data lp5521_platform_data = { + .led_config = lp5521_led_config, + .num_channels = ARRAY_SIZE(lp5521_led_config), + .clock_mode = LP5521_CLOCK_EXT, + .patterns = board_led_patterns, + .num_patterns = ARRAY_SIZE(board_led_patterns), +}; + +Then predefined led pattern(s) can be executed via the sysfs. +To start the pattern #1, +# echo 1 > /sys/bus/i2c/devices/xxxx/led_pattern +(xxxx : i2c bus & slave address) +To end the pattern, +# echo 0 > /sys/bus/i2c/devices/xxxx/led_pattern diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 9682ece16011..007c7c921e7e 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -88,6 +88,9 @@ /* default R channel current register value */ #define LP5521_REG_R_CURR_DEFAULT 0xAF +/* Pattern Mode */ +#define PATTERN_OFF 0 + struct lp5521_engine { int id; u8 mode; @@ -493,7 +496,7 @@ static ssize_t store_current(struct device *dev, ssize_t ret; unsigned long curr; - if (strict_strtoul(buf, 0, &curr)) + if (kstrtoul(buf, 0, &curr)) return -EINVAL; if (curr > led->max_current) @@ -525,6 +528,100 @@ static ssize_t lp5521_selftest(struct device *dev, return sprintf(buf, "%s\n", ret ? "FAIL" : "OK"); } +static void lp5521_clear_program_memory(struct i2c_client *cl) +{ + int i; + u8 rgb_mem[] = { + LP5521_REG_R_PROG_MEM, + LP5521_REG_G_PROG_MEM, + LP5521_REG_B_PROG_MEM, + }; + + for (i = 0; i < ARRAY_SIZE(rgb_mem); i++) { + lp5521_write(cl, rgb_mem[i], 0); + lp5521_write(cl, rgb_mem[i] + 1, 0); + } +} + +static void lp5521_write_program_memory(struct i2c_client *cl, + u8 base, u8 *rgb, int size) +{ + int i; + + if (!rgb || size <= 0) + return; + + for (i = 0; i < size; i++) + lp5521_write(cl, base + i, *(rgb + i)); + + lp5521_write(cl, base + i, 0); + lp5521_write(cl, base + i + 1, 0); +} + +static inline struct lp5521_led_pattern *lp5521_get_pattern + (struct lp5521_chip *chip, u8 offset) +{ + struct lp5521_led_pattern *ptn; + ptn = chip->pdata->patterns + (offset - 1); + return ptn; +} + +static void lp5521_run_led_pattern(int mode, struct lp5521_chip *chip) +{ + struct lp5521_led_pattern *ptn; + struct i2c_client *cl = chip->client; + int num_patterns = chip->pdata->num_patterns; + + if (mode > num_patterns || !(chip->pdata->patterns)) + return; + + if (mode == PATTERN_OFF) { + lp5521_write(cl, LP5521_REG_ENABLE, + LP5521_MASTER_ENABLE | LP5521_LOGARITHMIC_PWM); + usleep_range(1000, 2000); + lp5521_write(cl, LP5521_REG_OP_MODE, LP5521_CMD_DIRECT); + } else { + ptn = lp5521_get_pattern(chip, mode); + if (!ptn) + return; + + lp5521_write(cl, LP5521_REG_OP_MODE, LP5521_CMD_LOAD); + usleep_range(1000, 2000); + + lp5521_clear_program_memory(cl); + + lp5521_write_program_memory(cl, LP5521_REG_R_PROG_MEM, + ptn->r, ptn->size_r); + lp5521_write_program_memory(cl, LP5521_REG_G_PROG_MEM, + ptn->g, ptn->size_g); + lp5521_write_program_memory(cl, LP5521_REG_B_PROG_MEM, + ptn->b, ptn->size_b); + + lp5521_write(cl, LP5521_REG_OP_MODE, LP5521_CMD_RUN); + usleep_range(1000, 2000); + lp5521_write(cl, LP5521_REG_ENABLE, + LP5521_MASTER_ENABLE | LP5521_LOGARITHMIC_PWM | + LP5521_EXEC_RUN); + } +} + +static ssize_t store_led_pattern(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct lp5521_chip *chip = i2c_get_clientdata(to_i2c_client(dev)); + unsigned long val; + int ret; + + ret = strict_strtoul(buf, 16, &val); + if (ret) + return ret; + + lp5521_run_led_pattern(val, chip); + + return len; +} + /* led class device attributes */ static DEVICE_ATTR(led_current, S_IRUGO | S_IWUSR, show_current, store_current); static DEVICE_ATTR(max_current, S_IRUGO , show_max_current, NULL); @@ -550,6 +647,7 @@ static DEVICE_ATTR(engine1_load, S_IWUSR, NULL, store_engine1_load); static DEVICE_ATTR(engine2_load, S_IWUSR, NULL, store_engine2_load); static DEVICE_ATTR(engine3_load, S_IWUSR, NULL, store_engine3_load); static DEVICE_ATTR(selftest, S_IRUGO, lp5521_selftest, NULL); +static DEVICE_ATTR(led_pattern, S_IWUSR, NULL, store_led_pattern); static struct attribute *lp5521_attributes[] = { &dev_attr_engine1_mode.attr, @@ -559,6 +657,7 @@ static struct attribute *lp5521_attributes[] = { &dev_attr_engine1_load.attr, &dev_attr_engine2_load.attr, &dev_attr_engine3_load.attr, + &dev_attr_led_pattern.attr, NULL }; @@ -761,6 +860,7 @@ static int __devexit lp5521_remove(struct i2c_client *client) struct lp5521_chip *chip = i2c_get_clientdata(client); int i; + lp5521_run_led_pattern(PATTERN_OFF, chip); lp5521_unregister_sysfs(client); for (i = 0; i < chip->num_leds; i++) { diff --git a/include/linux/leds-lp5521.h b/include/linux/leds-lp5521.h index e9ab583cac36..3f071ec019b2 100644 --- a/include/linux/leds-lp5521.h +++ b/include/linux/leds-lp5521.h @@ -32,6 +32,15 @@ struct lp5521_led_config { u8 max_current; }; +struct lp5521_led_pattern { + u8 *r; + u8 *g; + u8 *b; + u8 size_r; + u8 size_g; + u8 size_b; +}; + #define LP5521_CLOCK_AUTO 0 #define LP5521_CLOCK_INT 1 #define LP5521_CLOCK_EXT 2 @@ -57,6 +66,8 @@ struct lp5521_platform_data { void (*enable)(bool state); const char *label; u8 update_config; + struct lp5521_led_pattern *patterns; + int num_patterns; }; #endif /* __LINUX_LP5521_H */ -- cgit v1.2.3 From fbedceb10066430b925cf43fbf926e8abb9e2359 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 23 Mar 2012 15:02:22 -0700 Subject: crc32: move long comment about crc32 fundamentals to Documentation/ Move a long comment from lib/crc32.c to Documentation/crc32.txt where it will more likely get read. Edited the resulting document to add an explanation of the slicing-by-n algorithm. [djwong@us.ibm.com: minor changelog tweaks] [akpm@linux-foundation.org: fix typo, per George] Signed-off-by: George Spelvin Signed-off-by: Bob Pearson Signed-off-by: Darrick J. Wong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/00-INDEX | 2 + Documentation/crc32.txt | 182 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/crc32.c | 129 +--------------------------------- 3 files changed, 186 insertions(+), 127 deletions(-) create mode 100644 Documentation/crc32.txt (limited to 'Documentation') diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index a1a643272883..2214f123a976 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -104,6 +104,8 @@ cpuidle/ - info on CPU_IDLE, CPU idle state management subsystem. cputopology.txt - documentation on how CPU topology info is exported via sysfs. +crc32.txt + - brief tutorial on CRC computation cris/ - directory with info about Linux on CRIS architecture. crypto/ diff --git a/Documentation/crc32.txt b/Documentation/crc32.txt new file mode 100644 index 000000000000..a08a7dd9d625 --- /dev/null +++ b/Documentation/crc32.txt @@ -0,0 +1,182 @@ +A brief CRC tutorial. + +A CRC is a long-division remainder. You add the CRC to the message, +and the whole thing (message+CRC) is a multiple of the given +CRC polynomial. To check the CRC, you can either check that the +CRC matches the recomputed value, *or* you can check that the +remainder computed on the message+CRC is 0. This latter approach +is used by a lot of hardware implementations, and is why so many +protocols put the end-of-frame flag after the CRC. + +It's actually the same long division you learned in school, except that +- We're working in binary, so the digits are only 0 and 1, and +- When dividing polynomials, there are no carries. Rather than add and + subtract, we just xor. Thus, we tend to get a bit sloppy about + the difference between adding and subtracting. + +Like all division, the remainder is always smaller than the divisor. +To produce a 32-bit CRC, the divisor is actually a 33-bit CRC polynomial. +Since it's 33 bits long, bit 32 is always going to be set, so usually the +CRC is written in hex with the most significant bit omitted. (If you're +familiar with the IEEE 754 floating-point format, it's the same idea.) + +Note that a CRC is computed over a string of *bits*, so you have +to decide on the endianness of the bits within each byte. To get +the best error-detecting properties, this should correspond to the +order they're actually sent. For example, standard RS-232 serial is +little-endian; the most significant bit (sometimes used for parity) +is sent last. And when appending a CRC word to a message, you should +do it in the right order, matching the endianness. + +Just like with ordinary division, you proceed one digit (bit) at a time. +Each step of the division you take one more digit (bit) of the dividend +and append it to the current remainder. Then you figure out the +appropriate multiple of the divisor to subtract to being the remainder +back into range. In binary, this is easy - it has to be either 0 or 1, +and to make the XOR cancel, it's just a copy of bit 32 of the remainder. + +When computing a CRC, we don't care about the quotient, so we can +throw the quotient bit away, but subtract the appropriate multiple of +the polynomial from the remainder and we're back to where we started, +ready to process the next bit. + +A big-endian CRC written this way would be coded like: +for (i = 0; i < input_bits; i++) { + multiple = remainder & 0x80000000 ? CRCPOLY : 0; + remainder = (remainder << 1 | next_input_bit()) ^ multiple; +} + +Notice how, to get at bit 32 of the shifted remainder, we look +at bit 31 of the remainder *before* shifting it. + +But also notice how the next_input_bit() bits we're shifting into +the remainder don't actually affect any decision-making until +32 bits later. Thus, the first 32 cycles of this are pretty boring. +Also, to add the CRC to a message, we need a 32-bit-long hole for it at +the end, so we have to add 32 extra cycles shifting in zeros at the +end of every message, + +These details lead to a standard trick: rearrange merging in the +next_input_bit() until the moment it's needed. Then the first 32 cycles +can be precomputed, and merging in the final 32 zero bits to make room +for the CRC can be skipped entirely. This changes the code to: + +for (i = 0; i < input_bits; i++) { + remainder ^= next_input_bit() << 31; + multiple = (remainder & 0x80000000) ? CRCPOLY : 0; + remainder = (remainder << 1) ^ multiple; +} + +With this optimization, the little-endian code is particularly simple: +for (i = 0; i < input_bits; i++) { + remainder ^= next_input_bit(); + multiple = (remainder & 1) ? CRCPOLY : 0; + remainder = (remainder >> 1) ^ multiple; +} + +The most significant coefficient of the remainder polynomial is stored +in the least significant bit of the binary "remainder" variable. +The other details of endianness have been hidden in CRCPOLY (which must +be bit-reversed) and next_input_bit(). + +As long as next_input_bit is returning the bits in a sensible order, we don't +*have* to wait until the last possible moment to merge in additional bits. +We can do it 8 bits at a time rather than 1 bit at a time: +for (i = 0; i < input_bytes; i++) { + remainder ^= next_input_byte() << 24; + for (j = 0; j < 8; j++) { + multiple = (remainder & 0x80000000) ? CRCPOLY : 0; + remainder = (remainder << 1) ^ multiple; + } +} + +Or in little-endian: +for (i = 0; i < input_bytes; i++) { + remainder ^= next_input_byte(); + for (j = 0; j < 8; j++) { + multiple = (remainder & 1) ? CRCPOLY : 0; + remainder = (remainder >> 1) ^ multiple; + } +} + +If the input is a multiple of 32 bits, you can even XOR in a 32-bit +word at a time and increase the inner loop count to 32. + +You can also mix and match the two loop styles, for example doing the +bulk of a message byte-at-a-time and adding bit-at-a-time processing +for any fractional bytes at the end. + +To reduce the number of conditional branches, software commonly uses +the byte-at-a-time table method, popularized by Dilip V. Sarwate, +"Computation of Cyclic Redundancy Checks via Table Look-Up", Comm. ACM +v.31 no.8 (August 1998) p. 1008-1013. + +Here, rather than just shifting one bit of the remainder to decide +in the correct multiple to subtract, we can shift a byte at a time. +This produces a 40-bit (rather than a 33-bit) intermediate remainder, +and the correct multiple of the polynomial to subtract is found using +a 256-entry lookup table indexed by the high 8 bits. + +(The table entries are simply the CRC-32 of the given one-byte messages.) + +When space is more constrained, smaller tables can be used, e.g. two +4-bit shifts followed by a lookup in a 16-entry table. + +It is not practical to process much more than 8 bits at a time using this +technique, because tables larger than 256 entries use too much memory and, +more importantly, too much of the L1 cache. + +To get higher software performance, a "slicing" technique can be used. +See "High Octane CRC Generation with the Intel Slicing-by-8 Algorithm", +ftp://download.intel.com/technology/comms/perfnet/download/slicing-by-8.pdf + +This does not change the number of table lookups, but does increase +the parallelism. With the classic Sarwate algorithm, each table lookup +must be completed before the index of the next can be computed. + +A "slicing by 2" technique would shift the remainder 16 bits at a time, +producing a 48-bit intermediate remainder. Rather than doing a single +lookup in a 65536-entry table, the two high bytes are looked up in +two different 256-entry tables. Each contains the remainder required +to cancel out the corresponding byte. The tables are different because the +polynomials to cancel are different. One has non-zero coefficients from +x^32 to x^39, while the other goes from x^40 to x^47. + +Since modern processors can handle many parallel memory operations, this +takes barely longer than a single table look-up and thus performs almost +twice as fast as the basic Sarwate algorithm. + +This can be extended to "slicing by 4" using 4 256-entry tables. +Each step, 32 bits of data is fetched, XORed with the CRC, and the result +broken into bytes and looked up in the tables. Because the 32-bit shift +leaves the low-order bits of the intermediate remainder zero, the +final CRC is simply the XOR of the 4 table look-ups. + +But this still enforces sequential execution: a second group of table +look-ups cannot begin until the previous groups 4 table look-ups have all +been completed. Thus, the processor's load/store unit is sometimes idle. + +To make maximum use of the processor, "slicing by 8" performs 8 look-ups +in parallel. Each step, the 32-bit CRC is shifted 64 bits and XORed +with 64 bits of input data. What is important to note is that 4 of +those 8 bytes are simply copies of the input data; they do not depend +on the previous CRC at all. Thus, those 4 table look-ups may commence +immediately, without waiting for the previous loop iteration. + +By always having 4 loads in flight, a modern superscalar processor can +be kept busy and make full use of its L1 cache. + +Two more details about CRC implementation in the real world: + +Normally, appending zero bits to a message which is already a multiple +of a polynomial produces a larger multiple of that polynomial. Thus, +a basic CRC will not detect appended zero bits (or bytes). To enable +a CRC to detect this condition, it's common to invert the CRC before +appending it. This makes the remainder of the message+crc come out not +as zero, but some fixed non-zero value. (The CRC of the inversion +pattern, 0xffffffff.) + +The same problem applies to zero bits prepended to the message, and a +similar solution is used. Instead of starting the CRC computation with +a remainder of 0, an initial remainder of all ones is used. As long as +you start the same way on decoding, it doesn't make a difference. diff --git a/lib/crc32.c b/lib/crc32.c index ffea0c99a1f3..c3ce94a06db8 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -20,6 +20,8 @@ * Version 2. See the file COPYING for more details. */ +/* see: Documentation/crc32.txt for a description of algorithms */ + #include #include #include @@ -209,133 +211,6 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(crc32_be); -/* - * A brief CRC tutorial. - * - * A CRC is a long-division remainder. You add the CRC to the message, - * and the whole thing (message+CRC) is a multiple of the given - * CRC polynomial. To check the CRC, you can either check that the - * CRC matches the recomputed value, *or* you can check that the - * remainder computed on the message+CRC is 0. This latter approach - * is used by a lot of hardware implementations, and is why so many - * protocols put the end-of-frame flag after the CRC. - * - * It's actually the same long division you learned in school, except that - * - We're working in binary, so the digits are only 0 and 1, and - * - When dividing polynomials, there are no carries. Rather than add and - * subtract, we just xor. Thus, we tend to get a bit sloppy about - * the difference between adding and subtracting. - * - * A 32-bit CRC polynomial is actually 33 bits long. But since it's - * 33 bits long, bit 32 is always going to be set, so usually the CRC - * is written in hex with the most significant bit omitted. (If you're - * familiar with the IEEE 754 floating-point format, it's the same idea.) - * - * Note that a CRC is computed over a string of *bits*, so you have - * to decide on the endianness of the bits within each byte. To get - * the best error-detecting properties, this should correspond to the - * order they're actually sent. For example, standard RS-232 serial is - * little-endian; the most significant bit (sometimes used for parity) - * is sent last. And when appending a CRC word to a message, you should - * do it in the right order, matching the endianness. - * - * Just like with ordinary division, the remainder is always smaller than - * the divisor (the CRC polynomial) you're dividing by. Each step of the - * division, you take one more digit (bit) of the dividend and append it - * to the current remainder. Then you figure out the appropriate multiple - * of the divisor to subtract to being the remainder back into range. - * In binary, it's easy - it has to be either 0 or 1, and to make the - * XOR cancel, it's just a copy of bit 32 of the remainder. - * - * When computing a CRC, we don't care about the quotient, so we can - * throw the quotient bit away, but subtract the appropriate multiple of - * the polynomial from the remainder and we're back to where we started, - * ready to process the next bit. - * - * A big-endian CRC written this way would be coded like: - * for (i = 0; i < input_bits; i++) { - * multiple = remainder & 0x80000000 ? CRCPOLY : 0; - * remainder = (remainder << 1 | next_input_bit()) ^ multiple; - * } - * Notice how, to get at bit 32 of the shifted remainder, we look - * at bit 31 of the remainder *before* shifting it. - * - * But also notice how the next_input_bit() bits we're shifting into - * the remainder don't actually affect any decision-making until - * 32 bits later. Thus, the first 32 cycles of this are pretty boring. - * Also, to add the CRC to a message, we need a 32-bit-long hole for it at - * the end, so we have to add 32 extra cycles shifting in zeros at the - * end of every message, - * - * So the standard trick is to rearrage merging in the next_input_bit() - * until the moment it's needed. Then the first 32 cycles can be precomputed, - * and merging in the final 32 zero bits to make room for the CRC can be - * skipped entirely. - * This changes the code to: - * for (i = 0; i < input_bits; i++) { - * remainder ^= next_input_bit() << 31; - * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; - * remainder = (remainder << 1) ^ multiple; - * } - * With this optimization, the little-endian code is simpler: - * for (i = 0; i < input_bits; i++) { - * remainder ^= next_input_bit(); - * multiple = (remainder & 1) ? CRCPOLY : 0; - * remainder = (remainder >> 1) ^ multiple; - * } - * - * Note that the other details of endianness have been hidden in CRCPOLY - * (which must be bit-reversed) and next_input_bit(). - * - * However, as long as next_input_bit is returning the bits in a sensible - * order, we can actually do the merging 8 or more bits at a time rather - * than one bit at a time: - * for (i = 0; i < input_bytes; i++) { - * remainder ^= next_input_byte() << 24; - * for (j = 0; j < 8; j++) { - * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; - * remainder = (remainder << 1) ^ multiple; - * } - * } - * Or in little-endian: - * for (i = 0; i < input_bytes; i++) { - * remainder ^= next_input_byte(); - * for (j = 0; j < 8; j++) { - * multiple = (remainder & 1) ? CRCPOLY : 0; - * remainder = (remainder << 1) ^ multiple; - * } - * } - * If the input is a multiple of 32 bits, you can even XOR in a 32-bit - * word at a time and increase the inner loop count to 32. - * - * You can also mix and match the two loop styles, for example doing the - * bulk of a message byte-at-a-time and adding bit-at-a-time processing - * for any fractional bytes at the end. - * - * The only remaining optimization is to the byte-at-a-time table method. - * Here, rather than just shifting one bit of the remainder to decide - * in the correct multiple to subtract, we can shift a byte at a time. - * This produces a 40-bit (rather than a 33-bit) intermediate remainder, - * but again the multiple of the polynomial to subtract depends only on - * the high bits, the high 8 bits in this case. - * - * The multiple we need in that case is the low 32 bits of a 40-bit - * value whose high 8 bits are given, and which is a multiple of the - * generator polynomial. This is simply the CRC-32 of the given - * one-byte message. - * - * Two more details: normally, appending zero bits to a message which - * is already a multiple of a polynomial produces a larger multiple of that - * polynomial. To enable a CRC to detect this condition, it's common to - * invert the CRC before appending it. This makes the remainder of the - * message+crc come out not as zero, but some fixed non-zero value. - * - * The same problem applies to zero bits prepended to the message, and - * a similar solution is used. Instead of starting with a remainder of - * 0, an initial remainder of all ones is used. As long as you start - * the same way on decoding, it doesn't make a difference. - */ - #ifdef UNITTEST #include -- cgit v1.2.3