aboutsummaryrefslogtreecommitdiff
path: root/include/charset.h
diff options
context:
space:
mode:
authorHeinrich Schuchardt2018-08-31 21:31:27 +0200
committerAlexander Graf2018-09-23 21:55:29 +0200
commitd8c28232c34d4be9984eda52d69d0920678d937d (patch)
treeda6d8305f94829b36f163c331eb5e1bf8abc3280 /include/charset.h
parent1dde0d57a5d1281aaa949e9bf7b5c476345a56ee (diff)
lib: charset: utility functions for Unicode
utf8_get() - get next UTF-8 code point from buffer utf8_put() - write UTF-8 code point to buffer utf8_utf16_strnlen() - length of a utf-8 string after conversion to utf-16 utf8_utf16_strncpy() - copy a utf-8 string to utf-16 utf16_get() - get next UTF-16 code point from buffer utf16_put() - write UTF-16 code point to buffer utf16_strnlen() - number of codes points in a utf-16 string utf16_utf8_strnlen() - length of a utf-16 string after conversion to utf-8 utf16_utf8_strncpy() - copy a utf-16 string to utf-8 Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de> Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'include/charset.h')
-rw-r--r--include/charset.h130
1 files changed, 130 insertions, 0 deletions
diff --git a/include/charset.h b/include/charset.h
index 2c6deb8034f..cf41eb5e5fb 100644
--- a/include/charset.h
+++ b/include/charset.h
@@ -8,11 +8,141 @@
#ifndef __CHARSET_H_
#define __CHARSET_H_
+#include <linux/kernel.h>
#include <linux/types.h>
#define MAX_UTF8_PER_UTF16 3
/**
+ * utf8_get() - get next UTF-8 code point from buffer
+ *
+ * @src: pointer to current byte, updated to point to next byte
+ * Return: code point, or 0 for end of string, or -1 if no legal
+ * code point is found. In case of an error src points to
+ * the incorrect byte.
+ */
+s32 utf8_get(const char **src);
+
+/**
+ * utf8_put() - write UTF-8 code point to buffer
+ *
+ * @code: code point
+ * @dst: pointer to destination buffer, updated to next position
+ * Return: -1 if the input parameters are invalid
+ */
+int utf8_put(s32 code, char **dst);
+
+/**
+ * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion
+ * to utf-16
+ *
+ * @src: utf-8 string
+ * @count: maximum number of code points to convert
+ * Return: length in bytes after conversion to utf-16 without the
+ * trailing \0. If an invalid UTF-8 sequence is hit one
+ * word will be reserved for a replacement character.
+ */
+size_t utf8_utf16_strnlen(const char *src, size_t count);
+
+/**
+ * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16
+ *
+ * @src: utf-8 string
+ * Return: length in bytes after conversion to utf-16 without the
+ * trailing \0. -1 if the utf-8 string is not valid.
+ */
+#define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX)
+
+/**
+ * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string
+ *
+ * @dst: destination buffer
+ * @src: source buffer
+ * @count: maximum number of code points to copy
+ * Return: -1 if the input parameters are invalid
+ */
+int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count);
+
+/**
+ * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string
+ *
+ * @dst: destination buffer
+ * @src: source buffer
+ * Return: -1 if the input parameters are invalid
+ */
+#define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX)
+
+/**
+ * utf16_get() - get next UTF-16 code point from buffer
+ *
+ * @src: pointer to current word, updated to point to next word
+ * Return: code point, or 0 for end of string, or -1 if no legal
+ * code point is found. In case of an error src points to
+ * the incorrect word.
+ */
+s32 utf16_get(const u16 **src);
+
+/**
+ * utf16_put() - write UTF-16 code point to buffer
+ *
+ * @code: code point
+ * @dst: pointer to destination buffer, updated to next position
+ * Return: -1 if the input parameters are invalid
+ */
+int utf16_put(s32 code, u16 **dst);
+
+/**
+ * utf16_strnlen() - length of a truncated utf-16 string
+ *
+ * @src: utf-16 string
+ * @count: maximum number of code points to convert
+ * Return: length in code points. If an invalid UTF-16 sequence is
+ * hit one position will be reserved for a replacement
+ * character.
+ */
+size_t utf16_strnlen(const u16 *src, size_t count);
+
+/**
+ * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion
+ * to utf-8
+ *
+ * @src: utf-16 string
+ * @count: maximum number of code points to convert
+ * Return: length in bytes after conversion to utf-8 without the
+ * trailing \0. If an invalid UTF-16 sequence is hit one
+ * byte will be reserved for a replacement character.
+ */
+size_t utf16_utf8_strnlen(const u16 *src, size_t count);
+
+/**
+ * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8
+ *
+ * @src: utf-16 string
+ * Return: length in bytes after conversion to utf-8 without the
+ * trailing \0. -1 if the utf-16 string is not valid.
+ */
+#define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX)
+
+/**
+ * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string
+ *
+ * @dst: destination buffer
+ * @src: source buffer
+ * @count: maximum number of code points to copy
+ * Return: -1 if the input parameters are invalid
+ */
+int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count);
+
+/**
+ * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string
+ *
+ * @dst: destination buffer
+ * @src: source buffer
+ * Return: -1 if the input parameters are invalid
+ */
+#define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX)
+
+/**
* u16_strlen - count non-zero words
*
* This function matches wsclen() if the -fshort-wchar compiler flag is set.