diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Kconfig | 2 | ||||
-rw-r--r-- | fs/Makefile | 1 | ||||
-rw-r--r-- | fs/erofs/Kconfig | 21 | ||||
-rw-r--r-- | fs/erofs/Makefile | 9 | ||||
-rw-r--r-- | fs/erofs/data.c | 311 | ||||
-rw-r--r-- | fs/erofs/decompress.c | 78 | ||||
-rw-r--r-- | fs/erofs/decompress.h | 24 | ||||
-rw-r--r-- | fs/erofs/erofs_fs.h | 436 | ||||
-rw-r--r-- | fs/erofs/fs.c | 267 | ||||
-rw-r--r-- | fs/erofs/internal.h | 313 | ||||
-rw-r--r-- | fs/erofs/namei.c | 252 | ||||
-rw-r--r-- | fs/erofs/super.c | 105 | ||||
-rw-r--r-- | fs/erofs/zmap.c | 601 | ||||
-rw-r--r-- | fs/fs.c | 22 |
14 files changed, 2442 insertions, 0 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 620af7f0447..cda9f66cc93 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -24,4 +24,6 @@ source "fs/yaffs2/Kconfig" source "fs/squashfs/Kconfig" +source "fs/erofs/Kconfig" + endmenu diff --git a/fs/Makefile b/fs/Makefile index 937cbcf6e85..f05a21c9e6d 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -25,5 +25,6 @@ obj-$(CONFIG_CMD_UBIFS) += ubifs/ obj-$(CONFIG_YAFFS2) += yaffs2/ obj-$(CONFIG_CMD_ZFS) += zfs/ obj-$(CONFIG_FS_SQUASHFS) += squashfs/ +obj-$(CONFIG_FS_EROFS) += erofs/ endif obj-y += fs_internal.o diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig new file mode 100644 index 00000000000..ee4e777c5c8 --- /dev/null +++ b/fs/erofs/Kconfig @@ -0,0 +1,21 @@ +config FS_EROFS + bool "Enable EROFS filesystem support" + help + This provides support for reading images from EROFS filesystem. + EROFS (Enhanced Read-Only File System) is a lightweight read-only + file system for scenarios which need high-performance read-only + requirements. + + It also provides fixed-sized output compression support, which + improves storage density, keeps relatively higher compression + ratios, which is more useful to achieve high performance for + embedded devices with limited memory. + +config FS_EROFS_ZIP + bool "EROFS Data Compression Support" + depends on FS_EROFS + select LZ4 + default y + help + Enable fixed-sized output compression for EROFS. + If you don't want to enable compression feature, say N. diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile new file mode 100644 index 00000000000..58af6a68e41 --- /dev/null +++ b/fs/erofs/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0+ +# + +obj-$(CONFIG_$(SPL_)FS_EROFS) = fs.o \ + super.o \ + namei.o \ + data.o \ + decompress.o \ + zmap.o diff --git a/fs/erofs/data.c b/fs/erofs/data.c new file mode 100644 index 00000000000..761896054c8 --- /dev/null +++ b/fs/erofs/data.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include "internal.h" +#include "decompress.h" + +static int erofs_map_blocks_flatmode(struct erofs_inode *inode, + struct erofs_map_blocks *map, + int flags) +{ + int err = 0; + erofs_blk_t nblocks, lastblk; + u64 offset = map->m_la; + struct erofs_inode *vi = inode; + bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); + + nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ); + lastblk = nblocks - tailendpacking; + + /* there is no hole in flatmode */ + map->m_flags = EROFS_MAP_MAPPED; + + if (offset < blknr_to_addr(lastblk)) { + map->m_pa = blknr_to_addr(vi->u.i_blkaddr) + map->m_la; + map->m_plen = blknr_to_addr(lastblk) - offset; + } else if (tailendpacking) { + /* 2 - inode inline B: inode, [xattrs], inline last blk... */ + map->m_pa = iloc(vi->nid) + vi->inode_isize + + vi->xattr_isize + erofs_blkoff(map->m_la); + map->m_plen = inode->i_size - offset; + + /* inline data should be located in one meta block */ + if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) { + erofs_err("inline data cross block boundary @ nid %" PRIu64, + vi->nid); + DBG_BUGON(1); + err = -EFSCORRUPTED; + goto err_out; + } + + map->m_flags |= EROFS_MAP_META; + } else { + erofs_err("internal error @ nid: %" PRIu64 " (size %llu), m_la 0x%" PRIx64, + vi->nid, (unsigned long long)inode->i_size, map->m_la); + DBG_BUGON(1); + err = -EIO; + goto err_out; + } + + map->m_llen = map->m_plen; +err_out: + return err; +} + +int erofs_map_blocks(struct erofs_inode *inode, + struct erofs_map_blocks *map, int flags) +{ + struct erofs_inode *vi = inode; + struct erofs_inode_chunk_index *idx; + u8 buf[EROFS_BLKSIZ]; + u64 chunknr; + unsigned int unit; + erofs_off_t pos; + int err = 0; + + map->m_deviceid = 0; + if (map->m_la >= inode->i_size) { + /* leave out-of-bound access unmapped */ + map->m_flags = 0; + map->m_plen = 0; + goto out; + } + + if (vi->datalayout != EROFS_INODE_CHUNK_BASED) + return erofs_map_blocks_flatmode(inode, map, flags); + + if (vi->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES) + unit = sizeof(*idx); /* chunk index */ + else + unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */ + + chunknr = map->m_la >> vi->u.chunkbits; + pos = roundup(iloc(vi->nid) + vi->inode_isize + + vi->xattr_isize, unit) + unit * chunknr; + + err = erofs_blk_read(buf, erofs_blknr(pos), 1); + if (err < 0) + return -EIO; + + map->m_la = chunknr << vi->u.chunkbits; + map->m_plen = min_t(erofs_off_t, 1UL << vi->u.chunkbits, + roundup(inode->i_size - map->m_la, EROFS_BLKSIZ)); + + /* handle block map */ + if (!(vi->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) { + __le32 *blkaddr = (void *)buf + erofs_blkoff(pos); + + if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) { + map->m_flags = 0; + } else { + map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr)); + map->m_flags = EROFS_MAP_MAPPED; + } + goto out; + } + /* parse chunk indexes */ + idx = (void *)buf + erofs_blkoff(pos); + switch (le32_to_cpu(idx->blkaddr)) { + case EROFS_NULL_ADDR: + map->m_flags = 0; + break; + default: + map->m_deviceid = le16_to_cpu(idx->device_id) & + sbi.device_id_mask; + map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr)); + map->m_flags = EROFS_MAP_MAPPED; + break; + } +out: + map->m_llen = map->m_plen; + return err; +} + +int erofs_map_dev(struct erofs_sb_info *sbi, struct erofs_map_dev *map) +{ + struct erofs_device_info *dif; + int id; + + if (map->m_deviceid) { + if (sbi->extra_devices < map->m_deviceid) + return -ENODEV; + } else if (sbi->extra_devices) { + for (id = 0; id < sbi->extra_devices; ++id) { + erofs_off_t startoff, length; + + dif = sbi->devs + id; + if (!dif->mapped_blkaddr) + continue; + startoff = blknr_to_addr(dif->mapped_blkaddr); + length = blknr_to_addr(dif->blocks); + + if (map->m_pa >= startoff && + map->m_pa < startoff + length) { + map->m_pa -= startoff; + break; + } + } + } + return 0; +} + +static int erofs_read_raw_data(struct erofs_inode *inode, char *buffer, + erofs_off_t size, erofs_off_t offset) +{ + struct erofs_map_blocks map = { + .index = UINT_MAX, + }; + struct erofs_map_dev mdev; + int ret; + erofs_off_t ptr = offset; + + while (ptr < offset + size) { + char *const estart = buffer + ptr - offset; + erofs_off_t eend; + + map.m_la = ptr; + ret = erofs_map_blocks(inode, &map, 0); + if (ret) + return ret; + + DBG_BUGON(map.m_plen != map.m_llen); + + mdev = (struct erofs_map_dev) { + .m_deviceid = map.m_deviceid, + .m_pa = map.m_pa, + }; + ret = erofs_map_dev(&sbi, &mdev); + if (ret) + return ret; + + /* trim extent */ + eend = min(offset + size, map.m_la + map.m_llen); + DBG_BUGON(ptr < map.m_la); + + if (!(map.m_flags & EROFS_MAP_MAPPED)) { + if (!map.m_llen) { + /* reached EOF */ + memset(estart, 0, offset + size - ptr); + ptr = offset + size; + continue; + } + memset(estart, 0, eend - ptr); + ptr = eend; + continue; + } + + if (ptr > map.m_la) { + mdev.m_pa += ptr - map.m_la; + map.m_la = ptr; + } + + ret = erofs_dev_read(mdev.m_deviceid, estart, mdev.m_pa, + eend - map.m_la); + if (ret < 0) + return -EIO; + ptr = eend; + } + return 0; +} + +static int z_erofs_read_data(struct erofs_inode *inode, char *buffer, + erofs_off_t size, erofs_off_t offset) +{ + erofs_off_t end, length, skip; + struct erofs_map_blocks map = { + .index = UINT_MAX, + }; + struct erofs_map_dev mdev; + bool partial; + unsigned int bufsize = 0; + char *raw = NULL; + int ret = 0; + + end = offset + size; + while (end > offset) { + map.m_la = end - 1; + + ret = z_erofs_map_blocks_iter(inode, &map, 0); + if (ret) + break; + + /* no device id here, thus it will always succeed */ + mdev = (struct erofs_map_dev) { + .m_pa = map.m_pa, + }; + ret = erofs_map_dev(&sbi, &mdev); + if (ret) { + DBG_BUGON(1); + break; + } + + /* + * trim to the needed size if the returned extent is quite + * larger than requested, and set up partial flag as well. + */ + if (end < map.m_la + map.m_llen) { + length = end - map.m_la; + partial = true; + } else { + DBG_BUGON(end != map.m_la + map.m_llen); + length = map.m_llen; + partial = !(map.m_flags & EROFS_MAP_FULL_MAPPED); + } + + if (map.m_la < offset) { + skip = offset - map.m_la; + end = offset; + } else { + skip = 0; + end = map.m_la; + } + + if (!(map.m_flags & EROFS_MAP_MAPPED)) { + memset(buffer + end - offset, 0, length); + end = map.m_la; + continue; + } + + if (map.m_plen > bufsize) { + bufsize = map.m_plen; + raw = realloc(raw, bufsize); + if (!raw) { + ret = -ENOMEM; + break; + } + } + ret = erofs_dev_read(mdev.m_deviceid, raw, mdev.m_pa, map.m_plen); + if (ret < 0) + break; + + ret = z_erofs_decompress(&(struct z_erofs_decompress_req) { + .in = raw, + .out = buffer + end - offset, + .decodedskip = skip, + .inputsize = map.m_plen, + .decodedlength = length, + .alg = map.m_algorithmformat, + .partial_decoding = partial + }); + if (ret < 0) + break; + } + if (raw) + free(raw); + return ret < 0 ? ret : 0; +} + +int erofs_pread(struct erofs_inode *inode, char *buf, + erofs_off_t count, erofs_off_t offset) +{ + switch (inode->datalayout) { + case EROFS_INODE_FLAT_PLAIN: + case EROFS_INODE_FLAT_INLINE: + case EROFS_INODE_CHUNK_BASED: + return erofs_read_raw_data(inode, buf, count, offset); + case EROFS_INODE_FLAT_COMPRESSION_LEGACY: + case EROFS_INODE_FLAT_COMPRESSION: + return z_erofs_read_data(inode, buf, count, offset); + default: + break; + } + return -EINVAL; +} diff --git a/fs/erofs/decompress.c b/fs/erofs/decompress.c new file mode 100644 index 00000000000..2be3b844cfc --- /dev/null +++ b/fs/erofs/decompress.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include "decompress.h" + +#if IS_ENABLED(CONFIG_LZ4) +#include <u-boot/lz4.h> +static int z_erofs_decompress_lz4(struct z_erofs_decompress_req *rq) +{ + int ret = 0; + char *dest = rq->out; + char *src = rq->in; + char *buff = NULL; + bool support_0padding = false; + unsigned int inputmargin = 0; + + if (erofs_sb_has_lz4_0padding()) { + support_0padding = true; + + while (!src[inputmargin & ~PAGE_MASK]) + if (!(++inputmargin & ~PAGE_MASK)) + break; + + if (inputmargin >= rq->inputsize) + return -EIO; + } + + if (rq->decodedskip) { + buff = malloc(rq->decodedlength); + if (!buff) + return -ENOMEM; + dest = buff; + } + + if (rq->partial_decoding || !support_0padding) + ret = LZ4_decompress_safe_partial(src + inputmargin, dest, + rq->inputsize - inputmargin, + rq->decodedlength, rq->decodedlength); + else + ret = LZ4_decompress_safe(src + inputmargin, dest, + rq->inputsize - inputmargin, + rq->decodedlength); + + if (ret != (int)rq->decodedlength) { + ret = -EIO; + goto out; + } + + if (rq->decodedskip) + memcpy(rq->out, dest + rq->decodedskip, + rq->decodedlength - rq->decodedskip); + +out: + if (buff) + free(buff); + + return ret; +} +#endif + +int z_erofs_decompress(struct z_erofs_decompress_req *rq) +{ + if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED) { + if (rq->inputsize != EROFS_BLKSIZ) + return -EFSCORRUPTED; + + DBG_BUGON(rq->decodedlength > EROFS_BLKSIZ); + DBG_BUGON(rq->decodedlength < rq->decodedskip); + + memcpy(rq->out, rq->in + rq->decodedskip, + rq->decodedlength - rq->decodedskip); + return 0; + } + +#if IS_ENABLED(CONFIG_LZ4) + if (rq->alg == Z_EROFS_COMPRESSION_LZ4) + return z_erofs_decompress_lz4(rq); +#endif + return -EOPNOTSUPP; +} diff --git a/fs/erofs/decompress.h b/fs/erofs/decompress.h new file mode 100644 index 00000000000..81d5fb84f6c --- /dev/null +++ b/fs/erofs/decompress.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef __EROFS_DECOMPRESS_H +#define __EROFS_DECOMPRESS_H + +#include "internal.h" + +struct z_erofs_decompress_req { + char *in, *out; + + /* + * initial decompressed bytes that need to be skipped + * when finally copying to output buffer + */ + unsigned int decodedskip; + unsigned int inputsize, decodedlength; + + /* indicate the algorithm will be used for decompression */ + unsigned int alg; + bool partial_decoding; +}; + +int z_erofs_decompress(struct z_erofs_decompress_req *rq); + +#endif diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h new file mode 100644 index 00000000000..6b62c7a4f5f --- /dev/null +++ b/fs/erofs/erofs_fs.h @@ -0,0 +1,436 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */ +/* + * EROFS (Enhanced ROM File System) on-disk format definition + * + * Copyright (C) 2017-2018 HUAWEI, Inc. + * http://www.huawei.com/ + * Copyright (C) 2021, Alibaba Cloud + */ +#ifndef __EROFS_FS_H +#define __EROFS_FS_H + +#include <asm/unaligned.h> +#include <fs.h> +#include <part.h> +#include <stdint.h> +#include <compiler.h> + +#define EROFS_SUPER_MAGIC_V1 0xE0F5E1E2 +#define EROFS_SUPER_OFFSET 1024 + +#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 + +/* + * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should + * be incompatible with this kernel version. + */ +#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001 +#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002 +#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002 +#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004 +#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008 +#define EROFS_ALL_FEATURE_INCOMPAT \ + (EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \ + EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ + EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \ + EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ + EROFS_FEATURE_INCOMPAT_DEVICE_TABLE) + +#define EROFS_SB_EXTSLOT_SIZE 16 + +struct erofs_deviceslot { + union { + u8 uuid[16]; /* used for device manager later */ + u8 userdata[64]; /* digest(sha256), etc. */ + } u; + __le32 blocks; /* total fs blocks of this device */ + __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */ + u8 reserved[56]; +}; + +#define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot) + +/* erofs on-disk super block (currently 128 bytes) */ +struct erofs_super_block { + __le32 magic; /* file system magic number */ + __le32 checksum; /* crc32c(super_block) */ + __le32 feature_compat; + __u8 blkszbits; /* support block_size == PAGE_SIZE only */ + __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */ + + __le16 root_nid; /* nid of root directory */ + __le64 inos; /* total valid ino # (== f_files - f_favail) */ + + __le64 build_time; /* inode v1 time derivation */ + __le32 build_time_nsec; /* inode v1 time derivation in nano scale */ + __le32 blocks; /* used for statfs */ + __le32 meta_blkaddr; /* start block address of metadata area */ + __le32 xattr_blkaddr; /* start block address of shared xattr area */ + __u8 uuid[16]; /* 128-bit uuid for volume */ + __u8 volume_name[16]; /* volume name */ + __le32 feature_incompat; + union { + /* bitmap for available compression algorithms */ + __le16 available_compr_algs; + /* customized sliding window size instead of 64k by default */ + __le16 lz4_max_distance; + } __packed u1; + __le16 extra_devices; /* # of devices besides the primary device */ + __le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */ + __u8 reserved2[38]; +}; + +/* + * erofs inode datalayout (i_format in on-disk inode): + * 0 - inode plain without inline data A: + * inode, [xattrs], ... | ... | no-holed data + * 1 - inode VLE compression B (legacy): + * inode, [xattrs], extents ... | ... + * 2 - inode plain with inline data C: + * inode, [xattrs], last_inline_data, ... | ... | no-holed data + * 3 - inode compression D: + * inode, [xattrs], map_header, extents ... | ... + * 4 - inode chunk-based E: + * inode, [xattrs], chunk indexes ... | ... + * 5~7 - reserved + */ +enum { + EROFS_INODE_FLAT_PLAIN = 0, + EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1, + EROFS_INODE_FLAT_INLINE = 2, + EROFS_INODE_FLAT_COMPRESSION = 3, + EROFS_INODE_CHUNK_BASED = 4, + EROFS_INODE_DATALAYOUT_MAX +}; + +static inline bool erofs_inode_is_data_compressed(unsigned int datamode) +{ + return datamode == EROFS_INODE_FLAT_COMPRESSION || + datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY; +} + +/* bit definitions of inode i_advise */ +#define EROFS_I_VERSION_BITS 1 +#define EROFS_I_DATALAYOUT_BITS 3 + +#define EROFS_I_VERSION_BIT 0 +#define EROFS_I_DATALAYOUT_BIT 1 + +#define EROFS_I_ALL \ + ((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1) + +/* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */ +#define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F +/* with chunk indexes or just a 4-byte blkaddr array */ +#define EROFS_CHUNK_FORMAT_INDEXES 0x0020 + +#define EROFS_CHUNK_FORMAT_ALL \ + (EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES) + +struct erofs_inode_chunk_info { + __le16 format; /* chunk blkbits, etc. */ + __le16 reserved; +}; + +/* 32-byte reduced form of an ondisk inode */ +struct erofs_inode_compact { + __le16 i_format; /* inode format hints */ + +/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ + __le16 i_xattr_icount; + __le16 i_mode; + __le16 i_nlink; + __le32 i_size; + __le32 i_reserved; + union { + /* file total compressed blocks for data mapping 1 */ + __le32 compressed_blocks; + __le32 raw_blkaddr; + + /* for device files, used to indicate old/new device # */ + __le32 rdev; + + /* for chunk-based files, it contains the summary info */ + struct erofs_inode_chunk_info c; + } i_u; + __le32 i_ino; /* only used for 32-bit stat compatibility */ + __le16 i_uid; + __le16 i_gid; + __le32 i_reserved2; +}; + +/* 32 bytes on-disk inode */ +#define EROFS_INODE_LAYOUT_COMPACT 0 +/* 64 bytes on-disk inode */ +#define EROFS_INODE_LAYOUT_EXTENDED 1 + +/* 64-byte complete form of an ondisk inode */ +struct erofs_inode_extended { + __le16 i_format; /* inode format hints */ + +/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ + __le16 i_xattr_icount; + __le16 i_mode; + __le16 i_reserved; + __le64 i_size; + union { + /* file total compressed blocks for data mapping 1 */ + __le32 compressed_blocks; + __le32 raw_blkaddr; + + /* for device files, used to indicate old/new device # */ + __le32 rdev; + + /* for chunk-based files, it contains the summary info */ + struct erofs_inode_chunk_info c; + } i_u; + + /* only used for 32-bit stat compatibility */ + __le32 i_ino; + + __le32 i_uid; + __le32 i_gid; + __le64 i_ctime; + __le32 i_ctime_nsec; + __le32 i_nlink; + __u8 i_reserved2[16]; +}; + +#define EROFS_MAX_SHARED_XATTRS (128) +/* h_shared_count between 129 ... 255 are special # */ +#define EROFS_SHARED_XATTR_EXTENT (255) + +/* + * inline xattrs (n == i_xattr_icount): + * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes + * 12 bytes / \ + * / \ + * /-----------------------\ + * | erofs_xattr_entries+ | + * +-----------------------+ + * inline xattrs must starts in erofs_xattr_ibody_header, + * for read-only fs, no need to introduce h_refcount + */ +struct erofs_xattr_ibody_header { + __le32 h_reserved; + __u8 h_shared_count; + __u8 h_reserved2[7]; + __le32 h_shared_xattrs[0]; /* shared xattr id array */ +}; + +/* Name indexes */ +#define EROFS_XATTR_INDEX_USER 1 +#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS 2 +#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3 +#define EROFS_XATTR_INDEX_TRUSTED 4 +#define EROFS_XATTR_INDEX_LUSTRE 5 +#define EROFS_XATTR_INDEX_SECURITY 6 + +/* xattr entry (for both inline & shared xattrs) */ +struct erofs_xattr_entry { + __u8 e_name_len; /* length of name */ + __u8 e_name_index; /* attribute name index */ + __le16 e_value_size; /* size of attribute value */ + /* followed by e_name and e_value */ + char e_name[0]; /* attribute name */ +}; + +static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount) +{ + if (!i_xattr_icount) + return 0; + + return sizeof(struct erofs_xattr_ibody_header) + + sizeof(__u32) * (le16_to_cpu(i_xattr_icount) - 1); +} + +#define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry)) + +static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e) +{ + return EROFS_XATTR_ALIGN(sizeof(struct erofs_xattr_entry) + + e->e_name_len + le16_to_cpu(e->e_value_size)); +} + +/* represent a zeroed chunk (hole) */ +#define EROFS_NULL_ADDR -1 + +/* 4-byte block address array */ +#define EROFS_BLOCK_MAP_ENTRY_SIZE sizeof(__le32) + +/* 8-byte inode chunk indexes */ +struct erofs_inode_chunk_index { + __le16 advise; /* always 0, don't care for now */ + __le16 device_id; /* back-end storage id (with bits masked) */ + __le32 blkaddr; /* start block address of this inode chunk */ +}; + +/* maximum supported size of a physical compression cluster */ +#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024) + +/* available compression algorithm types (for h_algorithmtype) */ +enum { + Z_EROFS_COMPRESSION_LZ4 = 0, + Z_EROFS_COMPRESSION_LZMA = 1, + Z_EROFS_COMPRESSION_MAX +}; + +#define Z_EROFS_ALL_COMPR_ALGS (1 << (Z_EROFS_COMPRESSION_MAX - 1)) + +/* 14 bytes (+ length field = 16 bytes) */ +struct z_erofs_lz4_cfgs { + __le16 max_distance; + __le16 max_pclusterblks; + u8 reserved[10]; +} __packed; + +/* 14 bytes (+ length field = 16 bytes) */ +struct z_erofs_lzma_cfgs { + __le32 dict_size; + __le16 format; + u8 reserved[8]; +} __packed; +#define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE) + +/* + * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on) + * e.g. for 4k logical cluster size, 4B if compacted 2B is off; + * (4B) + 2B + (4B) if compacted 2B is on. + * bit 1 : HEAD1 big pcluster (0 - off; 1 - on) + * bit 2 : HEAD2 big pcluster (0 - off; 1 - on) + */ +#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001 +#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002 +#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004 + +struct z_erofs_map_header { + __le32 h_reserved1; + __le16 h_advise; + /* + * bit 0-3 : algorithm type of head 1 (logical cluster type 01); + * bit 4-7 : algorithm type of head 2 (logical cluster type 11). + */ + __u8 h_algorithmtype; + /* + * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; + * bit 3-7 : reserved. + */ + __u8 h_clusterbits; +}; + +#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8 + +/* + * Fixed-sized output compression ondisk Logical Extent cluster type: + * 0 - literal (uncompressed) cluster + * 1 - compressed cluster (for the head logical cluster) + * 2 - compressed cluster (for the other logical clusters) + * + * In detail, + * 0 - literal (uncompressed) cluster, + * di_advise = 0 + * di_clusterofs = the literal data offset of the cluster + * di_blkaddr = the blkaddr of the literal cluster + * + * 1 - compressed cluster (for the head logical cluster) + * di_advise = 1 + * di_clusterofs = the decompressed data offset of the cluster + * di_blkaddr = the blkaddr of the compressed cluster + * + * 2 - compressed cluster (for the other logical clusters) + * di_advise = 2 + * di_clusterofs = + * the decompressed data offset in its own head cluster + * di_u.delta[0] = distance to its corresponding head cluster + * di_u.delta[1] = distance to its corresponding tail cluster + * (di_advise could be 0, 1 or 2) + */ +enum { + Z_EROFS_VLE_CLUSTER_TYPE_PLAIN = 0, + Z_EROFS_VLE_CLUSTER_TYPE_HEAD = 1, + Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD = 2, + Z_EROFS_VLE_CLUSTER_TYPE_RESERVED = 3, + Z_EROFS_VLE_CLUSTER_TYPE_MAX +}; + +#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2 +#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0 + +/* + * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the + * compressed block count of a compressed extent (in logical clusters, aka. + * block count of a pcluster). + */ +#define Z_EROFS_VLE_DI_D0_CBLKCNT (1 << 11) + +struct z_erofs_vle_decompressed_index { + __le16 di_advise; + /* where to decompress in the head cluster */ + __le16 di_clusterofs; + + union { + /* for the head cluster */ + __le32 blkaddr; + /* + * for the rest clusters + * eg. for 4k page-sized cluster, maximum 4K*64k = 256M) + * [0] - pointing to the head cluster + * [1] - pointing to the tail cluster + */ + __le16 delta[2]; + } di_u; +}; + +#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \ + (round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \ + sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING) + +#define Z_EROFS_VLE_EXTENT_ALIGN(size) round_up(size, \ + sizeof(struct z_erofs_vle_decompressed_index)) + +/* dirent sorts in alphabet order, thus we can do binary search */ +struct erofs_dirent { + __le64 nid; /* node number */ + __le16 nameoff; /* start offset of file name */ + __u8 file_type; /* file type */ + __u8 reserved; /* reserved */ +} __packed; + +/* file types used in inode_info->flags */ +enum { + EROFS_FT_UNKNOWN, + EROFS_FT_REG_FILE, + EROFS_FT_DIR, + EROFS_FT_CHRDEV, + EROFS_FT_BLKDEV, + EROFS_FT_FIFO, + EROFS_FT_SOCK, + EROFS_FT_SYMLINK, + EROFS_FT_MAX +}; + +#define EROFS_NAME_LEN 255 + +/* check the EROFS on-disk layout strictly at compile time */ +static inline void erofs_check_ondisk_layout_definitions(void) +{ + BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128); + BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32); + BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); + BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12); + BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4); + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4); + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8); + BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8); + BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8); + BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12); + /* keep in sync between 2 index structures for better extendibility */ + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != + sizeof(struct z_erofs_vle_decompressed_index)); + BUILD_BUG_ON(sizeof(struct erofs_deviceslot) != 128); + + BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) < + Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1); +} + +#endif diff --git a/fs/erofs/fs.c b/fs/erofs/fs.c new file mode 100644 index 00000000000..89269750f8b --- /dev/null +++ b/fs/erofs/fs.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include "internal.h" +#include <fs_internal.h> + +struct erofs_sb_info sbi; + +static struct erofs_ctxt { + struct disk_partition cur_part_info; + struct blk_desc *cur_dev; +} ctxt; + +int erofs_dev_read(int device_id, void *buf, u64 offset, size_t len) +{ + lbaint_t sect = offset >> ctxt.cur_dev->log2blksz; + int off = offset & (ctxt.cur_dev->blksz - 1); + + if (!ctxt.cur_dev) + return -EIO; + + if (fs_devread(ctxt.cur_dev, &ctxt.cur_part_info, sect, + off, len, buf)) + return 0; + return -EIO; +} + +int erofs_blk_read(void *buf, erofs_blk_t start, u32 nblocks) +{ + return erofs_dev_read(0, buf, blknr_to_addr(start), + blknr_to_addr(nblocks)); +} + +int erofs_probe(struct blk_desc *fs_dev_desc, + struct disk_partition *fs_partition) +{ + int ret; + + ctxt.cur_dev = fs_dev_desc; + ctxt.cur_part_info = *fs_partition; + + ret = erofs_read_superblock(); + if (ret) + goto error; + + return 0; +error: + ctxt.cur_dev = NULL; + return ret; +} + +struct erofs_dir_stream { + struct fs_dir_stream fs_dirs; + struct fs_dirent dirent; + + struct erofs_inode inode; + char dblk[EROFS_BLKSIZ]; + unsigned int maxsize, de_end; + erofs_off_t pos; +}; + +static int erofs_readlink(struct erofs_inode *vi) +{ + size_t len = vi->i_size; + char *target; + int err; + + target = malloc(len + 1); + if (!target) + return -ENOMEM; + target[len] = '\0'; + + err = erofs_pread(vi, target, len, 0); + if (err) + goto err_out; + + err = erofs_ilookup(target, vi); + if (err) + goto err_out; + +err_out: + free(target); + return err; +} + +int erofs_opendir(const char *filename, struct fs_dir_stream **dirsp) +{ + struct erofs_dir_stream *dirs; + int err; + + dirs = calloc(1, sizeof(*dirs)); + if (!dirs) + return -ENOMEM; + + err = erofs_ilookup(filename, &dirs->inode); + if (err) + goto err_out; + + if (S_ISLNK(dirs->inode.i_mode)) { + err = erofs_readlink(&dirs->inode); + if (err) + goto err_out; + } + + if (!S_ISDIR(dirs->inode.i_mode)) { + err = -ENOTDIR; + goto err_out; + } + *dirsp = (struct fs_dir_stream *)dirs; + return 0; +err_out: + free(dirs); + return err; +} + +int erofs_readdir(struct fs_dir_stream *fs_dirs, struct fs_dirent **dentp) +{ + struct erofs_dir_stream *dirs = (struct erofs_dir_stream *)fs_dirs; + struct fs_dirent *dent = &dirs->dirent; + erofs_off_t pos = dirs->pos; + unsigned int nameoff, de_namelen; + struct erofs_dirent *de; + char *de_name; + int err; + + if (pos >= dirs->inode.i_size) + return 1; + + if (!dirs->maxsize) { + dirs->maxsize = min_t(unsigned int, EROFS_BLKSIZ, + dirs->inode.i_size - pos); + + err = erofs_pread(&dirs->inode, dirs->dblk, + dirs->maxsize, pos); + if (err) + return err; + + de = (struct erofs_dirent *)dirs->dblk; + dirs->de_end = le16_to_cpu(de->nameoff); + if (dirs->de_end < sizeof(struct erofs_dirent) || + dirs->de_end >= EROFS_BLKSIZ) { + erofs_err("invalid de[0].nameoff %u @ nid %llu", + dirs->de_end, de->nid | 0ULL); + return -EFSCORRUPTED; + } + } + + de = (struct erofs_dirent *)(dirs->dblk + erofs_blkoff(pos)); + nameoff = le16_to_cpu(de->nameoff); + de_name = (char *)dirs->dblk + nameoff; + + /* the last dirent in the block? */ + if (de + 1 >= (struct erofs_dirent *)(dirs->dblk + dirs->de_end)) + de_namelen = strnlen(de_name, dirs->maxsize - nameoff); + else + de_namelen = le16_to_cpu(de[1].nameoff) - nameoff; + + /* a corrupted entry is found */ + if (nameoff + de_namelen > dirs->maxsize || + de_namelen > EROFS_NAME_LEN) { + erofs_err("bogus dirent @ nid %llu", de->nid | 0ULL); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + + memcpy(dent->name, de_name, de_namelen); + dent->name[de_namelen] = '\0'; + + if (de->file_type == EROFS_FT_DIR) { + dent->type = FS_DT_DIR; + } else if (de->file_type == EROFS_FT_SYMLINK) { + dent->type = FS_DT_LNK; + } else { + struct erofs_inode vi; + + dent->type = FS_DT_REG; + vi.nid = de->nid; + + err = erofs_read_inode_from_disk(&vi); + if (err) + return err; + dent->size = vi.i_size; + } + *dentp = dent; + + pos += sizeof(*de); + if (erofs_blkoff(pos) >= dirs->de_end) { + pos = blknr_to_addr(erofs_blknr(pos) + 1); + dirs->maxsize = 0; + } + dirs->pos = pos; + return 0; +} + +void erofs_closedir(struct fs_dir_stream *fs_dirs) +{ + free(fs_dirs); +} + +int erofs_exists(const char *filename) +{ + struct erofs_inode vi; + int err; + + err = erofs_ilookup(filename, &vi); + return err == 0; +} + +int erofs_size(const char *filename, loff_t *size) +{ + struct erofs_inode vi; + int err; + + err = erofs_ilookup(filename, &vi); + if (err) + return err; + *size = vi.i_size; + return 0; +} + +int erofs_read(const char *filename, void *buf, loff_t offset, loff_t len, + loff_t *actread) +{ + struct erofs_inode vi; + int err; + + err = erofs_ilookup(filename, &vi); + if (err) + return err; + + if (S_ISLNK(vi.i_mode)) { + err = erofs_readlink(&vi); + if (err) + return err; + } + + if (!len) + len = vi.i_size; + + err = erofs_pread(&vi, buf, len, offset); + if (err) { + *actread = 0; + return err; + } + + if (offset >= vi.i_size) + *actread = 0; + else if (offset + len > vi.i_size) + *actread = vi.i_size - offset; + else + *actread = len; + return 0; +} + +void erofs_close(void) +{ + ctxt.cur_dev = NULL; +} + +int erofs_uuid(char *uuid_str) +{ + if (IS_ENABLED(CONFIG_LIB_UUID)) { + if (ctxt.cur_dev) + uuid_bin_to_str(sbi.uuid, uuid_str, + UUID_STR_FORMAT_STD); + return 0; + } + return -ENOSYS; +} diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h new file mode 100644 index 00000000000..4af7c91560c --- /dev/null +++ b/fs/erofs/internal.h @@ -0,0 +1,313 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef __EROFS_INTERNAL_H +#define __EROFS_INTERNAL_H + +#define __packed __attribute__((__packed__)) + +#include <linux/stat.h> +#include <linux/bug.h> +#include <linux/err.h> +#include <linux/printk.h> +#include <linux/log2.h> +#include <inttypes.h> +#include "erofs_fs.h" + +#define erofs_err(fmt, ...) \ + pr_err(fmt "\n", ##__VA_ARGS__) + +#define erofs_info(fmt, ...) \ + pr_info(fmt "\n", ##__VA_ARGS__) + +#define erofs_dbg(fmt, ...) \ + pr_debug(fmt "\n", ##__VA_ARGS__) + +#define DBG_BUGON(condition) BUG_ON(condition) + +/* no obvious reason to support explicit PAGE_SIZE != 4096 for now */ +#if PAGE_SIZE != 4096 +#error incompatible PAGE_SIZE is already defined +#endif + +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +#define LOG_BLOCK_SIZE (12) +#define EROFS_BLKSIZ (1U << LOG_BLOCK_SIZE) + +#define EROFS_ISLOTBITS 5 +#define EROFS_SLOTSIZE (1U << EROFS_ISLOTBITS) + +typedef u64 erofs_off_t; +typedef u64 erofs_nid_t; +/* data type for filesystem-wide blocks number */ +typedef u32 erofs_blk_t; + +#define NULL_ADDR ((unsigned int)-1) +#define NULL_ADDR_UL ((unsigned long)-1) + +#define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ) +#define erofs_blkoff(addr) ((addr) % EROFS_BLKSIZ) +#define blknr_to_addr(nr) ((erofs_off_t)(nr) * EROFS_BLKSIZ) + +#define BLK_ROUND_UP(addr) DIV_ROUND_UP(addr, EROFS_BLKSIZ) + +struct erofs_buffer_head; + +struct erofs_device_info { + u32 blocks; + u32 mapped_blkaddr; +}; + +struct erofs_sb_info { + struct erofs_device_info *devs; + + u64 total_blocks; + u64 primarydevice_blocks; + + erofs_blk_t meta_blkaddr; + erofs_blk_t xattr_blkaddr; + + u32 feature_compat; + u32 feature_incompat; + u64 build_time; + u32 build_time_nsec; + + unsigned char islotbits; + + /* what we really care is nid, rather than ino.. */ + erofs_nid_t root_nid; + /* used for statfs, f_files - f_favail */ + u64 inos; + + u8 uuid[16]; + + u16 available_compr_algs; + u16 lz4_max_distance; + u32 checksum; + u16 extra_devices; + union { + u16 devt_slotoff; /* used for mkfs */ + u16 device_id_mask; /* used for others */ + }; +}; + +/* global sbi */ +extern struct erofs_sb_info sbi; + +static inline erofs_off_t iloc(erofs_nid_t nid) +{ + return blknr_to_addr(sbi.meta_blkaddr) + (nid << sbi.islotbits); +} + +#define EROFS_FEATURE_FUNCS(name, compat, feature) \ +static inline bool erofs_sb_has_##name(void) \ +{ \ + return sbi.feature_##compat & EROFS_FEATURE_##feature; \ +} \ +static inline void erofs_sb_set_##name(void) \ +{ \ + sbi.feature_##compat |= EROFS_FEATURE_##feature; \ +} \ +static inline void erofs_sb_clear_##name(void) \ +{ \ + sbi.feature_##compat &= ~EROFS_FEATURE_##feature; \ +} + +EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING) +EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS) +EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER) +EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE) +EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE) +EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) + +#define EROFS_I_EA_INITED (1 << 0) +#define EROFS_I_Z_INITED (1 << 1) + +struct erofs_inode { + struct list_head i_hash, i_subdirs, i_xattrs; + + union { + /* (erofsfuse) runtime flags */ + unsigned int flags; + /* (mkfs.erofs) device ID containing source file */ + u32 dev; + }; + unsigned int i_count; + struct erofs_inode *i_parent; + + umode_t i_mode; + erofs_off_t i_size; + + u64 i_ino[2]; + u32 i_uid; + u32 i_gid; + u64 i_ctime; + u32 i_ctime_nsec; + u32 i_nlink; + + union { + u32 i_blkaddr; + u32 i_blocks; + u32 i_rdev; + struct { + unsigned short chunkformat; + unsigned char chunkbits; + }; + } u; + + unsigned char datalayout; + unsigned char inode_isize; + /* inline tail-end packing size */ + unsigned short idata_size; + + unsigned int xattr_isize; + unsigned int extent_isize; + + erofs_nid_t nid; + struct erofs_buffer_head *bh; + struct erofs_buffer_head *bh_inline, *bh_data; + + void *idata; + + union { + void *compressmeta; + void *chunkindexes; + struct { + uint16_t z_advise; + uint8_t z_algorithmtype[2]; + uint8_t z_logical_clusterbits; + uint8_t z_physical_clusterblks; + }; + }; +}; + +static inline bool is_inode_layout_compression(struct erofs_inode *inode) +{ + return erofs_inode_is_data_compressed(inode->datalayout); +} + +static inline unsigned int erofs_bitrange(unsigned int value, unsigned int bit, + unsigned int bits) +{ + return (value >> bit) & ((1 << bits) - 1); +} + +static inline unsigned int erofs_inode_version(unsigned int value) +{ + return erofs_bitrange(value, EROFS_I_VERSION_BIT, + EROFS_I_VERSION_BITS); +} + +static inline unsigned int erofs_inode_datalayout(unsigned int value) +{ + return erofs_bitrange(value, EROFS_I_DATALAYOUT_BIT, + EROFS_I_DATALAYOUT_BITS); +} + +#define IS_ROOT(x) ((x) == (x)->i_parent) + +struct erofs_dentry { + struct list_head d_child; /* child of parent list */ + + unsigned int type; + char name[EROFS_NAME_LEN]; + union { + struct erofs_inode *inode; + erofs_nid_t nid; + }; +}; + +static inline bool is_dot_dotdot(const char *name) +{ + if (name[0] != '.') + return false; + + return name[1] == '\0' || (name[1] == '.' && name[2] == '\0'); +} + +enum { + BH_Meta, + BH_Mapped, + BH_Encoded, + BH_FullMapped, +}; + +/* Has a disk mapping */ +#define EROFS_MAP_MAPPED (1 << BH_Mapped) +/* Located in metadata (could be copied from bd_inode) */ +#define EROFS_MAP_META (1 << BH_Meta) +/* The extent is encoded */ +#define EROFS_MAP_ENCODED (1 << BH_Encoded) +/* The length of extent is full */ +#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped) + +struct erofs_map_blocks { + char mpage[EROFS_BLKSIZ]; + + erofs_off_t m_pa, m_la; + u64 m_plen, m_llen; + + unsigned short m_deviceid; + char m_algorithmformat; + unsigned int m_flags; + erofs_blk_t index; +}; + +/* + * Used to get the exact decompressed length, e.g. fiemap (consider lookback + * approach instead if possible since it's more metadata lightweight.) + */ +#define EROFS_GET_BLOCKS_FIEMAP 0x0002 + +enum { + Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, + Z_EROFS_COMPRESSION_RUNTIME_MAX +}; + +struct erofs_map_dev { + erofs_off_t m_pa; + unsigned int m_deviceid; +}; + +/* fs.c */ +int erofs_blk_read(void *buf, erofs_blk_t start, u32 nblocks); +int erofs_dev_read(int device_id, void *buf, u64 offset, size_t len); + +/* super.c */ +int erofs_read_superblock(void); + +/* namei.c */ +int erofs_read_inode_from_disk(struct erofs_inode *vi); +int erofs_ilookup(const char *path, struct erofs_inode *vi); +int erofs_read_inode_from_disk(struct erofs_inode *vi); + +/* data.c */ +int erofs_pread(struct erofs_inode *inode, char *buf, + erofs_off_t count, erofs_off_t offset); +int erofs_map_blocks(struct erofs_inode *inode, + struct erofs_map_blocks *map, int flags); +int erofs_map_dev(struct erofs_sb_info *sbi, struct erofs_map_dev *map); +/* zmap.c */ +int z_erofs_fill_inode(struct erofs_inode *vi); +int z_erofs_map_blocks_iter(struct erofs_inode *vi, + struct erofs_map_blocks *map, int flags); + +#ifdef EUCLEAN +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ +#else +#define EFSCORRUPTED EIO +#endif + +#define CRC32C_POLY_LE 0x82F63B78 +static inline u32 erofs_crc32c(u32 crc, const u8 *in, size_t len) +{ + int i; + + while (len--) { + crc ^= *in++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? CRC32C_POLY_LE : 0); + } + return crc; +} + +#endif diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c new file mode 100644 index 00000000000..d1d4757c507 --- /dev/null +++ b/fs/erofs/namei.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include "internal.h" + +int erofs_read_inode_from_disk(struct erofs_inode *vi) +{ + int ret, ifmt; + char buf[sizeof(struct erofs_inode_extended)]; + struct erofs_inode_compact *dic; + struct erofs_inode_extended *die; + const erofs_off_t inode_loc = iloc(vi->nid); + + ret = erofs_dev_read(0, buf, inode_loc, sizeof(*dic)); + if (ret < 0) + return -EIO; + + dic = (struct erofs_inode_compact *)buf; + ifmt = le16_to_cpu(dic->i_format); + + vi->datalayout = erofs_inode_datalayout(ifmt); + if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) { + erofs_err("unsupported datalayout %u of nid %llu", + vi->datalayout, vi->nid | 0ULL); + return -EOPNOTSUPP; + } + switch (erofs_inode_version(ifmt)) { + case EROFS_INODE_LAYOUT_EXTENDED: + vi->inode_isize = sizeof(struct erofs_inode_extended); + + ret = erofs_dev_read(0, buf + sizeof(*dic), inode_loc + sizeof(*dic), + sizeof(*die) - sizeof(*dic)); + if (ret < 0) + return -EIO; + + die = (struct erofs_inode_extended *)buf; + vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); + vi->i_mode = le16_to_cpu(die->i_mode); + + switch (vi->i_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + vi->u.i_blkaddr = le32_to_cpu(die->i_u.raw_blkaddr); + break; + case S_IFCHR: + case S_IFBLK: + vi->u.i_rdev = 0; + break; + case S_IFIFO: + case S_IFSOCK: + vi->u.i_rdev = 0; + break; + default: + goto bogusimode; + } + + vi->i_uid = le32_to_cpu(die->i_uid); + vi->i_gid = le32_to_cpu(die->i_gid); + vi->i_nlink = le32_to_cpu(die->i_nlink); + + vi->i_ctime = le64_to_cpu(die->i_ctime); + vi->i_ctime_nsec = le64_to_cpu(die->i_ctime_nsec); + vi->i_size = le64_to_cpu(die->i_size); + if (vi->datalayout == EROFS_INODE_CHUNK_BASED) + /* fill chunked inode summary info */ + vi->u.chunkformat = le16_to_cpu(die->i_u.c.format); + break; + case EROFS_INODE_LAYOUT_COMPACT: + vi->inode_isize = sizeof(struct erofs_inode_compact); + vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount); + vi->i_mode = le16_to_cpu(dic->i_mode); + + switch (vi->i_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + vi->u.i_blkaddr = le32_to_cpu(dic->i_u.raw_blkaddr); + break; + case S_IFCHR: + case S_IFBLK: + vi->u.i_rdev = 0; + break; + case S_IFIFO: + case S_IFSOCK: + vi->u.i_rdev = 0; + break; + default: + goto bogusimode; + } + + vi->i_uid = le16_to_cpu(dic->i_uid); + vi->i_gid = le16_to_cpu(dic->i_gid); + vi->i_nlink = le16_to_cpu(dic->i_nlink); + + vi->i_ctime = sbi.build_time; + vi->i_ctime_nsec = sbi.build_time_nsec; + + vi->i_size = le32_to_cpu(dic->i_size); + if (vi->datalayout == EROFS_INODE_CHUNK_BASED) + vi->u.chunkformat = le16_to_cpu(dic->i_u.c.format); + break; + default: + erofs_err("unsupported on-disk inode version %u of nid %llu", + erofs_inode_version(ifmt), vi->nid | 0ULL); + return -EOPNOTSUPP; + } + + vi->flags = 0; + if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { + if (vi->u.chunkformat & ~EROFS_CHUNK_FORMAT_ALL) { + erofs_err("unsupported chunk format %x of nid %llu", + vi->u.chunkformat, vi->nid | 0ULL); + return -EOPNOTSUPP; + } + vi->u.chunkbits = LOG_BLOCK_SIZE + + (vi->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); + } else if (erofs_inode_is_data_compressed(vi->datalayout)) + z_erofs_fill_inode(vi); + return 0; +bogusimode: + erofs_err("bogus i_mode (%o) @ nid %llu", vi->i_mode, vi->nid | 0ULL); + return -EFSCORRUPTED; +} + +struct erofs_dirent *find_target_dirent(erofs_nid_t pnid, + void *dentry_blk, + const char *name, unsigned int len, + unsigned int nameoff, + unsigned int maxsize) +{ + struct erofs_dirent *de = dentry_blk; + const struct erofs_dirent *end = dentry_blk + nameoff; + + while (de < end) { + const char *de_name; + unsigned int de_namelen; + + nameoff = le16_to_cpu(de->nameoff); + de_name = (char *)dentry_blk + nameoff; + + /* the last dirent in the block? */ + if (de + 1 >= end) + de_namelen = strnlen(de_name, maxsize - nameoff); + else + de_namelen = le16_to_cpu(de[1].nameoff) - nameoff; + + /* a corrupted entry is found */ + if (nameoff + de_namelen > maxsize || + de_namelen > EROFS_NAME_LEN) { + erofs_err("bogus dirent @ nid %llu", pnid | 0ULL); + DBG_BUGON(1); + return ERR_PTR(-EFSCORRUPTED); + } + + if (len == de_namelen && !memcmp(de_name, name, de_namelen)) + return de; + ++de; + } + return NULL; +} + +struct nameidata { + erofs_nid_t nid; + unsigned int ftype; +}; + +int erofs_namei(struct nameidata *nd, + const char *name, unsigned int len) +{ + erofs_nid_t nid = nd->nid; + int ret; + char buf[EROFS_BLKSIZ]; + struct erofs_inode vi = { .nid = nid }; + erofs_off_t offset; + + ret = erofs_read_inode_from_disk(&vi); + if (ret) + return ret; + + offset = 0; + while (offset < vi.i_size) { + erofs_off_t maxsize = min_t(erofs_off_t, + vi.i_size - offset, EROFS_BLKSIZ); + struct erofs_dirent *de = (void *)buf; + unsigned int nameoff; + + ret = erofs_pread(&vi, buf, maxsize, offset); + if (ret) + return ret; + + nameoff = le16_to_cpu(de->nameoff); + if (nameoff < sizeof(struct erofs_dirent) || + nameoff >= PAGE_SIZE) { + erofs_err("invalid de[0].nameoff %u @ nid %llu", + nameoff, nid | 0ULL); + return -EFSCORRUPTED; + } + + de = find_target_dirent(nid, buf, name, len, + nameoff, maxsize); + if (IS_ERR(de)) + return PTR_ERR(de); + + if (de) { + nd->nid = le64_to_cpu(de->nid); + return 0; + } + offset += maxsize; + } + return -ENOENT; +} + +static int link_path_walk(const char *name, struct nameidata *nd) +{ + nd->nid = sbi.root_nid; + + while (*name == '/') + name++; + + /* At this point we know we have a real path component. */ + while (*name != '\0') { + const char *p = name; + int ret; + + do { + ++p; + } while (*p != '\0' && *p != '/'); + + DBG_BUGON(p <= name); + ret = erofs_namei(nd, name, p - name); + if (ret) + return ret; + + name = p; + /* Skip until no more slashes. */ + for (name = p; *name == '/'; ++name) + ; + } + return 0; +} + +int erofs_ilookup(const char *path, struct erofs_inode *vi) +{ + int ret; + struct nameidata nd; + + ret = link_path_walk(path, &nd); + if (ret) + return ret; + + vi->nid = nd.nid; + return erofs_read_inode_from_disk(vi); +} diff --git a/fs/erofs/super.c b/fs/erofs/super.c new file mode 100644 index 00000000000..4cca322b9ea --- /dev/null +++ b/fs/erofs/super.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include "internal.h" + +static bool check_layout_compatibility(struct erofs_sb_info *sbi, + struct erofs_super_block *dsb) +{ + const unsigned int feature = le32_to_cpu(dsb->feature_incompat); + + sbi->feature_incompat = feature; + + /* check if current kernel meets all mandatory requirements */ + if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { + erofs_err("unidentified incompatible feature %x, please upgrade kernel version", + feature & ~EROFS_ALL_FEATURE_INCOMPAT); + return false; + } + return true; +} + +static int erofs_init_devices(struct erofs_sb_info *sbi, + struct erofs_super_block *dsb) +{ + unsigned int ondisk_extradevs, i; + erofs_off_t pos; + + sbi->total_blocks = sbi->primarydevice_blocks; + + if (!erofs_sb_has_device_table()) + ondisk_extradevs = 0; + else + ondisk_extradevs = le16_to_cpu(dsb->extra_devices); + + if (ondisk_extradevs != sbi->extra_devices) { + erofs_err("extra devices don't match (ondisk %u, given %u)", + ondisk_extradevs, sbi->extra_devices); + return -EINVAL; + } + if (!ondisk_extradevs) + return 0; + + sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1; + sbi->devs = calloc(ondisk_extradevs, sizeof(*sbi->devs)); + pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; + for (i = 0; i < ondisk_extradevs; ++i) { + struct erofs_deviceslot dis; + int ret; + + ret = erofs_dev_read(0, &dis, pos, sizeof(dis)); + if (ret < 0) + return ret; + + sbi->devs[i].mapped_blkaddr = dis.mapped_blkaddr; + sbi->total_blocks += dis.blocks; + pos += EROFS_DEVT_SLOT_SIZE; + } + return 0; +} + +int erofs_read_superblock(void) +{ + char data[EROFS_BLKSIZ]; + struct erofs_super_block *dsb; + unsigned int blkszbits; + int ret; + + ret = erofs_blk_read(data, 0, 1); + if (ret < 0) { + erofs_err("cannot read erofs superblock: %d", ret); + return -EIO; + } + dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); + + ret = -EINVAL; + if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { + erofs_err("cannot find valid erofs superblock"); + return ret; + } + + sbi.feature_compat = le32_to_cpu(dsb->feature_compat); + + blkszbits = dsb->blkszbits; + /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */ + if (blkszbits != LOG_BLOCK_SIZE) { + erofs_err("blksize %u isn't supported on this platform", + 1 << blkszbits); + return ret; + } + + if (!check_layout_compatibility(&sbi, dsb)) + return ret; + + sbi.primarydevice_blocks = le32_to_cpu(dsb->blocks); + sbi.meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); + sbi.xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); + sbi.islotbits = EROFS_ISLOTBITS; + sbi.root_nid = le16_to_cpu(dsb->root_nid); + sbi.inos = le64_to_cpu(dsb->inos); + sbi.checksum = le32_to_cpu(dsb->checksum); + + sbi.build_time = le64_to_cpu(dsb->build_time); + sbi.build_time_nsec = le32_to_cpu(dsb->build_time_nsec); + + memcpy(&sbi.uuid, dsb->uuid, sizeof(dsb->uuid)); + return erofs_init_devices(&sbi, dsb); +} diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c new file mode 100644 index 00000000000..be2599ac4f9 --- /dev/null +++ b/fs/erofs/zmap.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include "internal.h" + +int z_erofs_fill_inode(struct erofs_inode *vi) +{ + if (!erofs_sb_has_big_pcluster() && + vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { + vi->z_advise = 0; + vi->z_algorithmtype[0] = 0; + vi->z_algorithmtype[1] = 0; + vi->z_logical_clusterbits = LOG_BLOCK_SIZE; + + vi->flags |= EROFS_I_Z_INITED; + } + return 0; +} + +static int z_erofs_fill_inode_lazy(struct erofs_inode *vi) +{ + int ret; + erofs_off_t pos; + struct z_erofs_map_header *h; + char buf[sizeof(struct z_erofs_map_header)]; + + if (vi->flags & EROFS_I_Z_INITED) + return 0; + + DBG_BUGON(!erofs_sb_has_big_pcluster() && + vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); + pos = round_up(iloc(vi->nid) + vi->inode_isize + vi->xattr_isize, 8); + + ret = erofs_dev_read(0, buf, pos, sizeof(buf)); + if (ret < 0) + return -EIO; + + h = (struct z_erofs_map_header *)buf; + vi->z_advise = le16_to_cpu(h->h_advise); + vi->z_algorithmtype[0] = h->h_algorithmtype & 15; + vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; + + if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) { + erofs_err("unknown compression format %u for nid %llu", + vi->z_algorithmtype[0], (unsigned long long)vi->nid); + return -EOPNOTSUPP; + } + + vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7); + if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION && + !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ + !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { + erofs_err("big pcluster head1/2 of compact indexes should be consistent for nid %llu", + vi->nid * 1ULL); + return -EFSCORRUPTED; + } + vi->flags |= EROFS_I_Z_INITED; + return 0; +} + +struct z_erofs_maprecorder { + struct erofs_inode *inode; + struct erofs_map_blocks *map; + void *kaddr; + + unsigned long lcn; + /* compression extent information gathered */ + u8 type, headtype; + u16 clusterofs; + u16 delta[2]; + erofs_blk_t pblk, compressedlcs; +}; + +static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, + erofs_blk_t eblk) +{ + int ret; + struct erofs_map_blocks *const map = m->map; + char *mpage = map->mpage; + + if (map->index == eblk) + return 0; + + ret = erofs_blk_read(mpage, eblk, 1); + if (ret < 0) + return -EIO; + + map->index = eblk; + + return 0; +} + +static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, + unsigned long lcn) +{ + struct erofs_inode *const vi = m->inode; + const erofs_off_t ibase = iloc(vi->nid); + const erofs_off_t pos = + Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize + + vi->xattr_isize) + + lcn * sizeof(struct z_erofs_vle_decompressed_index); + struct z_erofs_vle_decompressed_index *di; + unsigned int advise, type; + int err; + + err = z_erofs_reload_indexes(m, erofs_blknr(pos)); + if (err) + return err; + + m->lcn = lcn; + di = m->kaddr + erofs_blkoff(pos); + + advise = le16_to_cpu(di->di_advise); + type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) & + ((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1); + switch (type) { + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + m->clusterofs = 1 << vi->z_logical_clusterbits; + m->delta[0] = le16_to_cpu(di->di_u.delta[0]); + if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) { + if (!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } + m->compressedlcs = m->delta[0] & + ~Z_EROFS_VLE_DI_D0_CBLKCNT; + m->delta[0] = 1; + } + m->delta[1] = le16_to_cpu(di->di_u.delta[1]); + break; + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + m->clusterofs = le16_to_cpu(di->di_clusterofs); + m->pblk = le32_to_cpu(di->di_u.blkaddr); + break; + default: + DBG_BUGON(1); + return -EOPNOTSUPP; + } + m->type = type; + return 0; +} + +static unsigned int decode_compactedbits(unsigned int lobits, + unsigned int lomask, + u8 *in, unsigned int pos, u8 *type) +{ + const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7); + const unsigned int lo = v & lomask; + + *type = (v >> lobits) & 3; + return lo; +} + +static int get_compacted_la_distance(unsigned int lclusterbits, + unsigned int encodebits, + unsigned int vcnt, u8 *in, int i) +{ + const unsigned int lomask = (1 << lclusterbits) - 1; + unsigned int lo, d1 = 0; + u8 type; + + DBG_BUGON(i >= vcnt); + + do { + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * i, &type); + + if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) + return d1; + ++d1; + } while (++i < vcnt); + + /* vcnt - 1 (Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) item */ + if (!(lo & Z_EROFS_VLE_DI_D0_CBLKCNT)) + d1 += lo - 1; + return d1; +} + +static int unpack_compacted_index(struct z_erofs_maprecorder *m, + unsigned int amortizedshift, + unsigned int eofs, bool lookahead) +{ + struct erofs_inode *const vi = m->inode; + const unsigned int lclusterbits = vi->z_logical_clusterbits; + const unsigned int lomask = (1 << lclusterbits) - 1; + unsigned int vcnt, base, lo, encodebits, nblk; + int i; + u8 *in, type; + bool big_pcluster; + + if (1 << amortizedshift == 4) + vcnt = 2; + else if (1 << amortizedshift == 2 && lclusterbits == 12) + vcnt = 16; + else + return -EOPNOTSUPP; + + big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; + encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; + base = round_down(eofs, vcnt << amortizedshift); + in = m->kaddr + base; + + i = (eofs - base) >> amortizedshift; + + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * i, &type); + m->type = type; + if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { + m->clusterofs = 1 << lclusterbits; + + /* figure out lookahead_distance: delta[1] if needed */ + if (lookahead) + m->delta[1] = get_compacted_la_distance(lclusterbits, + encodebits, + vcnt, in, i); + if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) { + if (!big_pcluster) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } + m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT; + m->delta[0] = 1; + return 0; + } else if (i + 1 != (int)vcnt) { + m->delta[0] = lo; + return 0; + } + /* + * since the last lcluster in the pack is special, + * of which lo saves delta[1] rather than delta[0]. + * Hence, get delta[0] by the previous lcluster indirectly. + */ + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * (i - 1), &type); + if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) + lo = 0; + else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) + lo = 1; + m->delta[0] = lo + 1; + return 0; + } + m->clusterofs = lo; + m->delta[0] = 0; + /* figout out blkaddr (pblk) for HEAD lclusters */ + if (!big_pcluster) { + nblk = 1; + while (i > 0) { + --i; + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * i, &type); + if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) + i -= lo; + + if (i >= 0) + ++nblk; + } + } else { + nblk = 0; + while (i > 0) { + --i; + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * i, &type); + if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { + if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) { + --i; + nblk += lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT; + continue; + } + if (lo == 1) { + DBG_BUGON(1); + /* --i; ++nblk; continue; */ + return -EFSCORRUPTED; + } + i -= lo - 2; + continue; + } + ++nblk; + } + } + in += (vcnt << amortizedshift) - sizeof(__le32); + m->pblk = le32_to_cpu(*(__le32 *)in) + nblk; + return 0; +} + +static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, + unsigned long lcn, bool lookahead) +{ + struct erofs_inode *const vi = m->inode; + const unsigned int lclusterbits = vi->z_logical_clusterbits; + const erofs_off_t ebase = round_up(iloc(vi->nid) + vi->inode_isize + + vi->xattr_isize, 8) + + sizeof(struct z_erofs_map_header); + const unsigned int totalidx = DIV_ROUND_UP(vi->i_size, EROFS_BLKSIZ); + unsigned int compacted_4b_initial, compacted_2b; + unsigned int amortizedshift; + erofs_off_t pos; + int err; + + if (lclusterbits != 12) + return -EOPNOTSUPP; + + if (lcn >= totalidx) + return -EINVAL; + + m->lcn = lcn; + /* used to align to 32-byte (compacted_2b) alignment */ + compacted_4b_initial = (32 - ebase % 32) / 4; + if (compacted_4b_initial == 32 / 4) + compacted_4b_initial = 0; + + if (vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) + compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); + else + compacted_2b = 0; + + pos = ebase; + if (lcn < compacted_4b_initial) { + amortizedshift = 2; + goto out; + } + pos += compacted_4b_initial * 4; + lcn -= compacted_4b_initial; + + if (lcn < compacted_2b) { + amortizedshift = 1; + goto out; + } + pos += compacted_2b * 2; + lcn -= compacted_2b; + amortizedshift = 2; +out: + pos += lcn * (1 << amortizedshift); + err = z_erofs_reload_indexes(m, erofs_blknr(pos)); + if (err) + return err; + return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos), + lookahead); +} + +static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m, + unsigned int lcn, bool lookahead) +{ + const unsigned int datamode = m->inode->datalayout; + + if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY) + return legacy_load_cluster_from_disk(m, lcn); + + if (datamode == EROFS_INODE_FLAT_COMPRESSION) + return compacted_load_cluster_from_disk(m, lcn, lookahead); + + return -EINVAL; +} + +static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, + unsigned int lookback_distance) +{ + struct erofs_inode *const vi = m->inode; + struct erofs_map_blocks *const map = m->map; + const unsigned int lclusterbits = vi->z_logical_clusterbits; + unsigned long lcn = m->lcn; + int err; + + if (lcn < lookback_distance) { + erofs_err("bogus lookback distance @ nid %llu", + (unsigned long long)vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + + /* load extent head logical cluster if needed */ + lcn -= lookback_distance; + err = z_erofs_load_cluster_from_disk(m, lcn, false); + if (err) + return err; + + switch (m->type) { + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + if (!m->delta[0]) { + erofs_err("invalid lookback distance 0 @ nid %llu", + (unsigned long long)vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + return z_erofs_extent_lookback(m, m->delta[0]); + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + m->headtype = m->type; + map->m_la = (lcn << lclusterbits) | m->clusterofs; + break; + default: + erofs_err("unknown type %u @ lcn %lu of nid %llu", + m->type, lcn, (unsigned long long)vi->nid); + DBG_BUGON(1); + return -EOPNOTSUPP; + } + return 0; +} + +static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, + unsigned int initial_lcn) +{ + struct erofs_inode *const vi = m->inode; + struct erofs_map_blocks *const map = m->map; + const unsigned int lclusterbits = vi->z_logical_clusterbits; + unsigned long lcn; + int err; + + DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN && + m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD); + if (m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN || + !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) { + map->m_plen = 1 << lclusterbits; + return 0; + } + + lcn = m->lcn + 1; + if (m->compressedlcs) + goto out; + + err = z_erofs_load_cluster_from_disk(m, lcn, false); + if (err) + return err; + + /* + * If the 1st NONHEAD lcluster has already been handled initially w/o + * valid compressedlcs, which means at least it mustn't be CBLKCNT, or + * an internal implemenatation error is detected. + * + * The following code can also handle it properly anyway, but let's + * BUG_ON in the debugging mode only for developers to notice that. + */ + DBG_BUGON(lcn == initial_lcn && + m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD); + + switch (m->type) { + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + /* + * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type + * rather than CBLKCNT, it's a 1 lcluster-sized pcluster. + */ + m->compressedlcs = 1; + break; + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + if (m->delta[0] != 1) + goto err_bonus_cblkcnt; + if (m->compressedlcs) + break; + /* fallthrough */ + default: + erofs_err("cannot found CBLKCNT @ lcn %lu of nid %llu", + lcn, vi->nid | 0ULL); + DBG_BUGON(1); + return -EFSCORRUPTED; + } +out: + map->m_plen = m->compressedlcs << lclusterbits; + return 0; +err_bonus_cblkcnt: + erofs_err("bogus CBLKCNT @ lcn %lu of nid %llu", + lcn, vi->nid | 0ULL); + DBG_BUGON(1); + return -EFSCORRUPTED; +} + +static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) +{ + struct erofs_inode *const vi = m->inode; + struct erofs_map_blocks *map = m->map; + unsigned int lclusterbits = vi->z_logical_clusterbits; + u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits; + int err; + + do { + /* handle the last EOF pcluster (no next HEAD lcluster) */ + if ((lcn << lclusterbits) >= vi->i_size) { + map->m_llen = vi->i_size - map->m_la; + return 0; + } + + err = z_erofs_load_cluster_from_disk(m, lcn, true); + if (err) + return err; + + if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { + DBG_BUGON(!m->delta[1] && + m->clusterofs != 1 << lclusterbits); + } else if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN || + m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD) { + /* go on until the next HEAD lcluster */ + if (lcn != headlcn) + break; + m->delta[1] = 1; + } else { + erofs_err("unknown type %u @ lcn %llu of nid %llu", + m->type, lcn | 0ULL, + (unsigned long long)vi->nid); + DBG_BUGON(1); + return -EOPNOTSUPP; + } + lcn += m->delta[1]; + } while (m->delta[1]); + + map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la; + return 0; +} + +int z_erofs_map_blocks_iter(struct erofs_inode *vi, + struct erofs_map_blocks *map, + int flags) +{ + struct z_erofs_maprecorder m = { + .inode = vi, + .map = map, + .kaddr = map->mpage, + }; + int err = 0; + unsigned int lclusterbits, endoff; + unsigned long initial_lcn; + unsigned long long ofs, end; + + /* when trying to read beyond EOF, leave it unmapped */ + if (map->m_la >= vi->i_size) { + map->m_llen = map->m_la + 1 - vi->i_size; + map->m_la = vi->i_size; + map->m_flags = 0; + goto out; + } + + err = z_erofs_fill_inode_lazy(vi); + if (err) + goto out; + + lclusterbits = vi->z_logical_clusterbits; + ofs = map->m_la; + initial_lcn = ofs >> lclusterbits; + endoff = ofs & ((1 << lclusterbits) - 1); + + err = z_erofs_load_cluster_from_disk(&m, initial_lcn, false); + if (err) + goto out; + + map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED; + end = (m.lcn + 1ULL) << lclusterbits; + switch (m.type) { + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + if (endoff >= m.clusterofs) { + m.headtype = m.type; + map->m_la = (m.lcn << lclusterbits) | m.clusterofs; + break; + } + /* m.lcn should be >= 1 if endoff < m.clusterofs */ + if (!m.lcn) { + erofs_err("invalid logical cluster 0 at nid %llu", + (unsigned long long)vi->nid); + err = -EFSCORRUPTED; + goto out; + } + end = (m.lcn << lclusterbits) | m.clusterofs; + map->m_flags |= EROFS_MAP_FULL_MAPPED; + m.delta[0] = 1; + /* fallthrough */ + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + /* get the correspoinding first chunk */ + err = z_erofs_extent_lookback(&m, m.delta[0]); + if (err) + goto out; + break; + default: + erofs_err("unknown type %u @ offset %llu of nid %llu", + m.type, ofs, (unsigned long long)vi->nid); + err = -EOPNOTSUPP; + goto out; + } + + map->m_llen = end - map->m_la; + map->m_pa = blknr_to_addr(m.pblk); + + err = z_erofs_get_extent_compressedlen(&m, initial_lcn); + if (err) + goto out; + + if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) + map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; + else + map->m_algorithmformat = vi->z_algorithmtype[0]; + + if (flags & EROFS_GET_BLOCKS_FIEMAP) { + err = z_erofs_get_extent_decompressedlen(&m); + if (!err) + map->m_flags |= EROFS_MAP_FULL_MAPPED; + } + +out: + erofs_dbg("m_la %" PRIu64 " m_pa %" PRIu64 " m_llen %" PRIu64 " m_plen %" PRIu64 " m_flags 0%o", + map->m_la, map->m_pa, + map->m_llen, map->m_plen, map->m_flags); + + DBG_BUGON(err < 0 && err != -ENOMEM); + return err; +} @@ -26,6 +26,7 @@ #include <linux/math64.h> #include <efi_loader.h> #include <squashfs.h> +#include <erofs.h> DECLARE_GLOBAL_DATA_PTR; @@ -305,6 +306,27 @@ static struct fstype_info fstypes[] = { .mkdir = fs_mkdir_unsupported, }, #endif +#if IS_ENABLED(CONFIG_FS_EROFS) + { + .fstype = FS_TYPE_EROFS, + .name = "erofs", + .null_dev_desc_ok = false, + .probe = erofs_probe, + .opendir = erofs_opendir, + .readdir = erofs_readdir, + .ls = fs_ls_generic, + .read = erofs_read, + .size = erofs_size, + .close = erofs_close, + .closedir = erofs_closedir, + .exists = erofs_exists, + .uuid = fs_uuid_unsupported, + .write = fs_write_unsupported, + .ln = fs_ln_unsupported, + .unlink = fs_unlink_unsupported, + .mkdir = fs_mkdir_unsupported, + }, +#endif { .fstype = FS_TYPE_ANY, .name = "unsupported", |