aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile1
-rw-r--r--fs/erofs/Kconfig21
-rw-r--r--fs/erofs/Makefile9
-rw-r--r--fs/erofs/data.c311
-rw-r--r--fs/erofs/decompress.c78
-rw-r--r--fs/erofs/decompress.h24
-rw-r--r--fs/erofs/erofs_fs.h436
-rw-r--r--fs/erofs/fs.c267
-rw-r--r--fs/erofs/internal.h313
-rw-r--r--fs/erofs/namei.c252
-rw-r--r--fs/erofs/super.c105
-rw-r--r--fs/erofs/zmap.c601
-rw-r--r--fs/fs.c22
14 files changed, 2442 insertions, 0 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 620af7f0447..cda9f66cc93 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -24,4 +24,6 @@ source "fs/yaffs2/Kconfig"
source "fs/squashfs/Kconfig"
+source "fs/erofs/Kconfig"
+
endmenu
diff --git a/fs/Makefile b/fs/Makefile
index 937cbcf6e85..f05a21c9e6d 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -25,5 +25,6 @@ obj-$(CONFIG_CMD_UBIFS) += ubifs/
obj-$(CONFIG_YAFFS2) += yaffs2/
obj-$(CONFIG_CMD_ZFS) += zfs/
obj-$(CONFIG_FS_SQUASHFS) += squashfs/
+obj-$(CONFIG_FS_EROFS) += erofs/
endif
obj-y += fs_internal.o
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
new file mode 100644
index 00000000000..ee4e777c5c8
--- /dev/null
+++ b/fs/erofs/Kconfig
@@ -0,0 +1,21 @@
+config FS_EROFS
+ bool "Enable EROFS filesystem support"
+ help
+ This provides support for reading images from EROFS filesystem.
+ EROFS (Enhanced Read-Only File System) is a lightweight read-only
+ file system for scenarios which need high-performance read-only
+ requirements.
+
+ It also provides fixed-sized output compression support, which
+ improves storage density, keeps relatively higher compression
+ ratios, which is more useful to achieve high performance for
+ embedded devices with limited memory.
+
+config FS_EROFS_ZIP
+ bool "EROFS Data Compression Support"
+ depends on FS_EROFS
+ select LZ4
+ default y
+ help
+ Enable fixed-sized output compression for EROFS.
+ If you don't want to enable compression feature, say N.
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
new file mode 100644
index 00000000000..58af6a68e41
--- /dev/null
+++ b/fs/erofs/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+
+obj-$(CONFIG_$(SPL_)FS_EROFS) = fs.o \
+ super.o \
+ namei.o \
+ data.o \
+ decompress.o \
+ zmap.o
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
new file mode 100644
index 00000000000..761896054c8
--- /dev/null
+++ b/fs/erofs/data.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include "internal.h"
+#include "decompress.h"
+
+static int erofs_map_blocks_flatmode(struct erofs_inode *inode,
+ struct erofs_map_blocks *map,
+ int flags)
+{
+ int err = 0;
+ erofs_blk_t nblocks, lastblk;
+ u64 offset = map->m_la;
+ struct erofs_inode *vi = inode;
+ bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
+
+ nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
+ lastblk = nblocks - tailendpacking;
+
+ /* there is no hole in flatmode */
+ map->m_flags = EROFS_MAP_MAPPED;
+
+ if (offset < blknr_to_addr(lastblk)) {
+ map->m_pa = blknr_to_addr(vi->u.i_blkaddr) + map->m_la;
+ map->m_plen = blknr_to_addr(lastblk) - offset;
+ } else if (tailendpacking) {
+ /* 2 - inode inline B: inode, [xattrs], inline last blk... */
+ map->m_pa = iloc(vi->nid) + vi->inode_isize +
+ vi->xattr_isize + erofs_blkoff(map->m_la);
+ map->m_plen = inode->i_size - offset;
+
+ /* inline data should be located in one meta block */
+ if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) {
+ erofs_err("inline data cross block boundary @ nid %" PRIu64,
+ vi->nid);
+ DBG_BUGON(1);
+ err = -EFSCORRUPTED;
+ goto err_out;
+ }
+
+ map->m_flags |= EROFS_MAP_META;
+ } else {
+ erofs_err("internal error @ nid: %" PRIu64 " (size %llu), m_la 0x%" PRIx64,
+ vi->nid, (unsigned long long)inode->i_size, map->m_la);
+ DBG_BUGON(1);
+ err = -EIO;
+ goto err_out;
+ }
+
+ map->m_llen = map->m_plen;
+err_out:
+ return err;
+}
+
+int erofs_map_blocks(struct erofs_inode *inode,
+ struct erofs_map_blocks *map, int flags)
+{
+ struct erofs_inode *vi = inode;
+ struct erofs_inode_chunk_index *idx;
+ u8 buf[EROFS_BLKSIZ];
+ u64 chunknr;
+ unsigned int unit;
+ erofs_off_t pos;
+ int err = 0;
+
+ map->m_deviceid = 0;
+ if (map->m_la >= inode->i_size) {
+ /* leave out-of-bound access unmapped */
+ map->m_flags = 0;
+ map->m_plen = 0;
+ goto out;
+ }
+
+ if (vi->datalayout != EROFS_INODE_CHUNK_BASED)
+ return erofs_map_blocks_flatmode(inode, map, flags);
+
+ if (vi->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
+ unit = sizeof(*idx); /* chunk index */
+ else
+ unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */
+
+ chunknr = map->m_la >> vi->u.chunkbits;
+ pos = roundup(iloc(vi->nid) + vi->inode_isize +
+ vi->xattr_isize, unit) + unit * chunknr;
+
+ err = erofs_blk_read(buf, erofs_blknr(pos), 1);
+ if (err < 0)
+ return -EIO;
+
+ map->m_la = chunknr << vi->u.chunkbits;
+ map->m_plen = min_t(erofs_off_t, 1UL << vi->u.chunkbits,
+ roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
+
+ /* handle block map */
+ if (!(vi->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
+ __le32 *blkaddr = (void *)buf + erofs_blkoff(pos);
+
+ if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
+ map->m_flags = 0;
+ } else {
+ map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr));
+ map->m_flags = EROFS_MAP_MAPPED;
+ }
+ goto out;
+ }
+ /* parse chunk indexes */
+ idx = (void *)buf + erofs_blkoff(pos);
+ switch (le32_to_cpu(idx->blkaddr)) {
+ case EROFS_NULL_ADDR:
+ map->m_flags = 0;
+ break;
+ default:
+ map->m_deviceid = le16_to_cpu(idx->device_id) &
+ sbi.device_id_mask;
+ map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
+ map->m_flags = EROFS_MAP_MAPPED;
+ break;
+ }
+out:
+ map->m_llen = map->m_plen;
+ return err;
+}
+
+int erofs_map_dev(struct erofs_sb_info *sbi, struct erofs_map_dev *map)
+{
+ struct erofs_device_info *dif;
+ int id;
+
+ if (map->m_deviceid) {
+ if (sbi->extra_devices < map->m_deviceid)
+ return -ENODEV;
+ } else if (sbi->extra_devices) {
+ for (id = 0; id < sbi->extra_devices; ++id) {
+ erofs_off_t startoff, length;
+
+ dif = sbi->devs + id;
+ if (!dif->mapped_blkaddr)
+ continue;
+ startoff = blknr_to_addr(dif->mapped_blkaddr);
+ length = blknr_to_addr(dif->blocks);
+
+ if (map->m_pa >= startoff &&
+ map->m_pa < startoff + length) {
+ map->m_pa -= startoff;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static int erofs_read_raw_data(struct erofs_inode *inode, char *buffer,
+ erofs_off_t size, erofs_off_t offset)
+{
+ struct erofs_map_blocks map = {
+ .index = UINT_MAX,
+ };
+ struct erofs_map_dev mdev;
+ int ret;
+ erofs_off_t ptr = offset;
+
+ while (ptr < offset + size) {
+ char *const estart = buffer + ptr - offset;
+ erofs_off_t eend;
+
+ map.m_la = ptr;
+ ret = erofs_map_blocks(inode, &map, 0);
+ if (ret)
+ return ret;
+
+ DBG_BUGON(map.m_plen != map.m_llen);
+
+ mdev = (struct erofs_map_dev) {
+ .m_deviceid = map.m_deviceid,
+ .m_pa = map.m_pa,
+ };
+ ret = erofs_map_dev(&sbi, &mdev);
+ if (ret)
+ return ret;
+
+ /* trim extent */
+ eend = min(offset + size, map.m_la + map.m_llen);
+ DBG_BUGON(ptr < map.m_la);
+
+ if (!(map.m_flags & EROFS_MAP_MAPPED)) {
+ if (!map.m_llen) {
+ /* reached EOF */
+ memset(estart, 0, offset + size - ptr);
+ ptr = offset + size;
+ continue;
+ }
+ memset(estart, 0, eend - ptr);
+ ptr = eend;
+ continue;
+ }
+
+ if (ptr > map.m_la) {
+ mdev.m_pa += ptr - map.m_la;
+ map.m_la = ptr;
+ }
+
+ ret = erofs_dev_read(mdev.m_deviceid, estart, mdev.m_pa,
+ eend - map.m_la);
+ if (ret < 0)
+ return -EIO;
+ ptr = eend;
+ }
+ return 0;
+}
+
+static int z_erofs_read_data(struct erofs_inode *inode, char *buffer,
+ erofs_off_t size, erofs_off_t offset)
+{
+ erofs_off_t end, length, skip;
+ struct erofs_map_blocks map = {
+ .index = UINT_MAX,
+ };
+ struct erofs_map_dev mdev;
+ bool partial;
+ unsigned int bufsize = 0;
+ char *raw = NULL;
+ int ret = 0;
+
+ end = offset + size;
+ while (end > offset) {
+ map.m_la = end - 1;
+
+ ret = z_erofs_map_blocks_iter(inode, &map, 0);
+ if (ret)
+ break;
+
+ /* no device id here, thus it will always succeed */
+ mdev = (struct erofs_map_dev) {
+ .m_pa = map.m_pa,
+ };
+ ret = erofs_map_dev(&sbi, &mdev);
+ if (ret) {
+ DBG_BUGON(1);
+ break;
+ }
+
+ /*
+ * trim to the needed size if the returned extent is quite
+ * larger than requested, and set up partial flag as well.
+ */
+ if (end < map.m_la + map.m_llen) {
+ length = end - map.m_la;
+ partial = true;
+ } else {
+ DBG_BUGON(end != map.m_la + map.m_llen);
+ length = map.m_llen;
+ partial = !(map.m_flags & EROFS_MAP_FULL_MAPPED);
+ }
+
+ if (map.m_la < offset) {
+ skip = offset - map.m_la;
+ end = offset;
+ } else {
+ skip = 0;
+ end = map.m_la;
+ }
+
+ if (!(map.m_flags & EROFS_MAP_MAPPED)) {
+ memset(buffer + end - offset, 0, length);
+ end = map.m_la;
+ continue;
+ }
+
+ if (map.m_plen > bufsize) {
+ bufsize = map.m_plen;
+ raw = realloc(raw, bufsize);
+ if (!raw) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+ ret = erofs_dev_read(mdev.m_deviceid, raw, mdev.m_pa, map.m_plen);
+ if (ret < 0)
+ break;
+
+ ret = z_erofs_decompress(&(struct z_erofs_decompress_req) {
+ .in = raw,
+ .out = buffer + end - offset,
+ .decodedskip = skip,
+ .inputsize = map.m_plen,
+ .decodedlength = length,
+ .alg = map.m_algorithmformat,
+ .partial_decoding = partial
+ });
+ if (ret < 0)
+ break;
+ }
+ if (raw)
+ free(raw);
+ return ret < 0 ? ret : 0;
+}
+
+int erofs_pread(struct erofs_inode *inode, char *buf,
+ erofs_off_t count, erofs_off_t offset)
+{
+ switch (inode->datalayout) {
+ case EROFS_INODE_FLAT_PLAIN:
+ case EROFS_INODE_FLAT_INLINE:
+ case EROFS_INODE_CHUNK_BASED:
+ return erofs_read_raw_data(inode, buf, count, offset);
+ case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
+ case EROFS_INODE_FLAT_COMPRESSION:
+ return z_erofs_read_data(inode, buf, count, offset);
+ default:
+ break;
+ }
+ return -EINVAL;
+}
diff --git a/fs/erofs/decompress.c b/fs/erofs/decompress.c
new file mode 100644
index 00000000000..2be3b844cfc
--- /dev/null
+++ b/fs/erofs/decompress.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include "decompress.h"
+
+#if IS_ENABLED(CONFIG_LZ4)
+#include <u-boot/lz4.h>
+static int z_erofs_decompress_lz4(struct z_erofs_decompress_req *rq)
+{
+ int ret = 0;
+ char *dest = rq->out;
+ char *src = rq->in;
+ char *buff = NULL;
+ bool support_0padding = false;
+ unsigned int inputmargin = 0;
+
+ if (erofs_sb_has_lz4_0padding()) {
+ support_0padding = true;
+
+ while (!src[inputmargin & ~PAGE_MASK])
+ if (!(++inputmargin & ~PAGE_MASK))
+ break;
+
+ if (inputmargin >= rq->inputsize)
+ return -EIO;
+ }
+
+ if (rq->decodedskip) {
+ buff = malloc(rq->decodedlength);
+ if (!buff)
+ return -ENOMEM;
+ dest = buff;
+ }
+
+ if (rq->partial_decoding || !support_0padding)
+ ret = LZ4_decompress_safe_partial(src + inputmargin, dest,
+ rq->inputsize - inputmargin,
+ rq->decodedlength, rq->decodedlength);
+ else
+ ret = LZ4_decompress_safe(src + inputmargin, dest,
+ rq->inputsize - inputmargin,
+ rq->decodedlength);
+
+ if (ret != (int)rq->decodedlength) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if (rq->decodedskip)
+ memcpy(rq->out, dest + rq->decodedskip,
+ rq->decodedlength - rq->decodedskip);
+
+out:
+ if (buff)
+ free(buff);
+
+ return ret;
+}
+#endif
+
+int z_erofs_decompress(struct z_erofs_decompress_req *rq)
+{
+ if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED) {
+ if (rq->inputsize != EROFS_BLKSIZ)
+ return -EFSCORRUPTED;
+
+ DBG_BUGON(rq->decodedlength > EROFS_BLKSIZ);
+ DBG_BUGON(rq->decodedlength < rq->decodedskip);
+
+ memcpy(rq->out, rq->in + rq->decodedskip,
+ rq->decodedlength - rq->decodedskip);
+ return 0;
+ }
+
+#if IS_ENABLED(CONFIG_LZ4)
+ if (rq->alg == Z_EROFS_COMPRESSION_LZ4)
+ return z_erofs_decompress_lz4(rq);
+#endif
+ return -EOPNOTSUPP;
+}
diff --git a/fs/erofs/decompress.h b/fs/erofs/decompress.h
new file mode 100644
index 00000000000..81d5fb84f6c
--- /dev/null
+++ b/fs/erofs/decompress.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef __EROFS_DECOMPRESS_H
+#define __EROFS_DECOMPRESS_H
+
+#include "internal.h"
+
+struct z_erofs_decompress_req {
+ char *in, *out;
+
+ /*
+ * initial decompressed bytes that need to be skipped
+ * when finally copying to output buffer
+ */
+ unsigned int decodedskip;
+ unsigned int inputsize, decodedlength;
+
+ /* indicate the algorithm will be used for decompression */
+ unsigned int alg;
+ bool partial_decoding;
+};
+
+int z_erofs_decompress(struct z_erofs_decompress_req *rq);
+
+#endif
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
new file mode 100644
index 00000000000..6b62c7a4f5f
--- /dev/null
+++ b/fs/erofs/erofs_fs.h
@@ -0,0 +1,436 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */
+/*
+ * EROFS (Enhanced ROM File System) on-disk format definition
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Copyright (C) 2021, Alibaba Cloud
+ */
+#ifndef __EROFS_FS_H
+#define __EROFS_FS_H
+
+#include <asm/unaligned.h>
+#include <fs.h>
+#include <part.h>
+#include <stdint.h>
+#include <compiler.h>
+
+#define EROFS_SUPER_MAGIC_V1 0xE0F5E1E2
+#define EROFS_SUPER_OFFSET 1024
+
+#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
+
+/*
+ * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
+ * be incompatible with this kernel version.
+ */
+#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001
+#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002
+#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002
+#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
+#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008
+#define EROFS_ALL_FEATURE_INCOMPAT \
+ (EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
+ EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
+ EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
+ EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
+ EROFS_FEATURE_INCOMPAT_DEVICE_TABLE)
+
+#define EROFS_SB_EXTSLOT_SIZE 16
+
+struct erofs_deviceslot {
+ union {
+ u8 uuid[16]; /* used for device manager later */
+ u8 userdata[64]; /* digest(sha256), etc. */
+ } u;
+ __le32 blocks; /* total fs blocks of this device */
+ __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */
+ u8 reserved[56];
+};
+
+#define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot)
+
+/* erofs on-disk super block (currently 128 bytes) */
+struct erofs_super_block {
+ __le32 magic; /* file system magic number */
+ __le32 checksum; /* crc32c(super_block) */
+ __le32 feature_compat;
+ __u8 blkszbits; /* support block_size == PAGE_SIZE only */
+ __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */
+
+ __le16 root_nid; /* nid of root directory */
+ __le64 inos; /* total valid ino # (== f_files - f_favail) */
+
+ __le64 build_time; /* inode v1 time derivation */
+ __le32 build_time_nsec; /* inode v1 time derivation in nano scale */
+ __le32 blocks; /* used for statfs */
+ __le32 meta_blkaddr; /* start block address of metadata area */
+ __le32 xattr_blkaddr; /* start block address of shared xattr area */
+ __u8 uuid[16]; /* 128-bit uuid for volume */
+ __u8 volume_name[16]; /* volume name */
+ __le32 feature_incompat;
+ union {
+ /* bitmap for available compression algorithms */
+ __le16 available_compr_algs;
+ /* customized sliding window size instead of 64k by default */
+ __le16 lz4_max_distance;
+ } __packed u1;
+ __le16 extra_devices; /* # of devices besides the primary device */
+ __le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */
+ __u8 reserved2[38];
+};
+
+/*
+ * erofs inode datalayout (i_format in on-disk inode):
+ * 0 - inode plain without inline data A:
+ * inode, [xattrs], ... | ... | no-holed data
+ * 1 - inode VLE compression B (legacy):
+ * inode, [xattrs], extents ... | ...
+ * 2 - inode plain with inline data C:
+ * inode, [xattrs], last_inline_data, ... | ... | no-holed data
+ * 3 - inode compression D:
+ * inode, [xattrs], map_header, extents ... | ...
+ * 4 - inode chunk-based E:
+ * inode, [xattrs], chunk indexes ... | ...
+ * 5~7 - reserved
+ */
+enum {
+ EROFS_INODE_FLAT_PLAIN = 0,
+ EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1,
+ EROFS_INODE_FLAT_INLINE = 2,
+ EROFS_INODE_FLAT_COMPRESSION = 3,
+ EROFS_INODE_CHUNK_BASED = 4,
+ EROFS_INODE_DATALAYOUT_MAX
+};
+
+static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
+{
+ return datamode == EROFS_INODE_FLAT_COMPRESSION ||
+ datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+}
+
+/* bit definitions of inode i_advise */
+#define EROFS_I_VERSION_BITS 1
+#define EROFS_I_DATALAYOUT_BITS 3
+
+#define EROFS_I_VERSION_BIT 0
+#define EROFS_I_DATALAYOUT_BIT 1
+
+#define EROFS_I_ALL \
+ ((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1)
+
+/* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */
+#define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F
+/* with chunk indexes or just a 4-byte blkaddr array */
+#define EROFS_CHUNK_FORMAT_INDEXES 0x0020
+
+#define EROFS_CHUNK_FORMAT_ALL \
+ (EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES)
+
+struct erofs_inode_chunk_info {
+ __le16 format; /* chunk blkbits, etc. */
+ __le16 reserved;
+};
+
+/* 32-byte reduced form of an ondisk inode */
+struct erofs_inode_compact {
+ __le16 i_format; /* inode format hints */
+
+/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
+ __le16 i_xattr_icount;
+ __le16 i_mode;
+ __le16 i_nlink;
+ __le32 i_size;
+ __le32 i_reserved;
+ union {
+ /* file total compressed blocks for data mapping 1 */
+ __le32 compressed_blocks;
+ __le32 raw_blkaddr;
+
+ /* for device files, used to indicate old/new device # */
+ __le32 rdev;
+
+ /* for chunk-based files, it contains the summary info */
+ struct erofs_inode_chunk_info c;
+ } i_u;
+ __le32 i_ino; /* only used for 32-bit stat compatibility */
+ __le16 i_uid;
+ __le16 i_gid;
+ __le32 i_reserved2;
+};
+
+/* 32 bytes on-disk inode */
+#define EROFS_INODE_LAYOUT_COMPACT 0
+/* 64 bytes on-disk inode */
+#define EROFS_INODE_LAYOUT_EXTENDED 1
+
+/* 64-byte complete form of an ondisk inode */
+struct erofs_inode_extended {
+ __le16 i_format; /* inode format hints */
+
+/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
+ __le16 i_xattr_icount;
+ __le16 i_mode;
+ __le16 i_reserved;
+ __le64 i_size;
+ union {
+ /* file total compressed blocks for data mapping 1 */
+ __le32 compressed_blocks;
+ __le32 raw_blkaddr;
+
+ /* for device files, used to indicate old/new device # */
+ __le32 rdev;
+
+ /* for chunk-based files, it contains the summary info */
+ struct erofs_inode_chunk_info c;
+ } i_u;
+
+ /* only used for 32-bit stat compatibility */
+ __le32 i_ino;
+
+ __le32 i_uid;
+ __le32 i_gid;
+ __le64 i_ctime;
+ __le32 i_ctime_nsec;
+ __le32 i_nlink;
+ __u8 i_reserved2[16];
+};
+
+#define EROFS_MAX_SHARED_XATTRS (128)
+/* h_shared_count between 129 ... 255 are special # */
+#define EROFS_SHARED_XATTR_EXTENT (255)
+
+/*
+ * inline xattrs (n == i_xattr_icount):
+ * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes
+ * 12 bytes / \
+ * / \
+ * /-----------------------\
+ * | erofs_xattr_entries+ |
+ * +-----------------------+
+ * inline xattrs must starts in erofs_xattr_ibody_header,
+ * for read-only fs, no need to introduce h_refcount
+ */
+struct erofs_xattr_ibody_header {
+ __le32 h_reserved;
+ __u8 h_shared_count;
+ __u8 h_reserved2[7];
+ __le32 h_shared_xattrs[0]; /* shared xattr id array */
+};
+
+/* Name indexes */
+#define EROFS_XATTR_INDEX_USER 1
+#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS 2
+#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
+#define EROFS_XATTR_INDEX_TRUSTED 4
+#define EROFS_XATTR_INDEX_LUSTRE 5
+#define EROFS_XATTR_INDEX_SECURITY 6
+
+/* xattr entry (for both inline & shared xattrs) */
+struct erofs_xattr_entry {
+ __u8 e_name_len; /* length of name */
+ __u8 e_name_index; /* attribute name index */
+ __le16 e_value_size; /* size of attribute value */
+ /* followed by e_name and e_value */
+ char e_name[0]; /* attribute name */
+};
+
+static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount)
+{
+ if (!i_xattr_icount)
+ return 0;
+
+ return sizeof(struct erofs_xattr_ibody_header) +
+ sizeof(__u32) * (le16_to_cpu(i_xattr_icount) - 1);
+}
+
+#define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry))
+
+static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
+{
+ return EROFS_XATTR_ALIGN(sizeof(struct erofs_xattr_entry) +
+ e->e_name_len + le16_to_cpu(e->e_value_size));
+}
+
+/* represent a zeroed chunk (hole) */
+#define EROFS_NULL_ADDR -1
+
+/* 4-byte block address array */
+#define EROFS_BLOCK_MAP_ENTRY_SIZE sizeof(__le32)
+
+/* 8-byte inode chunk indexes */
+struct erofs_inode_chunk_index {
+ __le16 advise; /* always 0, don't care for now */
+ __le16 device_id; /* back-end storage id (with bits masked) */
+ __le32 blkaddr; /* start block address of this inode chunk */
+};
+
+/* maximum supported size of a physical compression cluster */
+#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024)
+
+/* available compression algorithm types (for h_algorithmtype) */
+enum {
+ Z_EROFS_COMPRESSION_LZ4 = 0,
+ Z_EROFS_COMPRESSION_LZMA = 1,
+ Z_EROFS_COMPRESSION_MAX
+};
+
+#define Z_EROFS_ALL_COMPR_ALGS (1 << (Z_EROFS_COMPRESSION_MAX - 1))
+
+/* 14 bytes (+ length field = 16 bytes) */
+struct z_erofs_lz4_cfgs {
+ __le16 max_distance;
+ __le16 max_pclusterblks;
+ u8 reserved[10];
+} __packed;
+
+/* 14 bytes (+ length field = 16 bytes) */
+struct z_erofs_lzma_cfgs {
+ __le32 dict_size;
+ __le16 format;
+ u8 reserved[8];
+} __packed;
+#define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE)
+
+/*
+ * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
+ * e.g. for 4k logical cluster size, 4B if compacted 2B is off;
+ * (4B) + 2B + (4B) if compacted 2B is on.
+ * bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
+ * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
+ */
+#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
+#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
+#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
+
+struct z_erofs_map_header {
+ __le32 h_reserved1;
+ __le16 h_advise;
+ /*
+ * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
+ * bit 4-7 : algorithm type of head 2 (logical cluster type 11).
+ */
+ __u8 h_algorithmtype;
+ /*
+ * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
+ * bit 3-7 : reserved.
+ */
+ __u8 h_clusterbits;
+};
+
+#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8
+
+/*
+ * Fixed-sized output compression ondisk Logical Extent cluster type:
+ * 0 - literal (uncompressed) cluster
+ * 1 - compressed cluster (for the head logical cluster)
+ * 2 - compressed cluster (for the other logical clusters)
+ *
+ * In detail,
+ * 0 - literal (uncompressed) cluster,
+ * di_advise = 0
+ * di_clusterofs = the literal data offset of the cluster
+ * di_blkaddr = the blkaddr of the literal cluster
+ *
+ * 1 - compressed cluster (for the head logical cluster)
+ * di_advise = 1
+ * di_clusterofs = the decompressed data offset of the cluster
+ * di_blkaddr = the blkaddr of the compressed cluster
+ *
+ * 2 - compressed cluster (for the other logical clusters)
+ * di_advise = 2
+ * di_clusterofs =
+ * the decompressed data offset in its own head cluster
+ * di_u.delta[0] = distance to its corresponding head cluster
+ * di_u.delta[1] = distance to its corresponding tail cluster
+ * (di_advise could be 0, 1 or 2)
+ */
+enum {
+ Z_EROFS_VLE_CLUSTER_TYPE_PLAIN = 0,
+ Z_EROFS_VLE_CLUSTER_TYPE_HEAD = 1,
+ Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD = 2,
+ Z_EROFS_VLE_CLUSTER_TYPE_RESERVED = 3,
+ Z_EROFS_VLE_CLUSTER_TYPE_MAX
+};
+
+#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2
+#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0
+
+/*
+ * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the
+ * compressed block count of a compressed extent (in logical clusters, aka.
+ * block count of a pcluster).
+ */
+#define Z_EROFS_VLE_DI_D0_CBLKCNT (1 << 11)
+
+struct z_erofs_vle_decompressed_index {
+ __le16 di_advise;
+ /* where to decompress in the head cluster */
+ __le16 di_clusterofs;
+
+ union {
+ /* for the head cluster */
+ __le32 blkaddr;
+ /*
+ * for the rest clusters
+ * eg. for 4k page-sized cluster, maximum 4K*64k = 256M)
+ * [0] - pointing to the head cluster
+ * [1] - pointing to the tail cluster
+ */
+ __le16 delta[2];
+ } di_u;
+};
+
+#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \
+ (round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \
+ sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING)
+
+#define Z_EROFS_VLE_EXTENT_ALIGN(size) round_up(size, \
+ sizeof(struct z_erofs_vle_decompressed_index))
+
+/* dirent sorts in alphabet order, thus we can do binary search */
+struct erofs_dirent {
+ __le64 nid; /* node number */
+ __le16 nameoff; /* start offset of file name */
+ __u8 file_type; /* file type */
+ __u8 reserved; /* reserved */
+} __packed;
+
+/* file types used in inode_info->flags */
+enum {
+ EROFS_FT_UNKNOWN,
+ EROFS_FT_REG_FILE,
+ EROFS_FT_DIR,
+ EROFS_FT_CHRDEV,
+ EROFS_FT_BLKDEV,
+ EROFS_FT_FIFO,
+ EROFS_FT_SOCK,
+ EROFS_FT_SYMLINK,
+ EROFS_FT_MAX
+};
+
+#define EROFS_NAME_LEN 255
+
+/* check the EROFS on-disk layout strictly at compile time */
+static inline void erofs_check_ondisk_layout_definitions(void)
+{
+ BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
+ BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32);
+ BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
+ BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
+ BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
+ BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4);
+ BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8);
+ BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
+ BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
+ BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
+ /* keep in sync between 2 index structures for better extendibility */
+ BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) !=
+ sizeof(struct z_erofs_vle_decompressed_index));
+ BUILD_BUG_ON(sizeof(struct erofs_deviceslot) != 128);
+
+ BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
+ Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
+}
+
+#endif
diff --git a/fs/erofs/fs.c b/fs/erofs/fs.c
new file mode 100644
index 00000000000..89269750f8b
--- /dev/null
+++ b/fs/erofs/fs.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include "internal.h"
+#include <fs_internal.h>
+
+struct erofs_sb_info sbi;
+
+static struct erofs_ctxt {
+ struct disk_partition cur_part_info;
+ struct blk_desc *cur_dev;
+} ctxt;
+
+int erofs_dev_read(int device_id, void *buf, u64 offset, size_t len)
+{
+ lbaint_t sect = offset >> ctxt.cur_dev->log2blksz;
+ int off = offset & (ctxt.cur_dev->blksz - 1);
+
+ if (!ctxt.cur_dev)
+ return -EIO;
+
+ if (fs_devread(ctxt.cur_dev, &ctxt.cur_part_info, sect,
+ off, len, buf))
+ return 0;
+ return -EIO;
+}
+
+int erofs_blk_read(void *buf, erofs_blk_t start, u32 nblocks)
+{
+ return erofs_dev_read(0, buf, blknr_to_addr(start),
+ blknr_to_addr(nblocks));
+}
+
+int erofs_probe(struct blk_desc *fs_dev_desc,
+ struct disk_partition *fs_partition)
+{
+ int ret;
+
+ ctxt.cur_dev = fs_dev_desc;
+ ctxt.cur_part_info = *fs_partition;
+
+ ret = erofs_read_superblock();
+ if (ret)
+ goto error;
+
+ return 0;
+error:
+ ctxt.cur_dev = NULL;
+ return ret;
+}
+
+struct erofs_dir_stream {
+ struct fs_dir_stream fs_dirs;
+ struct fs_dirent dirent;
+
+ struct erofs_inode inode;
+ char dblk[EROFS_BLKSIZ];
+ unsigned int maxsize, de_end;
+ erofs_off_t pos;
+};
+
+static int erofs_readlink(struct erofs_inode *vi)
+{
+ size_t len = vi->i_size;
+ char *target;
+ int err;
+
+ target = malloc(len + 1);
+ if (!target)
+ return -ENOMEM;
+ target[len] = '\0';
+
+ err = erofs_pread(vi, target, len, 0);
+ if (err)
+ goto err_out;
+
+ err = erofs_ilookup(target, vi);
+ if (err)
+ goto err_out;
+
+err_out:
+ free(target);
+ return err;
+}
+
+int erofs_opendir(const char *filename, struct fs_dir_stream **dirsp)
+{
+ struct erofs_dir_stream *dirs;
+ int err;
+
+ dirs = calloc(1, sizeof(*dirs));
+ if (!dirs)
+ return -ENOMEM;
+
+ err = erofs_ilookup(filename, &dirs->inode);
+ if (err)
+ goto err_out;
+
+ if (S_ISLNK(dirs->inode.i_mode)) {
+ err = erofs_readlink(&dirs->inode);
+ if (err)
+ goto err_out;
+ }
+
+ if (!S_ISDIR(dirs->inode.i_mode)) {
+ err = -ENOTDIR;
+ goto err_out;
+ }
+ *dirsp = (struct fs_dir_stream *)dirs;
+ return 0;
+err_out:
+ free(dirs);
+ return err;
+}
+
+int erofs_readdir(struct fs_dir_stream *fs_dirs, struct fs_dirent **dentp)
+{
+ struct erofs_dir_stream *dirs = (struct erofs_dir_stream *)fs_dirs;
+ struct fs_dirent *dent = &dirs->dirent;
+ erofs_off_t pos = dirs->pos;
+ unsigned int nameoff, de_namelen;
+ struct erofs_dirent *de;
+ char *de_name;
+ int err;
+
+ if (pos >= dirs->inode.i_size)
+ return 1;
+
+ if (!dirs->maxsize) {
+ dirs->maxsize = min_t(unsigned int, EROFS_BLKSIZ,
+ dirs->inode.i_size - pos);
+
+ err = erofs_pread(&dirs->inode, dirs->dblk,
+ dirs->maxsize, pos);
+ if (err)
+ return err;
+
+ de = (struct erofs_dirent *)dirs->dblk;
+ dirs->de_end = le16_to_cpu(de->nameoff);
+ if (dirs->de_end < sizeof(struct erofs_dirent) ||
+ dirs->de_end >= EROFS_BLKSIZ) {
+ erofs_err("invalid de[0].nameoff %u @ nid %llu",
+ dirs->de_end, de->nid | 0ULL);
+ return -EFSCORRUPTED;
+ }
+ }
+
+ de = (struct erofs_dirent *)(dirs->dblk + erofs_blkoff(pos));
+ nameoff = le16_to_cpu(de->nameoff);
+ de_name = (char *)dirs->dblk + nameoff;
+
+ /* the last dirent in the block? */
+ if (de + 1 >= (struct erofs_dirent *)(dirs->dblk + dirs->de_end))
+ de_namelen = strnlen(de_name, dirs->maxsize - nameoff);
+ else
+ de_namelen = le16_to_cpu(de[1].nameoff) - nameoff;
+
+ /* a corrupted entry is found */
+ if (nameoff + de_namelen > dirs->maxsize ||
+ de_namelen > EROFS_NAME_LEN) {
+ erofs_err("bogus dirent @ nid %llu", de->nid | 0ULL);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+
+ memcpy(dent->name, de_name, de_namelen);
+ dent->name[de_namelen] = '\0';
+
+ if (de->file_type == EROFS_FT_DIR) {
+ dent->type = FS_DT_DIR;
+ } else if (de->file_type == EROFS_FT_SYMLINK) {
+ dent->type = FS_DT_LNK;
+ } else {
+ struct erofs_inode vi;
+
+ dent->type = FS_DT_REG;
+ vi.nid = de->nid;
+
+ err = erofs_read_inode_from_disk(&vi);
+ if (err)
+ return err;
+ dent->size = vi.i_size;
+ }
+ *dentp = dent;
+
+ pos += sizeof(*de);
+ if (erofs_blkoff(pos) >= dirs->de_end) {
+ pos = blknr_to_addr(erofs_blknr(pos) + 1);
+ dirs->maxsize = 0;
+ }
+ dirs->pos = pos;
+ return 0;
+}
+
+void erofs_closedir(struct fs_dir_stream *fs_dirs)
+{
+ free(fs_dirs);
+}
+
+int erofs_exists(const char *filename)
+{
+ struct erofs_inode vi;
+ int err;
+
+ err = erofs_ilookup(filename, &vi);
+ return err == 0;
+}
+
+int erofs_size(const char *filename, loff_t *size)
+{
+ struct erofs_inode vi;
+ int err;
+
+ err = erofs_ilookup(filename, &vi);
+ if (err)
+ return err;
+ *size = vi.i_size;
+ return 0;
+}
+
+int erofs_read(const char *filename, void *buf, loff_t offset, loff_t len,
+ loff_t *actread)
+{
+ struct erofs_inode vi;
+ int err;
+
+ err = erofs_ilookup(filename, &vi);
+ if (err)
+ return err;
+
+ if (S_ISLNK(vi.i_mode)) {
+ err = erofs_readlink(&vi);
+ if (err)
+ return err;
+ }
+
+ if (!len)
+ len = vi.i_size;
+
+ err = erofs_pread(&vi, buf, len, offset);
+ if (err) {
+ *actread = 0;
+ return err;
+ }
+
+ if (offset >= vi.i_size)
+ *actread = 0;
+ else if (offset + len > vi.i_size)
+ *actread = vi.i_size - offset;
+ else
+ *actread = len;
+ return 0;
+}
+
+void erofs_close(void)
+{
+ ctxt.cur_dev = NULL;
+}
+
+int erofs_uuid(char *uuid_str)
+{
+ if (IS_ENABLED(CONFIG_LIB_UUID)) {
+ if (ctxt.cur_dev)
+ uuid_bin_to_str(sbi.uuid, uuid_str,
+ UUID_STR_FORMAT_STD);
+ return 0;
+ }
+ return -ENOSYS;
+}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
new file mode 100644
index 00000000000..4af7c91560c
--- /dev/null
+++ b/fs/erofs/internal.h
@@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef __EROFS_INTERNAL_H
+#define __EROFS_INTERNAL_H
+
+#define __packed __attribute__((__packed__))
+
+#include <linux/stat.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/printk.h>
+#include <linux/log2.h>
+#include <inttypes.h>
+#include "erofs_fs.h"
+
+#define erofs_err(fmt, ...) \
+ pr_err(fmt "\n", ##__VA_ARGS__)
+
+#define erofs_info(fmt, ...) \
+ pr_info(fmt "\n", ##__VA_ARGS__)
+
+#define erofs_dbg(fmt, ...) \
+ pr_debug(fmt "\n", ##__VA_ARGS__)
+
+#define DBG_BUGON(condition) BUG_ON(condition)
+
+/* no obvious reason to support explicit PAGE_SIZE != 4096 for now */
+#if PAGE_SIZE != 4096
+#error incompatible PAGE_SIZE is already defined
+#endif
+
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+#define LOG_BLOCK_SIZE (12)
+#define EROFS_BLKSIZ (1U << LOG_BLOCK_SIZE)
+
+#define EROFS_ISLOTBITS 5
+#define EROFS_SLOTSIZE (1U << EROFS_ISLOTBITS)
+
+typedef u64 erofs_off_t;
+typedef u64 erofs_nid_t;
+/* data type for filesystem-wide blocks number */
+typedef u32 erofs_blk_t;
+
+#define NULL_ADDR ((unsigned int)-1)
+#define NULL_ADDR_UL ((unsigned long)-1)
+
+#define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ)
+#define erofs_blkoff(addr) ((addr) % EROFS_BLKSIZ)
+#define blknr_to_addr(nr) ((erofs_off_t)(nr) * EROFS_BLKSIZ)
+
+#define BLK_ROUND_UP(addr) DIV_ROUND_UP(addr, EROFS_BLKSIZ)
+
+struct erofs_buffer_head;
+
+struct erofs_device_info {
+ u32 blocks;
+ u32 mapped_blkaddr;
+};
+
+struct erofs_sb_info {
+ struct erofs_device_info *devs;
+
+ u64 total_blocks;
+ u64 primarydevice_blocks;
+
+ erofs_blk_t meta_blkaddr;
+ erofs_blk_t xattr_blkaddr;
+
+ u32 feature_compat;
+ u32 feature_incompat;
+ u64 build_time;
+ u32 build_time_nsec;
+
+ unsigned char islotbits;
+
+ /* what we really care is nid, rather than ino.. */
+ erofs_nid_t root_nid;
+ /* used for statfs, f_files - f_favail */
+ u64 inos;
+
+ u8 uuid[16];
+
+ u16 available_compr_algs;
+ u16 lz4_max_distance;
+ u32 checksum;
+ u16 extra_devices;
+ union {
+ u16 devt_slotoff; /* used for mkfs */
+ u16 device_id_mask; /* used for others */
+ };
+};
+
+/* global sbi */
+extern struct erofs_sb_info sbi;
+
+static inline erofs_off_t iloc(erofs_nid_t nid)
+{
+ return blknr_to_addr(sbi.meta_blkaddr) + (nid << sbi.islotbits);
+}
+
+#define EROFS_FEATURE_FUNCS(name, compat, feature) \
+static inline bool erofs_sb_has_##name(void) \
+{ \
+ return sbi.feature_##compat & EROFS_FEATURE_##feature; \
+} \
+static inline void erofs_sb_set_##name(void) \
+{ \
+ sbi.feature_##compat |= EROFS_FEATURE_##feature; \
+} \
+static inline void erofs_sb_clear_##name(void) \
+{ \
+ sbi.feature_##compat &= ~EROFS_FEATURE_##feature; \
+}
+
+EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING)
+EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
+EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
+EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
+EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
+EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
+
+#define EROFS_I_EA_INITED (1 << 0)
+#define EROFS_I_Z_INITED (1 << 1)
+
+struct erofs_inode {
+ struct list_head i_hash, i_subdirs, i_xattrs;
+
+ union {
+ /* (erofsfuse) runtime flags */
+ unsigned int flags;
+ /* (mkfs.erofs) device ID containing source file */
+ u32 dev;
+ };
+ unsigned int i_count;
+ struct erofs_inode *i_parent;
+
+ umode_t i_mode;
+ erofs_off_t i_size;
+
+ u64 i_ino[2];
+ u32 i_uid;
+ u32 i_gid;
+ u64 i_ctime;
+ u32 i_ctime_nsec;
+ u32 i_nlink;
+
+ union {
+ u32 i_blkaddr;
+ u32 i_blocks;
+ u32 i_rdev;
+ struct {
+ unsigned short chunkformat;
+ unsigned char chunkbits;
+ };
+ } u;
+
+ unsigned char datalayout;
+ unsigned char inode_isize;
+ /* inline tail-end packing size */
+ unsigned short idata_size;
+
+ unsigned int xattr_isize;
+ unsigned int extent_isize;
+
+ erofs_nid_t nid;
+ struct erofs_buffer_head *bh;
+ struct erofs_buffer_head *bh_inline, *bh_data;
+
+ void *idata;
+
+ union {
+ void *compressmeta;
+ void *chunkindexes;
+ struct {
+ uint16_t z_advise;
+ uint8_t z_algorithmtype[2];
+ uint8_t z_logical_clusterbits;
+ uint8_t z_physical_clusterblks;
+ };
+ };
+};
+
+static inline bool is_inode_layout_compression(struct erofs_inode *inode)
+{
+ return erofs_inode_is_data_compressed(inode->datalayout);
+}
+
+static inline unsigned int erofs_bitrange(unsigned int value, unsigned int bit,
+ unsigned int bits)
+{
+ return (value >> bit) & ((1 << bits) - 1);
+}
+
+static inline unsigned int erofs_inode_version(unsigned int value)
+{
+ return erofs_bitrange(value, EROFS_I_VERSION_BIT,
+ EROFS_I_VERSION_BITS);
+}
+
+static inline unsigned int erofs_inode_datalayout(unsigned int value)
+{
+ return erofs_bitrange(value, EROFS_I_DATALAYOUT_BIT,
+ EROFS_I_DATALAYOUT_BITS);
+}
+
+#define IS_ROOT(x) ((x) == (x)->i_parent)
+
+struct erofs_dentry {
+ struct list_head d_child; /* child of parent list */
+
+ unsigned int type;
+ char name[EROFS_NAME_LEN];
+ union {
+ struct erofs_inode *inode;
+ erofs_nid_t nid;
+ };
+};
+
+static inline bool is_dot_dotdot(const char *name)
+{
+ if (name[0] != '.')
+ return false;
+
+ return name[1] == '\0' || (name[1] == '.' && name[2] == '\0');
+}
+
+enum {
+ BH_Meta,
+ BH_Mapped,
+ BH_Encoded,
+ BH_FullMapped,
+};
+
+/* Has a disk mapping */
+#define EROFS_MAP_MAPPED (1 << BH_Mapped)
+/* Located in metadata (could be copied from bd_inode) */
+#define EROFS_MAP_META (1 << BH_Meta)
+/* The extent is encoded */
+#define EROFS_MAP_ENCODED (1 << BH_Encoded)
+/* The length of extent is full */
+#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped)
+
+struct erofs_map_blocks {
+ char mpage[EROFS_BLKSIZ];
+
+ erofs_off_t m_pa, m_la;
+ u64 m_plen, m_llen;
+
+ unsigned short m_deviceid;
+ char m_algorithmformat;
+ unsigned int m_flags;
+ erofs_blk_t index;
+};
+
+/*
+ * Used to get the exact decompressed length, e.g. fiemap (consider lookback
+ * approach instead if possible since it's more metadata lightweight.)
+ */
+#define EROFS_GET_BLOCKS_FIEMAP 0x0002
+
+enum {
+ Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
+ Z_EROFS_COMPRESSION_RUNTIME_MAX
+};
+
+struct erofs_map_dev {
+ erofs_off_t m_pa;
+ unsigned int m_deviceid;
+};
+
+/* fs.c */
+int erofs_blk_read(void *buf, erofs_blk_t start, u32 nblocks);
+int erofs_dev_read(int device_id, void *buf, u64 offset, size_t len);
+
+/* super.c */
+int erofs_read_superblock(void);
+
+/* namei.c */
+int erofs_read_inode_from_disk(struct erofs_inode *vi);
+int erofs_ilookup(const char *path, struct erofs_inode *vi);
+int erofs_read_inode_from_disk(struct erofs_inode *vi);
+
+/* data.c */
+int erofs_pread(struct erofs_inode *inode, char *buf,
+ erofs_off_t count, erofs_off_t offset);
+int erofs_map_blocks(struct erofs_inode *inode,
+ struct erofs_map_blocks *map, int flags);
+int erofs_map_dev(struct erofs_sb_info *sbi, struct erofs_map_dev *map);
+/* zmap.c */
+int z_erofs_fill_inode(struct erofs_inode *vi);
+int z_erofs_map_blocks_iter(struct erofs_inode *vi,
+ struct erofs_map_blocks *map, int flags);
+
+#ifdef EUCLEAN
+#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
+#else
+#define EFSCORRUPTED EIO
+#endif
+
+#define CRC32C_POLY_LE 0x82F63B78
+static inline u32 erofs_crc32c(u32 crc, const u8 *in, size_t len)
+{
+ int i;
+
+ while (len--) {
+ crc ^= *in++;
+ for (i = 0; i < 8; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? CRC32C_POLY_LE : 0);
+ }
+ return crc;
+}
+
+#endif
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
new file mode 100644
index 00000000000..d1d4757c507
--- /dev/null
+++ b/fs/erofs/namei.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include "internal.h"
+
+int erofs_read_inode_from_disk(struct erofs_inode *vi)
+{
+ int ret, ifmt;
+ char buf[sizeof(struct erofs_inode_extended)];
+ struct erofs_inode_compact *dic;
+ struct erofs_inode_extended *die;
+ const erofs_off_t inode_loc = iloc(vi->nid);
+
+ ret = erofs_dev_read(0, buf, inode_loc, sizeof(*dic));
+ if (ret < 0)
+ return -EIO;
+
+ dic = (struct erofs_inode_compact *)buf;
+ ifmt = le16_to_cpu(dic->i_format);
+
+ vi->datalayout = erofs_inode_datalayout(ifmt);
+ if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) {
+ erofs_err("unsupported datalayout %u of nid %llu",
+ vi->datalayout, vi->nid | 0ULL);
+ return -EOPNOTSUPP;
+ }
+ switch (erofs_inode_version(ifmt)) {
+ case EROFS_INODE_LAYOUT_EXTENDED:
+ vi->inode_isize = sizeof(struct erofs_inode_extended);
+
+ ret = erofs_dev_read(0, buf + sizeof(*dic), inode_loc + sizeof(*dic),
+ sizeof(*die) - sizeof(*dic));
+ if (ret < 0)
+ return -EIO;
+
+ die = (struct erofs_inode_extended *)buf;
+ vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
+ vi->i_mode = le16_to_cpu(die->i_mode);
+
+ switch (vi->i_mode & S_IFMT) {
+ case S_IFREG:
+ case S_IFDIR:
+ case S_IFLNK:
+ vi->u.i_blkaddr = le32_to_cpu(die->i_u.raw_blkaddr);
+ break;
+ case S_IFCHR:
+ case S_IFBLK:
+ vi->u.i_rdev = 0;
+ break;
+ case S_IFIFO:
+ case S_IFSOCK:
+ vi->u.i_rdev = 0;
+ break;
+ default:
+ goto bogusimode;
+ }
+
+ vi->i_uid = le32_to_cpu(die->i_uid);
+ vi->i_gid = le32_to_cpu(die->i_gid);
+ vi->i_nlink = le32_to_cpu(die->i_nlink);
+
+ vi->i_ctime = le64_to_cpu(die->i_ctime);
+ vi->i_ctime_nsec = le64_to_cpu(die->i_ctime_nsec);
+ vi->i_size = le64_to_cpu(die->i_size);
+ if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
+ /* fill chunked inode summary info */
+ vi->u.chunkformat = le16_to_cpu(die->i_u.c.format);
+ break;
+ case EROFS_INODE_LAYOUT_COMPACT:
+ vi->inode_isize = sizeof(struct erofs_inode_compact);
+ vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount);
+ vi->i_mode = le16_to_cpu(dic->i_mode);
+
+ switch (vi->i_mode & S_IFMT) {
+ case S_IFREG:
+ case S_IFDIR:
+ case S_IFLNK:
+ vi->u.i_blkaddr = le32_to_cpu(dic->i_u.raw_blkaddr);
+ break;
+ case S_IFCHR:
+ case S_IFBLK:
+ vi->u.i_rdev = 0;
+ break;
+ case S_IFIFO:
+ case S_IFSOCK:
+ vi->u.i_rdev = 0;
+ break;
+ default:
+ goto bogusimode;
+ }
+
+ vi->i_uid = le16_to_cpu(dic->i_uid);
+ vi->i_gid = le16_to_cpu(dic->i_gid);
+ vi->i_nlink = le16_to_cpu(dic->i_nlink);
+
+ vi->i_ctime = sbi.build_time;
+ vi->i_ctime_nsec = sbi.build_time_nsec;
+
+ vi->i_size = le32_to_cpu(dic->i_size);
+ if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
+ vi->u.chunkformat = le16_to_cpu(dic->i_u.c.format);
+ break;
+ default:
+ erofs_err("unsupported on-disk inode version %u of nid %llu",
+ erofs_inode_version(ifmt), vi->nid | 0ULL);
+ return -EOPNOTSUPP;
+ }
+
+ vi->flags = 0;
+ if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+ if (vi->u.chunkformat & ~EROFS_CHUNK_FORMAT_ALL) {
+ erofs_err("unsupported chunk format %x of nid %llu",
+ vi->u.chunkformat, vi->nid | 0ULL);
+ return -EOPNOTSUPP;
+ }
+ vi->u.chunkbits = LOG_BLOCK_SIZE +
+ (vi->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
+ } else if (erofs_inode_is_data_compressed(vi->datalayout))
+ z_erofs_fill_inode(vi);
+ return 0;
+bogusimode:
+ erofs_err("bogus i_mode (%o) @ nid %llu", vi->i_mode, vi->nid | 0ULL);
+ return -EFSCORRUPTED;
+}
+
+struct erofs_dirent *find_target_dirent(erofs_nid_t pnid,
+ void *dentry_blk,
+ const char *name, unsigned int len,
+ unsigned int nameoff,
+ unsigned int maxsize)
+{
+ struct erofs_dirent *de = dentry_blk;
+ const struct erofs_dirent *end = dentry_blk + nameoff;
+
+ while (de < end) {
+ const char *de_name;
+ unsigned int de_namelen;
+
+ nameoff = le16_to_cpu(de->nameoff);
+ de_name = (char *)dentry_blk + nameoff;
+
+ /* the last dirent in the block? */
+ if (de + 1 >= end)
+ de_namelen = strnlen(de_name, maxsize - nameoff);
+ else
+ de_namelen = le16_to_cpu(de[1].nameoff) - nameoff;
+
+ /* a corrupted entry is found */
+ if (nameoff + de_namelen > maxsize ||
+ de_namelen > EROFS_NAME_LEN) {
+ erofs_err("bogus dirent @ nid %llu", pnid | 0ULL);
+ DBG_BUGON(1);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+
+ if (len == de_namelen && !memcmp(de_name, name, de_namelen))
+ return de;
+ ++de;
+ }
+ return NULL;
+}
+
+struct nameidata {
+ erofs_nid_t nid;
+ unsigned int ftype;
+};
+
+int erofs_namei(struct nameidata *nd,
+ const char *name, unsigned int len)
+{
+ erofs_nid_t nid = nd->nid;
+ int ret;
+ char buf[EROFS_BLKSIZ];
+ struct erofs_inode vi = { .nid = nid };
+ erofs_off_t offset;
+
+ ret = erofs_read_inode_from_disk(&vi);
+ if (ret)
+ return ret;
+
+ offset = 0;
+ while (offset < vi.i_size) {
+ erofs_off_t maxsize = min_t(erofs_off_t,
+ vi.i_size - offset, EROFS_BLKSIZ);
+ struct erofs_dirent *de = (void *)buf;
+ unsigned int nameoff;
+
+ ret = erofs_pread(&vi, buf, maxsize, offset);
+ if (ret)
+ return ret;
+
+ nameoff = le16_to_cpu(de->nameoff);
+ if (nameoff < sizeof(struct erofs_dirent) ||
+ nameoff >= PAGE_SIZE) {
+ erofs_err("invalid de[0].nameoff %u @ nid %llu",
+ nameoff, nid | 0ULL);
+ return -EFSCORRUPTED;
+ }
+
+ de = find_target_dirent(nid, buf, name, len,
+ nameoff, maxsize);
+ if (IS_ERR(de))
+ return PTR_ERR(de);
+
+ if (de) {
+ nd->nid = le64_to_cpu(de->nid);
+ return 0;
+ }
+ offset += maxsize;
+ }
+ return -ENOENT;
+}
+
+static int link_path_walk(const char *name, struct nameidata *nd)
+{
+ nd->nid = sbi.root_nid;
+
+ while (*name == '/')
+ name++;
+
+ /* At this point we know we have a real path component. */
+ while (*name != '\0') {
+ const char *p = name;
+ int ret;
+
+ do {
+ ++p;
+ } while (*p != '\0' && *p != '/');
+
+ DBG_BUGON(p <= name);
+ ret = erofs_namei(nd, name, p - name);
+ if (ret)
+ return ret;
+
+ name = p;
+ /* Skip until no more slashes. */
+ for (name = p; *name == '/'; ++name)
+ ;
+ }
+ return 0;
+}
+
+int erofs_ilookup(const char *path, struct erofs_inode *vi)
+{
+ int ret;
+ struct nameidata nd;
+
+ ret = link_path_walk(path, &nd);
+ if (ret)
+ return ret;
+
+ vi->nid = nd.nid;
+ return erofs_read_inode_from_disk(vi);
+}
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
new file mode 100644
index 00000000000..4cca322b9ea
--- /dev/null
+++ b/fs/erofs/super.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include "internal.h"
+
+static bool check_layout_compatibility(struct erofs_sb_info *sbi,
+ struct erofs_super_block *dsb)
+{
+ const unsigned int feature = le32_to_cpu(dsb->feature_incompat);
+
+ sbi->feature_incompat = feature;
+
+ /* check if current kernel meets all mandatory requirements */
+ if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) {
+ erofs_err("unidentified incompatible feature %x, please upgrade kernel version",
+ feature & ~EROFS_ALL_FEATURE_INCOMPAT);
+ return false;
+ }
+ return true;
+}
+
+static int erofs_init_devices(struct erofs_sb_info *sbi,
+ struct erofs_super_block *dsb)
+{
+ unsigned int ondisk_extradevs, i;
+ erofs_off_t pos;
+
+ sbi->total_blocks = sbi->primarydevice_blocks;
+
+ if (!erofs_sb_has_device_table())
+ ondisk_extradevs = 0;
+ else
+ ondisk_extradevs = le16_to_cpu(dsb->extra_devices);
+
+ if (ondisk_extradevs != sbi->extra_devices) {
+ erofs_err("extra devices don't match (ondisk %u, given %u)",
+ ondisk_extradevs, sbi->extra_devices);
+ return -EINVAL;
+ }
+ if (!ondisk_extradevs)
+ return 0;
+
+ sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1;
+ sbi->devs = calloc(ondisk_extradevs, sizeof(*sbi->devs));
+ pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE;
+ for (i = 0; i < ondisk_extradevs; ++i) {
+ struct erofs_deviceslot dis;
+ int ret;
+
+ ret = erofs_dev_read(0, &dis, pos, sizeof(dis));
+ if (ret < 0)
+ return ret;
+
+ sbi->devs[i].mapped_blkaddr = dis.mapped_blkaddr;
+ sbi->total_blocks += dis.blocks;
+ pos += EROFS_DEVT_SLOT_SIZE;
+ }
+ return 0;
+}
+
+int erofs_read_superblock(void)
+{
+ char data[EROFS_BLKSIZ];
+ struct erofs_super_block *dsb;
+ unsigned int blkszbits;
+ int ret;
+
+ ret = erofs_blk_read(data, 0, 1);
+ if (ret < 0) {
+ erofs_err("cannot read erofs superblock: %d", ret);
+ return -EIO;
+ }
+ dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET);
+
+ ret = -EINVAL;
+ if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) {
+ erofs_err("cannot find valid erofs superblock");
+ return ret;
+ }
+
+ sbi.feature_compat = le32_to_cpu(dsb->feature_compat);
+
+ blkszbits = dsb->blkszbits;
+ /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
+ if (blkszbits != LOG_BLOCK_SIZE) {
+ erofs_err("blksize %u isn't supported on this platform",
+ 1 << blkszbits);
+ return ret;
+ }
+
+ if (!check_layout_compatibility(&sbi, dsb))
+ return ret;
+
+ sbi.primarydevice_blocks = le32_to_cpu(dsb->blocks);
+ sbi.meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
+ sbi.xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
+ sbi.islotbits = EROFS_ISLOTBITS;
+ sbi.root_nid = le16_to_cpu(dsb->root_nid);
+ sbi.inos = le64_to_cpu(dsb->inos);
+ sbi.checksum = le32_to_cpu(dsb->checksum);
+
+ sbi.build_time = le64_to_cpu(dsb->build_time);
+ sbi.build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
+
+ memcpy(&sbi.uuid, dsb->uuid, sizeof(dsb->uuid));
+ return erofs_init_devices(&sbi, dsb);
+}
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
new file mode 100644
index 00000000000..be2599ac4f9
--- /dev/null
+++ b/fs/erofs/zmap.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include "internal.h"
+
+int z_erofs_fill_inode(struct erofs_inode *vi)
+{
+ if (!erofs_sb_has_big_pcluster() &&
+ vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
+ vi->z_advise = 0;
+ vi->z_algorithmtype[0] = 0;
+ vi->z_algorithmtype[1] = 0;
+ vi->z_logical_clusterbits = LOG_BLOCK_SIZE;
+
+ vi->flags |= EROFS_I_Z_INITED;
+ }
+ return 0;
+}
+
+static int z_erofs_fill_inode_lazy(struct erofs_inode *vi)
+{
+ int ret;
+ erofs_off_t pos;
+ struct z_erofs_map_header *h;
+ char buf[sizeof(struct z_erofs_map_header)];
+
+ if (vi->flags & EROFS_I_Z_INITED)
+ return 0;
+
+ DBG_BUGON(!erofs_sb_has_big_pcluster() &&
+ vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
+ pos = round_up(iloc(vi->nid) + vi->inode_isize + vi->xattr_isize, 8);
+
+ ret = erofs_dev_read(0, buf, pos, sizeof(buf));
+ if (ret < 0)
+ return -EIO;
+
+ h = (struct z_erofs_map_header *)buf;
+ vi->z_advise = le16_to_cpu(h->h_advise);
+ vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
+ vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
+
+ if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) {
+ erofs_err("unknown compression format %u for nid %llu",
+ vi->z_algorithmtype[0], (unsigned long long)vi->nid);
+ return -EOPNOTSUPP;
+ }
+
+ vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
+ if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION &&
+ !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
+ !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
+ erofs_err("big pcluster head1/2 of compact indexes should be consistent for nid %llu",
+ vi->nid * 1ULL);
+ return -EFSCORRUPTED;
+ }
+ vi->flags |= EROFS_I_Z_INITED;
+ return 0;
+}
+
+struct z_erofs_maprecorder {
+ struct erofs_inode *inode;
+ struct erofs_map_blocks *map;
+ void *kaddr;
+
+ unsigned long lcn;
+ /* compression extent information gathered */
+ u8 type, headtype;
+ u16 clusterofs;
+ u16 delta[2];
+ erofs_blk_t pblk, compressedlcs;
+};
+
+static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
+ erofs_blk_t eblk)
+{
+ int ret;
+ struct erofs_map_blocks *const map = m->map;
+ char *mpage = map->mpage;
+
+ if (map->index == eblk)
+ return 0;
+
+ ret = erofs_blk_read(mpage, eblk, 1);
+ if (ret < 0)
+ return -EIO;
+
+ map->index = eblk;
+
+ return 0;
+}
+
+static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
+ unsigned long lcn)
+{
+ struct erofs_inode *const vi = m->inode;
+ const erofs_off_t ibase = iloc(vi->nid);
+ const erofs_off_t pos =
+ Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize +
+ vi->xattr_isize) +
+ lcn * sizeof(struct z_erofs_vle_decompressed_index);
+ struct z_erofs_vle_decompressed_index *di;
+ unsigned int advise, type;
+ int err;
+
+ err = z_erofs_reload_indexes(m, erofs_blknr(pos));
+ if (err)
+ return err;
+
+ m->lcn = lcn;
+ di = m->kaddr + erofs_blkoff(pos);
+
+ advise = le16_to_cpu(di->di_advise);
+ type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) &
+ ((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1);
+ switch (type) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ m->clusterofs = 1 << vi->z_logical_clusterbits;
+ m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
+ if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+ if (!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ m->compressedlcs = m->delta[0] &
+ ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+ m->delta[0] = 1;
+ }
+ m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
+ break;
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ m->clusterofs = le16_to_cpu(di->di_clusterofs);
+ m->pblk = le32_to_cpu(di->di_u.blkaddr);
+ break;
+ default:
+ DBG_BUGON(1);
+ return -EOPNOTSUPP;
+ }
+ m->type = type;
+ return 0;
+}
+
+static unsigned int decode_compactedbits(unsigned int lobits,
+ unsigned int lomask,
+ u8 *in, unsigned int pos, u8 *type)
+{
+ const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
+ const unsigned int lo = v & lomask;
+
+ *type = (v >> lobits) & 3;
+ return lo;
+}
+
+static int get_compacted_la_distance(unsigned int lclusterbits,
+ unsigned int encodebits,
+ unsigned int vcnt, u8 *in, int i)
+{
+ const unsigned int lomask = (1 << lclusterbits) - 1;
+ unsigned int lo, d1 = 0;
+ u8 type;
+
+ DBG_BUGON(i >= vcnt);
+
+ do {
+ lo = decode_compactedbits(lclusterbits, lomask,
+ in, encodebits * i, &type);
+
+ if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+ return d1;
+ ++d1;
+ } while (++i < vcnt);
+
+ /* vcnt - 1 (Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) item */
+ if (!(lo & Z_EROFS_VLE_DI_D0_CBLKCNT))
+ d1 += lo - 1;
+ return d1;
+}
+
+static int unpack_compacted_index(struct z_erofs_maprecorder *m,
+ unsigned int amortizedshift,
+ unsigned int eofs, bool lookahead)
+{
+ struct erofs_inode *const vi = m->inode;
+ const unsigned int lclusterbits = vi->z_logical_clusterbits;
+ const unsigned int lomask = (1 << lclusterbits) - 1;
+ unsigned int vcnt, base, lo, encodebits, nblk;
+ int i;
+ u8 *in, type;
+ bool big_pcluster;
+
+ if (1 << amortizedshift == 4)
+ vcnt = 2;
+ else if (1 << amortizedshift == 2 && lclusterbits == 12)
+ vcnt = 16;
+ else
+ return -EOPNOTSUPP;
+
+ big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
+ encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
+ base = round_down(eofs, vcnt << amortizedshift);
+ in = m->kaddr + base;
+
+ i = (eofs - base) >> amortizedshift;
+
+ lo = decode_compactedbits(lclusterbits, lomask,
+ in, encodebits * i, &type);
+ m->type = type;
+ if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+ m->clusterofs = 1 << lclusterbits;
+
+ /* figure out lookahead_distance: delta[1] if needed */
+ if (lookahead)
+ m->delta[1] = get_compacted_la_distance(lclusterbits,
+ encodebits,
+ vcnt, in, i);
+ if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+ if (!big_pcluster) {
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+ m->delta[0] = 1;
+ return 0;
+ } else if (i + 1 != (int)vcnt) {
+ m->delta[0] = lo;
+ return 0;
+ }
+ /*
+ * since the last lcluster in the pack is special,
+ * of which lo saves delta[1] rather than delta[0].
+ * Hence, get delta[0] by the previous lcluster indirectly.
+ */
+ lo = decode_compactedbits(lclusterbits, lomask,
+ in, encodebits * (i - 1), &type);
+ if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+ lo = 0;
+ else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT)
+ lo = 1;
+ m->delta[0] = lo + 1;
+ return 0;
+ }
+ m->clusterofs = lo;
+ m->delta[0] = 0;
+ /* figout out blkaddr (pblk) for HEAD lclusters */
+ if (!big_pcluster) {
+ nblk = 1;
+ while (i > 0) {
+ --i;
+ lo = decode_compactedbits(lclusterbits, lomask,
+ in, encodebits * i, &type);
+ if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+ i -= lo;
+
+ if (i >= 0)
+ ++nblk;
+ }
+ } else {
+ nblk = 0;
+ while (i > 0) {
+ --i;
+ lo = decode_compactedbits(lclusterbits, lomask,
+ in, encodebits * i, &type);
+ if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+ if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+ --i;
+ nblk += lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+ continue;
+ }
+ if (lo == 1) {
+ DBG_BUGON(1);
+ /* --i; ++nblk; continue; */
+ return -EFSCORRUPTED;
+ }
+ i -= lo - 2;
+ continue;
+ }
+ ++nblk;
+ }
+ }
+ in += (vcnt << amortizedshift) - sizeof(__le32);
+ m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
+ return 0;
+}
+
+static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
+ unsigned long lcn, bool lookahead)
+{
+ struct erofs_inode *const vi = m->inode;
+ const unsigned int lclusterbits = vi->z_logical_clusterbits;
+ const erofs_off_t ebase = round_up(iloc(vi->nid) + vi->inode_isize +
+ vi->xattr_isize, 8) +
+ sizeof(struct z_erofs_map_header);
+ const unsigned int totalidx = DIV_ROUND_UP(vi->i_size, EROFS_BLKSIZ);
+ unsigned int compacted_4b_initial, compacted_2b;
+ unsigned int amortizedshift;
+ erofs_off_t pos;
+ int err;
+
+ if (lclusterbits != 12)
+ return -EOPNOTSUPP;
+
+ if (lcn >= totalidx)
+ return -EINVAL;
+
+ m->lcn = lcn;
+ /* used to align to 32-byte (compacted_2b) alignment */
+ compacted_4b_initial = (32 - ebase % 32) / 4;
+ if (compacted_4b_initial == 32 / 4)
+ compacted_4b_initial = 0;
+
+ if (vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B)
+ compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
+ else
+ compacted_2b = 0;
+
+ pos = ebase;
+ if (lcn < compacted_4b_initial) {
+ amortizedshift = 2;
+ goto out;
+ }
+ pos += compacted_4b_initial * 4;
+ lcn -= compacted_4b_initial;
+
+ if (lcn < compacted_2b) {
+ amortizedshift = 1;
+ goto out;
+ }
+ pos += compacted_2b * 2;
+ lcn -= compacted_2b;
+ amortizedshift = 2;
+out:
+ pos += lcn * (1 << amortizedshift);
+ err = z_erofs_reload_indexes(m, erofs_blknr(pos));
+ if (err)
+ return err;
+ return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos),
+ lookahead);
+}
+
+static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
+ unsigned int lcn, bool lookahead)
+{
+ const unsigned int datamode = m->inode->datalayout;
+
+ if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
+ return legacy_load_cluster_from_disk(m, lcn);
+
+ if (datamode == EROFS_INODE_FLAT_COMPRESSION)
+ return compacted_load_cluster_from_disk(m, lcn, lookahead);
+
+ return -EINVAL;
+}
+
+static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
+ unsigned int lookback_distance)
+{
+ struct erofs_inode *const vi = m->inode;
+ struct erofs_map_blocks *const map = m->map;
+ const unsigned int lclusterbits = vi->z_logical_clusterbits;
+ unsigned long lcn = m->lcn;
+ int err;
+
+ if (lcn < lookback_distance) {
+ erofs_err("bogus lookback distance @ nid %llu",
+ (unsigned long long)vi->nid);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+
+ /* load extent head logical cluster if needed */
+ lcn -= lookback_distance;
+ err = z_erofs_load_cluster_from_disk(m, lcn, false);
+ if (err)
+ return err;
+
+ switch (m->type) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ if (!m->delta[0]) {
+ erofs_err("invalid lookback distance 0 @ nid %llu",
+ (unsigned long long)vi->nid);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ return z_erofs_extent_lookback(m, m->delta[0]);
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ m->headtype = m->type;
+ map->m_la = (lcn << lclusterbits) | m->clusterofs;
+ break;
+ default:
+ erofs_err("unknown type %u @ lcn %lu of nid %llu",
+ m->type, lcn, (unsigned long long)vi->nid);
+ DBG_BUGON(1);
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
+ unsigned int initial_lcn)
+{
+ struct erofs_inode *const vi = m->inode;
+ struct erofs_map_blocks *const map = m->map;
+ const unsigned int lclusterbits = vi->z_logical_clusterbits;
+ unsigned long lcn;
+ int err;
+
+ DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN &&
+ m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD);
+ if (m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
+ !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
+ map->m_plen = 1 << lclusterbits;
+ return 0;
+ }
+
+ lcn = m->lcn + 1;
+ if (m->compressedlcs)
+ goto out;
+
+ err = z_erofs_load_cluster_from_disk(m, lcn, false);
+ if (err)
+ return err;
+
+ /*
+ * If the 1st NONHEAD lcluster has already been handled initially w/o
+ * valid compressedlcs, which means at least it mustn't be CBLKCNT, or
+ * an internal implemenatation error is detected.
+ *
+ * The following code can also handle it properly anyway, but let's
+ * BUG_ON in the debugging mode only for developers to notice that.
+ */
+ DBG_BUGON(lcn == initial_lcn &&
+ m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD);
+
+ switch (m->type) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ /*
+ * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
+ * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
+ */
+ m->compressedlcs = 1;
+ break;
+ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ if (m->delta[0] != 1)
+ goto err_bonus_cblkcnt;
+ if (m->compressedlcs)
+ break;
+ /* fallthrough */
+ default:
+ erofs_err("cannot found CBLKCNT @ lcn %lu of nid %llu",
+ lcn, vi->nid | 0ULL);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+out:
+ map->m_plen = m->compressedlcs << lclusterbits;
+ return 0;
+err_bonus_cblkcnt:
+ erofs_err("bogus CBLKCNT @ lcn %lu of nid %llu",
+ lcn, vi->nid | 0ULL);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+}
+
+static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
+{
+ struct erofs_inode *const vi = m->inode;
+ struct erofs_map_blocks *map = m->map;
+ unsigned int lclusterbits = vi->z_logical_clusterbits;
+ u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits;
+ int err;
+
+ do {
+ /* handle the last EOF pcluster (no next HEAD lcluster) */
+ if ((lcn << lclusterbits) >= vi->i_size) {
+ map->m_llen = vi->i_size - map->m_la;
+ return 0;
+ }
+
+ err = z_erofs_load_cluster_from_disk(m, lcn, true);
+ if (err)
+ return err;
+
+ if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+ DBG_BUGON(!m->delta[1] &&
+ m->clusterofs != 1 << lclusterbits);
+ } else if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
+ m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD) {
+ /* go on until the next HEAD lcluster */
+ if (lcn != headlcn)
+ break;
+ m->delta[1] = 1;
+ } else {
+ erofs_err("unknown type %u @ lcn %llu of nid %llu",
+ m->type, lcn | 0ULL,
+ (unsigned long long)vi->nid);
+ DBG_BUGON(1);
+ return -EOPNOTSUPP;
+ }
+ lcn += m->delta[1];
+ } while (m->delta[1]);
+
+ map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la;
+ return 0;
+}
+
+int z_erofs_map_blocks_iter(struct erofs_inode *vi,
+ struct erofs_map_blocks *map,
+ int flags)
+{
+ struct z_erofs_maprecorder m = {
+ .inode = vi,
+ .map = map,
+ .kaddr = map->mpage,
+ };
+ int err = 0;
+ unsigned int lclusterbits, endoff;
+ unsigned long initial_lcn;
+ unsigned long long ofs, end;
+
+ /* when trying to read beyond EOF, leave it unmapped */
+ if (map->m_la >= vi->i_size) {
+ map->m_llen = map->m_la + 1 - vi->i_size;
+ map->m_la = vi->i_size;
+ map->m_flags = 0;
+ goto out;
+ }
+
+ err = z_erofs_fill_inode_lazy(vi);
+ if (err)
+ goto out;
+
+ lclusterbits = vi->z_logical_clusterbits;
+ ofs = map->m_la;
+ initial_lcn = ofs >> lclusterbits;
+ endoff = ofs & ((1 << lclusterbits) - 1);
+
+ err = z_erofs_load_cluster_from_disk(&m, initial_lcn, false);
+ if (err)
+ goto out;
+
+ map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
+ end = (m.lcn + 1ULL) << lclusterbits;
+ switch (m.type) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ if (endoff >= m.clusterofs) {
+ m.headtype = m.type;
+ map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
+ break;
+ }
+ /* m.lcn should be >= 1 if endoff < m.clusterofs */
+ if (!m.lcn) {
+ erofs_err("invalid logical cluster 0 at nid %llu",
+ (unsigned long long)vi->nid);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
+ end = (m.lcn << lclusterbits) | m.clusterofs;
+ map->m_flags |= EROFS_MAP_FULL_MAPPED;
+ m.delta[0] = 1;
+ /* fallthrough */
+ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ /* get the correspoinding first chunk */
+ err = z_erofs_extent_lookback(&m, m.delta[0]);
+ if (err)
+ goto out;
+ break;
+ default:
+ erofs_err("unknown type %u @ offset %llu of nid %llu",
+ m.type, ofs, (unsigned long long)vi->nid);
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ map->m_llen = end - map->m_la;
+ map->m_pa = blknr_to_addr(m.pblk);
+
+ err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
+ if (err)
+ goto out;
+
+ if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN)
+ map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
+ else
+ map->m_algorithmformat = vi->z_algorithmtype[0];
+
+ if (flags & EROFS_GET_BLOCKS_FIEMAP) {
+ err = z_erofs_get_extent_decompressedlen(&m);
+ if (!err)
+ map->m_flags |= EROFS_MAP_FULL_MAPPED;
+ }
+
+out:
+ erofs_dbg("m_la %" PRIu64 " m_pa %" PRIu64 " m_llen %" PRIu64 " m_plen %" PRIu64 " m_flags 0%o",
+ map->m_la, map->m_pa,
+ map->m_llen, map->m_plen, map->m_flags);
+
+ DBG_BUGON(err < 0 && err != -ENOMEM);
+ return err;
+}
diff --git a/fs/fs.c b/fs/fs.c
index 023f89cafec..99dac0fd79f 100644
--- a/fs/fs.c
+++ b/fs/fs.c
@@ -26,6 +26,7 @@
#include <linux/math64.h>
#include <efi_loader.h>
#include <squashfs.h>
+#include <erofs.h>
DECLARE_GLOBAL_DATA_PTR;
@@ -305,6 +306,27 @@ static struct fstype_info fstypes[] = {
.mkdir = fs_mkdir_unsupported,
},
#endif
+#if IS_ENABLED(CONFIG_FS_EROFS)
+ {
+ .fstype = FS_TYPE_EROFS,
+ .name = "erofs",
+ .null_dev_desc_ok = false,
+ .probe = erofs_probe,
+ .opendir = erofs_opendir,
+ .readdir = erofs_readdir,
+ .ls = fs_ls_generic,
+ .read = erofs_read,
+ .size = erofs_size,
+ .close = erofs_close,
+ .closedir = erofs_closedir,
+ .exists = erofs_exists,
+ .uuid = fs_uuid_unsupported,
+ .write = fs_write_unsupported,
+ .ln = fs_ln_unsupported,
+ .unlink = fs_unlink_unsupported,
+ .mkdir = fs_mkdir_unsupported,
+ },
+#endif
{
.fstype = FS_TYPE_ANY,
.name = "unsupported",