You've already forked linux-apfs-rw
mirror of
https://github.com/linux-apfs/linux-apfs-rw.git
synced 2026-05-01 15:01:34 -07:00
61672f2caf
As all the in-tree modules has been ported to the new Kernel 7 fs mount API, the old API has been removed. This patch add the support for the new set of APIs, while keeping previous kernel support intact. The code is in review on the Canonical Launchpad bug https://launchpad.net/bugs/2142837 as well Signed-off-by: Alessio Faina <alessio.faina@canonical.com>
2599 lines
72 KiB
C
2599 lines
72 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (C) 2018 Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
|
|
*/
|
|
|
|
#include <linux/slab.h>
|
|
#include <linux/buffer_head.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/mpage.h>
|
|
#include <linux/blk_types.h>
|
|
#include "apfs.h"
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) || RHEL_VERSION_GE(9, 3)
|
|
#include <linux/sched/mm.h>
|
|
#endif
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 13, 0)
|
|
#include <linux/fileattr.h>
|
|
#endif
|
|
|
|
#define MAX_PFK_LEN 512
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 15, 0)
|
|
static struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index)
|
|
{
|
|
return pagecache_get_page(mapping, index, FGP_WRITEBEGIN, mapping_gfp_mask(mapping));
|
|
}
|
|
#endif
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) || RHEL_VERSION_GE(9, 3)
|
|
|
|
static int apfs_read_folio(struct file *file, struct folio *folio)
|
|
{
|
|
return mpage_read_folio(folio, apfs_get_block);
|
|
}
|
|
|
|
#else
|
|
|
|
static int apfs_readpage(struct file *file, struct page *page)
|
|
{
|
|
return mpage_readpage(page, apfs_get_block);
|
|
}
|
|
|
|
#endif
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) /* Misses mpage_readpages() */
|
|
|
|
static void apfs_readahead(struct readahead_control *rac)
|
|
{
|
|
mpage_readahead(rac, apfs_get_block);
|
|
}
|
|
|
|
#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) */
|
|
|
|
static int apfs_readpages(struct file *file, struct address_space *mapping,
|
|
struct list_head *pages, unsigned int nr_pages)
|
|
{
|
|
return mpage_readpages(mapping, pages, nr_pages, apfs_get_block);
|
|
}
|
|
|
|
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) */
|
|
|
|
/**
|
|
* apfs_create_dstream_rec - Create a data stream record
|
|
* @dstream: data stream info
|
|
*
|
|
* Does nothing if the record already exists. TODO: support cloned files.
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_create_dstream_rec(struct apfs_dstream_info *dstream)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_query *query;
|
|
struct apfs_dstream_id_key raw_key;
|
|
struct apfs_dstream_id_val raw_val;
|
|
int ret;
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
apfs_init_dstream_id_key(dstream->ds_id, &query->key);
|
|
query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret != -ENODATA) /* Either an error, or the record already exists */
|
|
goto out;
|
|
|
|
apfs_key_set_hdr(APFS_TYPE_DSTREAM_ID, dstream->ds_id, &raw_key);
|
|
raw_val.refcnt = cpu_to_le32(1);
|
|
ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
if (ret) {
|
|
apfs_err(sb, "insertion failed for id 0x%llx", dstream->ds_id);
|
|
goto out;
|
|
}
|
|
out:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
static int apfs_check_dstream_refcnt(struct inode *inode);
|
|
static int apfs_put_dstream_rec(struct apfs_dstream_info *dstream);
|
|
|
|
/**
|
|
* apfs_inode_create_exclusive_dstream - Make an inode's dstream not shared
|
|
* @inode: the vfs inode
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
int apfs_inode_create_exclusive_dstream(struct inode *inode)
|
|
{
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw;
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
struct apfs_dstream_info *dstream = &ai->i_dstream;
|
|
u64 new_id;
|
|
int err;
|
|
|
|
if (!ai->i_has_dstream || !dstream->ds_shared)
|
|
return 0;
|
|
|
|
/*
|
|
* The ds_shared field is not updated when the other user of the
|
|
* dstream puts it, so it could be a false positive. Check it again
|
|
* before actually putting the dstream. The double query is wasteful,
|
|
* but I don't know if it makes sense to optimize this (TODO).
|
|
*/
|
|
err = apfs_check_dstream_refcnt(inode);
|
|
if (err) {
|
|
apfs_err(sb, "failed to check refcnt for ino 0x%llx", apfs_ino(inode));
|
|
return err;
|
|
}
|
|
if (!dstream->ds_shared)
|
|
return 0;
|
|
err = apfs_put_dstream_rec(dstream);
|
|
if (err) {
|
|
apfs_err(sb, "failed to put dstream for ino 0x%llx", apfs_ino(inode));
|
|
return err;
|
|
}
|
|
|
|
apfs_assert_in_transaction(sb, &vsb_raw->apfs_o);
|
|
new_id = le64_to_cpu(vsb_raw->apfs_next_obj_id);
|
|
le64_add_cpu(&vsb_raw->apfs_next_obj_id, 1);
|
|
|
|
err = apfs_clone_extents(dstream, new_id);
|
|
if (err) {
|
|
apfs_err(sb, "failed clone extents for ino 0x%llx", apfs_ino(inode));
|
|
return err;
|
|
}
|
|
|
|
dstream->ds_id = new_id;
|
|
err = apfs_create_dstream_rec(dstream);
|
|
if (err) {
|
|
apfs_err(sb, "failed to create dstream for ino 0x%llx", apfs_ino(inode));
|
|
return err;
|
|
}
|
|
|
|
dstream->ds_shared = false;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_inode_create_dstream_rec - Create the data stream record for an inode
|
|
* @inode: the vfs inode
|
|
*
|
|
* Does nothing if the record already exists. TODO: support cloned files.
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_inode_create_dstream_rec(struct inode *inode)
|
|
{
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
int err;
|
|
|
|
if (ai->i_has_dstream)
|
|
return apfs_inode_create_exclusive_dstream(inode);
|
|
|
|
err = apfs_create_dstream_rec(&ai->i_dstream);
|
|
if (err)
|
|
return err;
|
|
|
|
ai->i_has_dstream = true;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_dstream_adj_refcnt - Adjust dstream record refcount
|
|
* @dstream: data stream info
|
|
* @delta: desired change in reference count
|
|
*
|
|
* Deletes the record if the reference count goes to zero. Returns 0 on success
|
|
* or a negative error code in case of failure.
|
|
*/
|
|
int apfs_dstream_adj_refcnt(struct apfs_dstream_info *dstream, u32 delta)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_query *query;
|
|
struct apfs_dstream_id_val raw_val;
|
|
void *raw = NULL;
|
|
u32 refcnt;
|
|
int ret;
|
|
|
|
ASSERT(APFS_I(dstream->ds_inode)->i_has_dstream);
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
apfs_init_dstream_id_key(dstream->ds_id, &query->key);
|
|
query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret) {
|
|
apfs_err(sb, "query failed for id 0x%llx", dstream->ds_id);
|
|
if (ret == -ENODATA)
|
|
ret = -EFSCORRUPTED;
|
|
goto out;
|
|
}
|
|
|
|
if (query->len != sizeof(raw_val)) {
|
|
apfs_err(sb, "bad value length (%d)", query->len);
|
|
ret = -EFSCORRUPTED;
|
|
goto out;
|
|
}
|
|
raw = query->node->object.data;
|
|
raw_val = *(struct apfs_dstream_id_val *)(raw + query->off);
|
|
refcnt = le32_to_cpu(raw_val.refcnt);
|
|
|
|
refcnt += delta;
|
|
if (refcnt == 0) {
|
|
ret = apfs_btree_remove(query);
|
|
if (ret)
|
|
apfs_err(sb, "removal failed for id 0x%llx", dstream->ds_id);
|
|
goto out;
|
|
}
|
|
|
|
raw_val.refcnt = cpu_to_le32(refcnt);
|
|
ret = apfs_btree_replace(query, NULL /* key */, 0 /* key_len */, &raw_val, sizeof(raw_val));
|
|
if (ret)
|
|
apfs_err(sb, "update failed for id 0x%llx", dstream->ds_id);
|
|
out:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_put_dstream_rec - Put a reference for a data stream record
|
|
* @dstream: data stream info
|
|
*
|
|
* Deletes the record if the reference count goes to zero. Returns 0 on success
|
|
* or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_put_dstream_rec(struct apfs_dstream_info *dstream)
|
|
{
|
|
struct apfs_inode_info *ai = APFS_I(dstream->ds_inode);
|
|
|
|
if (!ai->i_has_dstream)
|
|
return 0;
|
|
return apfs_dstream_adj_refcnt(dstream, -1);
|
|
}
|
|
|
|
/**
|
|
* apfs_create_crypto_rec - Create the crypto state record for an inode
|
|
* @inode: the vfs inode
|
|
*
|
|
* Does nothing if the record already exists. TODO: support cloned files.
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_create_crypto_rec(struct inode *inode)
|
|
{
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream;
|
|
struct apfs_query *query;
|
|
struct apfs_crypto_state_key raw_key;
|
|
int ret;
|
|
|
|
if (inode->i_size || inode->i_blocks) /* Already has a dstream */
|
|
return 0;
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
apfs_init_crypto_state_key(dstream->ds_id, &query->key);
|
|
query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret != -ENODATA) /* Either an error, or the record already exists */
|
|
goto out;
|
|
|
|
apfs_key_set_hdr(APFS_TYPE_CRYPTO_STATE, dstream->ds_id, &raw_key);
|
|
if (sbi->s_dflt_pfk) {
|
|
struct apfs_crypto_state_val *raw_val = sbi->s_dflt_pfk;
|
|
unsigned int key_len = le16_to_cpu(raw_val->state.key_len);
|
|
|
|
ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), raw_val, sizeof(*raw_val) + key_len);
|
|
if (ret)
|
|
apfs_err(sb, "insertion failed for id 0x%llx", dstream->ds_id);
|
|
} else {
|
|
struct apfs_crypto_state_val raw_val;
|
|
|
|
raw_val.refcnt = cpu_to_le32(1);
|
|
raw_val.state.major_version = cpu_to_le16(APFS_WMCS_MAJOR_VERSION);
|
|
raw_val.state.minor_version = cpu_to_le16(APFS_WMCS_MINOR_VERSION);
|
|
raw_val.state.cpflags = 0;
|
|
raw_val.state.persistent_class = cpu_to_le32(APFS_PROTECTION_CLASS_F);
|
|
raw_val.state.key_os_version = 0;
|
|
raw_val.state.key_revision = cpu_to_le16(1);
|
|
raw_val.state.key_len = cpu_to_le16(0);
|
|
ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
if (ret)
|
|
apfs_err(sb, "insertion failed for id 0x%llx", dstream->ds_id);
|
|
}
|
|
out:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_dflt_key_class - Returns default key class for files in volume
|
|
* @sb: volume superblock
|
|
*/
|
|
static unsigned int apfs_dflt_key_class(struct super_block *sb)
|
|
{
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
|
|
if (!sbi->s_dflt_pfk)
|
|
return APFS_PROTECTION_CLASS_F;
|
|
|
|
return le32_to_cpu(sbi->s_dflt_pfk->state.persistent_class);
|
|
}
|
|
|
|
/**
|
|
* apfs_create_crypto_rec - Adjust crypto state record refcount
|
|
* @sb: volume superblock
|
|
* @crypto_id: crypto_id to adjust
|
|
* @delta: desired change in reference count
|
|
*
|
|
* This function is used when adding or removing extents, as each extent holds
|
|
* a reference to the crypto ID. It should also be used when removing inodes,
|
|
* and in that case it should also remove the crypto record (TODO).
|
|
*/
|
|
int apfs_crypto_adj_refcnt(struct super_block *sb, u64 crypto_id, int delta)
|
|
{
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_query *query;
|
|
struct apfs_crypto_state_val *raw_val;
|
|
char *raw;
|
|
int ret;
|
|
|
|
if (!crypto_id)
|
|
return 0;
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
apfs_init_crypto_state_key(crypto_id, &query->key);
|
|
query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret) {
|
|
apfs_err(sb, "query failed for id 0x%llx", crypto_id);
|
|
goto out;
|
|
}
|
|
|
|
ret = apfs_query_join_transaction(query);
|
|
if (ret) {
|
|
apfs_err(sb, "query join failed");
|
|
return ret;
|
|
}
|
|
raw = query->node->object.data;
|
|
raw_val = (void *)raw + query->off;
|
|
|
|
le32_add_cpu(&raw_val->refcnt, delta);
|
|
|
|
out:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_crypto_set_key - Modify content of crypto state record
|
|
* @sb: volume superblock
|
|
* @crypto_id: crypto_id to modify
|
|
* @new_val: new crypto state data; new_val->refcnt is overridden
|
|
*
|
|
* This function does not alter the inode's default protection class field.
|
|
* It needs to be done separately if the class changes.
|
|
*/
|
|
static int apfs_crypto_set_key(struct super_block *sb, u64 crypto_id, struct apfs_crypto_state_val *new_val)
|
|
{
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_query *query;
|
|
struct apfs_crypto_state_val *raw_val;
|
|
char *raw;
|
|
int ret;
|
|
unsigned int pfk_len;
|
|
|
|
if (!crypto_id)
|
|
return 0;
|
|
|
|
pfk_len = le16_to_cpu(new_val->state.key_len);
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
apfs_init_crypto_state_key(crypto_id, &query->key);
|
|
query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret) {
|
|
apfs_err(sb, "query failed for id 0x%llx", crypto_id);
|
|
goto out;
|
|
}
|
|
raw = query->node->object.data;
|
|
raw_val = (void *)raw + query->off;
|
|
|
|
new_val->refcnt = raw_val->refcnt;
|
|
|
|
ret = apfs_btree_replace(query, NULL /* key */, 0 /* key_len */, new_val, sizeof(*new_val) + pfk_len);
|
|
if (ret)
|
|
apfs_err(sb, "update failed for id 0x%llx", crypto_id);
|
|
|
|
out:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_crypto_get_key - Retrieve content of crypto state record
|
|
* @sb: volume superblock
|
|
* @crypto_id: crypto_id to modify
|
|
* @val: result crypto state data
|
|
* @max_len: maximum allowed value of val->state.key_len
|
|
*/
|
|
static int apfs_crypto_get_key(struct super_block *sb, u64 crypto_id, struct apfs_crypto_state_val *val,
|
|
unsigned int max_len)
|
|
{
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_query *query;
|
|
struct apfs_crypto_state_val *raw_val;
|
|
char *raw;
|
|
int ret;
|
|
unsigned int pfk_len;
|
|
|
|
if (!crypto_id)
|
|
return -ENOENT;
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
apfs_init_crypto_state_key(crypto_id, &query->key);
|
|
query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret)
|
|
goto out;
|
|
raw = query->node->object.data;
|
|
raw_val = (void *)raw + query->off;
|
|
|
|
pfk_len = le16_to_cpu(raw_val->state.key_len);
|
|
if (pfk_len > max_len) {
|
|
ret = -ENOSPC;
|
|
goto out;
|
|
}
|
|
|
|
memcpy(val, raw_val, sizeof(*val) + pfk_len);
|
|
|
|
out:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 17, 0)
|
|
int __apfs_write_begin(const struct kiocb *file, struct address_space *mapping, loff_t pos, unsigned int len, unsigned int flags, struct page **pagep, void **fsdata)
|
|
#else
|
|
int __apfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned int len, unsigned int flags, struct page **pagep, void **fsdata)
|
|
#endif
|
|
{
|
|
struct inode *inode = mapping->host;
|
|
struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream;
|
|
struct super_block *sb = inode->i_sb;
|
|
struct page *page;
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 7, 0)
|
|
struct folio *folio;
|
|
#endif
|
|
struct buffer_head *bh, *head;
|
|
unsigned int blocksize, block_start, block_end, from, to;
|
|
pgoff_t index = pos >> PAGE_SHIFT;
|
|
sector_t iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
|
|
loff_t i_blks_end;
|
|
int err;
|
|
|
|
apfs_inode_join_transaction(sb, inode);
|
|
|
|
err = apfs_inode_create_dstream_rec(inode);
|
|
if (err) {
|
|
apfs_err(sb, "failed to create dstream for ino 0x%llx", apfs_ino(inode));
|
|
return err;
|
|
}
|
|
|
|
if (apfs_vol_is_encrypted(sb)) {
|
|
err = apfs_create_crypto_rec(inode);
|
|
if (err) {
|
|
apfs_err(sb, "crypto creation failed for ino 0x%llx", apfs_ino(inode));
|
|
return err;
|
|
}
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) || RHEL_VERSION_GE(9, 3)
|
|
flags = memalloc_nofs_save();
|
|
page = grab_cache_page_write_begin(mapping, index);
|
|
memalloc_nofs_restore(flags);
|
|
#else
|
|
page = grab_cache_page_write_begin(mapping, index, flags | AOP_FLAG_NOFS);
|
|
#endif
|
|
if (!page)
|
|
return -ENOMEM;
|
|
if (!page_has_buffers(page)) {
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0)
|
|
create_empty_buffers(page, sb->s_blocksize, 0);
|
|
#else
|
|
folio = page_folio(page);
|
|
bh = folio_buffers(folio);
|
|
if (!bh)
|
|
bh = create_empty_buffers(folio, sb->s_blocksize, 0);
|
|
#endif
|
|
}
|
|
|
|
/* CoW moves existing blocks, so read them but mark them as unmapped */
|
|
head = page_buffers(page);
|
|
blocksize = head->b_size;
|
|
i_blks_end = (inode->i_size + sb->s_blocksize - 1) >> inode->i_blkbits;
|
|
i_blks_end <<= inode->i_blkbits;
|
|
if (i_blks_end >= pos) {
|
|
from = pos & (PAGE_SIZE - 1);
|
|
to = from + min(i_blks_end - pos, (loff_t)len);
|
|
} else {
|
|
/* TODO: deal with preallocated tail blocks */
|
|
from = UINT_MAX;
|
|
to = 0;
|
|
}
|
|
for (bh = head, block_start = 0; bh != head || !block_start;
|
|
block_start = block_end, bh = bh->b_this_page, ++iblock) {
|
|
block_end = block_start + blocksize;
|
|
if (to > block_start && from < block_end) {
|
|
if (buffer_trans(bh))
|
|
continue;
|
|
if (!buffer_mapped(bh)) {
|
|
err = __apfs_get_block(dstream, iblock, bh,
|
|
false /* create */);
|
|
if (err) {
|
|
apfs_err(sb, "failed to map block for ino 0x%llx", apfs_ino(inode));
|
|
goto out_put_page;
|
|
}
|
|
}
|
|
if (buffer_mapped(bh) && !buffer_uptodate(bh)) {
|
|
get_bh(bh);
|
|
lock_buffer(bh);
|
|
bh->b_end_io = end_buffer_read_sync;
|
|
apfs_submit_bh(REQ_OP_READ, 0, bh);
|
|
wait_on_buffer(bh);
|
|
if (!buffer_uptodate(bh)) {
|
|
apfs_err(sb, "failed to read block for ino 0x%llx", apfs_ino(inode));
|
|
err = -EIO;
|
|
goto out_put_page;
|
|
}
|
|
}
|
|
clear_buffer_mapped(bh);
|
|
}
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 12, 0)
|
|
err = __block_write_begin(page_folio(page), pos, len, apfs_get_new_block);
|
|
#else
|
|
err = __block_write_begin(page, pos, len, apfs_get_new_block);
|
|
#endif
|
|
if (err) {
|
|
apfs_err(sb, "CoW failed in inode 0x%llx", apfs_ino(inode));
|
|
goto out_put_page;
|
|
}
|
|
|
|
*pagep = page;
|
|
return 0;
|
|
|
|
out_put_page:
|
|
unlock_page(page);
|
|
put_page(page);
|
|
return err;
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 17, 0)
|
|
static int apfs_write_begin(const struct kiocb *file, struct address_space *mapping,
|
|
loff_t pos, unsigned int len,
|
|
struct folio **foliop, void **fsdata)
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(6, 12, 0)
|
|
static int apfs_write_begin(struct file *file, struct address_space *mapping,
|
|
loff_t pos, unsigned int len,
|
|
struct folio **foliop, void **fsdata)
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) || RHEL_VERSION_GE(9, 3)
|
|
static int apfs_write_begin(struct file *file, struct address_space *mapping,
|
|
loff_t pos, unsigned int len,
|
|
struct page **pagep, void **fsdata)
|
|
#else
|
|
static int apfs_write_begin(struct file *file, struct address_space *mapping,
|
|
loff_t pos, unsigned int len, unsigned int flags,
|
|
struct page **pagep, void **fsdata)
|
|
#endif
|
|
{
|
|
struct inode *inode = mapping->host;
|
|
struct super_block *sb = inode->i_sb;
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 12, 0)
|
|
struct page *page = NULL;
|
|
struct page **pagep = &page;
|
|
#endif
|
|
int err;
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) || RHEL_VERSION_GE(9, 3)
|
|
unsigned int flags = 0;
|
|
#endif
|
|
|
|
if (unlikely(pos >= APFS_MAX_FILE_SIZE))
|
|
return -EFBIG;
|
|
|
|
err = apfs_transaction_start(sb, APFS_TRANS_REG);
|
|
if (err)
|
|
return err;
|
|
|
|
err = __apfs_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
|
|
if (err)
|
|
goto fail;
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 12, 0)
|
|
*foliop = page_folio(page);
|
|
#endif
|
|
return 0;
|
|
|
|
fail:
|
|
apfs_transaction_abort(sb);
|
|
return err;
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 17, 0)
|
|
int __apfs_write_end(const struct kiocb *file, struct address_space *mapping, loff_t pos, unsigned int len, unsigned int copied, struct page *page, void *fsdata)
|
|
#else
|
|
int __apfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned int len, unsigned int copied, struct page *page, void *fsdata)
|
|
#endif
|
|
{
|
|
struct inode *inode = mapping->host;
|
|
struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream;
|
|
int ret, err;
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 12, 0)
|
|
ret = generic_write_end(file, mapping, pos, len, copied, page_folio(page), fsdata);
|
|
#else
|
|
ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
|
|
#endif
|
|
dstream->ds_size = i_size_read(inode);
|
|
if (ret < len && pos + len > inode->i_size) {
|
|
truncate_pagecache(inode, inode->i_size);
|
|
err = apfs_truncate(dstream, inode->i_size);
|
|
if (err) {
|
|
apfs_err(inode->i_sb, "truncation failed for ino 0x%llx", apfs_ino(inode));
|
|
return err;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 17, 0)
|
|
static int apfs_write_end(const struct kiocb *file, struct address_space *mapping,
|
|
loff_t pos, unsigned int len, unsigned int copied,
|
|
struct folio *folio, void *fsdata)
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(6, 12, 0)
|
|
static int apfs_write_end(struct file *file, struct address_space *mapping,
|
|
loff_t pos, unsigned int len, unsigned int copied,
|
|
struct folio *folio, void *fsdata)
|
|
#else
|
|
static int apfs_write_end(struct file *file, struct address_space *mapping,
|
|
loff_t pos, unsigned int len, unsigned int copied,
|
|
struct page *page, void *fsdata)
|
|
#endif
|
|
{
|
|
struct inode *inode = mapping->host;
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_nx_transaction *trans = &APFS_NXI(sb)->nx_transaction;
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 12, 0)
|
|
struct page *page = &folio->page;
|
|
#endif
|
|
int ret, err;
|
|
|
|
ret = __apfs_write_end(file, mapping, pos, len, copied, page, fsdata);
|
|
if (ret < 0) {
|
|
err = ret;
|
|
goto fail;
|
|
}
|
|
|
|
if ((pos + ret) & (sb->s_blocksize - 1))
|
|
trans->t_state |= APFS_NX_TRANS_INCOMPLETE_BLOCK;
|
|
else
|
|
trans->t_state &= ~APFS_NX_TRANS_INCOMPLETE_BLOCK;
|
|
|
|
err = apfs_transaction_commit(sb);
|
|
if (!err)
|
|
return ret;
|
|
|
|
fail:
|
|
apfs_transaction_abort(sb);
|
|
return err;
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0) || RHEL_VERSION_GE(9, 3)
|
|
static void apfs_noop_invalidate_folio(struct folio *folio, size_t offset, size_t length)
|
|
#else
|
|
static void apfs_noop_invalidatepage(struct page *page, unsigned int offset, unsigned int length)
|
|
#endif
|
|
{
|
|
}
|
|
|
|
/* bmap is not implemented to avoid issues with CoW on swapfiles */
|
|
static const struct address_space_operations apfs_aops = {
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0) || RHEL_VERSION_GE(9, 2)
|
|
.dirty_folio = block_dirty_folio,
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0)
|
|
.set_page_dirty = __set_page_dirty_buffers,
|
|
#endif
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) || RHEL_VERSION_GE(9, 3)
|
|
.read_folio = apfs_read_folio,
|
|
#else
|
|
.readpage = apfs_readpage,
|
|
#endif
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0)
|
|
.readahead = apfs_readahead,
|
|
#else
|
|
.readpages = apfs_readpages,
|
|
#endif
|
|
|
|
.write_begin = apfs_write_begin,
|
|
.write_end = apfs_write_end,
|
|
|
|
/* The intention is to keep bhs around until the transaction is over */
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0) || RHEL_VERSION_GE(9, 3)
|
|
.invalidate_folio = apfs_noop_invalidate_folio,
|
|
#else
|
|
.invalidatepage = apfs_noop_invalidatepage,
|
|
#endif
|
|
};
|
|
|
|
/**
|
|
* apfs_inode_set_ops - Set up an inode's operations
|
|
* @inode: vfs inode to set up
|
|
* @rdev: device id (0 if not a device file)
|
|
* @compressed: is this a compressed inode?
|
|
*
|
|
* For device files, also sets the device id to @rdev.
|
|
*/
|
|
static void apfs_inode_set_ops(struct inode *inode, dev_t rdev, bool compressed)
|
|
{
|
|
/* A lot of operations still missing, of course */
|
|
switch (inode->i_mode & S_IFMT) {
|
|
case S_IFREG:
|
|
inode->i_op = &apfs_file_inode_operations;
|
|
if (compressed) {
|
|
inode->i_fop = &apfs_compress_file_operations;
|
|
inode->i_mapping->a_ops = &apfs_compress_aops;
|
|
} else {
|
|
inode->i_fop = &apfs_file_operations;
|
|
inode->i_mapping->a_ops = &apfs_aops;
|
|
}
|
|
break;
|
|
case S_IFDIR:
|
|
inode->i_op = &apfs_dir_inode_operations;
|
|
inode->i_fop = &apfs_dir_operations;
|
|
break;
|
|
case S_IFLNK:
|
|
inode->i_op = &apfs_symlink_inode_operations;
|
|
break;
|
|
default:
|
|
inode->i_op = &apfs_special_inode_operations;
|
|
init_special_inode(inode, inode->i_mode, rdev);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* apfs_inode_from_query - Read the inode found by a successful query
|
|
* @query: the query that found the record
|
|
* @inode: vfs inode to be filled with the read data
|
|
*
|
|
* Reads the inode record into @inode and performs some basic sanity checks,
|
|
* mostly as a protection against crafted filesystems. Returns 0 on success
|
|
* or a negative error code otherwise.
|
|
*/
|
|
static int apfs_inode_from_query(struct apfs_query *query, struct inode *inode)
|
|
{
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
struct apfs_dstream_info *dstream = &ai->i_dstream;
|
|
struct apfs_inode_val *inode_val;
|
|
char *raw = query->node->object.data;
|
|
char *xval = NULL;
|
|
int xlen;
|
|
u32 rdev = 0, bsd_flags;
|
|
bool compressed = false;
|
|
|
|
if (query->len < sizeof(*inode_val))
|
|
goto corrupted;
|
|
|
|
inode_val = (struct apfs_inode_val *)(raw + query->off);
|
|
|
|
ai->i_parent_id = le64_to_cpu(inode_val->parent_id);
|
|
dstream->ds_id = le64_to_cpu(inode_val->private_id);
|
|
inode->i_mode = le16_to_cpu(inode_val->mode);
|
|
ai->i_key_class = le32_to_cpu(inode_val->default_protection_class);
|
|
ai->i_int_flags = le64_to_cpu(inode_val->internal_flags);
|
|
|
|
ai->i_saved_uid = le32_to_cpu(inode_val->owner);
|
|
i_uid_write(inode, ai->i_saved_uid);
|
|
ai->i_saved_gid = le32_to_cpu(inode_val->group);
|
|
i_gid_write(inode, ai->i_saved_gid);
|
|
|
|
ai->i_bsd_flags = bsd_flags = le32_to_cpu(inode_val->bsd_flags);
|
|
if (bsd_flags & APFS_INOBSD_IMMUTABLE)
|
|
inode->i_flags |= S_IMMUTABLE;
|
|
if (bsd_flags & APFS_INOBSD_APPEND)
|
|
inode->i_flags |= S_APPEND;
|
|
|
|
if (!S_ISDIR(inode->i_mode)) {
|
|
/*
|
|
* Directory inodes don't store their link count, so to provide
|
|
* it we would have to actually count the subdirectories. The
|
|
* HFS/HFS+ modules just leave it at 1, and so do we, for now.
|
|
*/
|
|
set_nlink(inode, le32_to_cpu(inode_val->nlink));
|
|
} else {
|
|
ai->i_nchildren = le32_to_cpu(inode_val->nchildren);
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0)
|
|
inode->i_ctime = ns_to_timespec64(le64_to_cpu(inode_val->change_time));
|
|
#else
|
|
inode_set_ctime_to_ts(inode, ns_to_timespec64(le64_to_cpu(inode_val->change_time)));
|
|
#endif
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0)
|
|
inode->i_atime = ns_to_timespec64(le64_to_cpu(inode_val->access_time));
|
|
inode->i_mtime = ns_to_timespec64(le64_to_cpu(inode_val->mod_time));
|
|
#else
|
|
inode_set_atime_to_ts(inode, ns_to_timespec64(le64_to_cpu(inode_val->access_time)));
|
|
inode_set_mtime_to_ts(inode, ns_to_timespec64(le64_to_cpu(inode_val->mod_time)));
|
|
#endif
|
|
ai->i_crtime = ns_to_timespec64(le64_to_cpu(inode_val->create_time));
|
|
|
|
dstream->ds_size = inode->i_size = inode->i_blocks = 0;
|
|
ai->i_has_dstream = false;
|
|
if ((bsd_flags & APFS_INOBSD_COMPRESSED) && !S_ISDIR(inode->i_mode)) {
|
|
if (!apfs_compress_get_size(inode, &inode->i_size)) {
|
|
inode->i_blocks = (inode->i_size + 511) >> 9;
|
|
compressed = true;
|
|
}
|
|
} else {
|
|
xlen = apfs_find_xfield(inode_val->xfields,
|
|
query->len - sizeof(*inode_val),
|
|
APFS_INO_EXT_TYPE_DSTREAM, &xval);
|
|
if (xlen >= sizeof(struct apfs_dstream)) {
|
|
struct apfs_dstream *dstream_raw = (struct apfs_dstream *)xval;
|
|
|
|
dstream->ds_size = inode->i_size = le64_to_cpu(dstream_raw->size);
|
|
inode->i_blocks = le64_to_cpu(dstream_raw->alloced_size) >> 9;
|
|
ai->i_has_dstream = true;
|
|
}
|
|
}
|
|
xval = NULL;
|
|
|
|
/* TODO: move each xfield read to its own function */
|
|
dstream->ds_sparse_bytes = 0;
|
|
xlen = apfs_find_xfield(inode_val->xfields, query->len - sizeof(*inode_val), APFS_INO_EXT_TYPE_SPARSE_BYTES, &xval);
|
|
if (xlen >= sizeof(__le64)) {
|
|
__le64 *sparse_bytes_p = (__le64 *)xval;
|
|
|
|
dstream->ds_sparse_bytes = le64_to_cpup(sparse_bytes_p);
|
|
}
|
|
xval = NULL;
|
|
|
|
rdev = 0;
|
|
xlen = apfs_find_xfield(inode_val->xfields,
|
|
query->len - sizeof(*inode_val),
|
|
APFS_INO_EXT_TYPE_RDEV, &xval);
|
|
if (xlen >= sizeof(__le32)) {
|
|
__le32 *rdev_p = (__le32 *)xval;
|
|
|
|
rdev = le32_to_cpup(rdev_p);
|
|
}
|
|
|
|
apfs_inode_set_ops(inode, rdev, compressed);
|
|
return 0;
|
|
|
|
corrupted:
|
|
apfs_err(inode->i_sb, "bad inode record for inode 0x%llx", apfs_ino(inode));
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
/**
|
|
* apfs_inode_lookup - Lookup an inode record in the catalog b-tree
|
|
* @inode: vfs inode to lookup
|
|
*
|
|
* Runs a catalog query for the apfs_ino(@inode) inode record; returns a pointer
|
|
* to the query structure on success, or an error pointer in case of failure.
|
|
*/
|
|
static struct apfs_query *apfs_inode_lookup(const struct inode *inode)
|
|
{
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_query *query;
|
|
int ret;
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return ERR_PTR(-ENOMEM);
|
|
apfs_init_inode_key(apfs_ino(inode), &query->key);
|
|
query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (!ret)
|
|
return query;
|
|
|
|
/* Don't complain if an orphan is already gone */
|
|
if (!current_work() || ret != -ENODATA)
|
|
apfs_err(sb, "query failed for id 0x%llx", apfs_ino(inode));
|
|
apfs_free_query(query);
|
|
return ERR_PTR(ret);
|
|
}
|
|
|
|
/**
|
|
* apfs_test_inode - Check if the inode matches a 64-bit inode number
|
|
* @inode: inode to test
|
|
* @cnid: pointer to the inode number
|
|
*/
|
|
static int apfs_test_inode(struct inode *inode, void *cnid)
|
|
{
|
|
u64 *ino = cnid;
|
|
|
|
return apfs_ino(inode) == *ino;
|
|
}
|
|
|
|
/**
|
|
* apfs_set_inode - Set a 64-bit inode number on the given inode
|
|
* @inode: inode to set
|
|
* @cnid: pointer to the inode number
|
|
*/
|
|
static int apfs_set_inode(struct inode *inode, void *cnid)
|
|
{
|
|
apfs_set_ino(inode, *(u64 *)cnid);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_iget_locked - Wrapper for iget5_locked()
|
|
* @sb: filesystem superblock
|
|
* @cnid: 64-bit inode number
|
|
*
|
|
* Works the same as iget_locked(), but can handle 64-bit inode numbers on
|
|
* 32-bit architectures.
|
|
*/
|
|
static struct inode *apfs_iget_locked(struct super_block *sb, u64 cnid)
|
|
{
|
|
return iget5_locked(sb, cnid, apfs_test_inode, apfs_set_inode, &cnid);
|
|
}
|
|
|
|
/**
|
|
* apfs_check_dstream_refcnt - Check if an inode's dstream is shared
|
|
* @inode: the inode to check
|
|
*
|
|
* Sets the value of ds_shared for the inode's dstream. Returns 0 on success,
|
|
* or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_check_dstream_refcnt(struct inode *inode)
|
|
{
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
struct apfs_dstream_info *dstream = &ai->i_dstream;
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_query *query = NULL;
|
|
struct apfs_dstream_id_val raw_val;
|
|
void *raw = NULL;
|
|
u32 refcnt;
|
|
int ret;
|
|
|
|
if (!ai->i_has_dstream) {
|
|
dstream->ds_shared = false;
|
|
return 0;
|
|
}
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
apfs_init_dstream_id_key(dstream->ds_id, &query->key);
|
|
query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret) {
|
|
apfs_err(sb, "query failed for id 0x%llx", dstream->ds_id);
|
|
if (ret == -ENODATA)
|
|
ret = -EFSCORRUPTED;
|
|
goto fail;
|
|
}
|
|
|
|
if (query->len != sizeof(raw_val)) {
|
|
ret = -EFSCORRUPTED;
|
|
goto fail;
|
|
}
|
|
raw = query->node->object.data;
|
|
raw_val = *(struct apfs_dstream_id_val *)(raw + query->off);
|
|
refcnt = le32_to_cpu(raw_val.refcnt);
|
|
|
|
dstream->ds_shared = refcnt > 1;
|
|
fail:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_iget - Populate inode structures with metadata from disk
|
|
* @sb: filesystem superblock
|
|
* @cnid: inode number
|
|
*
|
|
* Populates the vfs inode and the corresponding apfs_inode_info structure.
|
|
* Returns a pointer to the vfs inode in case of success, or an appropriate
|
|
* error pointer otherwise.
|
|
*/
|
|
struct inode *apfs_iget(struct super_block *sb, u64 cnid)
|
|
{
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
|
|
struct inode *inode;
|
|
struct apfs_query *query;
|
|
int err;
|
|
|
|
inode = apfs_iget_locked(sb, cnid);
|
|
if (!inode)
|
|
return ERR_PTR(-ENOMEM);
|
|
if (!(apfs_inode_state_read_once(inode) & I_NEW))
|
|
return inode;
|
|
|
|
down_read(&nxi->nx_big_sem);
|
|
query = apfs_inode_lookup(inode);
|
|
if (IS_ERR(query)) {
|
|
err = PTR_ERR(query);
|
|
/* Don't complain if an orphan is already gone */
|
|
if (!current_work() || err != -ENODATA)
|
|
apfs_err(sb, "lookup failed for ino 0x%llx", cnid);
|
|
goto fail;
|
|
}
|
|
err = apfs_inode_from_query(query, inode);
|
|
apfs_free_query(query);
|
|
if (err)
|
|
goto fail;
|
|
err = apfs_check_dstream_refcnt(inode);
|
|
if (err) {
|
|
apfs_err(sb, "refcnt check failed for ino 0x%llx", cnid);
|
|
goto fail;
|
|
}
|
|
up_read(&nxi->nx_big_sem);
|
|
|
|
/* Allow the user to override the ownership */
|
|
if (uid_valid(sbi->s_uid))
|
|
inode->i_uid = sbi->s_uid;
|
|
if (gid_valid(sbi->s_gid))
|
|
inode->i_gid = sbi->s_gid;
|
|
|
|
/* Inode flags are not important for now, leave them at 0 */
|
|
unlock_new_inode(inode);
|
|
return inode;
|
|
|
|
fail:
|
|
up_read(&nxi->nx_big_sem);
|
|
iget_failed(inode);
|
|
return ERR_PTR(err);
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) /* No statx yet... */
|
|
|
|
int apfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
|
struct kstat *stat)
|
|
{
|
|
struct inode *inode = d_inode(dentry);
|
|
|
|
generic_fillattr(inode, stat);
|
|
stat->dev = APFS_SB(inode->i_sb)->s_anon_dev;
|
|
stat->ino = apfs_ino(inode);
|
|
return 0;
|
|
}
|
|
|
|
#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) */
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0)
|
|
int apfs_getattr(const struct path *path, struct kstat *stat,
|
|
u32 request_mask, unsigned int query_flags)
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) && !RHEL_VERSION_GE(9, 6)
|
|
int apfs_getattr(struct user_namespace *mnt_userns,
|
|
const struct path *path, struct kstat *stat, u32 request_mask,
|
|
unsigned int query_flags)
|
|
#else
|
|
int apfs_getattr(struct mnt_idmap *idmap,
|
|
const struct path *path, struct kstat *stat, u32 request_mask,
|
|
unsigned int query_flags)
|
|
#endif
|
|
{
|
|
struct inode *inode = d_inode(path->dentry);
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
|
|
stat->result_mask |= STATX_BTIME;
|
|
stat->btime = ai->i_crtime;
|
|
|
|
if (ai->i_bsd_flags & APFS_INOBSD_APPEND)
|
|
stat->attributes |= STATX_ATTR_APPEND;
|
|
if (ai->i_bsd_flags & APFS_INOBSD_IMMUTABLE)
|
|
stat->attributes |= STATX_ATTR_IMMUTABLE;
|
|
if (ai->i_bsd_flags & APFS_INOBSD_NODUMP)
|
|
stat->attributes |= STATX_ATTR_NODUMP;
|
|
if (ai->i_bsd_flags & APFS_INOBSD_COMPRESSED)
|
|
stat->attributes |= STATX_ATTR_COMPRESSED;
|
|
|
|
stat->attributes_mask |= STATX_ATTR_APPEND;
|
|
stat->attributes_mask |= STATX_ATTR_IMMUTABLE;
|
|
stat->attributes_mask |= STATX_ATTR_NODUMP;
|
|
stat->attributes_mask |= STATX_ATTR_COMPRESSED;
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0)
|
|
generic_fillattr(inode, stat);
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) && !RHEL_VERSION_GE(9, 6)
|
|
generic_fillattr(mnt_userns, inode, stat);
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0)
|
|
generic_fillattr(idmap, inode, stat);
|
|
#else
|
|
generic_fillattr(idmap, request_mask, inode, stat);
|
|
#endif
|
|
|
|
stat->dev = APFS_SB(inode->i_sb)->s_anon_dev;
|
|
stat->ino = apfs_ino(inode);
|
|
return 0;
|
|
}
|
|
|
|
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) */
|
|
|
|
/**
|
|
* apfs_build_inode_val - Allocate and initialize the value for an inode record
|
|
* @inode: vfs inode to record
|
|
* @qname: filename for primary link
|
|
* @val_p: on return, a pointer to the new on-disk value structure
|
|
*
|
|
* Returns the length of the value, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_build_inode_val(struct inode *inode, const struct qstr *qname,
|
|
struct apfs_inode_val **val_p)
|
|
{
|
|
struct apfs_inode_val *val;
|
|
struct apfs_x_field xkey;
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 7, 0)
|
|
struct timespec64 ts;
|
|
#endif
|
|
int total_xlen, val_len;
|
|
bool is_device = S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode);
|
|
__le32 rdev;
|
|
|
|
/* The only required xfield is the name, and the id if it's a device */
|
|
total_xlen = sizeof(struct apfs_xf_blob);
|
|
total_xlen += sizeof(xkey) + round_up(qname->len + 1, 8);
|
|
if (is_device)
|
|
total_xlen += sizeof(xkey) + round_up(sizeof(rdev), 8);
|
|
|
|
val_len = sizeof(*val) + total_xlen;
|
|
val = kzalloc(val_len, GFP_KERNEL);
|
|
if (!val)
|
|
return -ENOMEM;
|
|
|
|
val->parent_id = cpu_to_le64(APFS_I(inode)->i_parent_id);
|
|
val->private_id = cpu_to_le64(apfs_ino(inode));
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0)
|
|
val->mod_time = cpu_to_le64(timespec64_to_ns(&inode->i_mtime));
|
|
#else
|
|
ts = inode_get_mtime(inode);
|
|
val->mod_time = cpu_to_le64(timespec64_to_ns(&ts));
|
|
#endif
|
|
val->create_time = val->change_time = val->access_time = val->mod_time;
|
|
|
|
if (S_ISDIR(inode->i_mode))
|
|
val->nchildren = 0;
|
|
else
|
|
val->nlink = cpu_to_le32(1);
|
|
|
|
val->owner = cpu_to_le32(i_uid_read(inode));
|
|
val->group = cpu_to_le32(i_gid_read(inode));
|
|
val->mode = cpu_to_le16(inode->i_mode);
|
|
|
|
/* The buffer was just allocated: none of these functions should fail */
|
|
apfs_init_xfields(val->xfields, total_xlen);
|
|
xkey.x_type = APFS_INO_EXT_TYPE_NAME;
|
|
xkey.x_flags = APFS_XF_DO_NOT_COPY;
|
|
xkey.x_size = cpu_to_le16(qname->len + 1);
|
|
apfs_insert_xfield(val->xfields, total_xlen, &xkey, qname->name);
|
|
if (is_device) {
|
|
rdev = cpu_to_le32(inode->i_rdev);
|
|
xkey.x_type = APFS_INO_EXT_TYPE_RDEV;
|
|
xkey.x_flags = 0; /* TODO: proper flags here? */
|
|
xkey.x_size = cpu_to_le16(sizeof(rdev));
|
|
apfs_insert_xfield(val->xfields, total_xlen, &xkey, &rdev);
|
|
}
|
|
|
|
*val_p = val;
|
|
return val_len;
|
|
}
|
|
|
|
/*
|
|
* apfs_inode_rename - Update the primary name reported in an inode record
|
|
* @inode: the in-memory inode
|
|
* @new_name: name of the new primary link (NULL if unchanged)
|
|
* @query: the query that found the inode record
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_inode_rename(struct inode *inode, char *new_name,
|
|
struct apfs_query *query)
|
|
{
|
|
char *raw = query->node->object.data;
|
|
struct apfs_inode_val *new_val = NULL;
|
|
int buflen, namelen;
|
|
struct apfs_x_field xkey;
|
|
int xlen;
|
|
int err;
|
|
|
|
if (!new_name)
|
|
return 0;
|
|
|
|
namelen = strlen(new_name) + 1; /* Count the null-termination */
|
|
buflen = query->len;
|
|
buflen += sizeof(struct apfs_x_field) + round_up(namelen, 8);
|
|
new_val = kzalloc(buflen, GFP_KERNEL);
|
|
if (!new_val)
|
|
return -ENOMEM;
|
|
memcpy(new_val, raw + query->off, query->len);
|
|
|
|
/* TODO: can we assume that all inode records have an xfield blob? */
|
|
xkey.x_type = APFS_INO_EXT_TYPE_NAME;
|
|
xkey.x_flags = APFS_XF_DO_NOT_COPY;
|
|
xkey.x_size = cpu_to_le16(namelen);
|
|
xlen = apfs_insert_xfield(new_val->xfields, buflen - sizeof(*new_val),
|
|
&xkey, new_name);
|
|
if (!xlen) {
|
|
/* Buffer has enough space, but the metadata claims otherwise */
|
|
apfs_err(inode->i_sb, "bad xfields on inode 0x%llx", apfs_ino(inode));
|
|
err = -EFSCORRUPTED;
|
|
goto fail;
|
|
}
|
|
|
|
/* Just remove the old record and create a new one */
|
|
err = apfs_btree_replace(query, NULL /* key */, 0 /* key_len */, new_val, sizeof(*new_val) + xlen);
|
|
if (err)
|
|
apfs_err(inode->i_sb, "update failed for ino 0x%llx", apfs_ino(inode));
|
|
|
|
fail:
|
|
kfree(new_val);
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* apfs_create_dstream_xfield - Create the inode xfield for a new data stream
|
|
* @inode: the in-memory inode
|
|
* @query: the query that found the inode record
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_create_dstream_xfield(struct inode *inode,
|
|
struct apfs_query *query)
|
|
{
|
|
char *raw = query->node->object.data;
|
|
struct apfs_inode_val *new_val;
|
|
struct apfs_dstream dstream_raw = {0};
|
|
struct apfs_x_field xkey;
|
|
struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream;
|
|
int xlen;
|
|
int buflen;
|
|
int err;
|
|
|
|
buflen = query->len;
|
|
buflen += sizeof(struct apfs_x_field) + sizeof(dstream_raw);
|
|
new_val = kzalloc(buflen, GFP_KERNEL);
|
|
if (!new_val)
|
|
return -ENOMEM;
|
|
memcpy(new_val, raw + query->off, query->len);
|
|
|
|
dstream_raw.size = cpu_to_le64(inode->i_size);
|
|
dstream_raw.alloced_size = cpu_to_le64(apfs_alloced_size(dstream));
|
|
if (apfs_vol_is_encrypted(inode->i_sb))
|
|
dstream_raw.default_crypto_id = cpu_to_le64(dstream->ds_id);
|
|
|
|
/* TODO: can we assume that all inode records have an xfield blob? */
|
|
xkey.x_type = APFS_INO_EXT_TYPE_DSTREAM;
|
|
xkey.x_flags = APFS_XF_SYSTEM_FIELD;
|
|
xkey.x_size = cpu_to_le16(sizeof(dstream_raw));
|
|
xlen = apfs_insert_xfield(new_val->xfields, buflen - sizeof(*new_val),
|
|
&xkey, &dstream_raw);
|
|
if (!xlen) {
|
|
/* Buffer has enough space, but the metadata claims otherwise */
|
|
apfs_err(inode->i_sb, "bad xfields on inode 0x%llx", apfs_ino(inode));
|
|
err = -EFSCORRUPTED;
|
|
goto fail;
|
|
}
|
|
|
|
/* Just remove the old record and create a new one */
|
|
err = apfs_btree_replace(query, NULL /* key */, 0 /* key_len */, new_val, sizeof(*new_val) + xlen);
|
|
if (err)
|
|
apfs_err(inode->i_sb, "update failed for ino 0x%llx", apfs_ino(inode));
|
|
|
|
fail:
|
|
kfree(new_val);
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* apfs_inode_resize - Update the sizes reported in an inode record
|
|
* @inode: the in-memory inode
|
|
* @query: the query that found the inode record
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_inode_resize(struct inode *inode, struct apfs_query *query)
|
|
{
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
char *raw;
|
|
struct apfs_inode_val *inode_raw;
|
|
char *xval;
|
|
int xlen;
|
|
int err;
|
|
|
|
/* All dstream records must have a matching xfield, even if empty */
|
|
if (!ai->i_has_dstream)
|
|
return 0;
|
|
|
|
err = apfs_query_join_transaction(query);
|
|
if (err) {
|
|
apfs_err(inode->i_sb, "query join failed");
|
|
return err;
|
|
}
|
|
raw = query->node->object.data;
|
|
inode_raw = (void *)raw + query->off;
|
|
|
|
xlen = apfs_find_xfield(inode_raw->xfields,
|
|
query->len - sizeof(*inode_raw),
|
|
APFS_INO_EXT_TYPE_DSTREAM, &xval);
|
|
|
|
if (xlen) {
|
|
struct apfs_dstream *dstream;
|
|
|
|
if (xlen != sizeof(*dstream)) {
|
|
apfs_err(inode->i_sb, "bad xlen (%d) on inode 0x%llx", xlen, apfs_ino(inode));
|
|
return -EFSCORRUPTED;
|
|
}
|
|
dstream = (struct apfs_dstream *)xval;
|
|
|
|
/* TODO: count bytes read and written */
|
|
dstream->size = cpu_to_le64(inode->i_size);
|
|
dstream->alloced_size = cpu_to_le64(apfs_alloced_size(&ai->i_dstream));
|
|
return 0;
|
|
}
|
|
/* This inode has no dstream xfield, so we need to create it */
|
|
return apfs_create_dstream_xfield(inode, query);
|
|
}
|
|
|
|
/**
|
|
* apfs_create_sparse_xfield - Create an inode xfield to count sparse bytes
|
|
* @inode: the in-memory inode
|
|
* @query: the query that found the inode record
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_create_sparse_xfield(struct inode *inode, struct apfs_query *query)
|
|
{
|
|
struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream;
|
|
char *raw = query->node->object.data;
|
|
struct apfs_inode_val *new_val;
|
|
__le64 sparse_bytes;
|
|
struct apfs_x_field xkey;
|
|
int xlen;
|
|
int buflen;
|
|
int err;
|
|
|
|
buflen = query->len;
|
|
buflen += sizeof(struct apfs_x_field) + sizeof(sparse_bytes);
|
|
new_val = kzalloc(buflen, GFP_KERNEL);
|
|
if (!new_val)
|
|
return -ENOMEM;
|
|
memcpy(new_val, raw + query->off, query->len);
|
|
|
|
sparse_bytes = cpu_to_le64(dstream->ds_sparse_bytes);
|
|
|
|
/* TODO: can we assume that all inode records have an xfield blob? */
|
|
xkey.x_type = APFS_INO_EXT_TYPE_SPARSE_BYTES;
|
|
xkey.x_flags = APFS_XF_SYSTEM_FIELD | APFS_XF_CHILDREN_INHERIT;
|
|
xkey.x_size = cpu_to_le16(sizeof(sparse_bytes));
|
|
xlen = apfs_insert_xfield(new_val->xfields, buflen - sizeof(*new_val), &xkey, &sparse_bytes);
|
|
if (!xlen) {
|
|
/* Buffer has enough space, but the metadata claims otherwise */
|
|
apfs_err(inode->i_sb, "bad xfields on inode 0x%llx", apfs_ino(inode));
|
|
err = -EFSCORRUPTED;
|
|
goto fail;
|
|
}
|
|
|
|
/* Just remove the old record and create a new one */
|
|
err = apfs_btree_replace(query, NULL /* key */, 0 /* key_len */, new_val, sizeof(*new_val) + xlen);
|
|
if (err)
|
|
apfs_err(inode->i_sb, "update failed for ino 0x%llx", apfs_ino(inode));
|
|
|
|
fail:
|
|
kfree(new_val);
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* apfs_inode_resize_sparse - Update sparse byte count reported in inode record
|
|
* @inode: the in-memory inode
|
|
* @query: the query that found the inode record
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*
|
|
* TODO: should the xfield be removed if the count reaches 0? Should the inode
|
|
* flag change?
|
|
*/
|
|
static int apfs_inode_resize_sparse(struct inode *inode, struct apfs_query *query)
|
|
{
|
|
struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream;
|
|
char *raw;
|
|
struct apfs_inode_val *inode_raw;
|
|
char *xval;
|
|
int xlen;
|
|
int err;
|
|
|
|
err = apfs_query_join_transaction(query);
|
|
if (err) {
|
|
apfs_err(inode->i_sb, "query join failed");
|
|
return err;
|
|
}
|
|
raw = query->node->object.data;
|
|
inode_raw = (void *)raw + query->off;
|
|
|
|
xlen = apfs_find_xfield(inode_raw->xfields,
|
|
query->len - sizeof(*inode_raw),
|
|
APFS_INO_EXT_TYPE_SPARSE_BYTES, &xval);
|
|
if (!xlen && !dstream->ds_sparse_bytes)
|
|
return 0;
|
|
|
|
if (xlen) {
|
|
__le64 *sparse_bytes_p;
|
|
|
|
if (xlen != sizeof(*sparse_bytes_p)) {
|
|
apfs_err(inode->i_sb, "bad xlen (%d) on inode 0x%llx", xlen, apfs_ino(inode));
|
|
return -EFSCORRUPTED;
|
|
}
|
|
sparse_bytes_p = (__le64 *)xval;
|
|
|
|
*sparse_bytes_p = cpu_to_le64(dstream->ds_sparse_bytes);
|
|
return 0;
|
|
}
|
|
return apfs_create_sparse_xfield(inode, query);
|
|
}
|
|
|
|
/**
|
|
* apfs_update_inode - Update an existing inode record
|
|
* @inode: the modified in-memory inode
|
|
* @new_name: name of the new primary link (NULL if unchanged)
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
int apfs_update_inode(struct inode *inode, char *new_name)
|
|
{
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
struct apfs_dstream_info *dstream = &ai->i_dstream;
|
|
struct apfs_query *query;
|
|
struct apfs_btree_node_phys *node_raw;
|
|
struct apfs_inode_val *inode_raw;
|
|
int err;
|
|
|
|
err = apfs_flush_extent_cache(dstream);
|
|
if (err) {
|
|
apfs_err(sb, "extent cache flush failed for inode 0x%llx", apfs_ino(inode));
|
|
return err;
|
|
}
|
|
|
|
query = apfs_inode_lookup(inode);
|
|
if (IS_ERR(query)) {
|
|
apfs_err(sb, "lookup failed for ino 0x%llx", apfs_ino(inode));
|
|
return PTR_ERR(query);
|
|
}
|
|
|
|
/* TODO: copy the record to memory and make all xfield changes there */
|
|
err = apfs_inode_rename(inode, new_name, query);
|
|
if (err) {
|
|
apfs_err(sb, "rename failed for ino 0x%llx", apfs_ino(inode));
|
|
goto fail;
|
|
}
|
|
|
|
err = apfs_inode_resize(inode, query);
|
|
if (err) {
|
|
apfs_err(sb, "resize failed for ino 0x%llx", apfs_ino(inode));
|
|
goto fail;
|
|
}
|
|
|
|
err = apfs_inode_resize_sparse(inode, query);
|
|
if (err) {
|
|
apfs_err(sb, "sparse resize failed for ino 0x%llx", apfs_ino(inode));
|
|
goto fail;
|
|
}
|
|
if (dstream->ds_sparse_bytes)
|
|
ai->i_int_flags |= APFS_INODE_IS_SPARSE;
|
|
|
|
/* TODO: just use apfs_btree_replace()? */
|
|
err = apfs_query_join_transaction(query);
|
|
if (err) {
|
|
apfs_err(sb, "query join failed");
|
|
goto fail;
|
|
}
|
|
node_raw = (void *)query->node->object.data;
|
|
apfs_assert_in_transaction(sb, &node_raw->btn_o);
|
|
inode_raw = (void *)node_raw + query->off;
|
|
|
|
inode_raw->parent_id = cpu_to_le64(ai->i_parent_id);
|
|
inode_raw->private_id = cpu_to_le64(dstream->ds_id);
|
|
inode_raw->mode = cpu_to_le16(inode->i_mode);
|
|
inode_raw->owner = cpu_to_le32(i_uid_read(inode));
|
|
inode_raw->group = cpu_to_le32(i_gid_read(inode));
|
|
inode_raw->default_protection_class = cpu_to_le32(ai->i_key_class);
|
|
inode_raw->internal_flags = cpu_to_le64(ai->i_int_flags);
|
|
inode_raw->bsd_flags = cpu_to_le32(ai->i_bsd_flags);
|
|
|
|
/* Don't persist the uid/gid provided by the user on mount */
|
|
if (uid_valid(sbi->s_uid))
|
|
inode_raw->owner = cpu_to_le32(ai->i_saved_uid);
|
|
if (gid_valid(sbi->s_gid))
|
|
inode_raw->group = cpu_to_le32(ai->i_saved_gid);
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0)
|
|
inode_raw->change_time = cpu_to_le64(timespec64_to_ns(&inode->i_ctime));
|
|
#else
|
|
struct timespec64 ictime = inode_get_ctime(inode);
|
|
inode_raw->change_time = cpu_to_le64(timespec64_to_ns(&ictime));
|
|
#endif
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0)
|
|
inode_raw->access_time = cpu_to_le64(timespec64_to_ns(&inode->i_atime));
|
|
inode_raw->mod_time = cpu_to_le64(timespec64_to_ns(&inode->i_mtime));
|
|
#else
|
|
struct timespec64 ts = inode_get_mtime(inode);
|
|
inode_raw->mod_time = cpu_to_le64(timespec64_to_ns(&ts));
|
|
ts = inode_get_atime(inode);
|
|
inode_raw->access_time = cpu_to_le64(timespec64_to_ns(&ts));
|
|
#endif
|
|
inode_raw->create_time = cpu_to_le64(timespec64_to_ns(&ai->i_crtime));
|
|
|
|
if (S_ISDIR(inode->i_mode)) {
|
|
inode_raw->nchildren = cpu_to_le32(ai->i_nchildren);
|
|
} else {
|
|
/* The remaining link for orphan inodes is not counted */
|
|
inode_raw->nlink = cpu_to_le32(inode->i_nlink);
|
|
}
|
|
|
|
fail:
|
|
apfs_free_query(query);
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* apfs_delete_inode - Delete an inode record
|
|
* @inode: the vfs inode to delete
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure, which may
|
|
* be -EAGAIN if the inode was not deleted in full.
|
|
*/
|
|
static int apfs_delete_inode(struct inode *inode)
|
|
{
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
struct apfs_dstream_info *dstream = NULL;
|
|
struct apfs_query *query;
|
|
u64 old_dstream_id;
|
|
int ret;
|
|
|
|
ret = apfs_delete_all_xattrs(inode);
|
|
if (ret) {
|
|
apfs_err(sb, "xattr deletion failed for ino 0x%llx", apfs_ino(inode));
|
|
return ret;
|
|
}
|
|
|
|
dstream = &ai->i_dstream;
|
|
old_dstream_id = dstream->ds_id;
|
|
|
|
/*
|
|
* This is very wasteful since all the new extents and references will
|
|
* get deleted right away, but it only affects clones, so I don't see a
|
|
* big reason to improve it (TODO)
|
|
*/
|
|
ret = apfs_inode_create_exclusive_dstream(inode);
|
|
if (ret) {
|
|
apfs_err(sb, "dstream creation failed for ino 0x%llx", apfs_ino(inode));
|
|
return ret;
|
|
}
|
|
|
|
/* TODO: what about partial deletion of xattrs? Is that allowed? */
|
|
ret = apfs_inode_delete_front(inode);
|
|
if (ret) {
|
|
/*
|
|
* If the inode had too many extents, only the first few get
|
|
* deleted and the inode remains in the orphan list for now.
|
|
* I don't know why the deletion starts at the front, but it
|
|
* seems to be what the official driver does.
|
|
*/
|
|
if (ret != -EAGAIN) {
|
|
apfs_err(sb, "head deletion failed for ino 0x%llx", apfs_ino(inode));
|
|
return ret;
|
|
}
|
|
if (dstream->ds_id != old_dstream_id) {
|
|
ret = apfs_update_inode(inode, NULL /* new_name */);
|
|
if (ret) {
|
|
apfs_err(sb, "dstream id update failed for orphan 0x%llx", apfs_ino(inode));
|
|
return ret;
|
|
}
|
|
}
|
|
return -EAGAIN;
|
|
}
|
|
|
|
ret = apfs_put_dstream_rec(dstream);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to put dstream for ino 0x%llx", apfs_ino(inode));
|
|
return ret;
|
|
}
|
|
dstream = NULL;
|
|
ai->i_has_dstream = false;
|
|
|
|
query = apfs_inode_lookup(inode);
|
|
if (IS_ERR(query)) {
|
|
apfs_err(sb, "lookup failed for ino 0x%llx", apfs_ino(inode));
|
|
return PTR_ERR(query);
|
|
}
|
|
ret = apfs_btree_remove(query);
|
|
apfs_free_query(query);
|
|
if (ret) {
|
|
apfs_err(sb, "removal failed for ino 0x%llx", apfs_ino(inode));
|
|
return ret;
|
|
}
|
|
|
|
ai->i_cleaned = true;
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_clean_single_orphan - Clean the given orphan file
|
|
* @inode: inode for the file to clean
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure, which may
|
|
* be -EAGAIN if the file could not be deleted in full.
|
|
*/
|
|
static int apfs_clean_single_orphan(struct inode *inode)
|
|
{
|
|
struct super_block *sb = inode->i_sb;
|
|
u64 ino = apfs_ino(inode);
|
|
bool eagain = false;
|
|
int err;
|
|
|
|
err = apfs_transaction_start(sb, APFS_TRANS_DEL);
|
|
if (err)
|
|
return err;
|
|
err = apfs_delete_inode(inode);
|
|
if (err) {
|
|
if (err != -EAGAIN) {
|
|
apfs_err(sb, "failed to delete orphan 0x%llx", ino);
|
|
goto fail;
|
|
}
|
|
eagain = true;
|
|
} else {
|
|
err = apfs_delete_orphan_link(inode);
|
|
if (err) {
|
|
apfs_err(sb, "failed to unlink orphan 0x%llx", ino);
|
|
goto fail;
|
|
}
|
|
}
|
|
err = apfs_transaction_commit(sb);
|
|
if (err)
|
|
goto fail;
|
|
return eagain ? -EAGAIN : 0;
|
|
|
|
fail:
|
|
apfs_transaction_abort(sb);
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* apfs_clean_any_orphan - Pick an orphan and delete as much as reasonable
|
|
* @sb: filesystem superblock
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure, which may
|
|
* be -ENODATA if there are no more orphan files or -EAGAIN if a file could not
|
|
* be deleted in full.
|
|
*/
|
|
static int apfs_clean_any_orphan(struct super_block *sb)
|
|
{
|
|
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
|
|
struct inode *inode = NULL;
|
|
int err;
|
|
u64 ino;
|
|
|
|
down_read(&nxi->nx_big_sem);
|
|
err = apfs_any_orphan_ino(sb, &ino);
|
|
up_read(&nxi->nx_big_sem);
|
|
if (err) {
|
|
if (err == -ENODATA)
|
|
return -ENODATA;
|
|
apfs_err(sb, "failed to find orphan inode numbers");
|
|
return err;
|
|
}
|
|
|
|
inode = apfs_iget(sb, ino);
|
|
if (IS_ERR(inode)) {
|
|
err = PTR_ERR(inode);
|
|
if (err != -ENODATA) {
|
|
apfs_err(sb, "iget failed for orphan 0x%llx", ino);
|
|
return err;
|
|
}
|
|
/*
|
|
* This happens rarely for files with no extents, if we hit a
|
|
* race with ->evict_inode(). Not a problem: the file is gone.
|
|
*/
|
|
apfs_notice(sb, "orphan 0x%llx not found", ino);
|
|
return 0;
|
|
}
|
|
|
|
if (atomic_read(&inode->i_count) > 1)
|
|
goto out;
|
|
err = apfs_clean_single_orphan(inode);
|
|
if (err && err != -EAGAIN) {
|
|
apfs_err(sb, "failed to clean orphan 0x%llx", ino);
|
|
goto out;
|
|
}
|
|
out:
|
|
iput(inode);
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* apfs_schedule_orphan_cleanup - Schedule cleanup for orphan inodes
|
|
* @sb: filesystem superblock
|
|
*/
|
|
void apfs_schedule_orphan_cleanup(struct super_block *sb)
|
|
{
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
|
|
/*
|
|
* Don't schedule cleanups during unmount: completing all of it could
|
|
* take a while so just leave future mounts to handle the orphans.
|
|
*/
|
|
if (atomic_read(&sb->s_active) == 0)
|
|
return;
|
|
|
|
/*
|
|
* Don't keep retrying orphan cleanups nonstop when they run into an
|
|
* unexpected error: it won't do any good and it will flood dmesg. We
|
|
* will retry eventually for ENOSPC, but that's handled elsewhere.
|
|
*/
|
|
if (atomic_read(&sbi->s_orphan_cleanup_err))
|
|
return;
|
|
|
|
schedule_work(&sbi->s_orphan_cleanup_work);
|
|
}
|
|
|
|
/**
|
|
* apfs_clean_orphans - Delete as many orphan files as is reasonable
|
|
* @sb: filesystem superblock
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_clean_orphans(struct super_block *sb)
|
|
{
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
int ret, i;
|
|
|
|
for (i = 0; i < 100; ++i) {
|
|
ret = apfs_clean_any_orphan(sb);
|
|
if (ret == 0)
|
|
continue;
|
|
if (ret == -ENODATA)
|
|
return 0;
|
|
if (ret == -EAGAIN)
|
|
break;
|
|
apfs_err(sb, "failed to delete an orphan file");
|
|
atomic_set(&sbi->s_orphan_cleanup_err, ret);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* If a file is too big, or if there are too many files, take a break
|
|
* and continue later.
|
|
*/
|
|
apfs_schedule_orphan_cleanup(sb);
|
|
return 0;
|
|
}
|
|
|
|
void apfs_evict_inode(struct inode *inode)
|
|
{
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
int err;
|
|
|
|
if (is_bad_inode(inode) || inode->i_nlink || ai->i_cleaned)
|
|
goto out;
|
|
|
|
if (!ai->i_has_dstream || ai->i_dstream.ds_size == 0) {
|
|
/* For files with no extents, scheduled cleanup wastes time */
|
|
err = apfs_clean_single_orphan(inode);
|
|
if (err) {
|
|
apfs_err(sb, "failed to clean orphan 0x%llx (err:%d)", apfs_ino(inode), err);
|
|
atomic_set(&APFS_SB(sb)->s_orphan_cleanup_err, err);
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
/* If the inode still has extents then schedule cleanup for the rest */
|
|
apfs_schedule_orphan_cleanup(sb);
|
|
out:
|
|
truncate_inode_pages_final(&inode->i_data);
|
|
clear_inode(inode);
|
|
}
|
|
|
|
void apfs_orphan_cleanup_work(struct work_struct *work)
|
|
{
|
|
struct super_block *sb = NULL;
|
|
struct apfs_sb_info *sbi = NULL;
|
|
struct inode *priv = NULL;
|
|
int err;
|
|
|
|
sbi = container_of(work, struct apfs_sb_info, s_orphan_cleanup_work);
|
|
priv = sbi->s_private_dir;
|
|
sb = priv->i_sb;
|
|
|
|
if (sb->s_flags & SB_RDONLY) {
|
|
apfs_alert(sb, "attempt to flush orphans in read-only mount");
|
|
return;
|
|
}
|
|
|
|
err = apfs_clean_orphans(sb);
|
|
if (err)
|
|
apfs_err(sb, "orphan cleanup failed (err:%d)", err);
|
|
}
|
|
|
|
/**
|
|
* apfs_insert_inode_locked - Wrapper for insert_inode_locked4()
|
|
* @inode: vfs inode to insert in cache
|
|
*
|
|
* Works the same as insert_inode_locked(), but can handle 64-bit inode numbers
|
|
* on 32-bit architectures.
|
|
*/
|
|
static int apfs_insert_inode_locked(struct inode *inode)
|
|
{
|
|
u64 cnid = apfs_ino(inode);
|
|
|
|
return insert_inode_locked4(inode, cnid, apfs_test_inode, &cnid);
|
|
}
|
|
|
|
/**
|
|
* apfs_new_inode - Create a new in-memory inode
|
|
* @dir: parent inode
|
|
* @mode: mode bits for the new inode
|
|
* @rdev: device id (0 if not a device file)
|
|
*
|
|
* Returns a pointer to the new vfs inode on success, or an error pointer in
|
|
* case of failure.
|
|
*/
|
|
struct inode *apfs_new_inode(struct inode *dir, umode_t mode, dev_t rdev)
|
|
{
|
|
struct super_block *sb = dir->i_sb;
|
|
struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw;
|
|
struct inode *inode;
|
|
struct apfs_inode_info *ai;
|
|
struct apfs_dstream_info *dstream;
|
|
u64 cnid;
|
|
struct timespec64 now;
|
|
|
|
/* Updating on-disk structures here is odd, but it works for now */
|
|
apfs_assert_in_transaction(sb, &vsb_raw->apfs_o);
|
|
|
|
inode = new_inode(sb);
|
|
if (!inode)
|
|
return ERR_PTR(-ENOMEM);
|
|
ai = APFS_I(inode);
|
|
dstream = &ai->i_dstream;
|
|
|
|
cnid = le64_to_cpu(vsb_raw->apfs_next_obj_id);
|
|
le64_add_cpu(&vsb_raw->apfs_next_obj_id, 1);
|
|
apfs_set_ino(inode, cnid);
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0)
|
|
inode_init_owner(inode, dir, mode);
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) && !RHEL_VERSION_GE(9, 6)
|
|
inode_init_owner(&init_user_ns, inode, dir, mode);
|
|
#else
|
|
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
|
|
#endif
|
|
|
|
ai->i_saved_uid = i_uid_read(inode);
|
|
ai->i_saved_gid = i_gid_read(inode);
|
|
ai->i_parent_id = apfs_ino(dir);
|
|
set_nlink(inode, 1);
|
|
ai->i_nchildren = 0;
|
|
if (apfs_vol_is_encrypted(sb) && S_ISREG(mode))
|
|
ai->i_key_class = apfs_dflt_key_class(sb);
|
|
else
|
|
ai->i_key_class = 0;
|
|
ai->i_int_flags = APFS_INODE_NO_RSRC_FORK;
|
|
ai->i_bsd_flags = 0;
|
|
|
|
ai->i_has_dstream = false;
|
|
dstream->ds_id = cnid;
|
|
dstream->ds_size = 0;
|
|
dstream->ds_sparse_bytes = 0;
|
|
dstream->ds_shared = false;
|
|
|
|
now = current_time(inode);
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0)
|
|
inode->i_atime = inode->i_mtime = inode->i_ctime = ai->i_crtime = now;
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0)
|
|
inode_set_ctime_to_ts(inode, now);
|
|
inode->i_atime = inode->i_mtime = ai->i_crtime = now;
|
|
#else
|
|
ai->i_crtime = simple_inode_init_ts(inode);
|
|
#endif
|
|
vsb_raw->apfs_last_mod_time = cpu_to_le64(timespec64_to_ns(&now));
|
|
|
|
if (S_ISREG(mode))
|
|
le64_add_cpu(&vsb_raw->apfs_num_files, 1);
|
|
else if (S_ISDIR(mode))
|
|
le64_add_cpu(&vsb_raw->apfs_num_directories, 1);
|
|
else if (S_ISLNK(mode))
|
|
le64_add_cpu(&vsb_raw->apfs_num_symlinks, 1);
|
|
else
|
|
le64_add_cpu(&vsb_raw->apfs_num_other_fsobjects, 1);
|
|
|
|
if (apfs_insert_inode_locked(inode)) {
|
|
/* The inode number should have been free, but wasn't */
|
|
apfs_err(sb, "next obj_id (0x%llx) not free", cnid);
|
|
make_bad_inode(inode);
|
|
iput(inode);
|
|
return ERR_PTR(-EFSCORRUPTED);
|
|
}
|
|
|
|
/* No need to dirty the inode, we'll write it to disk right away */
|
|
apfs_inode_set_ops(inode, rdev, false /* compressed */);
|
|
return inode;
|
|
}
|
|
|
|
/**
|
|
* apfs_create_inode_rec - Create an inode record in the catalog b-tree
|
|
* @sb: filesystem superblock
|
|
* @inode: vfs inode to record
|
|
* @dentry: dentry for primary link
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
int apfs_create_inode_rec(struct super_block *sb, struct inode *inode,
|
|
struct dentry *dentry)
|
|
{
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_query *query;
|
|
struct apfs_inode_key raw_key;
|
|
struct apfs_inode_val *raw_val;
|
|
int val_len;
|
|
int ret;
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
apfs_init_inode_key(apfs_ino(inode), &query->key);
|
|
query->flags |= APFS_QUERY_CAT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret && ret != -ENODATA) {
|
|
apfs_err(sb, "query failed for ino 0x%llx", apfs_ino(inode));
|
|
goto fail;
|
|
}
|
|
|
|
apfs_key_set_hdr(APFS_TYPE_INODE, apfs_ino(inode), &raw_key);
|
|
|
|
val_len = apfs_build_inode_val(inode, &dentry->d_name, &raw_val);
|
|
if (val_len < 0) {
|
|
ret = val_len;
|
|
goto fail;
|
|
}
|
|
|
|
ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), raw_val, val_len);
|
|
if (ret)
|
|
apfs_err(sb, "insertion failed for ino 0x%llx", apfs_ino(inode));
|
|
kfree(raw_val);
|
|
|
|
fail:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_setsize - Change the size of a regular file
|
|
* @inode: the vfs inode
|
|
* @new_size: the new size
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_setsize(struct inode *inode, loff_t new_size)
|
|
{
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream;
|
|
int err;
|
|
|
|
if (new_size == inode->i_size)
|
|
return 0;
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0)
|
|
inode->i_mtime = inode->i_ctime = current_time(inode);
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0)
|
|
inode->i_mtime = inode_set_ctime_current(inode);
|
|
#else
|
|
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
|
|
#endif
|
|
|
|
err = apfs_inode_create_dstream_rec(inode);
|
|
if (err) {
|
|
apfs_err(sb, "failed to create dstream for ino 0x%llx", apfs_ino(inode));
|
|
return err;
|
|
}
|
|
|
|
/* Must be called before i_size is changed */
|
|
err = apfs_truncate(dstream, new_size);
|
|
if (err) {
|
|
apfs_err(sb, "truncation failed for ino 0x%llx", apfs_ino(inode));
|
|
return err;
|
|
}
|
|
|
|
truncate_setsize(inode, new_size);
|
|
dstream->ds_size = i_size_read(inode);
|
|
return 0;
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0)
|
|
int apfs_setattr(struct dentry *dentry, struct iattr *iattr)
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) && !RHEL_VERSION_GE(9, 6)
|
|
int apfs_setattr(struct user_namespace *mnt_userns,
|
|
struct dentry *dentry, struct iattr *iattr)
|
|
#else
|
|
int apfs_setattr(struct mnt_idmap *idmap,
|
|
struct dentry *dentry, struct iattr *iattr)
|
|
#endif
|
|
{
|
|
struct inode *inode = d_inode(dentry);
|
|
struct super_block *sb = inode->i_sb;
|
|
bool resizing = S_ISREG(inode->i_mode) && (iattr->ia_valid & ATTR_SIZE);
|
|
bool shrinking = false;
|
|
int err;
|
|
|
|
if (resizing && iattr->ia_size > APFS_MAX_FILE_SIZE)
|
|
return -EFBIG;
|
|
if (resizing && iattr->ia_size < inode->i_size)
|
|
shrinking = true;
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0)
|
|
err = setattr_prepare(dentry, iattr);
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) && !RHEL_VERSION_GE(9, 6)
|
|
err = setattr_prepare(&init_user_ns, dentry, iattr);
|
|
#else
|
|
err = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
|
|
#endif
|
|
if (err)
|
|
return err;
|
|
|
|
/* TODO: figure out why ->write_inode() isn't firing */
|
|
err = apfs_transaction_start(sb, shrinking ? APFS_TRANS_DEL : APFS_TRANS_REG);
|
|
if (err)
|
|
return err;
|
|
apfs_inode_join_transaction(sb, inode);
|
|
|
|
if (resizing) {
|
|
err = apfs_setsize(inode, iattr->ia_size);
|
|
if (err) {
|
|
apfs_err(sb, "setsize failed for ino 0x%llx", apfs_ino(inode));
|
|
goto fail;
|
|
}
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0)
|
|
setattr_copy(inode, iattr);
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) && !RHEL_VERSION_GE(9, 6)
|
|
setattr_copy(&init_user_ns, inode, iattr);
|
|
#else
|
|
setattr_copy(&nop_mnt_idmap, inode, iattr);
|
|
#endif
|
|
|
|
mark_inode_dirty(inode);
|
|
err = apfs_transaction_commit(sb);
|
|
if (err)
|
|
goto fail;
|
|
return 0;
|
|
|
|
fail:
|
|
apfs_transaction_abort(sb);
|
|
return err;
|
|
}
|
|
|
|
/* TODO: this only seems to be necessary because ->write_inode() isn't firing */
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(7, 0, 0)
|
|
int apfs_update_time(struct inode *inode, enum fs_update_time time, unsigned int flags)
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0)
|
|
int apfs_update_time(struct inode *inode, struct timespec64 *time, int flags)
|
|
#else
|
|
int apfs_update_time(struct inode *inode, int flags)
|
|
#endif
|
|
{
|
|
struct super_block *sb = inode->i_sb;
|
|
int err;
|
|
|
|
err = apfs_transaction_start(sb, APFS_TRANS_REG);
|
|
if (err)
|
|
return err;
|
|
apfs_inode_join_transaction(sb, inode);
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(7, 0, 0)
|
|
generic_update_time(inode, time, flags);
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) && !RHEL_VERSION_GE(9, 6)
|
|
generic_update_time(inode, time, flags);
|
|
#else
|
|
generic_update_time(inode, flags);
|
|
#endif
|
|
|
|
err = apfs_transaction_commit(sb);
|
|
if (err)
|
|
goto fail;
|
|
return 0;
|
|
|
|
fail:
|
|
apfs_transaction_abort(sb);
|
|
return err;
|
|
}
|
|
|
|
static int apfs_ioc_set_dflt_pfk(struct file *file, void __user *user_pfk)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
|
|
struct apfs_wrapped_crypto_state pfk_hdr;
|
|
struct apfs_crypto_state_val *pfk;
|
|
unsigned int key_len;
|
|
|
|
if (__copy_from_user(&pfk_hdr, user_pfk, sizeof(pfk_hdr)))
|
|
return -EFAULT;
|
|
key_len = le16_to_cpu(pfk_hdr.key_len);
|
|
if (key_len > MAX_PFK_LEN)
|
|
return -EFBIG;
|
|
pfk = kmalloc(sizeof(*pfk) + key_len, GFP_KERNEL);
|
|
if (!pfk)
|
|
return -ENOMEM;
|
|
if (__copy_from_user(&pfk->state, user_pfk, sizeof(pfk_hdr) + key_len)) {
|
|
kfree(pfk);
|
|
return -EFAULT;
|
|
}
|
|
pfk->refcnt = cpu_to_le32(1);
|
|
|
|
down_write(&nxi->nx_big_sem);
|
|
|
|
if (sbi->s_dflt_pfk)
|
|
kfree(sbi->s_dflt_pfk);
|
|
sbi->s_dflt_pfk = pfk;
|
|
|
|
up_write(&nxi->nx_big_sem);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int apfs_ioc_set_dir_class(struct file *file, u32 __user *user_class)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
struct super_block *sb = inode->i_sb;
|
|
u32 class;
|
|
int err;
|
|
|
|
if (get_user(class, user_class))
|
|
return -EFAULT;
|
|
|
|
ai->i_key_class = class;
|
|
|
|
err = apfs_transaction_start(sb, APFS_TRANS_REG);
|
|
if (err)
|
|
return err;
|
|
apfs_inode_join_transaction(sb, inode);
|
|
err = apfs_transaction_commit(sb);
|
|
if (err)
|
|
goto fail;
|
|
return 0;
|
|
|
|
fail:
|
|
apfs_transaction_abort(sb);
|
|
return err;
|
|
}
|
|
|
|
static int apfs_ioc_set_pfk(struct file *file, void __user *user_pfk)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_wrapped_crypto_state pfk_hdr;
|
|
struct apfs_crypto_state_val *pfk;
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
struct apfs_dstream_info *dstream = &ai->i_dstream;
|
|
unsigned int key_len, key_class;
|
|
int err;
|
|
|
|
if (__copy_from_user(&pfk_hdr, user_pfk, sizeof(pfk_hdr)))
|
|
return -EFAULT;
|
|
key_len = le16_to_cpu(pfk_hdr.key_len);
|
|
if (key_len > MAX_PFK_LEN)
|
|
return -EFBIG;
|
|
pfk = kmalloc(sizeof(*pfk) + key_len, GFP_KERNEL);
|
|
if (!pfk)
|
|
return -ENOMEM;
|
|
if (__copy_from_user(&pfk->state, user_pfk, sizeof(pfk_hdr) + key_len)) {
|
|
kfree(pfk);
|
|
return -EFAULT;
|
|
}
|
|
pfk->refcnt = cpu_to_le32(1);
|
|
|
|
err = apfs_transaction_start(sb, APFS_TRANS_REG);
|
|
if (err) {
|
|
kfree(pfk);
|
|
return err;
|
|
}
|
|
|
|
err = apfs_crypto_set_key(sb, dstream->ds_id, pfk);
|
|
if (err)
|
|
goto fail;
|
|
|
|
key_class = le32_to_cpu(pfk_hdr.persistent_class);
|
|
if (ai->i_key_class != key_class) {
|
|
ai->i_key_class = key_class;
|
|
apfs_inode_join_transaction(sb, inode);
|
|
}
|
|
|
|
err = apfs_transaction_commit(sb);
|
|
if (err)
|
|
goto fail;
|
|
kfree(pfk);
|
|
return 0;
|
|
|
|
fail:
|
|
apfs_transaction_abort(sb);
|
|
kfree(pfk);
|
|
return err;
|
|
}
|
|
|
|
static int apfs_ioc_get_class(struct file *file, u32 __user *user_class)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
u32 class;
|
|
|
|
class = ai->i_key_class;
|
|
if (put_user(class, user_class))
|
|
return -EFAULT;
|
|
return 0;
|
|
}
|
|
|
|
static int apfs_ioc_get_pfk(struct file *file, void __user *user_pfk)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
|
|
struct apfs_wrapped_crypto_state pfk_hdr;
|
|
struct apfs_crypto_state_val *pfk;
|
|
unsigned int max_len, key_len;
|
|
struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream;
|
|
int err;
|
|
|
|
if (__copy_from_user(&pfk_hdr, user_pfk, sizeof(pfk_hdr)))
|
|
return -EFAULT;
|
|
max_len = le16_to_cpu(pfk_hdr.key_len);
|
|
if (max_len > MAX_PFK_LEN)
|
|
return -EFBIG;
|
|
pfk = kmalloc(sizeof(*pfk) + max_len, GFP_KERNEL);
|
|
if (!pfk)
|
|
return -ENOMEM;
|
|
|
|
down_read(&nxi->nx_big_sem);
|
|
|
|
err = apfs_crypto_get_key(sb, dstream->ds_id, pfk, max_len);
|
|
if (err)
|
|
goto fail;
|
|
|
|
up_read(&nxi->nx_big_sem);
|
|
|
|
key_len = le16_to_cpu(pfk->state.key_len);
|
|
if (__copy_to_user(user_pfk, &pfk->state, sizeof(pfk_hdr) + key_len)) {
|
|
kfree(pfk);
|
|
return -EFAULT;
|
|
}
|
|
|
|
kfree(pfk);
|
|
return 0;
|
|
|
|
fail:
|
|
up_read(&nxi->nx_big_sem);
|
|
kfree(pfk);
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* Older kernels have no vfs_ioc_setflags_prepare(), so don't implement the
|
|
* SETFLAGS/GETFLAGS ioctls there. It should be easy to fix, but it's not
|
|
* really needed at all. Be careful with this macro check, because it nests
|
|
* over a few others.
|
|
*/
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0)
|
|
|
|
/**
|
|
* apfs_getflags - Read an inode's bsd flags in FS_IOC_GETFLAGS format
|
|
* @inode: the vfs inode
|
|
*/
|
|
static unsigned int apfs_getflags(struct inode *inode)
|
|
{
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
unsigned int flags = 0;
|
|
|
|
if (ai->i_bsd_flags & APFS_INOBSD_APPEND)
|
|
flags |= FS_APPEND_FL;
|
|
if (ai->i_bsd_flags & APFS_INOBSD_IMMUTABLE)
|
|
flags |= FS_IMMUTABLE_FL;
|
|
if (ai->i_bsd_flags & APFS_INOBSD_NODUMP)
|
|
flags |= FS_NODUMP_FL;
|
|
return flags;
|
|
}
|
|
|
|
/**
|
|
* apfs_setflags - Set an inode's bsd flags
|
|
* @inode: the vfs inode
|
|
* @flags: flags to set, in FS_IOC_SETFLAGS format
|
|
*/
|
|
static void apfs_setflags(struct inode *inode, unsigned int flags)
|
|
{
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
unsigned int i_flags = 0;
|
|
|
|
if (flags & FS_APPEND_FL) {
|
|
ai->i_bsd_flags |= APFS_INOBSD_APPEND;
|
|
i_flags |= S_APPEND;
|
|
} else {
|
|
ai->i_bsd_flags &= ~APFS_INOBSD_APPEND;
|
|
}
|
|
|
|
if (flags & FS_IMMUTABLE_FL) {
|
|
ai->i_bsd_flags |= APFS_INOBSD_IMMUTABLE;
|
|
i_flags |= S_IMMUTABLE;
|
|
} else {
|
|
ai->i_bsd_flags &= ~APFS_INOBSD_IMMUTABLE;
|
|
}
|
|
|
|
if (flags & FS_NODUMP_FL)
|
|
ai->i_bsd_flags |= APFS_INOBSD_NODUMP;
|
|
else
|
|
ai->i_bsd_flags &= ~APFS_INOBSD_NODUMP;
|
|
|
|
inode_set_flags(inode, i_flags, S_IMMUTABLE | S_APPEND);
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0)
|
|
|
|
/**
|
|
* apfs_ioc_getflags - Ioctl handler for FS_IOC_GETFLAGS
|
|
* @file: affected file
|
|
* @arg: ioctl argument
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_ioc_getflags(struct file *file, int __user *arg)
|
|
{
|
|
unsigned int flags = apfs_getflags(file_inode(file));
|
|
|
|
return put_user(flags, arg);
|
|
}
|
|
|
|
/**
|
|
* apfs_do_ioc_setflags - Actual work for apfs_ioc_setflags(), after preparation
|
|
* @inode: affected vfs inode
|
|
* @newflags: inode flags to set, in FS_IOC_SETFLAGS format
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_do_ioc_setflags(struct inode *inode, unsigned int newflags)
|
|
{
|
|
struct super_block *sb = inode->i_sb;
|
|
unsigned int oldflags;
|
|
int err;
|
|
|
|
lockdep_assert_held_write(&inode->i_rwsem);
|
|
|
|
oldflags = apfs_getflags(inode);
|
|
err = vfs_ioc_setflags_prepare(inode, oldflags, newflags);
|
|
if (err)
|
|
return err;
|
|
|
|
err = apfs_transaction_start(sb, APFS_TRANS_REG);
|
|
if (err)
|
|
return err;
|
|
|
|
apfs_inode_join_transaction(sb, inode);
|
|
apfs_setflags(inode, newflags);
|
|
inode->i_ctime = current_time(inode);
|
|
|
|
err = apfs_transaction_commit(sb);
|
|
if (err)
|
|
apfs_transaction_abort(sb);
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* apfs_ioc_setflags - Ioctl handler for FS_IOC_SETFLAGS
|
|
* @file: affected file
|
|
* @arg: ioctl argument
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_ioc_setflags(struct file *file, int __user *arg)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
struct super_block *sb = inode->i_sb;
|
|
unsigned int newflags;
|
|
int err;
|
|
|
|
if (sb->s_flags & SB_RDONLY)
|
|
return -EROFS;
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0)
|
|
if (!inode_owner_or_capable(inode))
|
|
#else
|
|
if (!inode_owner_or_capable(&init_user_ns, inode))
|
|
#endif
|
|
return -EPERM;
|
|
|
|
if (get_user(newflags, arg))
|
|
return -EFAULT;
|
|
|
|
if (newflags & ~(FS_APPEND_FL | FS_IMMUTABLE_FL | FS_NODUMP_FL))
|
|
return -EOPNOTSUPP;
|
|
|
|
err = mnt_want_write_file(file);
|
|
if (err)
|
|
return err;
|
|
|
|
inode_lock(inode);
|
|
err = apfs_do_ioc_setflags(inode, newflags);
|
|
inode_unlock(inode);
|
|
|
|
mnt_drop_write_file(file);
|
|
return err;
|
|
}
|
|
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) && !RHEL_VERSION_GE(9, 6)
|
|
|
|
int apfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
|
|
{
|
|
unsigned int flags = apfs_getflags(d_inode(dentry));
|
|
|
|
fileattr_fill_flags(fa, flags);
|
|
return 0;
|
|
}
|
|
|
|
int apfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa)
|
|
{
|
|
struct inode *inode = d_inode(dentry);
|
|
struct super_block *sb = inode->i_sb;
|
|
int err;
|
|
|
|
if (sb->s_flags & SB_RDONLY)
|
|
return -EROFS;
|
|
|
|
if (fa->flags & ~(FS_APPEND_FL | FS_IMMUTABLE_FL | FS_NODUMP_FL))
|
|
return -EOPNOTSUPP;
|
|
if (fileattr_has_fsx(fa))
|
|
return -EOPNOTSUPP;
|
|
|
|
lockdep_assert_held_write(&inode->i_rwsem);
|
|
|
|
err = apfs_transaction_start(sb, APFS_TRANS_REG);
|
|
if (err)
|
|
return err;
|
|
|
|
apfs_inode_join_transaction(sb, inode);
|
|
apfs_setflags(inode, fa->flags);
|
|
inode->i_ctime = current_time(inode);
|
|
|
|
err = apfs_transaction_commit(sb);
|
|
if (err)
|
|
apfs_transaction_abort(sb);
|
|
return err;
|
|
}
|
|
|
|
#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(6, 3, 0) */
|
|
|
|
int apfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
|
|
{
|
|
unsigned int flags = apfs_getflags(d_inode(dentry));
|
|
|
|
fileattr_fill_flags(fa, flags);
|
|
return 0;
|
|
}
|
|
|
|
int apfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa)
|
|
{
|
|
struct inode *inode = d_inode(dentry);
|
|
struct super_block *sb = inode->i_sb;
|
|
int err;
|
|
|
|
if (sb->s_flags & SB_RDONLY)
|
|
return -EROFS;
|
|
|
|
if (fa->flags & ~(FS_APPEND_FL | FS_IMMUTABLE_FL | FS_NODUMP_FL))
|
|
return -EOPNOTSUPP;
|
|
if (fileattr_has_fsx(fa))
|
|
return -EOPNOTSUPP;
|
|
|
|
lockdep_assert_held_write(&inode->i_rwsem);
|
|
|
|
err = apfs_transaction_start(sb, APFS_TRANS_REG);
|
|
if (err)
|
|
return err;
|
|
|
|
apfs_inode_join_transaction(sb, inode);
|
|
apfs_setflags(inode, fa->flags);
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0)
|
|
inode->i_ctime = current_time(inode);
|
|
#else
|
|
inode_set_ctime_current(inode);
|
|
#endif
|
|
|
|
err = apfs_transaction_commit(sb);
|
|
if (err)
|
|
apfs_transaction_abort(sb);
|
|
return err;
|
|
}
|
|
|
|
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0) */
|
|
|
|
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) */
|
|
|
|
long apfs_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
{
|
|
void __user *argp = (void __user *)arg;
|
|
|
|
switch (cmd) {
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0)
|
|
case FS_IOC_GETFLAGS:
|
|
return apfs_ioc_getflags(file, argp);
|
|
case FS_IOC_SETFLAGS:
|
|
return apfs_ioc_setflags(file, argp);
|
|
#endif
|
|
case APFS_IOC_SET_DFLT_PFK:
|
|
return apfs_ioc_set_dflt_pfk(file, argp);
|
|
case APFS_IOC_SET_DIR_CLASS:
|
|
return apfs_ioc_set_dir_class(file, argp);
|
|
case APFS_IOC_GET_CLASS:
|
|
return apfs_ioc_get_class(file, argp);
|
|
case APFS_IOC_TAKE_SNAPSHOT:
|
|
return apfs_ioc_take_snapshot(file, argp);
|
|
default:
|
|
return -ENOTTY;
|
|
}
|
|
}
|
|
|
|
long apfs_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
{
|
|
void __user *argp = (void __user *)arg;
|
|
|
|
switch (cmd) {
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0)
|
|
case FS_IOC_GETFLAGS:
|
|
return apfs_ioc_getflags(file, argp);
|
|
case FS_IOC_SETFLAGS:
|
|
return apfs_ioc_setflags(file, argp);
|
|
#endif
|
|
case APFS_IOC_SET_PFK:
|
|
return apfs_ioc_set_pfk(file, argp);
|
|
case APFS_IOC_GET_CLASS:
|
|
return apfs_ioc_get_class(file, argp);
|
|
case APFS_IOC_GET_PFK:
|
|
return apfs_ioc_get_pfk(file, argp);
|
|
default:
|
|
return -ENOTTY;
|
|
}
|
|
}
|