You've already forked linux-apfs-rw
mirror of
https://github.com/linux-apfs/linux-apfs-rw.git
synced 2026-05-01 15:01:34 -07:00
3174e363ad
If a function throws ENOSPC halfway through an operation, the
transaction will get aborted so that the filesystem isn't left in an
inconsistent state. This means that the mount will go read-only, and
that the user will lose any uncommitted changes.
So, it's better if we detect ENOSPC before starting an operation. I
tried this before with commit a52b73ed97 ("Check free space before
starting a transaction"), but the code was messy and it would have
required constant updates, which I neglected of course. The problem was
that I wanted to be as accurate as possible with my calculations of
required space, but I'm not sure there is any point.
Instead, require a large fixed amount of space (512 KiB) for most
operations, and a much smaller but still almost certainly sufficient
amount of space (80 KiB) for deletions. Hopefully this will be enough
for users to maneuver around ENOSPC situations. It's not perfect, but no
CoW filesystem can be truly perfect in this regard.
Signed-off-by: Ernesto A. Fernández <ernesto@corellium.com>
2375 lines
71 KiB
C
2375 lines
71 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (C) 2018 Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
|
|
*/
|
|
|
|
#include <linux/buffer_head.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/blk_types.h>
|
|
#include "apfs.h"
|
|
|
|
/**
|
|
* apfs_ext_is_hole - Does this extent represent a hole in a sparse file?
|
|
* @extent: the extent to check
|
|
*/
|
|
static inline bool apfs_ext_is_hole(struct apfs_file_extent *extent)
|
|
{
|
|
return extent->phys_block_num == 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_size_to_blocks - Return the block count for a given size, rounded up
|
|
* @sb: filesystem superblock
|
|
* @size: size in bytes
|
|
*
|
|
* TODO: reuse for inode.c
|
|
*/
|
|
static inline u64 apfs_size_to_blocks(struct super_block *sb, u64 size)
|
|
{
|
|
return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
|
|
}
|
|
|
|
/**
|
|
* apfs_extent_from_query - Read the extent found by a successful query
|
|
* @query: the query that found the record
|
|
* @extent: Return parameter. The extent found.
|
|
*
|
|
* Reads the extent record into @extent and performs some basic sanity checks
|
|
* as a protection against crafted filesystems. Returns 0 on success or
|
|
* -EFSCORRUPTED otherwise.
|
|
*/
|
|
int apfs_extent_from_query(struct apfs_query *query,
|
|
struct apfs_file_extent *extent)
|
|
{
|
|
struct super_block *sb = query->node->object.sb;
|
|
char *raw = query->node->object.data;
|
|
u64 ext_len;
|
|
|
|
if (!apfs_is_sealed(sb)) {
|
|
struct apfs_file_extent_val *ext = NULL;
|
|
struct apfs_file_extent_key *ext_key = NULL;
|
|
|
|
if (query->len != sizeof(*ext) || query->key_len != sizeof(*ext_key)) {
|
|
apfs_err(sb, "bad length of key (%d) or value (%d)", query->key_len, query->len);
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
ext = (struct apfs_file_extent_val *)(raw + query->off);
|
|
ext_key = (struct apfs_file_extent_key *)(raw + query->key_off);
|
|
ext_len = le64_to_cpu(ext->len_and_flags) & APFS_FILE_EXTENT_LEN_MASK;
|
|
|
|
extent->logical_addr = le64_to_cpu(ext_key->logical_addr);
|
|
extent->phys_block_num = le64_to_cpu(ext->phys_block_num);
|
|
extent->crypto_id = le64_to_cpu(ext->crypto_id);
|
|
} else {
|
|
struct apfs_fext_tree_val *fext_val = NULL;
|
|
struct apfs_fext_tree_key *fext_key = NULL;
|
|
|
|
if (query->len != sizeof(*fext_val) || query->key_len != sizeof(*fext_key)) {
|
|
apfs_err(sb, "bad length of sealed key (%d) or value (%d)", query->key_len, query->len);
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
fext_val = (struct apfs_fext_tree_val *)(raw + query->off);
|
|
fext_key = (struct apfs_fext_tree_key *)(raw + query->key_off);
|
|
ext_len = le64_to_cpu(fext_val->len_and_flags) & APFS_FILE_EXTENT_LEN_MASK;
|
|
|
|
extent->logical_addr = le64_to_cpu(fext_key->logical_addr);
|
|
extent->phys_block_num = le64_to_cpu(fext_val->phys_block_num);
|
|
extent->crypto_id = 0;
|
|
}
|
|
|
|
/* Extent length must be a multiple of the block size */
|
|
if (ext_len & (sb->s_blocksize - 1)) {
|
|
apfs_err(sb, "invalid length (0x%llx)", ext_len);
|
|
return -EFSCORRUPTED;
|
|
}
|
|
extent->len = ext_len;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_extent_read - Read the extent record that covers a block
|
|
* @dstream: data stream info
|
|
* @dsblock: logical number of the wanted block (must be in range)
|
|
* @extent: Return parameter. The extent found.
|
|
*
|
|
* Finds and caches the extent record. On success, returns a pointer to the
|
|
* cache record; on failure, returns an error code.
|
|
*/
|
|
static int apfs_extent_read(struct apfs_dstream_info *dstream, sector_t dsblock,
|
|
struct apfs_file_extent *extent)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_superblock *vsb_raw = sbi->s_vsb_raw;
|
|
struct apfs_key key;
|
|
struct apfs_query *query;
|
|
struct apfs_file_extent *cache = &dstream->ds_cached_ext;
|
|
u64 iaddr = dsblock << sb->s_blocksize_bits;
|
|
struct apfs_node *root = NULL;
|
|
int ret = 0;
|
|
|
|
spin_lock(&dstream->ds_ext_lock);
|
|
if (iaddr >= cache->logical_addr &&
|
|
iaddr < cache->logical_addr + cache->len) {
|
|
*extent = *cache;
|
|
spin_unlock(&dstream->ds_ext_lock);
|
|
return 0;
|
|
}
|
|
spin_unlock(&dstream->ds_ext_lock);
|
|
|
|
/* We will search for the extent that covers iblock */
|
|
if (!apfs_is_sealed(sb)) {
|
|
apfs_init_file_extent_key(dstream->ds_id, iaddr, &key);
|
|
root = sbi->s_cat_root;
|
|
} else {
|
|
apfs_init_fext_key(dstream->ds_id, iaddr, &key);
|
|
root = apfs_read_node(sb, le64_to_cpu(vsb_raw->apfs_fext_tree_oid), APFS_OBJ_PHYSICAL, false /* write */);
|
|
if (IS_ERR(root)) {
|
|
apfs_err(sb, "failed to read fext root 0x%llx", le64_to_cpu(vsb_raw->apfs_fext_tree_oid));
|
|
return PTR_ERR(root);
|
|
}
|
|
}
|
|
|
|
query = apfs_alloc_query(root, NULL /* parent */);
|
|
if (!query) {
|
|
ret = -ENOMEM;
|
|
goto done;
|
|
}
|
|
query->key = key;
|
|
query->flags = apfs_is_sealed(sb) ? APFS_QUERY_FEXT : APFS_QUERY_CAT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret) {
|
|
apfs_err(sb, "query failed for id 0x%llx, addr 0x%llx", dstream->ds_id, iaddr);
|
|
if (ret == -ENODATA)
|
|
ret = -EFSCORRUPTED;
|
|
goto done;
|
|
}
|
|
|
|
ret = apfs_extent_from_query(query, extent);
|
|
if (ret) {
|
|
apfs_err(sb, "bad extent record for dstream 0x%llx", dstream->ds_id);
|
|
goto done;
|
|
}
|
|
if (iaddr < extent->logical_addr || iaddr >= extent->logical_addr + extent->len) {
|
|
apfs_err(sb, "no extent for addr 0x%llx in dstream 0x%llx", iaddr, dstream->ds_id);
|
|
ret = -EFSCORRUPTED;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* For now prioritize the deferral of writes.
|
|
* i_extent_dirty is protected by the read semaphore.
|
|
*/
|
|
if (!dstream->ds_ext_dirty) {
|
|
spin_lock(&dstream->ds_ext_lock);
|
|
*cache = *extent;
|
|
spin_unlock(&dstream->ds_ext_lock);
|
|
}
|
|
|
|
done:
|
|
apfs_free_query(query);
|
|
if (apfs_is_sealed(sb))
|
|
apfs_node_free(root);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_logic_to_phys_bno - Find the physical block number for a dstream block
|
|
* @dstream: data stream info
|
|
* @dsblock: logical number of the wanted block
|
|
* @bno: on return, the physical block number (or zero for holes)
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
int apfs_logic_to_phys_bno(struct apfs_dstream_info *dstream, sector_t dsblock, u64 *bno)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_file_extent ext;
|
|
u64 blk_off;
|
|
int ret;
|
|
|
|
ret = apfs_extent_read(dstream, dsblock, &ext);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (apfs_ext_is_hole(&ext)) {
|
|
*bno = 0;
|
|
return 0;
|
|
}
|
|
|
|
/* Find the block offset of iblock within the extent */
|
|
blk_off = dsblock - (ext.logical_addr >> sb->s_blocksize_bits);
|
|
*bno = ext.phys_block_num + blk_off;
|
|
return 0;
|
|
}
|
|
|
|
/* This does the same as apfs_get_block(), but without taking any locks */
|
|
int __apfs_get_block(struct apfs_dstream_info *dstream, sector_t dsblock,
|
|
struct buffer_head *bh_result, int create)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_file_extent ext;
|
|
u64 blk_off, bno, map_len;
|
|
int ret;
|
|
|
|
ASSERT(!create);
|
|
|
|
if (dsblock >= apfs_size_to_blocks(sb, dstream->ds_size))
|
|
return 0;
|
|
|
|
ret = apfs_extent_read(dstream, dsblock, &ext);
|
|
if (ret) {
|
|
apfs_err(sb, "extent read failed");
|
|
return ret;
|
|
}
|
|
|
|
/* Find the block offset of iblock within the extent */
|
|
blk_off = dsblock - (ext.logical_addr >> sb->s_blocksize_bits);
|
|
|
|
/* Make sure we don't read past the extent boundaries */
|
|
map_len = ext.len - (blk_off << sb->s_blocksize_bits);
|
|
if (bh_result->b_size > map_len)
|
|
bh_result->b_size = map_len;
|
|
|
|
/*
|
|
* Save the requested mapping length as apfs_map_bh() replaces it with
|
|
* the filesystem block size
|
|
*/
|
|
map_len = bh_result->b_size;
|
|
/* Extents representing holes have block number 0 */
|
|
if (!apfs_ext_is_hole(&ext)) {
|
|
/* Find the block number of iblock within the disk */
|
|
bno = ext.phys_block_num + blk_off;
|
|
apfs_map_bh(bh_result, sb, bno);
|
|
}
|
|
bh_result->b_size = map_len;
|
|
return 0;
|
|
}
|
|
|
|
int apfs_get_block(struct inode *inode, sector_t iblock,
|
|
struct buffer_head *bh_result, int create)
|
|
{
|
|
struct apfs_nxsb_info *nxi = APFS_NXI(inode->i_sb);
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
int ret;
|
|
|
|
down_read(&nxi->nx_big_sem);
|
|
ret = __apfs_get_block(&ai->i_dstream, iblock, bh_result, create);
|
|
up_read(&nxi->nx_big_sem);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_set_extent_length - Set a new length in an extent record's value
|
|
* @ext: the extent record's value
|
|
* @len: the new length
|
|
*
|
|
* Preserves the flags, though none are defined yet and I don't know if that
|
|
* will ever be important.
|
|
*/
|
|
static inline void apfs_set_extent_length(struct apfs_file_extent_val *ext, u64 len)
|
|
{
|
|
u64 len_and_flags = le64_to_cpu(ext->len_and_flags);
|
|
u64 flags = len_and_flags & APFS_FILE_EXTENT_FLAG_MASK;
|
|
|
|
ext->len_and_flags = cpu_to_le64(flags | len);
|
|
}
|
|
|
|
static int apfs_range_put_reference(struct super_block *sb, u64 paddr, u64 length);
|
|
|
|
/**
|
|
* apfs_shrink_extent_head - Shrink an extent record in its head
|
|
* @query: the query that found the record
|
|
* @dstream: data stream info
|
|
* @start: new logical start for the extent
|
|
*
|
|
* Also deletes the physical extent records for the head. Returns 0 on success
|
|
* or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_shrink_extent_head(struct apfs_query *query, struct apfs_dstream_info *dstream, u64 start)
|
|
{
|
|
struct super_block *sb = query->node->object.sb;
|
|
struct apfs_file_extent_key key;
|
|
struct apfs_file_extent_val val;
|
|
struct apfs_file_extent extent;
|
|
u64 new_len, head_len;
|
|
void *raw = NULL;
|
|
int err = 0;
|
|
|
|
err = apfs_extent_from_query(query, &extent);
|
|
if (err) {
|
|
apfs_err(sb, "bad extent record for dstream 0x%llx", dstream->ds_id);
|
|
return err;
|
|
}
|
|
raw = query->node->object.data;
|
|
key = *(struct apfs_file_extent_key *)(raw + query->key_off);
|
|
val = *(struct apfs_file_extent_val *)(raw + query->off);
|
|
|
|
new_len = extent.logical_addr + extent.len - start;
|
|
head_len = extent.len - new_len;
|
|
|
|
/* Delete the physical records for the blocks lost in the shrinkage */
|
|
if (!apfs_ext_is_hole(&extent)) {
|
|
err = apfs_range_put_reference(sb, extent.phys_block_num, head_len);
|
|
if (err) {
|
|
apfs_err(sb, "failed to put range 0x%llx-0x%llx", extent.phys_block_num, head_len);
|
|
return err;
|
|
}
|
|
} else {
|
|
dstream->ds_sparse_bytes -= head_len;
|
|
}
|
|
|
|
/* This is the actual shrinkage of the logical extent */
|
|
key.logical_addr = cpu_to_le64(start);
|
|
apfs_set_extent_length(&val, new_len);
|
|
if (!apfs_ext_is_hole(&extent))
|
|
le64_add_cpu(&val.phys_block_num, head_len >> sb->s_blocksize_bits);
|
|
return apfs_btree_replace(query, &key, sizeof(key), &val, sizeof(val));
|
|
}
|
|
|
|
/**
|
|
* apfs_shrink_extent_tail - Shrink an extent record in its tail
|
|
* @query: the query that found the record
|
|
* @dstream: data stream info
|
|
* @end: new logical end for the extent
|
|
*
|
|
* Also puts the physical extent records for the tail. Returns 0 on success or
|
|
* a negative error code in case of failure.
|
|
*/
|
|
static int apfs_shrink_extent_tail(struct apfs_query *query, struct apfs_dstream_info *dstream, u64 end)
|
|
{
|
|
struct super_block *sb = query->node->object.sb;
|
|
struct apfs_file_extent_val *val;
|
|
struct apfs_file_extent extent;
|
|
u64 new_len, new_blkcount, tail_len;
|
|
void *raw;
|
|
int err = 0;
|
|
|
|
ASSERT((end & (sb->s_blocksize - 1)) == 0);
|
|
|
|
err = apfs_query_join_transaction(query);
|
|
if (err) {
|
|
apfs_err(sb, "query join failed");
|
|
return err;
|
|
}
|
|
raw = query->node->object.data;
|
|
|
|
err = apfs_extent_from_query(query, &extent);
|
|
if (err) {
|
|
apfs_err(sb, "bad extent record for dstream 0x%llx", dstream->ds_id);
|
|
return err;
|
|
}
|
|
val = raw + query->off;
|
|
|
|
new_len = end - extent.logical_addr;
|
|
new_blkcount = new_len >> sb->s_blocksize_bits;
|
|
tail_len = extent.len - new_len;
|
|
|
|
/* Delete the physical records for the blocks lost in the shrinkage */
|
|
if (!apfs_ext_is_hole(&extent)) {
|
|
err = apfs_range_put_reference(sb, extent.phys_block_num + new_blkcount, tail_len);
|
|
if (err) {
|
|
apfs_err(sb, "failed to put range 0x%llx-0x%llx", extent.phys_block_num + new_blkcount, tail_len);
|
|
return err;
|
|
}
|
|
} else {
|
|
dstream->ds_sparse_bytes -= tail_len;
|
|
}
|
|
|
|
/* This is the actual shrinkage of the logical extent */
|
|
apfs_set_extent_length(val, new_len);
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* apfs_query_found_extent - Did this query find an extent with the right id?
|
|
* @query: the (successful) query that found the record
|
|
*/
|
|
static inline bool apfs_query_found_extent(struct apfs_query *query)
|
|
{
|
|
void *raw = query->node->object.data;
|
|
struct apfs_key_header *hdr;
|
|
|
|
if (query->key_len < sizeof(*hdr))
|
|
return false;
|
|
hdr = raw + query->key_off;
|
|
|
|
if (apfs_cat_type(hdr) != APFS_TYPE_FILE_EXTENT)
|
|
return false;
|
|
if (apfs_cat_cnid(hdr) != query->key.id)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* apfs_update_tail_extent - Grow the tail extent for a data stream
|
|
* @dstream: data stream info
|
|
* @extent: new in-memory extent
|
|
*
|
|
* Also takes care of any needed changes to the physical extent records. Returns
|
|
* 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_update_tail_extent(struct apfs_dstream_info *dstream, const struct apfs_file_extent *extent)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_query *query;
|
|
struct apfs_file_extent_key raw_key;
|
|
struct apfs_file_extent_val raw_val;
|
|
u64 extent_id = dstream->ds_id;
|
|
int ret;
|
|
u64 new_crypto;
|
|
|
|
apfs_key_set_hdr(APFS_TYPE_FILE_EXTENT, extent_id, &raw_key);
|
|
raw_key.logical_addr = cpu_to_le64(extent->logical_addr);
|
|
raw_val.len_and_flags = cpu_to_le64(extent->len);
|
|
raw_val.phys_block_num = cpu_to_le64(extent->phys_block_num);
|
|
if (apfs_vol_is_encrypted(sb))
|
|
new_crypto = extent_id;
|
|
else
|
|
new_crypto = 0;
|
|
raw_val.crypto_id = cpu_to_le64(new_crypto);
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
/* We want the last extent record */
|
|
apfs_init_file_extent_key(extent_id, -1, &query->key);
|
|
query->flags = APFS_QUERY_CAT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret && ret != -ENODATA) {
|
|
apfs_err(sb, "query failed for last extent of id 0x%llx", extent_id);
|
|
goto out;
|
|
}
|
|
|
|
if (ret == -ENODATA || !apfs_query_found_extent(query)) {
|
|
/* We are creating the first extent for the file */
|
|
ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
if (ret) {
|
|
apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr);
|
|
goto out;
|
|
}
|
|
} else {
|
|
struct apfs_file_extent tail;
|
|
|
|
ret = apfs_extent_from_query(query, &tail);
|
|
if (ret) {
|
|
apfs_err(sb, "bad extent record for dstream 0x%llx", dstream->ds_id);
|
|
goto out;
|
|
}
|
|
|
|
if (tail.logical_addr > extent->logical_addr) {
|
|
apfs_alert(sb, "extent is not tail - bug!");
|
|
ret = -EOPNOTSUPP;
|
|
goto out;
|
|
} else if (tail.logical_addr == extent->logical_addr) {
|
|
ret = apfs_btree_replace(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
if (ret) {
|
|
apfs_err(sb, "update failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr);
|
|
goto out;
|
|
}
|
|
if (apfs_ext_is_hole(&tail)) {
|
|
dstream->ds_sparse_bytes -= tail.len;
|
|
} else if (tail.phys_block_num != extent->phys_block_num) {
|
|
ret = apfs_range_put_reference(sb, tail.phys_block_num, tail.len);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to put range 0x%llx-0x%llx", tail.phys_block_num, tail.len);
|
|
goto out;
|
|
}
|
|
}
|
|
if (new_crypto == tail.crypto_id)
|
|
goto out;
|
|
ret = apfs_crypto_adj_refcnt(sb, tail.crypto_id, -1);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to put crypto id 0x%llx", tail.crypto_id);
|
|
goto out;
|
|
}
|
|
} else {
|
|
/*
|
|
* TODO: we could actually also continue the tail extent
|
|
* if it's right next to the new one (both logically and
|
|
* physically), even if they don't overlap. Or maybe we
|
|
* should always make sure that the tail extent is in
|
|
* the cache before a write...
|
|
*/
|
|
if (extent->logical_addr < tail.logical_addr + tail.len) {
|
|
ret = apfs_shrink_extent_tail(query, dstream, extent->logical_addr);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to shrink tail of dstream 0x%llx", extent_id);
|
|
goto out;
|
|
}
|
|
}
|
|
ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
if (ret) {
|
|
apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr);
|
|
goto out;
|
|
}
|
|
}
|
|
}
|
|
|
|
ret = apfs_crypto_adj_refcnt(sb, new_crypto, 1);
|
|
if (ret)
|
|
apfs_err(sb, "failed to take crypto id 0x%llx", new_crypto);
|
|
|
|
out:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_split_extent - Break an extent in two
|
|
* @query: query pointing to the extent
|
|
* @div: logical address for the division
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_split_extent(struct apfs_query *query, u64 div)
|
|
{
|
|
struct super_block *sb = query->node->object.sb;
|
|
struct apfs_file_extent_val *val1;
|
|
struct apfs_file_extent_key key2;
|
|
struct apfs_file_extent_val val2;
|
|
struct apfs_file_extent extent;
|
|
u64 len1, len2, blkcount1;
|
|
void *raw;
|
|
int err = 0;
|
|
|
|
err = apfs_query_join_transaction(query);
|
|
if (err) {
|
|
apfs_err(sb, "query join failed");
|
|
return err;
|
|
}
|
|
raw = query->node->object.data;
|
|
|
|
err = apfs_extent_from_query(query, &extent);
|
|
if (err) {
|
|
apfs_err(sb, "bad extent record");
|
|
return err;
|
|
}
|
|
val1 = raw + query->off;
|
|
val2 = *(struct apfs_file_extent_val *)(raw + query->off);
|
|
key2 = *(struct apfs_file_extent_key *)(raw + query->key_off);
|
|
|
|
len1 = div - extent.logical_addr;
|
|
blkcount1 = len1 >> sb->s_blocksize_bits;
|
|
len2 = extent.len - len1;
|
|
|
|
/* Modify the current extent in place to become the first half */
|
|
apfs_set_extent_length(val1, len1);
|
|
|
|
/* Insert the second half right after the first */
|
|
key2.logical_addr = cpu_to_le64(div);
|
|
if (!apfs_ext_is_hole(&extent))
|
|
val2.phys_block_num = cpu_to_le64(extent.phys_block_num + blkcount1);
|
|
apfs_set_extent_length(&val2, len2);
|
|
err = apfs_btree_insert(query, &key2, sizeof(key2), &val2, sizeof(val2));
|
|
if (err) {
|
|
apfs_err(sb, "insertion failed in division 0x%llx", div);
|
|
return err;
|
|
}
|
|
|
|
return apfs_crypto_adj_refcnt(sb, extent.crypto_id, 1);
|
|
}
|
|
|
|
/**
|
|
* apfs_update_mid_extent - Create or update a non-tail extent for a dstream
|
|
* @dstream: data stream info
|
|
* @extent: new in-memory extent
|
|
*
|
|
* Also takes care of any needed changes to the physical extent records. Returns
|
|
* 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_update_mid_extent(struct apfs_dstream_info *dstream, const struct apfs_file_extent *extent)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_key key;
|
|
struct apfs_query *query;
|
|
struct apfs_file_extent_key raw_key;
|
|
struct apfs_file_extent_val raw_val;
|
|
struct apfs_file_extent prev_ext;
|
|
u64 extent_id = dstream->ds_id;
|
|
u64 prev_crypto, new_crypto;
|
|
u64 prev_start, prev_end;
|
|
bool second_run = false;
|
|
int ret;
|
|
|
|
apfs_key_set_hdr(APFS_TYPE_FILE_EXTENT, extent_id, &raw_key);
|
|
raw_key.logical_addr = cpu_to_le64(extent->logical_addr);
|
|
raw_val.len_and_flags = cpu_to_le64(extent->len);
|
|
raw_val.phys_block_num = cpu_to_le64(extent->phys_block_num);
|
|
if (apfs_vol_is_encrypted(sb))
|
|
new_crypto = extent_id;
|
|
else
|
|
new_crypto = 0;
|
|
raw_val.crypto_id = cpu_to_le64(new_crypto);
|
|
|
|
apfs_init_file_extent_key(extent_id, extent->logical_addr, &key);
|
|
|
|
search_and_insert:
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
query->key = key;
|
|
query->flags = APFS_QUERY_CAT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret && ret != -ENODATA) {
|
|
apfs_err(sb, "query failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr);
|
|
goto out;
|
|
}
|
|
|
|
if (ret == -ENODATA || !apfs_query_found_extent(query)) {
|
|
/*
|
|
* The new extent goes in a hole we just made, right at the
|
|
* beginning of the file.
|
|
*/
|
|
if (!second_run) {
|
|
apfs_err(sb, "missing extent on dstream 0x%llx", extent_id);
|
|
ret = -EFSCORRUPTED;
|
|
} else {
|
|
ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
if (ret)
|
|
apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr);
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
if (apfs_extent_from_query(query, &prev_ext)) {
|
|
apfs_err(sb, "bad mid extent record on dstream 0x%llx", extent_id);
|
|
ret = -EFSCORRUPTED;
|
|
goto out;
|
|
}
|
|
prev_crypto = prev_ext.crypto_id;
|
|
prev_start = prev_ext.logical_addr;
|
|
prev_end = prev_ext.logical_addr + prev_ext.len;
|
|
|
|
if (prev_end == extent->logical_addr && second_run) {
|
|
/* The new extent goes in the hole we just made */
|
|
ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
if (ret) {
|
|
apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr);
|
|
goto out;
|
|
}
|
|
} else if (prev_start == extent->logical_addr && prev_ext.len == extent->len) {
|
|
/* The old and new extents are the same logical block */
|
|
ret = apfs_btree_replace(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
if (ret) {
|
|
apfs_err(sb, "update failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr);
|
|
goto out;
|
|
}
|
|
if (apfs_ext_is_hole(&prev_ext)) {
|
|
dstream->ds_sparse_bytes -= prev_ext.len;
|
|
} else if (prev_ext.phys_block_num != extent->phys_block_num) {
|
|
ret = apfs_range_put_reference(sb, prev_ext.phys_block_num, prev_ext.len);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to put range 0x%llx-0x%llx", prev_ext.phys_block_num, prev_ext.len);
|
|
goto out;
|
|
}
|
|
}
|
|
ret = apfs_crypto_adj_refcnt(sb, prev_crypto, -1);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to put crypto id 0x%llx", prev_crypto);
|
|
goto out;
|
|
}
|
|
} else if (prev_start == extent->logical_addr) {
|
|
/* The new extent is the first logical block of the old one */
|
|
if (second_run) {
|
|
/* I don't know if this is possible, but be safe */
|
|
apfs_alert(sb, "recursion shrinking extent head for dstream 0x%llx", extent_id);
|
|
ret = -EFSCORRUPTED;
|
|
goto out;
|
|
}
|
|
ret = apfs_shrink_extent_head(query, dstream, extent->logical_addr + extent->len);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to shrink extent in dstream 0x%llx", extent_id);
|
|
goto out;
|
|
}
|
|
/* The query should point to the previous record, start again */
|
|
apfs_free_query(query);
|
|
second_run = true;
|
|
goto search_and_insert;
|
|
} else if (prev_end == extent->logical_addr + extent->len) {
|
|
/* The new extent is the last logical block of the old one */
|
|
ret = apfs_shrink_extent_tail(query, dstream, extent->logical_addr);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to shrink extent in dstream 0x%llx", extent_id);
|
|
goto out;
|
|
}
|
|
ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
if (ret) {
|
|
apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr);
|
|
goto out;
|
|
}
|
|
} else if (prev_start < extent->logical_addr && prev_end > extent->logical_addr + extent->len) {
|
|
/* The new extent is logically in the middle of the old one */
|
|
if (second_run) {
|
|
/* I don't know if this is possible, but be safe */
|
|
apfs_alert(sb, "recursion when splitting extents for dstream 0x%llx", extent_id);
|
|
ret = -EFSCORRUPTED;
|
|
goto out;
|
|
}
|
|
ret = apfs_split_extent(query, extent->logical_addr + extent->len);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to split extent in dstream 0x%llx", extent_id);
|
|
goto out;
|
|
}
|
|
/* The split may make the query invalid */
|
|
apfs_free_query(query);
|
|
second_run = true;
|
|
goto search_and_insert;
|
|
} else {
|
|
/* I don't know what this is, be safe */
|
|
apfs_alert(sb, "strange extents for dstream 0x%llx", extent_id);
|
|
ret = -EFSCORRUPTED;
|
|
goto out;
|
|
}
|
|
|
|
ret = apfs_crypto_adj_refcnt(sb, new_crypto, 1);
|
|
if (ret)
|
|
apfs_err(sb, "failed to take crypto id 0x%llx", new_crypto);
|
|
|
|
out:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_update_extent - Create or update the extent record for an extent
|
|
* @dstream: data stream info
|
|
* @extent: new in-memory file extent
|
|
*
|
|
* The @extent must either be a new tail for the dstream, or a single block.
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_update_extent(struct apfs_dstream_info *dstream, const struct apfs_file_extent *extent)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
|
|
if (extent->logical_addr + extent->len >= dstream->ds_size)
|
|
return apfs_update_tail_extent(dstream, extent);
|
|
if (extent->len > sb->s_blocksize) {
|
|
apfs_err(sb, "can't create mid extents of length 0x%llx", extent->len);
|
|
return -EOPNOTSUPP;
|
|
}
|
|
return apfs_update_mid_extent(dstream, extent);
|
|
}
|
|
|
|
static int apfs_extend_phys_extent(struct apfs_query *query, u64 bno, u64 blkcnt, u64 dstream_id)
|
|
{
|
|
struct apfs_phys_ext_key raw_key;
|
|
struct apfs_phys_ext_val raw_val;
|
|
u64 kind = (u64)APFS_KIND_NEW << APFS_PEXT_KIND_SHIFT;
|
|
|
|
apfs_key_set_hdr(APFS_TYPE_EXTENT, bno, &raw_key);
|
|
raw_val.len_and_kind = cpu_to_le64(kind | blkcnt);
|
|
raw_val.owning_obj_id = cpu_to_le64(dstream_id);
|
|
raw_val.refcnt = cpu_to_le32(1);
|
|
return apfs_btree_replace(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
}
|
|
|
|
static int apfs_insert_new_phys_extent(struct apfs_query *query, u64 bno, u64 blkcnt, u64 dstream_id)
|
|
{
|
|
struct apfs_phys_ext_key raw_key;
|
|
struct apfs_phys_ext_val raw_val;
|
|
u64 kind = (u64)APFS_KIND_NEW << APFS_PEXT_KIND_SHIFT;
|
|
|
|
apfs_key_set_hdr(APFS_TYPE_EXTENT, bno, &raw_key);
|
|
raw_val.len_and_kind = cpu_to_le64(kind | blkcnt);
|
|
raw_val.owning_obj_id = cpu_to_le64(dstream_id);
|
|
raw_val.refcnt = cpu_to_le32(1);
|
|
return apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
}
|
|
|
|
static int apfs_phys_ext_from_query(struct apfs_query *query, struct apfs_phys_extent *pext);
|
|
|
|
/**
|
|
* apfs_insert_phys_extent - Create or grow the physical record for an extent
|
|
* @dstream: data stream info for the extent
|
|
* @extent: new in-memory file extent
|
|
*
|
|
* Only works for appending to extents, for now. TODO: reference counting.
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_insert_phys_extent(struct apfs_dstream_info *dstream, const struct apfs_file_extent *extent)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw;
|
|
struct apfs_node *extref_root;
|
|
struct apfs_query *query = NULL;
|
|
struct apfs_phys_extent pext;
|
|
u64 blkcnt = extent->len >> sb->s_blocksize_bits;
|
|
u64 last_bno, new_base, new_blkcnt;
|
|
int ret;
|
|
|
|
extref_root = apfs_read_node(sb,
|
|
le64_to_cpu(vsb_raw->apfs_extentref_tree_oid),
|
|
APFS_OBJ_PHYSICAL, true /* write */);
|
|
if (IS_ERR(extref_root)) {
|
|
apfs_err(sb, "failed to read extref root 0x%llx", le64_to_cpu(vsb_raw->apfs_extentref_tree_oid));
|
|
return PTR_ERR(extref_root);
|
|
}
|
|
apfs_assert_in_transaction(sb, &vsb_raw->apfs_o);
|
|
vsb_raw->apfs_extentref_tree_oid = cpu_to_le64(extref_root->object.oid);
|
|
|
|
query = apfs_alloc_query(extref_root, NULL /* parent */);
|
|
if (!query) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* The cached logical extent may have been split into multiple physical
|
|
* extents because of clones. If that happens, we want to grow the last
|
|
* one.
|
|
*/
|
|
last_bno = extent->phys_block_num + blkcnt - 1;
|
|
apfs_init_extent_key(last_bno, &query->key);
|
|
query->flags = APFS_QUERY_EXTENTREF;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret && ret != -ENODATA) {
|
|
apfs_err(sb, "query failed for paddr 0x%llx", last_bno);
|
|
goto out;
|
|
}
|
|
|
|
if (ret == -ENODATA) {
|
|
/* This is a fresh new physical extent */
|
|
ret = apfs_insert_new_phys_extent(query, extent->phys_block_num, blkcnt, dstream->ds_id);
|
|
if (ret)
|
|
apfs_err(sb, "insertion failed for paddr 0x%llx", extent->phys_block_num);
|
|
goto out;
|
|
}
|
|
|
|
ret = apfs_phys_ext_from_query(query, &pext);
|
|
if (ret) {
|
|
apfs_err(sb, "bad pext record for bno 0x%llx", last_bno);
|
|
goto out;
|
|
}
|
|
if (pext.bno + pext.blkcount <= extent->phys_block_num) {
|
|
/* Also a fresh new physical extent */
|
|
ret = apfs_insert_new_phys_extent(query, extent->phys_block_num, blkcnt, dstream->ds_id);
|
|
if (ret)
|
|
apfs_err(sb, "insertion failed for paddr 0x%llx", extent->phys_block_num);
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* There is an existing physical extent that overlaps the new one. The
|
|
* cache was dirty, so the existing extent can't cover the whole tail.
|
|
*/
|
|
if (pext.bno + pext.blkcount >= extent->phys_block_num + blkcnt) {
|
|
apfs_err(sb, "dirty cache tail covered by existing physical extent 0x%llx-0x%llx", pext.bno, pext.blkcount);
|
|
ret = -EFSCORRUPTED;
|
|
goto out;
|
|
}
|
|
if (pext.refcnt == 1) {
|
|
new_base = pext.bno;
|
|
new_blkcnt = extent->phys_block_num + blkcnt - new_base;
|
|
ret = apfs_extend_phys_extent(query, new_base, new_blkcnt, dstream->ds_id);
|
|
if (ret)
|
|
apfs_err(sb, "update failed for paddr 0x%llx", new_base);
|
|
} else {
|
|
/*
|
|
* We can't extend this one, because it would extend the other
|
|
* references as well.
|
|
*/
|
|
new_base = pext.bno + pext.blkcount;
|
|
new_blkcnt = extent->phys_block_num + blkcnt - new_base;
|
|
ret = apfs_insert_new_phys_extent(query, new_base, new_blkcnt, dstream->ds_id);
|
|
if (ret)
|
|
apfs_err(sb, "insertion failed for paddr 0x%llx", new_base);
|
|
}
|
|
|
|
out:
|
|
apfs_free_query(query);
|
|
apfs_node_free(extref_root);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_phys_ext_from_query - Read the physical extent record found by a query
|
|
* @query: the (successful) query that found the record
|
|
* @pext: on return, the physical extent read
|
|
*
|
|
* Reads the physical extent record into @pext and performs some basic sanity
|
|
* checks as a protection against crafted filesystems. Returns 0 on success or
|
|
* -EFSCORRUPTED otherwise.
|
|
*/
|
|
static int apfs_phys_ext_from_query(struct apfs_query *query, struct apfs_phys_extent *pext)
|
|
{
|
|
struct super_block *sb = query->node->object.sb;
|
|
struct apfs_phys_ext_key *key;
|
|
struct apfs_phys_ext_val *val;
|
|
char *raw = query->node->object.data;
|
|
|
|
if (query->len != sizeof(*val) || query->key_len != sizeof(*key)) {
|
|
apfs_err(sb, "bad length of key (%d) or value (%d)", query->key_len, query->len);
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
key = (struct apfs_phys_ext_key *)(raw + query->key_off);
|
|
val = (struct apfs_phys_ext_val *)(raw + query->off);
|
|
|
|
pext->bno = apfs_cat_cnid(&key->hdr);
|
|
pext->blkcount = le64_to_cpu(val->len_and_kind) & APFS_PEXT_LEN_MASK;
|
|
pext->len = pext->blkcount << sb->s_blocksize_bits;
|
|
pext->refcnt = le32_to_cpu(val->refcnt);
|
|
pext->kind = le64_to_cpu(val->len_and_kind) >> APFS_PEXT_KIND_SHIFT;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_free_phys_ext - Add all blocks in a physical extent to the free queue
|
|
* @sb: superblock structure
|
|
* @pext: physical range to free
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_free_phys_ext(struct super_block *sb, struct apfs_phys_extent *pext)
|
|
{
|
|
struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw;
|
|
|
|
apfs_assert_in_transaction(sb, &vsb_raw->apfs_o);
|
|
le64_add_cpu(&vsb_raw->apfs_fs_alloc_count, -pext->blkcount);
|
|
le64_add_cpu(&vsb_raw->apfs_total_blocks_freed, pext->blkcount);
|
|
|
|
return apfs_free_queue_insert(sb, pext->bno, pext->blkcount);
|
|
}
|
|
|
|
/**
|
|
* apfs_put_phys_extent - Reduce the reference count for a physical extent
|
|
* @pext: physical extent data, already read
|
|
* @query: query that found the extent
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_put_phys_extent(struct apfs_phys_extent *pext, struct apfs_query *query)
|
|
{
|
|
struct super_block *sb = query->node->object.sb;
|
|
struct apfs_phys_ext_val *val;
|
|
void *raw;
|
|
int err;
|
|
|
|
if (--pext->refcnt == 0) {
|
|
err = apfs_btree_remove(query);
|
|
if (err) {
|
|
apfs_err(sb, "removal failed for paddr 0x%llx", pext->bno);
|
|
return err;
|
|
}
|
|
return pext->kind == APFS_KIND_NEW ? apfs_free_phys_ext(sb, pext) : 0;
|
|
}
|
|
|
|
err = apfs_query_join_transaction(query);
|
|
if (err) {
|
|
apfs_err(sb, "query join failed");
|
|
return err;
|
|
}
|
|
raw = query->node->object.data;
|
|
val = raw + query->off;
|
|
val->refcnt = cpu_to_le32(pext->refcnt);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_take_phys_extent - Increase the reference count for a physical extent
|
|
* @pext: physical extent data, already read
|
|
* @query: query that found the extent
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_take_phys_extent(struct apfs_phys_extent *pext, struct apfs_query *query)
|
|
{
|
|
struct super_block *sb = query->node->object.sb;
|
|
struct apfs_phys_ext_val *val;
|
|
void *raw;
|
|
int err;
|
|
|
|
/* An update extent may be dropped when a reference is taken */
|
|
if (++pext->refcnt == 0)
|
|
return apfs_btree_remove(query);
|
|
|
|
err = apfs_query_join_transaction(query);
|
|
if (err) {
|
|
apfs_err(sb, "query join failed");
|
|
return err;
|
|
}
|
|
raw = query->node->object.data;
|
|
val = raw + query->off;
|
|
val->refcnt = cpu_to_le32(pext->refcnt);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_set_phys_ext_length - Set new length in a physical extent record's value
|
|
* @pext: the physical extent record's value
|
|
* @len: the new length (in blocks)
|
|
*
|
|
* Preserves the kind, though I doubt that's the right thing to do in general.
|
|
*/
|
|
static inline void apfs_set_phys_ext_length(struct apfs_phys_ext_val *pext, u64 len)
|
|
{
|
|
u64 len_and_kind = le64_to_cpu(pext->len_and_kind);
|
|
u64 kind = len_and_kind & APFS_PEXT_KIND_MASK;
|
|
|
|
pext->len_and_kind = cpu_to_le64(kind | len);
|
|
}
|
|
|
|
/**
|
|
* apfs_split_phys_ext - Break a physical extent in two
|
|
* @query: query pointing to the extent
|
|
* @div: first physical block number to come after the division
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_split_phys_ext(struct apfs_query *query, u64 div)
|
|
{
|
|
struct super_block *sb = query->node->object.sb;
|
|
struct apfs_phys_ext_val *val1;
|
|
struct apfs_phys_ext_key key2;
|
|
struct apfs_phys_ext_val val2;
|
|
struct apfs_phys_extent pextent;
|
|
u64 blkcount1, blkcount2;
|
|
void *raw;
|
|
int err = 0;
|
|
|
|
err = apfs_query_join_transaction(query);
|
|
if (err) {
|
|
apfs_err(sb, "query join failed");
|
|
return err;
|
|
}
|
|
raw = query->node->object.data;
|
|
|
|
err = apfs_phys_ext_from_query(query, &pextent);
|
|
if (err) {
|
|
apfs_err(sb, "bad pext record over div 0x%llx", div);
|
|
return err;
|
|
}
|
|
val1 = raw + query->off;
|
|
val2 = *(struct apfs_phys_ext_val *)(raw + query->off);
|
|
key2 = *(struct apfs_phys_ext_key *)(raw + query->key_off);
|
|
|
|
blkcount1 = div - pextent.bno;
|
|
blkcount2 = pextent.blkcount - blkcount1;
|
|
|
|
/* Modify the current extent in place to become the first half */
|
|
apfs_set_phys_ext_length(val1, blkcount1);
|
|
|
|
/* Insert the second half right after the first */
|
|
apfs_key_set_hdr(APFS_TYPE_EXTENT, div, &key2);
|
|
apfs_set_phys_ext_length(&val2, blkcount2);
|
|
return apfs_btree_insert(query, &key2, sizeof(key2), &val2, sizeof(val2));
|
|
}
|
|
|
|
/**
|
|
* apfs_create_update_pext - Create a reference update physical extent record
|
|
* @query: query that searched for the physical extent
|
|
* @extent: range of physical blocks to update
|
|
* @diff: reference count change
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_create_update_pext(struct apfs_query *query, const struct apfs_file_extent *extent, u32 diff)
|
|
{
|
|
struct super_block *sb = query->node->object.sb;
|
|
struct apfs_phys_ext_key key = {0};
|
|
struct apfs_phys_ext_val val = {0};
|
|
|
|
apfs_key_set_hdr(APFS_TYPE_EXTENT, extent->phys_block_num, &key);
|
|
val.len_and_kind = cpu_to_le64((u64)APFS_KIND_UPDATE << APFS_PEXT_KIND_SHIFT | extent->len >> sb->s_blocksize_bits);
|
|
val.owning_obj_id = cpu_to_le64(APFS_OWNING_OBJ_ID_INVALID);
|
|
val.refcnt = cpu_to_le32(diff);
|
|
return apfs_btree_insert(query, &key, sizeof(key), &val, sizeof(val));
|
|
}
|
|
|
|
/**
|
|
* apfs_dstream_cache_is_tail - Is the tail of this dstream in its extent cache?
|
|
* @dstream: dstream to check
|
|
*/
|
|
static inline bool apfs_dstream_cache_is_tail(struct apfs_dstream_info *dstream)
|
|
{
|
|
struct apfs_file_extent *cache = &dstream->ds_cached_ext;
|
|
|
|
/* nx_big_sem provides the locking for the cache here */
|
|
return cache->len && (dstream->ds_size <= cache->logical_addr + cache->len);
|
|
}
|
|
|
|
/**
|
|
* apfs_flush_extent_cache - Write the cached extent to the catalog, if dirty
|
|
* @dstream: data stream to flush
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
int apfs_flush_extent_cache(struct apfs_dstream_info *dstream)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_file_extent *ext = &dstream->ds_cached_ext;
|
|
int err;
|
|
|
|
if (!dstream->ds_ext_dirty)
|
|
return 0;
|
|
ASSERT(ext->len > 0);
|
|
|
|
err = apfs_update_extent(dstream, ext);
|
|
if (err) {
|
|
apfs_err(sb, "extent update failed");
|
|
return err;
|
|
}
|
|
err = apfs_insert_phys_extent(dstream, ext);
|
|
if (err) {
|
|
apfs_err(sb, "pext insertion failed");
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* TODO: keep track of the byte and block count through the use of
|
|
* inode_add_bytes() and inode_set_bytes(). This hasn't been done with
|
|
* care in the rest of the module and it doesn't seem to matter beyond
|
|
* stat(), so I'm ignoring it for now.
|
|
*/
|
|
|
|
dstream->ds_ext_dirty = false;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_create_hole - Create and insert a hole extent for the dstream
|
|
* @dstream: data stream info
|
|
* @start: first logical block number for the hole
|
|
* @end: first logical block number right after the hole
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
* TODO: what happens to the crypto refcount?
|
|
*/
|
|
static int apfs_create_hole(struct apfs_dstream_info *dstream, u64 start, u64 end)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_query *query;
|
|
struct apfs_file_extent_key raw_key;
|
|
struct apfs_file_extent_val raw_val;
|
|
u64 extent_id = dstream->ds_id;
|
|
int ret;
|
|
|
|
if (start == end)
|
|
return 0;
|
|
|
|
/* File extent records use addresses, not block numbers */
|
|
start <<= sb->s_blocksize_bits;
|
|
end <<= sb->s_blocksize_bits;
|
|
|
|
apfs_key_set_hdr(APFS_TYPE_FILE_EXTENT, extent_id, &raw_key);
|
|
raw_key.logical_addr = cpu_to_le64(start);
|
|
raw_val.len_and_flags = cpu_to_le64(end - start);
|
|
raw_val.phys_block_num = cpu_to_le64(0); /* It's a hole... */
|
|
raw_val.crypto_id = cpu_to_le64(apfs_vol_is_encrypted(sb) ? extent_id : 0);
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
apfs_init_file_extent_key(extent_id, start, &query->key);
|
|
query->flags = APFS_QUERY_CAT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret && ret != -ENODATA) {
|
|
apfs_err(sb, "query failed for id 0x%llx, addr 0x%llx", extent_id, start);
|
|
goto out;
|
|
}
|
|
|
|
ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
if (ret)
|
|
apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", extent_id, start);
|
|
dstream->ds_sparse_bytes += end - start;
|
|
|
|
out:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_zero_dstream_tail - Zero out stale bytes in a data stream's last block
|
|
* @dstream: data stream info
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_zero_dstream_tail(struct apfs_dstream_info *dstream)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct inode *inode = NULL;
|
|
struct page *page = NULL;
|
|
void *fsdata = NULL;
|
|
int valid_length;
|
|
int err;
|
|
|
|
/* No stale bytes if no actual content */
|
|
if (dstream->ds_size <= dstream->ds_sparse_bytes)
|
|
return 0;
|
|
|
|
/* No stale tail if the last block is fully used */
|
|
valid_length = dstream->ds_size & (sb->s_blocksize - 1);
|
|
if (valid_length == 0)
|
|
return 0;
|
|
|
|
inode = dstream->ds_inode;
|
|
if (!inode) {
|
|
/* This should never happen, but be safe */
|
|
apfs_alert(sb, "attempt to zero the tail of xattr dstream 0x%llx", dstream->ds_id);
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
/* This will take care of the CoW and zeroing */
|
|
err = __apfs_write_begin(NULL, inode->i_mapping, inode->i_size, 0, 0, &page, &fsdata);
|
|
if (err)
|
|
return err;
|
|
return __apfs_write_end(NULL, inode->i_mapping, inode->i_size, 0, 0, page, fsdata);
|
|
}
|
|
|
|
/**
|
|
* apfs_zero_bh_tail - Zero out stale bytes in a buffer head
|
|
* @sb: filesystem superblock
|
|
* @bh: buffer head to zero
|
|
* @length: length of valid bytes to be left alone
|
|
*/
|
|
static void apfs_zero_bh_tail(struct super_block *sb, struct buffer_head *bh, u64 length)
|
|
{
|
|
ASSERT(buffer_trans(bh));
|
|
if (length < sb->s_blocksize)
|
|
memset(bh->b_data + length, 0, sb->s_blocksize - length);
|
|
}
|
|
|
|
/**
|
|
* apfs_range_in_snap - Check if a given block range overlaps a snapshot
|
|
* @sb: filesystem superblock
|
|
* @bno: first block in the range
|
|
* @blkcnt: block count for the range
|
|
* @in_snap: on return, the result
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_range_in_snap(struct super_block *sb, u64 bno, u64 blkcnt, bool *in_snap)
|
|
{
|
|
struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw;
|
|
struct apfs_node *extref_root = NULL;
|
|
struct apfs_query *query = NULL;
|
|
struct apfs_phys_extent pext = {0};
|
|
int ret;
|
|
|
|
/* Avoid the tree queries when we don't even have snapshots */
|
|
if (vsb_raw->apfs_num_snapshots == 0) {
|
|
*in_snap = false;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Now check if the current physical extent tree has an entry for
|
|
* these blocks
|
|
*/
|
|
extref_root = apfs_read_node(sb, le64_to_cpu(vsb_raw->apfs_extentref_tree_oid), APFS_OBJ_PHYSICAL, false /* write */);
|
|
if (IS_ERR(extref_root)) {
|
|
apfs_err(sb, "failed to read extref root 0x%llx", le64_to_cpu(vsb_raw->apfs_extentref_tree_oid));
|
|
return PTR_ERR(extref_root);
|
|
}
|
|
|
|
query = apfs_alloc_query(extref_root, NULL /* parent */);
|
|
if (!query) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
apfs_init_extent_key(bno, &query->key);
|
|
query->flags = APFS_QUERY_EXTENTREF;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret && ret != -ENODATA) {
|
|
apfs_err(sb, "query failed for paddr 0x%llx", bno);
|
|
goto out;
|
|
}
|
|
if (ret == -ENODATA) {
|
|
*in_snap = false;
|
|
ret = 0;
|
|
goto out;
|
|
}
|
|
|
|
ret = apfs_phys_ext_from_query(query, &pext);
|
|
if (ret) {
|
|
apfs_err(sb, "bad pext record for paddr 0x%llx", bno);
|
|
goto out;
|
|
}
|
|
|
|
if (pext.bno <= bno && pext.bno + pext.blkcount >= bno + blkcnt) {
|
|
if (pext.kind == APFS_KIND_NEW) {
|
|
*in_snap = false;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* I think the file extent could still be covered by two different
|
|
* physical extents from the current tree, but it's easier to just
|
|
* assume the worst here.
|
|
*/
|
|
*in_snap = true;
|
|
|
|
out:
|
|
apfs_free_query(query);
|
|
apfs_node_free(extref_root);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_dstream_cache_in_snap - Check if the cached extent overlaps a snapshot
|
|
* @dstream: the data stream to check
|
|
* @in_snap: on return, the result
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_dstream_cache_in_snap(struct apfs_dstream_info *dstream, bool *in_snap)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_file_extent *cache = NULL;
|
|
|
|
/* All changes to extents get flushed when a snaphot is created */
|
|
if (dstream->ds_ext_dirty) {
|
|
*in_snap = false;
|
|
return 0;
|
|
}
|
|
|
|
cache = &dstream->ds_cached_ext;
|
|
return apfs_range_in_snap(sb, cache->phys_block_num, cache->len >> sb->s_blocksize_bits, in_snap);
|
|
}
|
|
|
|
/**
|
|
* apfs_dstream_get_new_block - Like the get_block_t function, but for dstreams
|
|
* @dstream: data stream info
|
|
* @dsblock: logical dstream block to map
|
|
* @bh_result: buffer head to map (NULL if none)
|
|
* @bno: if not NULL, the new block number is returned here
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_dstream_get_new_block(struct apfs_dstream_info *dstream, u64 dsblock, struct buffer_head *bh_result, u64 *bno)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw;
|
|
struct apfs_file_extent *cache = NULL;
|
|
u64 phys_bno, logical_addr, cache_blks, dstream_blks;
|
|
bool in_snap = true;
|
|
int err;
|
|
|
|
/* TODO: preallocate tail blocks */
|
|
logical_addr = dsblock << sb->s_blocksize_bits;
|
|
|
|
err = apfs_spaceman_allocate_block(sb, &phys_bno, false /* backwards */);
|
|
if (err) {
|
|
apfs_err(sb, "block allocation failed");
|
|
return err;
|
|
}
|
|
apfs_assert_in_transaction(sb, &vsb_raw->apfs_o);
|
|
le64_add_cpu(&vsb_raw->apfs_fs_alloc_count, 1);
|
|
le64_add_cpu(&vsb_raw->apfs_total_blocks_alloced, 1);
|
|
if (bno)
|
|
*bno = phys_bno;
|
|
|
|
if (bh_result) {
|
|
apfs_map_bh(bh_result, sb, phys_bno);
|
|
err = apfs_transaction_join(sb, bh_result);
|
|
if (err)
|
|
return err;
|
|
|
|
if (!buffer_uptodate(bh_result)) {
|
|
/*
|
|
* Truly new buffers need to be marked as such, to get
|
|
* zeroed; this also takes care of holes in sparse files
|
|
*/
|
|
set_buffer_new(bh_result);
|
|
} else if (dstream->ds_size > logical_addr) {
|
|
/*
|
|
* The last block may have stale data left from a
|
|
* truncation
|
|
*/
|
|
apfs_zero_bh_tail(sb, bh_result, dstream->ds_size - logical_addr);
|
|
}
|
|
}
|
|
|
|
dstream_blks = apfs_size_to_blocks(sb, dstream->ds_size);
|
|
if (dstream_blks < dsblock) {
|
|
/*
|
|
* This recurses into apfs_dstream_get_new_block() and dirties
|
|
* the extent cache, so it must happen before flushing it.
|
|
*/
|
|
err = apfs_zero_dstream_tail(dstream);
|
|
if (err) {
|
|
apfs_err(sb, "failed to zero tail for dstream 0x%llx", dstream->ds_id);
|
|
return err;
|
|
}
|
|
}
|
|
|
|
err = apfs_dstream_cache_in_snap(dstream, &in_snap);
|
|
if (err)
|
|
return err;
|
|
|
|
cache = &dstream->ds_cached_ext;
|
|
cache_blks = apfs_size_to_blocks(sb, cache->len);
|
|
|
|
/* TODO: allow dirty caches of several blocks in the middle of a file */
|
|
if (!in_snap && apfs_dstream_cache_is_tail(dstream) &&
|
|
logical_addr == cache->logical_addr + cache->len &&
|
|
phys_bno == cache->phys_block_num + cache_blks) {
|
|
cache->len += sb->s_blocksize;
|
|
dstream->ds_ext_dirty = true;
|
|
return 0;
|
|
}
|
|
|
|
err = apfs_flush_extent_cache(dstream);
|
|
if (err) {
|
|
apfs_err(sb, "extent cache flush failed for dstream 0x%llx", dstream->ds_id);
|
|
return err;
|
|
}
|
|
|
|
if (dstream_blks < dsblock) {
|
|
/*
|
|
* This puts new extents after the reported end of the file, so
|
|
* it must happen after the flush to avoid conflict with those
|
|
* extent operations.
|
|
*/
|
|
err = apfs_create_hole(dstream, dstream_blks, dsblock);
|
|
if (err) {
|
|
apfs_err(sb, "hole creation failed for dstream 0x%llx", dstream->ds_id);
|
|
return err;
|
|
}
|
|
}
|
|
|
|
cache->logical_addr = logical_addr;
|
|
cache->phys_block_num = phys_bno;
|
|
cache->len = sb->s_blocksize;
|
|
dstream->ds_ext_dirty = true;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_dstream_get_new_bno - Allocate a new block inside a dstream
|
|
* @dstream: data stream info
|
|
* @dsblock: logical dstream block to allocate
|
|
* @bno: on return, the new block number
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
int apfs_dstream_get_new_bno(struct apfs_dstream_info *dstream, u64 dsblock, u64 *bno)
|
|
{
|
|
return apfs_dstream_get_new_block(dstream, dsblock, NULL /* bh_result */, bno);
|
|
}
|
|
|
|
int apfs_get_new_block(struct inode *inode, sector_t iblock,
|
|
struct buffer_head *bh_result, int create)
|
|
{
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
|
|
ASSERT(create);
|
|
return apfs_dstream_get_new_block(&ai->i_dstream, iblock, bh_result, NULL /* bno */);
|
|
}
|
|
|
|
/**
|
|
* apfs_shrink_dstream_last_extent - Shrink last extent of dstream being resized
|
|
* @dstream: data stream info
|
|
* @new_size: new size for the whole data stream
|
|
*
|
|
* Deletes, shrinks or zeroes the last extent, as needed for the truncation of
|
|
* the data stream.
|
|
*
|
|
* Only works with the last extent, so it needs to be called repeatedly to
|
|
* complete the truncation. Returns -EAGAIN in that case, or 0 when the process
|
|
* is complete. Returns other negative error codes in case of failure.
|
|
*/
|
|
static int apfs_shrink_dstream_last_extent(struct apfs_dstream_info *dstream, loff_t new_size)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_query *query;
|
|
struct apfs_file_extent tail;
|
|
u64 extent_id = dstream->ds_id;
|
|
int ret = 0;
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
apfs_init_file_extent_key(extent_id, -1, &query->key);
|
|
query->flags = APFS_QUERY_CAT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret && ret != -ENODATA) {
|
|
apfs_err(sb, "query failed for last extent of id 0x%llx", extent_id);
|
|
goto out;
|
|
}
|
|
|
|
if (!apfs_query_found_extent(query)) {
|
|
/* No more extents, we deleted the whole file already? */
|
|
if (new_size) {
|
|
apfs_err(sb, "missing extent for dstream 0x%llx", extent_id);
|
|
ret = -EFSCORRUPTED;
|
|
} else {
|
|
ret = 0;
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
ret = apfs_extent_from_query(query, &tail);
|
|
if (ret) {
|
|
apfs_err(sb, "bad tail extent record on dstream 0x%llx", extent_id);
|
|
goto out;
|
|
}
|
|
|
|
if (tail.logical_addr + tail.len < new_size) {
|
|
apfs_err(sb, "missing extent for dstream 0x%llx", extent_id);
|
|
ret = -EFSCORRUPTED; /* Tail extent missing */
|
|
} else if (tail.logical_addr + tail.len == new_size) {
|
|
ret = 0; /* Nothing more to be done */
|
|
} else if (tail.logical_addr >= new_size) {
|
|
/* This whole extent needs to go */
|
|
ret = apfs_btree_remove(query);
|
|
if (ret) {
|
|
apfs_err(sb, "removal failed for id 0x%llx, addr 0x%llx", dstream->ds_id, tail.logical_addr);
|
|
goto out;
|
|
}
|
|
if (apfs_ext_is_hole(&tail)) {
|
|
dstream->ds_sparse_bytes -= tail.len;
|
|
} else {
|
|
ret = apfs_range_put_reference(sb, tail.phys_block_num, tail.len);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to put range 0x%llx-0x%llx", tail.phys_block_num, tail.len);
|
|
goto out;
|
|
}
|
|
}
|
|
ret = apfs_crypto_adj_refcnt(sb, tail.crypto_id, -1);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to take crypto id 0x%llx", tail.crypto_id);
|
|
goto out;
|
|
}
|
|
ret = tail.logical_addr == new_size ? 0 : -EAGAIN;
|
|
} else {
|
|
/*
|
|
* The file is being truncated in the middle of this extent.
|
|
* TODO: preserve the physical tail to be overwritten later.
|
|
*/
|
|
new_size = apfs_size_to_blocks(sb, new_size) << sb->s_blocksize_bits;
|
|
ret = apfs_shrink_extent_tail(query, dstream, new_size);
|
|
if (ret)
|
|
apfs_err(sb, "failed to shrink tail of dstream 0x%llx", extent_id);
|
|
}
|
|
|
|
out:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_shrink_dstream - Shrink a data stream's extents to a new length
|
|
* @dstream: data stream info
|
|
* @new_size: the new size
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_shrink_dstream(struct apfs_dstream_info *dstream, loff_t new_size)
|
|
{
|
|
int ret;
|
|
|
|
do {
|
|
ret = apfs_shrink_dstream_last_extent(dstream, new_size);
|
|
} while (ret == -EAGAIN);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_truncate - Truncate a data stream's content
|
|
* @dstream: data stream info
|
|
* @new_size: the new size
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
int apfs_truncate(struct apfs_dstream_info *dstream, loff_t new_size)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
u64 old_blks, new_blks;
|
|
struct apfs_file_extent *cache = &dstream->ds_cached_ext;
|
|
int err;
|
|
|
|
/* TODO: don't write the cached extent if it will be deleted */
|
|
err = apfs_flush_extent_cache(dstream);
|
|
if (err) {
|
|
apfs_err(sb, "extent cache flush failed for dstream 0x%llx", dstream->ds_id);
|
|
return err;
|
|
}
|
|
dstream->ds_ext_dirty = false;
|
|
|
|
/* TODO: keep the cache valid on truncation */
|
|
cache->len = 0;
|
|
|
|
/* "<=", because a partial write may have left extents beyond the end */
|
|
if (new_size <= dstream->ds_size)
|
|
return apfs_shrink_dstream(dstream, new_size);
|
|
|
|
err = apfs_zero_dstream_tail(dstream);
|
|
if (err) {
|
|
apfs_err(sb, "failed to zero tail for dstream 0x%llx", dstream->ds_id);
|
|
return err;
|
|
}
|
|
new_blks = apfs_size_to_blocks(sb, new_size);
|
|
old_blks = apfs_size_to_blocks(sb, dstream->ds_size);
|
|
return apfs_create_hole(dstream, old_blks, new_blks);
|
|
}
|
|
|
|
/**
|
|
* apfs_dstream_delete_front - Deletes as many leading extents as possible
|
|
* @sb: filesystem superblock
|
|
* @ds_id: id for the dstream to delete
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure, which may
|
|
* be -ENODATA if there are no more extents, or -EAGAIN if the free queue is
|
|
* getting too full.
|
|
*/
|
|
static int apfs_dstream_delete_front(struct super_block *sb, u64 ds_id)
|
|
{
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_spaceman *sm = APFS_SM(sb);
|
|
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
|
|
struct apfs_spaceman_free_queue *fq = NULL;
|
|
struct apfs_query *query = NULL;
|
|
struct apfs_file_extent head;
|
|
bool first_match = true;
|
|
int ret;
|
|
|
|
fq = &sm_raw->sm_fq[APFS_SFQ_MAIN];
|
|
if (le64_to_cpu(fq->sfq_count) > APFS_TRANS_MAIN_QUEUE_MAX)
|
|
return -EAGAIN;
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
apfs_init_file_extent_key(ds_id, 0, &query->key);
|
|
query->flags = APFS_QUERY_CAT;
|
|
|
|
next_extent:
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret && ret != -ENODATA) {
|
|
apfs_err(sb, "query failed for first extent of id 0x%llx", ds_id);
|
|
goto out;
|
|
}
|
|
apfs_query_direct_forward(query);
|
|
if (!apfs_query_found_extent(query)) {
|
|
/*
|
|
* After the original lookup, the query may not be set to the
|
|
* first extent, but instead to the record that comes right
|
|
* before.
|
|
*/
|
|
if (first_match) {
|
|
first_match = false;
|
|
goto next_extent;
|
|
}
|
|
ret = -ENODATA;
|
|
goto out;
|
|
}
|
|
first_match = false;
|
|
|
|
ret = apfs_extent_from_query(query, &head);
|
|
if (ret) {
|
|
apfs_err(sb, "bad head extent record on dstream 0x%llx", ds_id);
|
|
goto out;
|
|
}
|
|
ret = apfs_btree_remove(query);
|
|
if (ret) {
|
|
apfs_err(sb, "removal failed for id 0x%llx, addr 0x%llx", ds_id, head.logical_addr);
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* The official fsck doesn't complain about wrong sparse byte counts
|
|
* for orphans, so I guess we don't need to update them here
|
|
*/
|
|
if (!apfs_ext_is_hole(&head)) {
|
|
ret = apfs_range_put_reference(sb, head.phys_block_num, head.len);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to put range 0x%llx-0x%llx", head.phys_block_num, head.len);
|
|
goto out;
|
|
}
|
|
ret = apfs_crypto_adj_refcnt(sb, head.crypto_id, -1);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to take crypto id 0x%llx", head.crypto_id);
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
if (le64_to_cpu(fq->sfq_count) <= APFS_TRANS_MAIN_QUEUE_MAX)
|
|
goto next_extent;
|
|
ret = -EAGAIN;
|
|
out:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_inode_delete_front - Deletes as many leading extents as possible
|
|
* @inode: inode to delete
|
|
*
|
|
* Tries to delete all extents for @inode, in which case it returns 0. If the
|
|
* free queue is getting too full, deletes as much as is reasonable and returns
|
|
* -EAGAIN. May return other negative error codes as well.
|
|
*/
|
|
int apfs_inode_delete_front(struct inode *inode)
|
|
{
|
|
struct super_block *sb = inode->i_sb;
|
|
struct apfs_dstream_info *dstream = NULL;
|
|
struct apfs_inode_info *ai = APFS_I(inode);
|
|
int ret;
|
|
|
|
if (!ai->i_has_dstream)
|
|
return 0;
|
|
|
|
dstream = &ai->i_dstream;
|
|
ret = apfs_flush_extent_cache(dstream);
|
|
if (ret) {
|
|
apfs_err(sb, "extent cache flush failed for dstream 0x%llx", dstream->ds_id);
|
|
return ret;
|
|
}
|
|
|
|
ret = apfs_dstream_delete_front(sb, dstream->ds_id);
|
|
if (ret == -ENODATA)
|
|
return 0;
|
|
return ret;
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0)
|
|
loff_t apfs_remap_file_range(struct file *src_file, loff_t off, struct file *dst_file, loff_t destoff, loff_t len, unsigned int remap_flags)
|
|
#else
|
|
int apfs_clone_file_range(struct file *src_file, loff_t off, struct file *dst_file, loff_t destoff, u64 len)
|
|
#endif
|
|
{
|
|
struct inode *src_inode = file_inode(src_file);
|
|
struct inode *dst_inode = file_inode(dst_file);
|
|
struct apfs_inode_info *src_ai = APFS_I(src_inode);
|
|
struct apfs_inode_info *dst_ai = APFS_I(dst_inode);
|
|
struct apfs_dstream_info *src_ds = &src_ai->i_dstream;
|
|
struct apfs_dstream_info *dst_ds = &dst_ai->i_dstream;
|
|
struct super_block *sb = src_inode->i_sb;
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
const u64 xfield_flags = APFS_INODE_MAINTAIN_DIR_STATS | APFS_INODE_IS_SPARSE | APFS_INODE_HAS_PURGEABLE_FLAGS;
|
|
int err;
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0)
|
|
if (remap_flags & ~(REMAP_FILE_ADVISORY))
|
|
return -EINVAL;
|
|
#endif
|
|
if (src_inode == dst_inode)
|
|
return -EINVAL;
|
|
|
|
/* We only want to clone whole files, like in the official driver */
|
|
if (off != 0 || destoff != 0 || len != 0)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Clones here work in two steps: first the user creates an empty target
|
|
* file, and then the user calls the ioctl, which replaces the file with
|
|
* a clone. This is not atomic, of course.
|
|
*/
|
|
if (dst_ai->i_has_dstream || dst_ai->i_bsd_flags & APFS_INOBSD_COMPRESSED) {
|
|
apfs_warn(sb, "clones can only replace freshly created files");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
if (dst_ai->i_int_flags & xfield_flags) {
|
|
apfs_warn(sb, "clone can't replace a file that has xfields");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
if (!src_ai->i_has_dstream) {
|
|
apfs_warn(sb, "can't clone a file with no dstream");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
err = apfs_transaction_start(sb, APFS_TRANS_REG);
|
|
if (err)
|
|
return err;
|
|
apfs_inode_join_transaction(sb, src_inode);
|
|
apfs_inode_join_transaction(sb, dst_inode);
|
|
|
|
err = apfs_flush_extent_cache(src_ds);
|
|
if (err) {
|
|
apfs_err(sb, "extent cache flush failed for dstream 0x%llx", src_ds->ds_id);
|
|
goto fail;
|
|
}
|
|
err = apfs_dstream_adj_refcnt(src_ds, +1);
|
|
if (err) {
|
|
apfs_err(sb, "failed to take dstream id 0x%llx", src_ds->ds_id);
|
|
goto fail;
|
|
}
|
|
src_ds->ds_shared = true;
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0)
|
|
dst_inode->i_mtime = dst_inode->i_ctime = current_time(dst_inode);
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0)
|
|
dst_inode->i_mtime = inode_set_ctime_current(dst_inode);
|
|
#else
|
|
inode_set_mtime_to_ts(dst_inode, inode_set_ctime_current(dst_inode));
|
|
#endif
|
|
dst_inode->i_size = src_inode->i_size;
|
|
dst_ai->i_key_class = src_ai->i_key_class;
|
|
dst_ai->i_int_flags = src_ai->i_int_flags;
|
|
dst_ai->i_bsd_flags = src_ai->i_bsd_flags;
|
|
dst_ai->i_has_dstream = true;
|
|
|
|
dst_ds->ds_sb = src_ds->ds_sb;
|
|
dst_ds->ds_inode = dst_inode;
|
|
dst_ds->ds_id = src_ds->ds_id;
|
|
dst_ds->ds_size = src_ds->ds_size;
|
|
dst_ds->ds_sparse_bytes = src_ds->ds_sparse_bytes;
|
|
dst_ds->ds_cached_ext = src_ds->ds_cached_ext;
|
|
dst_ds->ds_ext_dirty = false;
|
|
dst_ds->ds_shared = true;
|
|
|
|
dst_ai->i_int_flags |= APFS_INODE_WAS_EVER_CLONED | APFS_INODE_WAS_CLONED;
|
|
src_ai->i_int_flags |= APFS_INODE_WAS_EVER_CLONED;
|
|
|
|
/*
|
|
* The sparse flag is the important one here: if we need it, it will get
|
|
* set later by apfs_update_inode(), after the xfield gets created.
|
|
*/
|
|
dst_ai->i_int_flags &= ~xfield_flags;
|
|
|
|
/*
|
|
* Commit the transaction to make sure all buffers in the source inode
|
|
* go through copy-on-write. This is a bit excessive, but I don't expect
|
|
* clones to be created often enough for it to matter.
|
|
*/
|
|
sbi->s_nxi->nx_transaction.t_state |= APFS_NX_TRANS_FORCE_COMMIT;
|
|
err = apfs_transaction_commit(sb);
|
|
if (err)
|
|
goto fail;
|
|
return dst_ds->ds_size;
|
|
|
|
fail:
|
|
apfs_transaction_abort(sb);
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* apfs_extent_create_record - Create a logical extent record for a dstream id
|
|
* @sb: filesystem superblock
|
|
* @dstream_id: the dstream id
|
|
* @extent: extent info for the record
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_extent_create_record(struct super_block *sb, u64 dstream_id, struct apfs_file_extent *extent)
|
|
{
|
|
struct apfs_sb_info *sbi = APFS_SB(sb);
|
|
struct apfs_query *query = NULL;
|
|
struct apfs_file_extent_val raw_val;
|
|
struct apfs_file_extent_key raw_key;
|
|
int ret = 0;
|
|
|
|
query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */);
|
|
if (!query)
|
|
return -ENOMEM;
|
|
apfs_init_file_extent_key(dstream_id, extent->logical_addr, &query->key);
|
|
query->flags = APFS_QUERY_CAT | APFS_QUERY_EXACT;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret && ret != -ENODATA) {
|
|
apfs_err(sb, "query failed for id 0x%llx, addr 0x%llx", dstream_id, extent->logical_addr);
|
|
goto out;
|
|
}
|
|
|
|
apfs_key_set_hdr(APFS_TYPE_FILE_EXTENT, dstream_id, &raw_key);
|
|
raw_key.logical_addr = cpu_to_le64(extent->logical_addr);
|
|
raw_val.len_and_flags = cpu_to_le64(extent->len);
|
|
raw_val.phys_block_num = cpu_to_le64(extent->phys_block_num);
|
|
raw_val.crypto_id = cpu_to_le64(apfs_vol_is_encrypted(sb) ? dstream_id : 0); /* TODO */
|
|
|
|
ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
|
|
if (ret)
|
|
apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", dstream_id, extent->logical_addr);
|
|
out:
|
|
apfs_free_query(query);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_put_single_extent - Put a reference to a single extent
|
|
* @sb: filesystem superblock
|
|
* @paddr_end: first block after the extent to put
|
|
* @paddr_min: don't put references before this block
|
|
*
|
|
* Puts a reference to the physical extent range that ends in @paddr_end. Sets
|
|
* @paddr_end to the beginning of the extent, so that the caller can continue
|
|
* with the previous one. Returns 0 on success, or a negative error code in
|
|
* case of failure.
|
|
*
|
|
* TODO: unify this with apfs_take_single_extent(), they are almost the same.
|
|
*/
|
|
static int apfs_put_single_extent(struct super_block *sb, u64 *paddr_end, u64 paddr_min)
|
|
{
|
|
struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw;
|
|
struct apfs_node *extref_root = NULL;
|
|
struct apfs_key key;
|
|
struct apfs_query *query = NULL;
|
|
struct apfs_phys_extent prev_ext;
|
|
u64 prev_start, prev_end;
|
|
bool cropped_head = false, cropped_tail = false;
|
|
struct apfs_file_extent tmp = {0}; /* TODO: clean up all the fake extent interfaces? */
|
|
int ret;
|
|
|
|
extref_root = apfs_read_node(sb, le64_to_cpu(vsb_raw->apfs_extentref_tree_oid), APFS_OBJ_PHYSICAL, true /* write */);
|
|
if (IS_ERR(extref_root)) {
|
|
apfs_err(sb, "failed to read extref root 0x%llx", le64_to_cpu(vsb_raw->apfs_extentref_tree_oid));
|
|
return PTR_ERR(extref_root);
|
|
}
|
|
apfs_assert_in_transaction(sb, &vsb_raw->apfs_o);
|
|
vsb_raw->apfs_extentref_tree_oid = cpu_to_le64(extref_root->object.oid);
|
|
|
|
apfs_init_extent_key(*paddr_end - 1, &key);
|
|
|
|
restart:
|
|
query = apfs_alloc_query(extref_root, NULL /* parent */);
|
|
if (!query) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
query->key = key;
|
|
query->flags = APFS_QUERY_EXTENTREF;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret && ret != -ENODATA) {
|
|
apfs_err(sb, "query failed for paddr 0x%llx", *paddr_end - 1);
|
|
goto out;
|
|
}
|
|
|
|
if (ret == -ENODATA) {
|
|
/* The whole range to put is part of a snapshot */
|
|
tmp.phys_block_num = paddr_min;
|
|
tmp.len = (*paddr_end - paddr_min) << sb->s_blocksize_bits;
|
|
ret = apfs_create_update_pext(query, &tmp, -1);
|
|
*paddr_end = paddr_min;
|
|
goto out;
|
|
}
|
|
|
|
ret = apfs_phys_ext_from_query(query, &prev_ext);
|
|
if (ret) {
|
|
apfs_err(sb, "bad pext record over paddr 0x%llx", *paddr_end - 1);
|
|
goto out;
|
|
}
|
|
prev_start = prev_ext.bno;
|
|
prev_end = prev_ext.bno + prev_ext.blkcount;
|
|
if (prev_end < *paddr_end) {
|
|
/* The extent to put is part of a snapshot */
|
|
tmp.phys_block_num = max(prev_end, paddr_min);
|
|
tmp.len = (*paddr_end - tmp.phys_block_num) << sb->s_blocksize_bits;
|
|
ret = apfs_create_update_pext(query, &tmp, -1);
|
|
*paddr_end = tmp.phys_block_num;
|
|
goto out;
|
|
}
|
|
|
|
if ((cropped_tail && prev_end > *paddr_end) || (cropped_head && prev_start < paddr_min)) {
|
|
/* This should never happen, but be safe */
|
|
apfs_alert(sb, "recursion cropping physical extent 0x%llx-0x%llx", prev_start, prev_end);
|
|
ret = -EFSCORRUPTED;
|
|
goto out;
|
|
}
|
|
|
|
if (prev_end > *paddr_end) {
|
|
ret = apfs_split_phys_ext(query, *paddr_end);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to split pext at 0x%llx", *paddr_end);
|
|
goto out;
|
|
}
|
|
/* The split may make the query invalid */
|
|
apfs_free_query(query);
|
|
cropped_tail = true;
|
|
goto restart;
|
|
}
|
|
|
|
if (prev_start < paddr_min) {
|
|
ret = apfs_split_phys_ext(query, paddr_min);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to split pext at 0x%llx", paddr_min);
|
|
goto out;
|
|
}
|
|
/* The split may make the query invalid */
|
|
apfs_free_query(query);
|
|
cropped_head = true;
|
|
goto restart;
|
|
}
|
|
|
|
/* The extent to put already exists */
|
|
ret = apfs_put_phys_extent(&prev_ext, query);
|
|
if (ret)
|
|
apfs_err(sb, "failed to put pext at 0x%llx", prev_ext.bno);
|
|
*paddr_end = prev_start;
|
|
|
|
out:
|
|
apfs_free_query(query);
|
|
apfs_node_free(extref_root);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_range_put_reference - Put a reference to a physical range
|
|
* @sb: filesystem superblock
|
|
* @paddr: first block of the range
|
|
* @length: length of the range (in bytes)
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_range_put_reference(struct super_block *sb, u64 paddr, u64 length)
|
|
{
|
|
u64 extent_end;
|
|
int err;
|
|
|
|
ASSERT(paddr);
|
|
|
|
extent_end = paddr + (length >> sb->s_blocksize_bits);
|
|
while (extent_end > paddr) {
|
|
err = apfs_put_single_extent(sb, &extent_end, paddr);
|
|
if (err)
|
|
return err;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_take_single_extent - Take a reference to a single extent
|
|
* @sb: filesystem superblock
|
|
* @paddr_end: first block after the extent to take
|
|
* @paddr_min: don't take references before this block
|
|
*
|
|
* Takes a reference to the physical extent range that ends in @paddr_end. Sets
|
|
* @paddr_end to the beginning of the extent, so that the caller can continue
|
|
* with the previous one. Returns 0 on success, or a negative error code in
|
|
* case of failure.
|
|
*/
|
|
static int apfs_take_single_extent(struct super_block *sb, u64 *paddr_end, u64 paddr_min)
|
|
{
|
|
struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw;
|
|
struct apfs_node *extref_root = NULL;
|
|
struct apfs_key key;
|
|
struct apfs_query *query = NULL;
|
|
struct apfs_phys_extent prev_ext;
|
|
u64 prev_start, prev_end;
|
|
bool cropped_head = false, cropped_tail = false;
|
|
struct apfs_file_extent tmp = {0}; /* TODO: clean up all the fake extent interfaces? */
|
|
int ret;
|
|
|
|
extref_root = apfs_read_node(sb, le64_to_cpu(vsb_raw->apfs_extentref_tree_oid), APFS_OBJ_PHYSICAL, true /* write */);
|
|
if (IS_ERR(extref_root)) {
|
|
apfs_err(sb, "failed to read extref root 0x%llx", le64_to_cpu(vsb_raw->apfs_extentref_tree_oid));
|
|
return PTR_ERR(extref_root);
|
|
}
|
|
apfs_assert_in_transaction(sb, &vsb_raw->apfs_o);
|
|
vsb_raw->apfs_extentref_tree_oid = cpu_to_le64(extref_root->object.oid);
|
|
|
|
apfs_init_extent_key(*paddr_end - 1, &key);
|
|
|
|
restart:
|
|
query = apfs_alloc_query(extref_root, NULL /* parent */);
|
|
if (!query) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
query->key = key;
|
|
query->flags = APFS_QUERY_EXTENTREF;
|
|
|
|
ret = apfs_btree_query(sb, &query);
|
|
if (ret && ret != -ENODATA) {
|
|
apfs_err(sb, "query failed for paddr 0x%llx", *paddr_end - 1);
|
|
goto out;
|
|
}
|
|
|
|
if (ret == -ENODATA) {
|
|
/* The whole range to take is part of a snapshot */
|
|
tmp.phys_block_num = paddr_min;
|
|
tmp.len = (*paddr_end - paddr_min) << sb->s_blocksize_bits;
|
|
ret = apfs_create_update_pext(query, &tmp, +1);
|
|
*paddr_end = paddr_min;
|
|
goto out;
|
|
}
|
|
|
|
ret = apfs_phys_ext_from_query(query, &prev_ext);
|
|
if (ret) {
|
|
apfs_err(sb, "bad pext record over paddr 0x%llx", *paddr_end - 1);
|
|
goto out;
|
|
}
|
|
prev_start = prev_ext.bno;
|
|
prev_end = prev_ext.bno + prev_ext.blkcount;
|
|
if (prev_end < *paddr_end) {
|
|
/* The extent to take is part of a snapshot */
|
|
tmp.phys_block_num = max(prev_end, paddr_min);
|
|
tmp.len = (*paddr_end - tmp.phys_block_num) << sb->s_blocksize_bits;
|
|
ret = apfs_create_update_pext(query, &tmp, +1);
|
|
*paddr_end = tmp.phys_block_num;
|
|
goto out;
|
|
}
|
|
|
|
if ((cropped_tail && prev_end > *paddr_end) || (cropped_head && prev_start < paddr_min)) {
|
|
/* This should never happen, but be safe */
|
|
apfs_alert(sb, "recursion cropping physical extent 0x%llx-0x%llx", prev_start, prev_end);
|
|
ret = -EFSCORRUPTED;
|
|
goto out;
|
|
}
|
|
|
|
if (prev_end > *paddr_end) {
|
|
ret = apfs_split_phys_ext(query, *paddr_end);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to split pext at 0x%llx", *paddr_end);
|
|
goto out;
|
|
}
|
|
/* The split may make the query invalid */
|
|
apfs_free_query(query);
|
|
cropped_tail = true;
|
|
goto restart;
|
|
}
|
|
|
|
if (prev_start < paddr_min) {
|
|
ret = apfs_split_phys_ext(query, paddr_min);
|
|
if (ret) {
|
|
apfs_err(sb, "failed to split pext at 0x%llx", paddr_min);
|
|
goto out;
|
|
}
|
|
/* The split may make the query invalid */
|
|
apfs_free_query(query);
|
|
cropped_head = true;
|
|
goto restart;
|
|
}
|
|
|
|
/* The extent to take already exists */
|
|
ret = apfs_take_phys_extent(&prev_ext, query);
|
|
if (ret)
|
|
apfs_err(sb, "failed to take pext at 0x%llx", prev_ext.bno);
|
|
*paddr_end = prev_start;
|
|
|
|
out:
|
|
apfs_free_query(query);
|
|
apfs_node_free(extref_root);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_range_take_reference - Take a reference to a physical range
|
|
* @sb: filesystem superblock
|
|
* @paddr: first block of the range
|
|
* @length: length of the range (in bytes)
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
static int apfs_range_take_reference(struct super_block *sb, u64 paddr, u64 length)
|
|
{
|
|
u64 extent_end;
|
|
int err;
|
|
|
|
ASSERT(paddr);
|
|
|
|
extent_end = paddr + (length >> sb->s_blocksize_bits);
|
|
while (extent_end > paddr) {
|
|
err = apfs_take_single_extent(sb, &extent_end, paddr);
|
|
if (err)
|
|
return err;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_clone_single_extent - Make a copy of an extent in a dstream to a new one
|
|
* @dstream: old dstream
|
|
* @new_id: id of the new dstream
|
|
* @log_addr: logical address for the extent
|
|
*
|
|
* Duplicates the logical extent, and updates the references to the physical
|
|
* extents as required. Sets @addr to the end of the extent, so that the caller
|
|
* can continue in the same place. Returns 0 on success, or a negative error
|
|
* code in case of failure.
|
|
*/
|
|
static int apfs_clone_single_extent(struct apfs_dstream_info *dstream, u64 new_id, u64 *log_addr)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
struct apfs_file_extent extent;
|
|
int err;
|
|
|
|
err = apfs_extent_read(dstream, *log_addr >> sb->s_blocksize_bits, &extent);
|
|
if (err) {
|
|
apfs_err(sb, "failed to read an extent to clone for dstream 0x%llx", dstream->ds_id);
|
|
return err;
|
|
}
|
|
err = apfs_extent_create_record(sb, new_id, &extent);
|
|
if (err) {
|
|
apfs_err(sb, "failed to create extent record for clone of dstream 0x%llx", dstream->ds_id);
|
|
return err;
|
|
}
|
|
|
|
if (!apfs_ext_is_hole(&extent)) {
|
|
err = apfs_range_take_reference(sb, extent.phys_block_num, extent.len);
|
|
if (err) {
|
|
apfs_err(sb, "failed to take a reference to physical range 0x%llx-0x%llx", extent.phys_block_num, extent.len);
|
|
return err;
|
|
}
|
|
}
|
|
|
|
*log_addr += extent.len;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_clone_extents - Make a copy of all extents in a dstream to a new one
|
|
* @dstream: old dstream
|
|
* @new_id: id for the new dstream
|
|
*
|
|
* Returns 0 on success, or a negative error code in case of failure.
|
|
*/
|
|
int apfs_clone_extents(struct apfs_dstream_info *dstream, u64 new_id)
|
|
{
|
|
u64 next = 0;
|
|
int err;
|
|
|
|
while (next < dstream->ds_size) {
|
|
err = apfs_clone_single_extent(dstream, new_id, &next);
|
|
if (err)
|
|
return err;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* apfs_nonsparse_dstream_read - Read from a dstream without holes
|
|
* @dstream: dstream to read
|
|
* @buf: destination buffer
|
|
* @count: exact number of bytes to read
|
|
* @offset: dstream offset to read from
|
|
*
|
|
* Returns 0 on success or a negative error code in case of failure.
|
|
*/
|
|
int apfs_nonsparse_dstream_read(struct apfs_dstream_info *dstream, void *buf, size_t count, u64 offset)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
u64 logical_start_block, logical_end_block, log_bno, blkcnt, idx;
|
|
struct buffer_head **bhs = NULL;
|
|
int ret = 0;
|
|
|
|
/* Save myself from thinking about overflow here */
|
|
if (count >= APFS_MAX_FILE_SIZE || offset >= APFS_MAX_FILE_SIZE) {
|
|
apfs_err(sb, "dstream read overflow (0x%llx-0x%llx)", offset, (unsigned long long)count);
|
|
return -EFBIG;
|
|
}
|
|
|
|
if (offset + count > dstream->ds_size) {
|
|
apfs_err(sb, "reading past the end (0x%llx-0x%llx)", offset, (unsigned long long)count);
|
|
/* No caller is expected to legitimately read out-of-bounds */
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
logical_start_block = offset >> sb->s_blocksize_bits;
|
|
logical_end_block = (offset + count + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
|
|
blkcnt = logical_end_block - logical_start_block;
|
|
bhs = kcalloc(blkcnt, sizeof(*bhs), GFP_KERNEL);
|
|
if (!bhs)
|
|
return -ENOMEM;
|
|
|
|
for (log_bno = logical_start_block; log_bno < logical_end_block; log_bno++) {
|
|
struct buffer_head *bh = NULL;
|
|
u64 bno = 0;
|
|
|
|
idx = log_bno - logical_start_block;
|
|
|
|
ret = apfs_logic_to_phys_bno(dstream, log_bno, &bno);
|
|
if (ret)
|
|
goto out;
|
|
if (bno == 0) {
|
|
apfs_err(sb, "nonsparse dstream has a hole");
|
|
ret = -EFSCORRUPTED;
|
|
goto out;
|
|
}
|
|
|
|
bhs[idx] = __apfs_getblk(sb, bno);
|
|
if (!bhs[idx]) {
|
|
apfs_err(sb, "failed to map block 0x%llx", bno);
|
|
ret = -EIO;
|
|
goto out;
|
|
}
|
|
|
|
bh = bhs[idx];
|
|
if (!buffer_uptodate(bh)) {
|
|
get_bh(bh);
|
|
lock_buffer(bh);
|
|
bh->b_end_io = end_buffer_read_sync;
|
|
apfs_submit_bh(REQ_OP_READ, 0, bh);
|
|
}
|
|
}
|
|
|
|
for (log_bno = logical_start_block; log_bno < logical_end_block; log_bno++) {
|
|
int off_in_block, left_in_block;
|
|
|
|
idx = log_bno - logical_start_block;
|
|
wait_on_buffer(bhs[idx]);
|
|
if (!buffer_uptodate(bhs[idx])) {
|
|
apfs_err(sb, "failed to read a block");
|
|
ret = -EIO;
|
|
goto out;
|
|
}
|
|
|
|
if (log_bno == logical_start_block)
|
|
off_in_block = offset & (sb->s_blocksize - 1);
|
|
else
|
|
off_in_block = 0;
|
|
|
|
if (log_bno == logical_end_block - 1)
|
|
left_in_block = count + offset - (log_bno << sb->s_blocksize_bits) - off_in_block;
|
|
else
|
|
left_in_block = sb->s_blocksize - off_in_block;
|
|
|
|
memcpy(buf, bhs[idx]->b_data + off_in_block, left_in_block);
|
|
buf += left_in_block;
|
|
}
|
|
|
|
out:
|
|
if (bhs) {
|
|
for (idx = 0; idx < blkcnt; idx++)
|
|
brelse(bhs[idx]);
|
|
kfree(bhs);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* apfs_nonsparse_dstream_preread - Attempt to preread a dstream without holes
|
|
* @dstream: dstream to preread
|
|
*
|
|
* Requests reads for all blocks of a dstream, but doesn't wait for the result.
|
|
*/
|
|
void apfs_nonsparse_dstream_preread(struct apfs_dstream_info *dstream)
|
|
{
|
|
struct super_block *sb = dstream->ds_sb;
|
|
u64 logical_end_block, log_bno;
|
|
|
|
logical_end_block = (dstream->ds_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
|
|
|
|
for (log_bno = 0; log_bno < logical_end_block; log_bno++) {
|
|
struct buffer_head *bh = NULL;
|
|
u64 bno = 0;
|
|
int ret;
|
|
|
|
ret = apfs_logic_to_phys_bno(dstream, log_bno, &bno);
|
|
if (ret || bno == 0)
|
|
return;
|
|
|
|
bh = __apfs_getblk(sb, bno);
|
|
if (!bh)
|
|
return;
|
|
if (!buffer_uptodate(bh)) {
|
|
get_bh(bh);
|
|
lock_buffer(bh);
|
|
bh->b_end_io = end_buffer_read_sync;
|
|
apfs_submit_bh(REQ_OP_READ, 0, bh);
|
|
}
|
|
brelse(bh);
|
|
bh = NULL;
|
|
}
|
|
}
|