Files
Ernesto A. Fernández c012e6cdb9 Commit when the main free queue has too many nodes
As mentioned in the previous commit message, generic/269 of xfstests
makes the main free queue unbalanced enough that insertions fail.
Clearing the free queue when the transaction starts is not enough to
prevent it, because the whole disaster can happen without a single
commit.

So, start forcing commits when the main queue has too many nodes. I
don't think there is a problem with being too strict here, because a
truly full main queue would be gigantic for most normal filesystems.
Just force a commit when we reach half the limit (rounded up). Of course
make an exception for filesystems with a single main fq node, since
those can't even become unbalanced in the first place and we don't want
to commit nonstop.

Signed-off-by: Ernesto A. Fernández <ernesto@corellium.com>
2025-03-18 18:38:13 -03:00

1480 lines
42 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2019 Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
*/
#include <linux/buffer_head.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include "apfs.h"
/**
* apfs_spaceman_read_cib_addr - Get the address of a cib from the spaceman
* @sb: superblock structure
* @index: index of the chunk-info block
*
* Returns the block number for the chunk-info block.
*
* This is not described in the official documentation; credit for figuring it
* out should go to Joachim Metz: <https://github.com/libyal/libfsapfs>.
*/
static u64 apfs_spaceman_read_cib_addr(struct super_block *sb, int index)
{
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
u32 offset;
__le64 *addr_p;
offset = sm->sm_addr_offset + index * sizeof(*addr_p);
addr_p = (void *)sm_raw + offset;
return le64_to_cpup(addr_p);
}
/**
* apfs_spaceman_write_cib_addr - Store the address of a cib in the spaceman
* @sb: superblock structure
* @index: index of the chunk-info block
* @addr: address of the chunk-info block
*/
static void apfs_spaceman_write_cib_addr(struct super_block *sb,
int index, u64 addr)
{
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
u32 offset;
__le64 *addr_p;
apfs_assert_in_transaction(sb, &sm_raw->sm_o);
offset = sm->sm_addr_offset + index * sizeof(*addr_p);
addr_p = (void *)sm_raw + offset;
*addr_p = cpu_to_le64(addr);
}
/**
* apfs_max_chunks_per_cib - Find the maximum chunk count for a chunk-info block
* @sb: superblock structure
*/
static inline int apfs_max_chunks_per_cib(struct super_block *sb)
{
return (sb->s_blocksize - sizeof(struct apfs_chunk_info_block)) /
sizeof(struct apfs_chunk_info);
}
/**
* apfs_read_spaceman_dev - Read a space manager device structure
* @sb: superblock structure
* @dev: on-disk device structure
*
* Initializes the in-memory spaceman fields related to the main device; fusion
* drives are not yet supported. Returns 0 on success, or a negative error code
* in case of failure.
*/
static int apfs_read_spaceman_dev(struct super_block *sb,
struct apfs_spaceman_device *dev)
{
struct apfs_spaceman *spaceman = APFS_SM(sb);
if (dev->sm_cab_count) {
apfs_err(sb, "large devices are not supported");
return -EINVAL;
}
spaceman->sm_block_count = le64_to_cpu(dev->sm_block_count);
spaceman->sm_chunk_count = le64_to_cpu(dev->sm_chunk_count);
spaceman->sm_cib_count = le32_to_cpu(dev->sm_cib_count);
spaceman->sm_free_count = le64_to_cpu(dev->sm_free_count);
spaceman->sm_addr_offset = le32_to_cpu(dev->sm_addr_offset);
/* Check that all the cib addresses fit in the spaceman object */
if ((long long)spaceman->sm_addr_offset +
(long long)spaceman->sm_cib_count * sizeof(u64) > spaceman->sm_size) {
apfs_err(sb, "too many cibs (%u)", spaceman->sm_cib_count);
return -EFSCORRUPTED;
}
return 0;
}
/**
* apfs_spaceman_get_16 - Get a 16-bit value from an offset in the spaceman
* @sb: superblock structure
* @off: offset for the value
*
* Returns a pointer to the value, or NULL if it doesn't fit.
*/
static __le16 *apfs_spaceman_get_16(struct super_block *sb, size_t off)
{
struct apfs_spaceman *spaceman = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw;
if (off > spaceman->sm_size)
return NULL;
if (off + sizeof(__le16) > spaceman->sm_size)
return NULL;
return (void *)sm_raw + off;
}
/**
* apfs_spaceman_get_64 - Get a 64-bit value from an offset in the spaceman
* @sb: superblock structure
* @off: offset for the value
*
* Returns a pointer to the value, or NULL if it doesn't fit.
*/
static __le64 *apfs_spaceman_get_64(struct super_block *sb, size_t off)
{
struct apfs_spaceman *spaceman = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw;
if (off > spaceman->sm_size)
return NULL;
if (off + sizeof(__le64) > spaceman->sm_size)
return NULL;
return (void *)sm_raw + off;
}
/**
* apfs_allocate_ip_bitmap - Allocate a free ip bitmap block
* @sb: filesystem superblock
* @offset_p: on return, the offset from sm_ip_bm_base of the allocated block
*
* Returns 0 on success or a negative error code in case of failure.
*/
static int apfs_allocate_ip_bitmap(struct super_block *sb, u16 *offset_p)
{
struct apfs_spaceman *spaceman = NULL;
struct apfs_spaceman_phys *sm_raw = NULL;
u32 free_next_offset, old_head_off;
u16 free_head, blkcnt;
__le16 *old_head_p = NULL;
spaceman = APFS_SM(sb);
sm_raw = spaceman->sm_raw;
free_next_offset = le32_to_cpu(sm_raw->sm_ip_bm_free_next_offset);
free_head = le16_to_cpu(sm_raw->sm_ip_bm_free_head);
blkcnt = (u16)le32_to_cpu(sm_raw->sm_ip_bm_block_count);
/*
* The "free_next" array is a linked list of free blocks that starts
* with the "free_head". Allocate this head then, and make the next
* block into the new head.
*/
old_head_off = free_next_offset + free_head * sizeof(*old_head_p);
old_head_p = apfs_spaceman_get_16(sb, old_head_off);
if (!old_head_p) {
apfs_err(sb, "free_next head offset out of bounds (%u)", old_head_off);
return -EFSCORRUPTED;
}
*offset_p = free_head;
free_head = le16_to_cpup(old_head_p);
sm_raw->sm_ip_bm_free_head = *old_head_p;
/* No longer free, no longer part of the linked list */
*old_head_p = cpu_to_le16(APFS_SPACEMAN_IP_BM_INDEX_INVALID);
/* Just a little sanity check because I've messed this up before */
if (free_head >= blkcnt || *offset_p >= blkcnt) {
apfs_err(sb, "free next list seems empty or corrupt");
return -EFSCORRUPTED;
}
return 0;
}
/**
* apfs_free_ip_bitmap - Free a used ip bitmap block
* @sb: filesystem superblock
* @offset: the offset from sm_ip_bm_base of the block to free
*
* Returns 0 on success or a negative error code in case of failure.
*/
static int apfs_free_ip_bitmap(struct super_block *sb, u16 offset)
{
struct apfs_spaceman *spaceman = NULL;
struct apfs_spaceman_phys *sm_raw = NULL;
u32 free_next_offset, old_tail_off;
u16 free_tail;
__le16 *old_tail_p = NULL;
spaceman = APFS_SM(sb);
sm_raw = spaceman->sm_raw;
free_next_offset = le32_to_cpu(sm_raw->sm_ip_bm_free_next_offset);
free_tail = le16_to_cpu(sm_raw->sm_ip_bm_free_tail);
/*
* The "free_next" array is a linked list of free blocks that ends
* with the "free_tail". The block getting freed will become the new
* tail of the list.
*/
old_tail_off = free_next_offset + free_tail * sizeof(*old_tail_p);
old_tail_p = apfs_spaceman_get_16(sb, old_tail_off);
if (!old_tail_p) {
apfs_err(sb, "free_next tail offset out of bounds (%u)", old_tail_off);
return -EFSCORRUPTED;
}
*old_tail_p = cpu_to_le16(offset);
sm_raw->sm_ip_bm_free_tail = cpu_to_le16(offset);
free_tail = offset;
return 0;
}
/**
* apfs_reallocate_ip_bitmap - Find a new block for an ip bitmap
* @sb: filesystem superblock
* @offset_p: the offset from sm_ip_bm_base of the block to free
*
* On success returns 0 and updates @offset_p to the new offset allocated for
* the ip bitmap. Since blocks are allocated at the head of the list and freed
* at the tail, there is no risk of reuse by future reallocations within the
* same transaction (under there is some serious corruption, of course).
*
* Returns a negative error code in case of failure.
*/
static int apfs_reallocate_ip_bitmap(struct super_block *sb, __le16 *offset_p)
{
int err;
u16 offset;
offset = le16_to_cpup(offset_p);
err = apfs_free_ip_bitmap(sb, offset);
if (err) {
apfs_err(sb, "failed to free ip bitmap %u", offset);
return err;
}
err = apfs_allocate_ip_bitmap(sb, &offset);
if (err) {
apfs_err(sb, "failed to allocate a new ip bitmap block");
return err;
}
*offset_p = cpu_to_le16(offset);
return 0;
}
/**
* apfs_write_single_ip_bitmap - Write a single ip bitmap to disk
* @sb: filesystem superblock
* @bitmap: bitmap to write
* @idx: index of the ip bitmap to write
*
* Returns 0 on success or a negative error code in case of failure.
*/
static int apfs_write_single_ip_bitmap(struct super_block *sb, char *bitmap, u32 idx)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_spaceman *spaceman = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw;
struct buffer_head *bh = NULL;
u64 ip_bm_base, ip_bitmap_bno;
u32 xid_off, ip_bitmap_off;
__le64 *xid_p = NULL;
__le16 *ip_bitmap_p = NULL;
int err;
ip_bm_base = le64_to_cpu(sm_raw->sm_ip_bm_base);
/* First update the xid, which is kept in a separate array */
xid_off = le32_to_cpu(sm_raw->sm_ip_bm_xid_offset) + idx * sizeof(*xid_p);
xid_p = apfs_spaceman_get_64(sb, xid_off);
if (!xid_p) {
apfs_err(sb, "xid out of bounds (%u)", xid_off);
return -EFSCORRUPTED;
}
*xid_p = cpu_to_le64(nxi->nx_xid);
/* Now get find new location for the ip bitmap (and free the old one) */
ip_bitmap_off = le32_to_cpu(sm_raw->sm_ip_bitmap_offset) + idx * sizeof(*ip_bitmap_p);
ip_bitmap_p = apfs_spaceman_get_16(sb, ip_bitmap_off);
if (!ip_bitmap_p) {
apfs_err(sb, "bmap offset out of bounds (%u)", ip_bitmap_off);
return -EFSCORRUPTED;
}
err = apfs_reallocate_ip_bitmap(sb, ip_bitmap_p);
if (err) {
apfs_err(sb, "failed to reallocate ip bitmap %u", le16_to_cpup(ip_bitmap_p));
return err;
}
/* Finally, write the dirty bitmap to the new location */
ip_bitmap_bno = ip_bm_base + le16_to_cpup(ip_bitmap_p);
bh = apfs_getblk(sb, ip_bitmap_bno);
if (!bh) {
apfs_err(sb, "failed to map block for CoW (0x%llx)", ip_bitmap_bno);
return -EIO;
}
memcpy(bh->b_data, bitmap, sb->s_blocksize);
err = apfs_transaction_join(sb, bh);
if (err)
goto fail;
bh = NULL;
spaceman->sm_ip_bmaps[idx].dirty = false;
return 0;
fail:
brelse(bh);
bh = NULL;
return err;
}
/**
* apfs_write_ip_bitmaps - Write all dirty ip bitmaps to disk
* @sb: superblock structure
*
* Returns 0 on success or a negative error code in case of failure.
*/
int apfs_write_ip_bitmaps(struct super_block *sb)
{
struct apfs_spaceman *spaceman = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw;
struct apfs_ip_bitmap_block_info *info = NULL;
u32 bmaps_count = spaceman->sm_ip_bmaps_count;
int err;
u32 i;
apfs_assert_in_transaction(sb, &sm_raw->sm_o);
for (i = 0; i < bmaps_count; ++i) {
info = &spaceman->sm_ip_bmaps[i];
if (!info->dirty)
continue;
err = apfs_write_single_ip_bitmap(sb, info->block, i);
if (err) {
apfs_err(sb, "failed to rotate ip bitmap %u", i);
return err;
}
}
return 0;
}
/**
* apfs_read_single_ip_bitmap - Read a single ip bitmap to memory
* @sb: filesystem superblock
* @idx: index of the ip bitmap to read
*
* Returns 0 on success or a negative error code in case of failure.
*/
static int apfs_read_single_ip_bitmap(struct super_block *sb, u32 idx)
{
struct apfs_spaceman *spaceman = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw;
struct buffer_head *bh = NULL;
char *bitmap = NULL;
u64 ip_bm_base, ip_bitmap_bno;
u32 ip_bitmap_off;
__le16 *ip_bitmap_p = NULL;
int err;
ip_bm_base = le64_to_cpu(sm_raw->sm_ip_bm_base);
ip_bitmap_off = le32_to_cpu(sm_raw->sm_ip_bitmap_offset) + idx * sizeof(*ip_bitmap_p);
ip_bitmap_p = apfs_spaceman_get_16(sb, ip_bitmap_off);
if (!ip_bitmap_p) {
apfs_err(sb, "bmap offset out of bounds (%u)", ip_bitmap_off);
return -EFSCORRUPTED;
}
bitmap = kmalloc(sb->s_blocksize, GFP_KERNEL);
if (!bitmap)
return -ENOMEM;
ip_bitmap_bno = ip_bm_base + le16_to_cpup(ip_bitmap_p);
bh = apfs_sb_bread(sb, ip_bitmap_bno);
if (!bh) {
apfs_err(sb, "failed to read ip bitmap (0x%llx)", ip_bitmap_bno);
err = -EIO;
goto fail;
}
memcpy(bitmap, bh->b_data, sb->s_blocksize);
brelse(bh);
bh = NULL;
spaceman->sm_ip_bmaps[idx].dirty = false;
spaceman->sm_ip_bmaps[idx].block = bitmap;
bitmap = NULL;
return 0;
fail:
kfree(bitmap);
bitmap = NULL;
return err;
}
/**
* apfs_read_ip_bitmaps - Read all the ip bitmaps to memory
* @sb: superblock structure
*
* Returns 0 on success or a negative error code in case of failure.
*/
static int apfs_read_ip_bitmaps(struct super_block *sb)
{
struct apfs_spaceman *spaceman = APFS_SM(sb);
u32 bmaps_count = spaceman->sm_ip_bmaps_count;
int err;
u32 i;
for (i = 0; i < bmaps_count; ++i) {
err = apfs_read_single_ip_bitmap(sb, i);
if (err) {
apfs_err(sb, "failed to read ip bitmap %u", i);
return err;
}
}
return 0;
}
/*
* Free queue record data
*/
struct apfs_fq_rec {
u64 xid;
u64 bno;
u64 len;
};
/**
* apfs_fq_rec_from_query - Read the free queue record found by a query
* @query: the query that found the record
* @fqrec: on return, the free queue record
*
* Reads the free queue record into @fqrec and performs some basic sanity
* checks as a protection against crafted filesystems. Returns 0 on success
* or -EFSCORRUPTED otherwise.
*/
static int apfs_fq_rec_from_query(struct apfs_query *query, struct apfs_fq_rec *fqrec)
{
char *raw = query->node->object.data;
struct apfs_spaceman_free_queue_key *key;
if (query->key_len != sizeof(*key)) {
apfs_err(query->node->object.sb, "bad key length (%d)", query->key_len);
return -EFSCORRUPTED;
}
key = (struct apfs_spaceman_free_queue_key *)(raw + query->key_off);
fqrec->xid = le64_to_cpu(key->sfqk_xid);
fqrec->bno = le64_to_cpu(key->sfqk_paddr);
if (query->len == 0) {
fqrec->len = 1; /* Ghost record */
return 0;
} else if (query->len == sizeof(__le64)) {
fqrec->len = le64_to_cpup((__le64 *)(raw + query->off));
return 0;
}
apfs_err(query->node->object.sb, "bad value length (%d)", query->len);
return -EFSCORRUPTED;
}
/**
* apfs_block_in_ip - Does this block belong to the internal pool?
* @sm: in-memory spaceman structure
* @bno: block number to check
*/
static inline bool apfs_block_in_ip(struct apfs_spaceman *sm, u64 bno)
{
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
u64 start = le64_to_cpu(sm_raw->sm_ip_base);
u64 end = start + le64_to_cpu(sm_raw->sm_ip_block_count);
return bno >= start && bno < end;
}
/**
* apfs_ip_mark_free - Mark a block in the internal pool as free
* @sb: superblock structure
* @bno: block number (must belong to the ip)
*/
static int apfs_ip_mark_free(struct super_block *sb, u64 bno)
{
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
struct apfs_ip_bitmap_block_info *info = NULL;
bno -= le64_to_cpu(sm_raw->sm_ip_base);
info = &sm->sm_ip_bmaps[bno >> sm->sm_ip_bmaps_shift];
__clear_bit_le(bno & sm->sm_ip_bmaps_mask, info->block);
info->dirty = true;
return 0;
}
/*
* apfs_main_free - Mark a regular block as free
*/
static int apfs_main_free(struct super_block *sb, u64 bno);
/**
* apfs_flush_fq_rec - Delete a single fq record and mark its blocks as free
* @root: free queue root node
* @xid: transaction to target
* @len: on return, the number of freed blocks
*
* Returns 0 on success, or a negative error code in case of failure. -ENODATA
* in particular means that there are no matching records left.
*/
static int apfs_flush_fq_rec(struct apfs_node *root, u64 xid, u64 *len)
{
struct super_block *sb = root->object.sb;
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_query *query = NULL;
struct apfs_fq_rec fqrec = {0};
u64 bno;
int err;
query = apfs_alloc_query(root, NULL /* parent */);
if (!query)
return -ENOMEM;
apfs_init_free_queue_key(xid, 0 /* paddr */, &query->key);
query->flags |= APFS_QUERY_FREE_QUEUE | APFS_QUERY_ANY_NUMBER | APFS_QUERY_EXACT;
err = apfs_btree_query(sb, &query);
if (err) {
if (err != -ENODATA)
apfs_err(sb, "query failed for xid 0x%llx, paddr 0x%llx", xid, 0ULL);
goto fail;
}
err = apfs_fq_rec_from_query(query, &fqrec);
if (err) {
apfs_err(sb, "bad free queue rec for xid 0x%llx", xid);
goto fail;
}
for (bno = fqrec.bno; bno < fqrec.bno + fqrec.len; ++bno) {
if (apfs_block_in_ip(sm, bno))
err = apfs_ip_mark_free(sb, bno);
else
err = apfs_main_free(sb, bno);
if (err) {
apfs_err(sb, "freeing block 0x%llx failed (%d)", (unsigned long long)bno, err);
goto fail;
}
}
err = apfs_btree_remove(query);
if (err) {
apfs_err(sb, "removal failed for xid 0x%llx", xid);
goto fail;
}
*len = fqrec.len;
fail:
apfs_free_query(query);
return err;
}
/**
* apfs_free_queue_oldest_xid - Find the oldest xid among the free queue records
* @root: free queue root node
*/
static u64 apfs_free_queue_oldest_xid(struct apfs_node *root)
{
struct apfs_spaceman_free_queue_key *key;
char *raw = root->object.data;
int len, off;
if (root->records == 0)
return 0;
len = apfs_node_locate_key(root, 0, &off);
if (len != sizeof(*key)) {
/* TODO: abort transaction */
apfs_err(root->object.sb, "bad key length (%d)", len);
return 0;
}
key = (struct apfs_spaceman_free_queue_key *)(raw + off);
return le64_to_cpu(key->sfqk_xid);
}
/**
* apfs_flush_free_queue - Free ip blocks queued by old transactions
* @sb: superblock structure
* @qid: queue to be freed
*
* Returns 0 on success or a negative error code in case of failure.
*/
static int apfs_flush_free_queue(struct super_block *sb, unsigned int qid)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
struct apfs_spaceman_free_queue *fq = &sm_raw->sm_fq[qid];
struct apfs_node *fq_root;
struct apfs_btree_info *fq_info = NULL;
u64 oldest = le64_to_cpu(fq->sfq_oldest_xid);
int err;
fq_root = apfs_read_node(sb, le64_to_cpu(fq->sfq_tree_oid),
APFS_OBJ_EPHEMERAL, true /* write */);
if (IS_ERR(fq_root)) {
apfs_err(sb, "failed to read fq root 0x%llx", le64_to_cpu(fq->sfq_tree_oid));
return PTR_ERR(fq_root);
}
while (oldest) {
/*
* Blocks freed in the current transaction can't be reused
* safely until after the commit, but I don't think there is
* any point in preserving old transacions. I'm guessing the
* official driver keeps multiple transactions going at the
* same time, that must be why they need a free queue.
*/
if (oldest == nxi->nx_xid)
break;
while (true) {
u64 count = 0;
/* Probably not very efficient... */
err = apfs_flush_fq_rec(fq_root, oldest, &count);
if (err == -ENODATA) {
err = 0;
break;
} else if (err) {
apfs_err(sb, "failed to flush fq");
goto fail;
} else {
le64_add_cpu(&fq->sfq_count, -count);
}
}
oldest = apfs_free_queue_oldest_xid(fq_root);
fq->sfq_oldest_xid = cpu_to_le64(oldest);
}
if (qid == APFS_SFQ_MAIN) {
fq_info = (void *)fq_root->object.data + sb->s_blocksize - sizeof(*fq_info);
sm->sm_main_fq_nodes = le64_to_cpu(fq_info->bt_node_count);
if (sm->sm_main_fq_nodes != 1) {
apfs_alert(sb, "main queue wasn't flushed in full - bug!");
err = -EFSCORRUPTED;
goto fail;
}
}
fail:
apfs_node_free(fq_root);
return err;
}
/**
* apfs_allocate_spaceman - Allocate an in-memory spaceman struct, if needed
* @sb: superblock structure
* @raw: on-disk spaceman struct
* @size: size of the on-disk spaceman
*
* Returns the spaceman and sets it in the superblock info. Also performs all
* initializations for the internal pool, including reading all the ip bitmaps.
* This is a bit out of place here, but it's convenient because it has to
* happen only once.
*
* On failure, returns an error pointer.
*/
static struct apfs_spaceman *apfs_allocate_spaceman(struct super_block *sb, struct apfs_spaceman_phys *raw, u32 size)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_spaceman *spaceman = NULL;
int blk_bitcnt = sb->s_blocksize * 8;
size_t sm_size;
u32 bmap_cnt;
int err;
if (nxi->nx_spaceman)
return nxi->nx_spaceman;
/* We don't expect filesystems this big, it would be like 260 TiB */
bmap_cnt = le32_to_cpu(raw->sm_ip_bm_size_in_blocks);
if (bmap_cnt > 200) {
apfs_err(sb, "too many ip bitmap blocks (%u)", bmap_cnt);
return ERR_PTR(-EFSCORRUPTED);
}
sm_size = sizeof(*spaceman) + bmap_cnt * sizeof(spaceman->sm_ip_bmaps[0]);
spaceman = nxi->nx_spaceman = kzalloc(sm_size, GFP_KERNEL);
if (!spaceman)
return ERR_PTR(-ENOMEM);
spaceman->sm_nxi = nxi;
/*
* These two fields must be set before reading the ip bitmaps, since
* that stuff involves several variable-length arrays inside the
* spaceman object itself.
*/
spaceman->sm_raw = raw;
spaceman->sm_size = size;
spaceman->sm_ip_bmaps_count = bmap_cnt;
spaceman->sm_ip_bmaps_mask = blk_bitcnt - 1;
spaceman->sm_ip_bmaps_shift = order_base_2(blk_bitcnt);
/* This must happen only once, so it's easier to just leave it here */
err = apfs_read_ip_bitmaps(sb);
if (err) {
apfs_err(sb, "failed to read the ip bitmaps");
kfree(spaceman);
nxi->nx_spaceman = spaceman = NULL;
return ERR_PTR(err);
}
return nxi->nx_spaceman;
}
/**
* apfs_read_spaceman - Find and read the space manager
* @sb: superblock structure
*
* Reads the space manager structure from disk and initializes its in-memory
* counterpart; returns 0 on success, or a negative error code in case of
* failure.
*/
int apfs_read_spaceman(struct super_block *sb)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_nx_superblock *raw_sb = nxi->nx_raw;
struct apfs_spaceman *spaceman = NULL;
struct apfs_ephemeral_object_info *sm_eph_info = NULL;
struct apfs_spaceman_phys *sm_raw;
u32 sm_flags;
u64 oid = le64_to_cpu(raw_sb->nx_spaceman_oid);
int err;
if (sb->s_flags & SB_RDONLY) /* The space manager won't be needed */
return 0;
sm_eph_info = apfs_ephemeral_object_lookup(sb, oid);
if (IS_ERR(sm_eph_info)) {
apfs_err(sb, "no spaceman object for oid 0x%llx", oid);
return PTR_ERR(sm_eph_info);
}
sm_raw = (struct apfs_spaceman_phys *)sm_eph_info->object;
sm_raw->sm_o.o_xid = cpu_to_le64(nxi->nx_xid);
spaceman = apfs_allocate_spaceman(sb, sm_raw, sm_eph_info->size);
if (IS_ERR(spaceman)) {
apfs_err(sb, "failed to allocate spaceman");
err = PTR_ERR(spaceman);
goto fail;
}
spaceman->sm_free_cache_base = spaceman->sm_free_cache_blkcnt = 0;
sm_flags = le32_to_cpu(sm_raw->sm_flags);
/* Undocumented feature, but it's too common to refuse to mount */
if (sm_flags & APFS_SM_FLAG_VERSIONED)
pr_warn_once("APFS: space manager is versioned\n");
/* Only read the main device; fusion drives are not yet supported */
err = apfs_read_spaceman_dev(sb, &sm_raw->sm_dev[APFS_SD_MAIN]);
if (err) {
apfs_err(sb, "failed to read main device");
goto fail;
}
spaceman->sm_blocks_per_chunk =
le32_to_cpu(sm_raw->sm_blocks_per_chunk);
spaceman->sm_chunks_per_cib = le32_to_cpu(sm_raw->sm_chunks_per_cib);
if (spaceman->sm_chunks_per_cib > apfs_max_chunks_per_cib(sb)) {
apfs_err(sb, "too many chunks per cib (%u)", spaceman->sm_chunks_per_cib);
err = -EFSCORRUPTED;
goto fail;
}
/*
* We flush free queues whole when each transaction begins, to make it
* harder for the btrees to become too unbalanced.
*/
err = apfs_flush_free_queue(sb, APFS_SFQ_IP);
if (err) {
apfs_err(sb, "failed to flush ip fq");
goto fail;
}
err = apfs_flush_free_queue(sb, APFS_SFQ_MAIN);
if (err) {
apfs_err(sb, "failed to flush main fq");
goto fail;
}
return 0;
fail:
spaceman->sm_raw = NULL;
return err;
}
/**
* apfs_write_spaceman - Write the in-memory spaceman fields to the disk buffer
* @sm: in-memory spaceman structure
*
* Copies the updated in-memory fields of the space manager into the on-disk
* structure; the buffer is not dirtied.
*/
static void apfs_write_spaceman(struct apfs_spaceman *sm)
{
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
struct apfs_spaceman_device *dev_raw = &sm_raw->sm_dev[APFS_SD_MAIN];
struct apfs_nxsb_info *nxi;
nxi = sm->sm_nxi;
ASSERT(le64_to_cpu(sm_raw->sm_o.o_xid) == nxi->nx_xid);
dev_raw->sm_free_count = cpu_to_le64(sm->sm_free_count);
}
/**
* apfs_ip_find_free - Find a free block inside the internal pool
* @sb: superblock structure
*
* Returns the block number for a free block, or 0 in case of corruption.
*/
static u64 apfs_ip_find_free(struct super_block *sb)
{
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
int blk_bitcnt = sb->s_blocksize * 8;
u64 full_bitcnt = le64_to_cpu(sm_raw->sm_ip_block_count);
u32 i;
for (i = 0; i < sm->sm_ip_bmaps_count; ++i) {
char *bitmap = sm->sm_ip_bmaps[i].block;
u64 off_in_bmap_blk, off_in_ip;
off_in_bmap_blk = find_next_zero_bit_le(bitmap, blk_bitcnt, 0 /* offset */);
if (off_in_bmap_blk >= blk_bitcnt) /* No space in this chunk */
continue;
/* We found something, confirm that it's not outside the ip */
off_in_ip = (i << sm->sm_ip_bmaps_shift) + off_in_bmap_blk;
if (off_in_ip >= full_bitcnt)
break;
return le64_to_cpu(sm_raw->sm_ip_base) + off_in_ip;
}
apfs_err(sb, "internal pool seems full");
return 0;
}
/**
* apfs_chunk_find_free - Find a free block inside a chunk
* @sb: superblock structure
* @bitmap: allocation bitmap for the chunk, which should have free blocks
* @addr: number of the first block in the chunk
*
* Returns the block number for a free block, or 0 in case of corruption.
*/
static u64 apfs_chunk_find_free(struct super_block *sb, char *bitmap, u64 addr)
{
int bitcount = sb->s_blocksize * 8;
u64 bno;
bno = find_next_zero_bit_le(bitmap, bitcount, 0 /* offset */);
if (bno >= bitcount)
return 0;
return addr + bno;
}
/**
* apfs_ip_mark_used - Mark a block in the internal pool as used
* @sb: superblock strucuture
* @bno: block number (must belong to the ip)
*/
static void apfs_ip_mark_used(struct super_block *sb, u64 bno)
{
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
struct apfs_ip_bitmap_block_info *info = NULL;
bno -= le64_to_cpu(sm_raw->sm_ip_base);
info = &sm->sm_ip_bmaps[bno >> sm->sm_ip_bmaps_shift];
__set_bit_le(bno & sm->sm_ip_bmaps_mask, info->block);
info->dirty = true;
}
/**
* apfs_chunk_mark_used - Mark a block inside a chunk as used
* @sb: superblock structure
* @bitmap: allocation bitmap for the chunk
* @bno: block number (must belong to the chunk)
*/
static inline void apfs_chunk_mark_used(struct super_block *sb, char *bitmap,
u64 bno)
{
int bitcount = sb->s_blocksize * 8;
__set_bit_le(bno & (bitcount - 1), bitmap);
}
/**
* apfs_chunk_mark_free - Mark a block inside a chunk as free
* @sb: superblock structure
* @bitmap: allocation bitmap for the chunk
* @bno: block number (must belong to the chunk)
*/
static inline int apfs_chunk_mark_free(struct super_block *sb, char *bitmap,
u64 bno)
{
int bitcount = sb->s_blocksize * 8;
return __test_and_clear_bit_le(bno & (bitcount - 1), bitmap);
}
/**
* apfs_free_queue_try_insert - Try to add a block range to its free queue
* @sb: superblock structure
* @bno: first block number to free
* @count: number of consecutive blocks to free
*
* Same as apfs_free_queue_insert_nocache(), except that this one can also fail
* with -ENOSPC if there is no room for the new record.
*/
static int apfs_free_queue_try_insert(struct super_block *sb, u64 bno, u64 count)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
struct apfs_spaceman_free_queue *fq;
struct apfs_node *fq_root = NULL;
struct apfs_btree_info *fq_info = NULL;
struct apfs_query *query = NULL;
struct apfs_spaceman_free_queue_key raw_key;
bool ghost = count == 1;
int needed_room;
__le64 raw_val;
u64 node_count;
u16 node_limit;
unsigned int qid;
int err;
qid = apfs_block_in_ip(sm, bno) ? APFS_SFQ_IP : APFS_SFQ_MAIN;
fq = &sm_raw->sm_fq[qid];
fq_root = apfs_read_node(sb, le64_to_cpu(fq->sfq_tree_oid),
APFS_OBJ_EPHEMERAL, true /* write */);
if (IS_ERR(fq_root)) {
apfs_err(sb, "failed to read fq root 0x%llx", le64_to_cpu(fq->sfq_tree_oid));
return PTR_ERR(fq_root);
}
query = apfs_alloc_query(fq_root, NULL /* parent */);
if (!query) {
err = -ENOMEM;
goto fail;
}
apfs_init_free_queue_key(nxi->nx_xid, bno, &query->key);
query->flags |= APFS_QUERY_FREE_QUEUE;
err = apfs_btree_query(sb, &query);
if (err && err != -ENODATA) {
apfs_err(sb, "query failed for xid 0x%llx, paddr 0x%llx", nxi->nx_xid, bno);
goto fail;
}
fq_info = (void *)fq_root->object.data + sb->s_blocksize - sizeof(*fq_info);
node_count = le64_to_cpu(fq_info->bt_node_count);
node_limit = le16_to_cpu(fq->sfq_tree_node_limit);
if (node_count == node_limit) {
needed_room = sizeof(raw_key) + (ghost ? 0 : sizeof(raw_val));
if (!apfs_node_has_room(query->node, needed_room, false /* replace */)) {
err = -ENOSPC;
goto fail;
}
}
raw_key.sfqk_xid = cpu_to_le64(nxi->nx_xid);
raw_key.sfqk_paddr = cpu_to_le64(bno);
if (ghost) {
/* A lack of value (ghost record) means single-block extent */
err = apfs_btree_insert(query, &raw_key, sizeof(raw_key), NULL /* val */, 0 /* val_len */);
} else {
raw_val = cpu_to_le64(count);
err = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
}
if (err) {
apfs_err(sb, "insertion failed for xid 0x%llx, paddr 0x%llx", nxi->nx_xid, bno);
goto fail;
}
if (!fq->sfq_oldest_xid)
fq->sfq_oldest_xid = cpu_to_le64(nxi->nx_xid);
le64_add_cpu(&fq->sfq_count, count);
if (qid == APFS_SFQ_MAIN)
sm->sm_main_fq_nodes = le64_to_cpu(fq_info->bt_node_count);
fail:
apfs_free_query(query);
apfs_node_free(fq_root);
return err;
}
/**
* apfs_free_queue_insert_nocache - Add a block range to its free queue
* @sb: superblock structure
* @bno: first block number to free
* @count: number of consecutive blocks to free
*
* Same as apfs_free_queue_insert(), but writes to the free queue directly,
* bypassing the cache of the latest freed block range.
*
* Returns 0 on success or a negative error code in case of failure.
*/
int apfs_free_queue_insert_nocache(struct super_block *sb, u64 bno, u64 count)
{
unsigned int qid;
int err;
err = apfs_free_queue_try_insert(sb, bno, count);
if (err == -ENOSPC) {
qid = apfs_block_in_ip(APFS_SM(sb), bno) ? APFS_SFQ_IP : APFS_SFQ_MAIN;
apfs_alert(sb, "free queue (%u) seems full - bug!", qid);
err = -EFSCORRUPTED;
}
if (err) {
apfs_err(sb, "fq insert failed (0x%llx-0x%llx)", bno, count);
return err;
}
return 0;
}
/**
* apfs_free_queue_insert - Add a block range to its free queue
* @sb: superblock structure
* @bno: first block number to free
* @count: number of consecutive blocks to free
*
* Uses a cache to delay the actual tree operations as much as possible.
*
* Returns 0 on success or a negative error code in case of failure.
*/
int apfs_free_queue_insert(struct super_block *sb, u64 bno, u64 count)
{
struct apfs_spaceman *sm = APFS_SM(sb);
int err;
if (sm->sm_free_cache_base == 0) {
/* Nothing yet cached */
sm->sm_free_cache_base = bno;
sm->sm_free_cache_blkcnt = count;
return 0;
}
/*
* First attempt to extend the cache of freed blocks, but never cache
* a range that doesn't belong to a single free queue.
*/
if (apfs_block_in_ip(sm, bno) == apfs_block_in_ip(sm, sm->sm_free_cache_base)) {
if (bno == sm->sm_free_cache_base + sm->sm_free_cache_blkcnt) {
sm->sm_free_cache_blkcnt += count;
return 0;
}
if (bno + count == sm->sm_free_cache_base) {
sm->sm_free_cache_base -= count;
sm->sm_free_cache_blkcnt += count;
return 0;
}
}
/* Failed to extend the cache, so flush it and replace it */
err = apfs_free_queue_insert_nocache(sb, sm->sm_free_cache_base, sm->sm_free_cache_blkcnt);
if (err) {
apfs_err(sb, "fq cache flush failed (0x%llx-0x%llx)", sm->sm_free_cache_base, sm->sm_free_cache_blkcnt);
return err;
}
sm->sm_free_cache_base = bno;
sm->sm_free_cache_blkcnt = count;
return 0;
}
/**
* apfs_chunk_alloc_free - Allocate or free block in given CIB and chunk
* @sb: superblock structure
* @cib_bh: buffer head for the chunk-info block
* @index: index of this chunk's info structure inside @cib
* @bno: block number
* @is_alloc: true to allocate, false to free
*/
static int apfs_chunk_alloc_free(struct super_block *sb,
struct buffer_head **cib_bh,
int index, u64 *bno, bool is_alloc)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_chunk_info_block *cib;
struct apfs_chunk_info *ci;
struct buffer_head *bmap_bh = NULL;
char *bmap = NULL;
bool old_cib = false;
bool old_bmap = false;
int err = 0;
cib = (struct apfs_chunk_info_block *)(*cib_bh)->b_data;
ci = &cib->cib_chunk_info[index];
/* Cibs and bitmaps from old transactions can't be modified in place */
if (le64_to_cpu(cib->cib_o.o_xid) < nxi->nx_xid)
old_cib = true;
if (le64_to_cpu(ci->ci_xid) < nxi->nx_xid)
old_bmap = true;
if (is_alloc && le32_to_cpu(ci->ci_free_count) < 1)
return -ENOSPC;
/* Read the current bitmap, or allocate it if necessary */
if (!ci->ci_bitmap_addr) {
u64 bmap_bno;
if (!is_alloc) {
apfs_err(sb, "attempt to free block in all-free chunk");
return -EFSCORRUPTED;
}
/* All blocks in this chunk are free */
bmap_bno = apfs_ip_find_free(sb);
if (!bmap_bno) {
apfs_err(sb, "no free blocks in ip");
return -EFSCORRUPTED;
}
bmap_bh = apfs_sb_bread(sb, bmap_bno);
} else {
bmap_bh = apfs_sb_bread(sb, le64_to_cpu(ci->ci_bitmap_addr));
}
if (!bmap_bh) {
apfs_err(sb, "failed to read bitmap block");
return -EIO;
}
bmap = bmap_bh->b_data;
if (!ci->ci_bitmap_addr) {
memset(bmap, 0, sb->s_blocksize);
old_bmap = false;
}
/* Write the bitmap to its location for the next transaction */
if (old_bmap) {
struct buffer_head *new_bmap_bh;
u64 new_bmap_bno;
new_bmap_bno = apfs_ip_find_free(sb);
if (!new_bmap_bno) {
apfs_err(sb, "no free blocks in ip");
err = -EFSCORRUPTED;
goto fail;
}
new_bmap_bh = apfs_getblk(sb, new_bmap_bno);
if (!new_bmap_bh) {
apfs_err(sb, "failed to map new bmap block (0x%llx)", new_bmap_bno);
err = -EIO;
goto fail;
}
memcpy(new_bmap_bh->b_data, bmap, sb->s_blocksize);
err = apfs_free_queue_insert(sb, bmap_bh->b_blocknr, 1);
brelse(bmap_bh);
bmap_bh = new_bmap_bh;
if (err) {
apfs_err(sb, "free queue insertion failed");
goto fail;
}
bmap = bmap_bh->b_data;
}
apfs_ip_mark_used(sb, bmap_bh->b_blocknr);
/* Write the cib to its location for the next transaction */
if (old_cib) {
struct buffer_head *new_cib_bh;
u64 new_cib_bno;
new_cib_bno = apfs_ip_find_free(sb);
if (!new_cib_bno) {
apfs_err(sb, "no free blocks in ip");
err = -EFSCORRUPTED;
goto fail;
}
new_cib_bh = apfs_getblk(sb, new_cib_bno);
if (!new_cib_bh) {
apfs_err(sb, "failed to map new cib block (0x%llx)", new_cib_bno);
err = -EIO;
goto fail;
}
memcpy(new_cib_bh->b_data, (*cib_bh)->b_data, sb->s_blocksize);
err = apfs_free_queue_insert(sb, (*cib_bh)->b_blocknr, 1);
brelse(*cib_bh);
*cib_bh = new_cib_bh;
if (err) {
apfs_err(sb, "free queue insertion failed");
goto fail;
}
err = apfs_transaction_join(sb, *cib_bh);
if (err)
goto fail;
cib = (struct apfs_chunk_info_block *)(*cib_bh)->b_data;
ci = &cib->cib_chunk_info[index];
cib->cib_o.o_oid = cpu_to_le64(new_cib_bno);
cib->cib_o.o_xid = cpu_to_le64(nxi->nx_xid);
apfs_ip_mark_used(sb, new_cib_bno);
}
/* The chunk info can be updated now */
apfs_assert_in_transaction(sb, &cib->cib_o);
ci->ci_xid = cpu_to_le64(nxi->nx_xid);
le32_add_cpu(&ci->ci_free_count, is_alloc ? -1 : 1);
ci->ci_bitmap_addr = cpu_to_le64(bmap_bh->b_blocknr);
ASSERT(buffer_trans(*cib_bh));
set_buffer_csum(*cib_bh);
/* Finally, allocate / free the actual block that was requested */
if (is_alloc) {
*bno = apfs_chunk_find_free(sb, bmap, le64_to_cpu(ci->ci_addr));
if (!*bno) {
apfs_err(sb, "no free blocks in chunk");
err = -EFSCORRUPTED;
goto fail;
}
apfs_chunk_mark_used(sb, bmap, *bno);
sm->sm_free_count -= 1;
} else {
if (!apfs_chunk_mark_free(sb, bmap, *bno)) {
apfs_err(sb, "block already marked as free (0x%llx)", *bno);
le32_add_cpu(&ci->ci_free_count, -1);
set_buffer_csum(*cib_bh);
err = -EFSCORRUPTED;
} else
sm->sm_free_count += 1;
}
mark_buffer_dirty(bmap_bh);
fail:
brelse(bmap_bh);
return err;
}
/**
* apfs_chunk_allocate_block - Allocate a single block from a chunk
* @sb: superblock structure
* @cib_bh: buffer head for the chunk-info block
* @index: index of this chunk's info structure inside @cib
* @bno: on return, the allocated block number
*
* Finds a free block in the chunk and marks it as used; the buffer at @cib_bh
* may be replaced if needed for copy-on-write. Returns 0 on success, or a
* negative error code in case of failure.
*/
static int apfs_chunk_allocate_block(struct super_block *sb,
struct buffer_head **cib_bh,
int index, u64 *bno)
{
return apfs_chunk_alloc_free(sb, cib_bh, index, bno, true);
}
/**
* apfs_cib_allocate_block - Allocate a single block from a cib
* @sb: superblock structure
* @cib_bh: buffer head for the chunk-info block
* @bno: on return, the allocated block number
* @backwards: start the search on the last chunk
*
* Finds a free block among all the chunks in the cib and marks it as used; the
* buffer at @cib_bh may be replaced if needed for copy-on-write. Returns 0 on
* success, or a negative error code in case of failure.
*/
static int apfs_cib_allocate_block(struct super_block *sb,
struct buffer_head **cib_bh, u64 *bno, bool backwards)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_chunk_info_block *cib;
u32 chunk_count;
int i;
cib = (struct apfs_chunk_info_block *)(*cib_bh)->b_data;
if (nxi->nx_flags & APFS_CHECK_NODES && !apfs_obj_verify_csum(sb, *cib_bh)) {
apfs_err(sb, "bad checksum for chunk-info block");
return -EFSBADCRC;
}
/* Avoid out-of-bounds operations on corrupted cibs */
chunk_count = le32_to_cpu(cib->cib_chunk_info_count);
if (chunk_count > sm->sm_chunks_per_cib) {
apfs_err(sb, "too many chunks in cib (%u)", chunk_count);
return -EFSCORRUPTED;
}
for (i = 0; i < chunk_count; ++i) {
int index;
int err;
index = backwards ? chunk_count - 1 - i : i;
err = apfs_chunk_allocate_block(sb, cib_bh, index, bno);
if (err == -ENOSPC) /* This chunk is full */
continue;
if (err)
apfs_err(sb, "error during allocation");
return err;
}
return -ENOSPC;
}
/**
* apfs_spaceman_allocate_block - Allocate a single on-disk block
* @sb: superblock structure
* @bno: on return, the allocated block number
* @backwards: start the search on the last chunk
*
* Finds a free block among the spaceman bitmaps and marks it as used. Returns
* 0 on success, or a negative error code in case of failure.
*/
int apfs_spaceman_allocate_block(struct super_block *sb, u64 *bno, bool backwards)
{
struct apfs_spaceman *sm = APFS_SM(sb);
int i;
for (i = 0; i < sm->sm_cib_count; ++i) {
struct buffer_head *cib_bh;
u64 cib_bno;
int index;
int err;
/* Keep extents and metadata separate to limit fragmentation */
index = backwards ? sm->sm_cib_count - 1 - i : i;
cib_bno = apfs_spaceman_read_cib_addr(sb, index);
cib_bh = apfs_sb_bread(sb, cib_bno);
if (!cib_bh) {
apfs_err(sb, "failed to read cib");
return -EIO;
}
err = apfs_cib_allocate_block(sb, &cib_bh, bno, backwards);
if (!err) {
/* The cib may have been moved */
apfs_spaceman_write_cib_addr(sb, index, cib_bh->b_blocknr);
/* The free block count has changed */
apfs_write_spaceman(sm);
}
brelse(cib_bh);
if (err == -ENOSPC) /* This cib is full */
continue;
if (err)
apfs_err(sb, "error during allocation");
return err;
}
/*
* We checked the free space before starting the transaction, so this
* isn't expected to happen.
*/
apfs_err(sb, "ran out of space during transaction");
return -ENOSPC;
}
/**
* apfs_chunk_free - Mark a regular block as free given CIB and chunk
* @sb: superblock structure
* @cib_bh: buffer head for the chunk-info block
* @index: index of this chunk's info structure inside @cib
* @bno: block number (must not belong to the ip)
*/
static int apfs_chunk_free(struct super_block *sb,
struct buffer_head **cib_bh,
int index, u64 bno)
{
return apfs_chunk_alloc_free(sb, cib_bh, index, &bno, false);
}
/**
* apfs_main_free - Mark a regular block as free
* @sb: superblock structure
* @bno: block number (must not belong to the ip)
*/
static int apfs_main_free(struct super_block *sb, u64 bno)
{
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
struct apfs_sb_info *sbi = NULL;
u64 cib_idx, chunk_idx;
struct buffer_head *cib_bh;
u64 cib_bno;
int err, orphan_err;
if (!sm_raw->sm_blocks_per_chunk || !sm_raw->sm_chunks_per_cib) {
apfs_err(sb, "block or chunk count not set");
return -EINVAL;
}
/* TODO: use bitshifts instead of do_div() */
chunk_idx = bno;
do_div(chunk_idx, sm->sm_blocks_per_chunk);
cib_idx = chunk_idx;
chunk_idx = do_div(cib_idx, sm->sm_chunks_per_cib);
cib_bno = apfs_spaceman_read_cib_addr(sb, cib_idx);
cib_bh = apfs_sb_bread(sb, cib_bno);
if (!cib_bh) {
apfs_err(sb, "failed to read cib");
return -EIO;
}
err = apfs_chunk_free(sb, &cib_bh, chunk_idx, bno);
if (!err) {
/* The cib may have been moved */
apfs_spaceman_write_cib_addr(sb, cib_idx, cib_bh->b_blocknr);
/* The free block count has changed */
apfs_write_spaceman(sm);
}
brelse(cib_bh);
if (err) {
apfs_err(sb, "error during free");
return err;
}
/* It may be time to resume orphan cleanups, if we made enough room */
sbi = APFS_SB(sb);
orphan_err = atomic_read(&sbi->s_orphan_cleanup_err);
if (orphan_err == -ENOSPC && sm->sm_free_count >= 2 * APFS_DEL_ROOM) {
atomic_set(&sbi->s_orphan_cleanup_err, 0);
apfs_schedule_orphan_cleanup(sb);
}
return err;
}
/**
* apfs_spaceman_get_free_blkcnt - Calculate the total number of free blocks
* @sb: filesystem superblock
* @blkcnt: on return, the total number of free blocks for all devices
*
* Can be called even if the spaceman has not been read (for example, on a
* read-only mount). Returns 0 on success, or a negative error code in case of
* failure.
*/
int apfs_spaceman_get_free_blkcnt(struct super_block *sb, u64 *blkcnt)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_nx_superblock *raw_sb = NULL;
struct apfs_spaceman_phys *sm_raw = NULL;
struct apfs_ephemeral_object_info *sm_eph_info = NULL;
struct apfs_spaceman_device *dev = NULL;
u64 oid;
int err;
if (!nxi->nx_eph_list) {
err = apfs_read_ephemeral_objects(sb);
if (err) {
apfs_err(sb, "failed to read the ephemeral objects");
return err;
}
}
raw_sb = nxi->nx_raw;
oid = le64_to_cpu(raw_sb->nx_spaceman_oid);
sm_eph_info = apfs_ephemeral_object_lookup(sb, oid);
if (IS_ERR(sm_eph_info)) {
apfs_err(sb, "no spaceman object for oid 0x%llx", oid);
return PTR_ERR(sm_eph_info);
}
sm_raw = (struct apfs_spaceman_phys *)sm_eph_info->object;
*blkcnt = 0;
dev = &sm_raw->sm_dev[APFS_SD_MAIN];
*blkcnt += le64_to_cpu(dev->sm_free_count);
dev = &sm_raw->sm_dev[APFS_SD_TIER2];
*blkcnt += le64_to_cpu(dev->sm_free_count);
return 0;
}