mirror of
https://github.com/armbian/linux-cix.git
synced 2026-01-06 12:30:45 -08:00
btrfs: add BTRFS_IOC_ENCODED_READ ioctl
There are 4 main cases:
1. Inline extents: we copy the data straight out of the extent buffer.
2. Hole/preallocated extents: we fill in zeroes.
3. Regular, uncompressed extents: we read the sectors we need directly
from disk.
4. Regular, compressed extents: we read the entire compressed extent
from disk and indicate what subset of the decompressed extent is in
the file.
This initial implementation simplifies a few things that can be improved
in the future:
- Cases 1, 3, and 4 allocate temporary memory to read into before
copying out to userspace.
- We don't do read repair, because it turns out that read repair is
currently broken for compressed data.
- We hold the inode lock during the operation.
Note that we don't need to hold the mmap lock. We may race with
btrfs_page_mkwrite() and read the old data from before the page was
dirtied:
btrfs_page_mkwrite btrfs_encoded_read
---------------------------------------------------
(enter) (enter)
btrfs_wait_ordered_range
lock_extent_bits
btrfs_page_set_dirty
unlock_extent_cached
(exit)
lock_extent_bits
read extent (dirty page hasn't been flushed,
so this is the old data)
unlock_extent_cached
(exit)
we read the old data from before the page was dirtied. But, that's true
even if we were to hold the mmap lock:
btrfs_page_mkwrite btrfs_encoded_read
-------------------------------------------------------------------
(enter) (enter)
btrfs_inode_lock(BTRFS_ILOCK_MMAP)
down_read(i_mmap_lock) (blocked)
btrfs_wait_ordered_range
lock_extent_bits
read extent (page hasn't been dirtied,
so this is the old data)
unlock_extent_cached
btrfs_inode_unlock(BTRFS_ILOCK_MMAP)
down_read(i_mmap_lock) returns
lock_extent_bits
btrfs_page_set_dirty
unlock_extent_cached
In other words, this is inherently racy, so it's fine that we return the
old data in this tiny window.
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
committed by
David Sterba
parent
dcb77a9ae8
commit
1881fba89b
@@ -49,6 +49,7 @@ extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
|
||||
struct btrfs_ordered_sum;
|
||||
struct btrfs_ref;
|
||||
struct btrfs_bio;
|
||||
struct btrfs_ioctl_encoded_io_args;
|
||||
|
||||
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
|
||||
|
||||
@@ -3305,6 +3306,9 @@ int btrfs_writepage_cow_fixup(struct page *page);
|
||||
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
|
||||
struct page *page, u64 start,
|
||||
u64 end, bool uptodate);
|
||||
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
|
||||
extern const struct dentry_operations btrfs_dentry_operations;
|
||||
extern const struct iomap_ops btrfs_dio_iomap_ops;
|
||||
extern const struct iomap_dio_ops btrfs_dio_ops;
|
||||
|
||||
498
fs/btrfs/inode.c
498
fs/btrfs/inode.c
File diff suppressed because it is too large
Load Diff
106
fs/btrfs/ioctl.c
106
fs/btrfs/ioctl.c
@@ -28,6 +28,7 @@
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/fileattr.h>
|
||||
#include <linux/fsverity.h>
|
||||
#include <linux/sched/xacct.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "export.h"
|
||||
@@ -88,6 +89,22 @@ struct btrfs_ioctl_send_args_32 {
|
||||
|
||||
#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
|
||||
struct btrfs_ioctl_send_args_32)
|
||||
|
||||
struct btrfs_ioctl_encoded_io_args_32 {
|
||||
compat_uptr_t iov;
|
||||
compat_ulong_t iovcnt;
|
||||
__s64 offset;
|
||||
__u64 flags;
|
||||
__u64 len;
|
||||
__u64 unencoded_len;
|
||||
__u64 unencoded_offset;
|
||||
__u32 compression;
|
||||
__u32 encryption;
|
||||
__u8 reserved[64];
|
||||
};
|
||||
|
||||
#define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
|
||||
struct btrfs_ioctl_encoded_io_args_32)
|
||||
#endif
|
||||
|
||||
/* Mask out flags that are inappropriate for the given type of inode. */
|
||||
@@ -5195,6 +5212,89 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
|
||||
bool compat)
|
||||
{
|
||||
struct btrfs_ioctl_encoded_io_args args = { 0 };
|
||||
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
|
||||
flags);
|
||||
size_t copy_end;
|
||||
struct iovec iovstack[UIO_FASTIOV];
|
||||
struct iovec *iov = iovstack;
|
||||
struct iov_iter iter;
|
||||
loff_t pos;
|
||||
struct kiocb kiocb;
|
||||
ssize_t ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
if (compat) {
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
struct btrfs_ioctl_encoded_io_args_32 args32;
|
||||
|
||||
copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
|
||||
flags);
|
||||
if (copy_from_user(&args32, argp, copy_end)) {
|
||||
ret = -EFAULT;
|
||||
goto out_acct;
|
||||
}
|
||||
args.iov = compat_ptr(args32.iov);
|
||||
args.iovcnt = args32.iovcnt;
|
||||
args.offset = args32.offset;
|
||||
args.flags = args32.flags;
|
||||
#else
|
||||
return -ENOTTY;
|
||||
#endif
|
||||
} else {
|
||||
copy_end = copy_end_kernel;
|
||||
if (copy_from_user(&args, argp, copy_end)) {
|
||||
ret = -EFAULT;
|
||||
goto out_acct;
|
||||
}
|
||||
}
|
||||
if (args.flags != 0) {
|
||||
ret = -EINVAL;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
ret = import_iovec(READ, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
|
||||
&iov, &iter);
|
||||
if (ret < 0)
|
||||
goto out_acct;
|
||||
|
||||
if (iov_iter_count(&iter) == 0) {
|
||||
ret = 0;
|
||||
goto out_iov;
|
||||
}
|
||||
pos = args.offset;
|
||||
ret = rw_verify_area(READ, file, &pos, args.len);
|
||||
if (ret < 0)
|
||||
goto out_iov;
|
||||
|
||||
init_sync_kiocb(&kiocb, file);
|
||||
kiocb.ki_pos = pos;
|
||||
|
||||
ret = btrfs_encoded_read(&kiocb, &iter, &args);
|
||||
if (ret >= 0) {
|
||||
fsnotify_access(file);
|
||||
if (copy_to_user(argp + copy_end,
|
||||
(char *)&args + copy_end_kernel,
|
||||
sizeof(args) - copy_end_kernel))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
out_iov:
|
||||
kfree(iov);
|
||||
out_acct:
|
||||
if (ret > 0)
|
||||
add_rchar(current, ret);
|
||||
inc_syscr(current);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long btrfs_ioctl(struct file *file, unsigned int
|
||||
cmd, unsigned long arg)
|
||||
{
|
||||
@@ -5339,6 +5439,12 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
return fsverity_ioctl_enable(file, (const void __user *)argp);
|
||||
case FS_IOC_MEASURE_VERITY:
|
||||
return fsverity_ioctl_measure(file, argp);
|
||||
case BTRFS_IOC_ENCODED_READ:
|
||||
return btrfs_ioctl_encoded_read(file, argp, false);
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
case BTRFS_IOC_ENCODED_READ_32:
|
||||
return btrfs_ioctl_encoded_read(file, argp, true);
|
||||
#endif
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
||||
Reference in New Issue
Block a user