mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
netfs: Speed up buffered reading
Improve the efficiency of buffered reads in a number of ways:
(1) Overhaul the algorithm in general so that it's a lot more compact and
split the read submission code between buffered and unbuffered
versions. The unbuffered version can be vastly simplified.
(2) Read-result collection is handed off to a work queue rather than being
done in the I/O thread. Multiple subrequests can be processes
simultaneously.
(3) When a subrequest is collected, any folios it fully spans are
collected and "spare" data on either side is donated to either the
previous or the next subrequest in the sequence.
Notes:
(*) Readahead expansion is massively slows down fio, presumably because it
causes a load of extra allocations, both folio and xarray, up front
before RPC requests can be transmitted.
(*) RDMA with cifs does appear to work, both with SIW and RXE.
(*) PG_private_2-based reading and copy-to-cache is split out into its own
file and altered to use folio_queue. Note that the copy to the cache
now creates a new write transaction against the cache and adds the
folios to be copied into it. This allows it to use part of the
writeback I/O code.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
committed by
Christian Brauner
parent
2e45b92297
commit
ee4cdf7ba8
@@ -68,17 +68,22 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct netfs_io_request *rreq = subreq->rreq;
|
||||
struct p9_fid *fid = rreq->netfs_priv;
|
||||
unsigned long long pos = subreq->start + subreq->transferred;
|
||||
int total, err;
|
||||
|
||||
total = p9_client_read(fid, subreq->start + subreq->transferred,
|
||||
&subreq->io_iter, &err);
|
||||
total = p9_client_read(fid, pos, &subreq->io_iter, &err);
|
||||
|
||||
/* if we just extended the file size, any portion not in
|
||||
* cache won't be on server and is zeroes */
|
||||
if (subreq->rreq->origin != NETFS_DIO_READ)
|
||||
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
|
||||
if (pos + total >= i_size_read(rreq->inode))
|
||||
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
|
||||
|
||||
netfs_subreq_terminated(subreq, err ?: total, false);
|
||||
if (!err)
|
||||
subreq->transferred += total;
|
||||
|
||||
netfs_read_subreq_terminated(subreq, err, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/netfs.h>
|
||||
#include <trace/events/netfs.h>
|
||||
#include "internal.h"
|
||||
|
||||
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
|
||||
@@ -242,9 +243,10 @@ static void afs_fetch_data_notify(struct afs_operation *op)
|
||||
|
||||
req->error = error;
|
||||
if (subreq) {
|
||||
if (subreq->rreq->origin != NETFS_DIO_READ)
|
||||
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
|
||||
netfs_subreq_terminated(subreq, error ?: req->actual_len, false);
|
||||
subreq->rreq->i_size = req->file_size;
|
||||
if (req->pos + req->actual_len >= req->file_size)
|
||||
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
|
||||
netfs_read_subreq_terminated(subreq, error, false);
|
||||
req->subreq = NULL;
|
||||
} else if (req->done) {
|
||||
req->done(req);
|
||||
@@ -262,6 +264,12 @@ static void afs_fetch_data_success(struct afs_operation *op)
|
||||
afs_fetch_data_notify(op);
|
||||
}
|
||||
|
||||
static void afs_fetch_data_aborted(struct afs_operation *op)
|
||||
{
|
||||
afs_check_for_remote_deletion(op);
|
||||
afs_fetch_data_notify(op);
|
||||
}
|
||||
|
||||
static void afs_fetch_data_put(struct afs_operation *op)
|
||||
{
|
||||
op->fetch.req->error = afs_op_error(op);
|
||||
@@ -272,7 +280,7 @@ static const struct afs_operation_ops afs_fetch_data_operation = {
|
||||
.issue_afs_rpc = afs_fs_fetch_data,
|
||||
.issue_yfs_rpc = yfs_fs_fetch_data,
|
||||
.success = afs_fetch_data_success,
|
||||
.aborted = afs_check_for_remote_deletion,
|
||||
.aborted = afs_fetch_data_aborted,
|
||||
.failed = afs_fetch_data_notify,
|
||||
.put = afs_fetch_data_put,
|
||||
};
|
||||
@@ -294,7 +302,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req)
|
||||
op = afs_alloc_operation(req->key, vnode->volume);
|
||||
if (IS_ERR(op)) {
|
||||
if (req->subreq)
|
||||
netfs_subreq_terminated(req->subreq, PTR_ERR(op), false);
|
||||
netfs_read_subreq_terminated(req->subreq, PTR_ERR(op), false);
|
||||
return PTR_ERR(op);
|
||||
}
|
||||
|
||||
@@ -313,7 +321,7 @@ static void afs_read_worker(struct work_struct *work)
|
||||
|
||||
fsreq = afs_alloc_read(GFP_NOFS);
|
||||
if (!fsreq)
|
||||
return netfs_subreq_terminated(subreq, -ENOMEM, false);
|
||||
return netfs_read_subreq_terminated(subreq, -ENOMEM, false);
|
||||
|
||||
fsreq->subreq = subreq;
|
||||
fsreq->pos = subreq->start + subreq->transferred;
|
||||
@@ -322,6 +330,7 @@ static void afs_read_worker(struct work_struct *work)
|
||||
fsreq->vnode = vnode;
|
||||
fsreq->iter = &subreq->io_iter;
|
||||
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
|
||||
afs_fetch_data(fsreq->vnode, fsreq);
|
||||
afs_put_read(fsreq);
|
||||
}
|
||||
|
||||
@@ -304,6 +304,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
|
||||
struct afs_vnode_param *vp = &op->file[0];
|
||||
struct afs_read *req = op->fetch.req;
|
||||
const __be32 *bp;
|
||||
size_t count_before;
|
||||
int ret;
|
||||
|
||||
_enter("{%u,%zu,%zu/%llu}",
|
||||
@@ -345,10 +346,14 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
|
||||
|
||||
/* extract the returned data */
|
||||
case 2:
|
||||
_debug("extract data %zu/%llu",
|
||||
iov_iter_count(call->iter), req->actual_len);
|
||||
count_before = call->iov_len;
|
||||
_debug("extract data %zu/%llu", count_before, req->actual_len);
|
||||
|
||||
ret = afs_extract_data(call, true);
|
||||
if (req->subreq) {
|
||||
req->subreq->transferred += count_before - call->iov_len;
|
||||
netfs_read_subreq_progress(req->subreq, false);
|
||||
}
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
|
||||
@@ -355,6 +355,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
|
||||
struct afs_vnode_param *vp = &op->file[0];
|
||||
struct afs_read *req = op->fetch.req;
|
||||
const __be32 *bp;
|
||||
size_t count_before;
|
||||
int ret;
|
||||
|
||||
_enter("{%u,%zu, %zu/%llu}",
|
||||
@@ -391,10 +392,14 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
|
||||
|
||||
/* extract the returned data */
|
||||
case 2:
|
||||
_debug("extract data %zu/%llu",
|
||||
iov_iter_count(call->iter), req->actual_len);
|
||||
count_before = call->iov_len;
|
||||
_debug("extract data %zu/%llu", count_before, req->actual_len);
|
||||
|
||||
ret = afs_extract_data(call, true);
|
||||
if (req->subreq) {
|
||||
req->subreq->transferred += count_before - call->iov_len;
|
||||
netfs_read_subreq_progress(req->subreq, false);
|
||||
}
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/netfs.h>
|
||||
#include <trace/events/netfs.h>
|
||||
|
||||
#include "super.h"
|
||||
#include "mds_client.h"
|
||||
@@ -205,21 +206,6 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
|
||||
}
|
||||
}
|
||||
|
||||
static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct inode *inode = subreq->rreq->inode;
|
||||
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
u64 objno, objoff;
|
||||
u32 xlen;
|
||||
|
||||
/* Truncate the extent at the end of the current block */
|
||||
ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
|
||||
&objno, &objoff, &xlen);
|
||||
subreq->len = min(xlen, fsc->mount_options->rsize);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void finish_netfs_read(struct ceph_osd_request *req)
|
||||
{
|
||||
struct inode *inode = req->r_inode;
|
||||
@@ -264,7 +250,12 @@ static void finish_netfs_read(struct ceph_osd_request *req)
|
||||
calc_pages_for(osd_data->alignment,
|
||||
osd_data->length), false);
|
||||
}
|
||||
netfs_subreq_terminated(subreq, err, false);
|
||||
if (err > 0) {
|
||||
subreq->transferred = err;
|
||||
err = 0;
|
||||
}
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress);
|
||||
netfs_read_subreq_terminated(subreq, err, false);
|
||||
iput(req->r_inode);
|
||||
ceph_dec_osd_stopping_blocker(fsc->mdsc);
|
||||
}
|
||||
@@ -278,7 +269,6 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
|
||||
struct ceph_mds_request *req;
|
||||
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
struct iov_iter iter;
|
||||
ssize_t err = 0;
|
||||
size_t len;
|
||||
int mode;
|
||||
@@ -301,6 +291,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
|
||||
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA);
|
||||
req->r_num_caps = 2;
|
||||
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
|
||||
err = ceph_mdsc_do_request(mdsc, NULL, req);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
@@ -314,17 +305,36 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
|
||||
}
|
||||
|
||||
len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len);
|
||||
iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
|
||||
err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter);
|
||||
if (err == 0)
|
||||
err = copy_to_iter(iinfo->inline_data + subreq->start, len, &subreq->io_iter);
|
||||
if (err == 0) {
|
||||
err = -EFAULT;
|
||||
} else {
|
||||
subreq->transferred += err;
|
||||
err = 0;
|
||||
}
|
||||
|
||||
ceph_mdsc_put_request(req);
|
||||
out:
|
||||
netfs_subreq_terminated(subreq, err, false);
|
||||
netfs_read_subreq_terminated(subreq, err, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int ceph_netfs_prepare_read(struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct netfs_io_request *rreq = subreq->rreq;
|
||||
struct inode *inode = rreq->inode;
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
|
||||
u64 objno, objoff;
|
||||
u32 xlen;
|
||||
|
||||
/* Truncate the extent at the end of the current block */
|
||||
ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
|
||||
&objno, &objoff, &xlen);
|
||||
rreq->io_streams[0].sreq_max_len = umin(xlen, fsc->mount_options->rsize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct netfs_io_request *rreq = subreq->rreq;
|
||||
@@ -334,9 +344,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
struct ceph_client *cl = fsc->client;
|
||||
struct ceph_osd_request *req = NULL;
|
||||
struct ceph_vino vino = ceph_vino(inode);
|
||||
struct iov_iter iter;
|
||||
int err = 0;
|
||||
u64 len = subreq->len;
|
||||
int err;
|
||||
u64 len;
|
||||
bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
|
||||
u64 off = subreq->start;
|
||||
int extent_cnt;
|
||||
@@ -349,6 +358,12 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
|
||||
return;
|
||||
|
||||
// TODO: This rounding here is slightly dodgy. It *should* work, for
|
||||
// now, as the cache only deals in blocks that are a multiple of
|
||||
// PAGE_SIZE and fscrypt blocks are at most PAGE_SIZE. What needs to
|
||||
// happen is for the fscrypt driving to be moved into netfslib and the
|
||||
// data in the cache also to be stored encrypted.
|
||||
len = subreq->len;
|
||||
ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
|
||||
|
||||
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
|
||||
@@ -371,8 +386,6 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n",
|
||||
ceph_vinop(inode), subreq->start, subreq->len, len);
|
||||
|
||||
iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
|
||||
|
||||
/*
|
||||
* FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for
|
||||
* encrypted inodes. We'd need infrastructure that handles an iov_iter
|
||||
@@ -384,7 +397,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
struct page **pages;
|
||||
size_t page_off;
|
||||
|
||||
err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
|
||||
err = iov_iter_get_pages_alloc2(&subreq->io_iter, &pages, len, &page_off);
|
||||
if (err < 0) {
|
||||
doutc(cl, "%llx.%llx failed to allocate pages, %d\n",
|
||||
ceph_vinop(inode), err);
|
||||
@@ -399,7 +412,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false,
|
||||
false);
|
||||
} else {
|
||||
osd_req_op_extent_osd_iter(req, 0, &iter);
|
||||
osd_req_op_extent_osd_iter(req, 0, &subreq->io_iter);
|
||||
}
|
||||
if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
|
||||
err = -EIO;
|
||||
@@ -410,17 +423,19 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
req->r_inode = inode;
|
||||
ihold(inode);
|
||||
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
|
||||
ceph_osdc_start_request(req->r_osdc, req);
|
||||
out:
|
||||
ceph_osdc_put_request(req);
|
||||
if (err)
|
||||
netfs_subreq_terminated(subreq, err, false);
|
||||
netfs_read_subreq_terminated(subreq, err, false);
|
||||
doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err);
|
||||
}
|
||||
|
||||
static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
|
||||
{
|
||||
struct inode *inode = rreq->inode;
|
||||
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
|
||||
struct ceph_client *cl = ceph_inode_to_client(inode);
|
||||
int got = 0, want = CEPH_CAP_FILE_CACHE;
|
||||
struct ceph_netfs_request_data *priv;
|
||||
@@ -472,6 +487,7 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
|
||||
|
||||
priv->caps = got;
|
||||
rreq->netfs_priv = priv;
|
||||
rreq->io_streams[0].sreq_max_len = fsc->mount_options->rsize;
|
||||
|
||||
out:
|
||||
if (ret < 0)
|
||||
@@ -496,9 +512,9 @@ static void ceph_netfs_free_request(struct netfs_io_request *rreq)
|
||||
const struct netfs_request_ops ceph_netfs_ops = {
|
||||
.init_request = ceph_init_request,
|
||||
.free_request = ceph_netfs_free_request,
|
||||
.prepare_read = ceph_netfs_prepare_read,
|
||||
.issue_read = ceph_netfs_issue_read,
|
||||
.expand_readahead = ceph_netfs_expand_readahead,
|
||||
.clamp_length = ceph_netfs_clamp_length,
|
||||
.check_write_begin = ceph_netfs_check_write_begin,
|
||||
};
|
||||
|
||||
|
||||
@@ -5,12 +5,14 @@ netfs-y := \
|
||||
buffered_write.o \
|
||||
direct_read.o \
|
||||
direct_write.o \
|
||||
io.o \
|
||||
iterator.o \
|
||||
locking.o \
|
||||
main.o \
|
||||
misc.o \
|
||||
objects.o \
|
||||
read_collect.o \
|
||||
read_pgpriv2.o \
|
||||
read_retry.o \
|
||||
write_collect.o \
|
||||
write_issue.o
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -16,6 +16,143 @@
|
||||
#include <linux/netfs.h>
|
||||
#include "internal.h"
|
||||
|
||||
static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct netfs_io_request *rreq = subreq->rreq;
|
||||
size_t rsize;
|
||||
|
||||
rsize = umin(subreq->len, rreq->io_streams[0].sreq_max_len);
|
||||
subreq->len = rsize;
|
||||
|
||||
if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
|
||||
size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
|
||||
rreq->io_streams[0].sreq_max_segs);
|
||||
|
||||
if (limit < rsize) {
|
||||
subreq->len = limit;
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
|
||||
}
|
||||
}
|
||||
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
|
||||
|
||||
subreq->io_iter = rreq->iter;
|
||||
iov_iter_truncate(&subreq->io_iter, subreq->len);
|
||||
iov_iter_advance(&rreq->iter, subreq->len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a read to a buffer from the server, slicing up the region to be read
|
||||
* according to the network rsize.
|
||||
*/
|
||||
static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
|
||||
{
|
||||
unsigned long long start = rreq->start;
|
||||
ssize_t size = rreq->len;
|
||||
int ret = 0;
|
||||
|
||||
atomic_set(&rreq->nr_outstanding, 1);
|
||||
|
||||
do {
|
||||
struct netfs_io_subrequest *subreq;
|
||||
ssize_t slice;
|
||||
|
||||
subreq = netfs_alloc_subrequest(rreq);
|
||||
if (!subreq) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
|
||||
subreq->start = start;
|
||||
subreq->len = size;
|
||||
|
||||
atomic_inc(&rreq->nr_outstanding);
|
||||
spin_lock_bh(&rreq->lock);
|
||||
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
|
||||
subreq->prev_donated = rreq->prev_donated;
|
||||
rreq->prev_donated = 0;
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
|
||||
netfs_stat(&netfs_n_rh_download);
|
||||
if (rreq->netfs_ops->prepare_read) {
|
||||
ret = rreq->netfs_ops->prepare_read(subreq);
|
||||
if (ret < 0) {
|
||||
atomic_dec(&rreq->nr_outstanding);
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
netfs_prepare_dio_read_iterator(subreq);
|
||||
slice = subreq->len;
|
||||
rreq->netfs_ops->issue_read(subreq);
|
||||
|
||||
size -= slice;
|
||||
start += slice;
|
||||
rreq->submitted += slice;
|
||||
|
||||
if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
|
||||
test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
|
||||
break;
|
||||
cond_resched();
|
||||
} while (size > 0);
|
||||
|
||||
if (atomic_dec_and_test(&rreq->nr_outstanding))
|
||||
netfs_rreq_terminated(rreq, false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a read to an application buffer, bypassing the pagecache and the
|
||||
* local disk cache.
|
||||
*/
|
||||
static int netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
|
||||
{
|
||||
int ret;
|
||||
|
||||
_enter("R=%x %llx-%llx",
|
||||
rreq->debug_id, rreq->start, rreq->start + rreq->len - 1);
|
||||
|
||||
if (rreq->len == 0) {
|
||||
pr_err("Zero-sized read [R=%x]\n", rreq->debug_id);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
// TODO: Use bounce buffer if requested
|
||||
|
||||
inode_dio_begin(rreq->inode);
|
||||
|
||||
ret = netfs_dispatch_unbuffered_reads(rreq);
|
||||
|
||||
if (!rreq->submitted) {
|
||||
netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
|
||||
inode_dio_end(rreq->inode);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (sync) {
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
|
||||
wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
|
||||
ret = rreq->error;
|
||||
if (ret == 0 && rreq->submitted < rreq->len &&
|
||||
rreq->origin != NETFS_DIO_READ) {
|
||||
trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
|
||||
ret = -EIO;
|
||||
}
|
||||
} else {
|
||||
ret = -EIOCBQUEUED;
|
||||
}
|
||||
|
||||
out:
|
||||
_leave(" = %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* netfs_unbuffered_read_iter_locked - Perform an unbuffered or direct I/O read
|
||||
* @iocb: The I/O control descriptor describing the read
|
||||
@@ -31,7 +168,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
|
||||
struct netfs_io_request *rreq;
|
||||
ssize_t ret;
|
||||
size_t orig_count = iov_iter_count(iter);
|
||||
bool async = !is_sync_kiocb(iocb);
|
||||
bool sync = is_sync_kiocb(iocb);
|
||||
|
||||
_enter("");
|
||||
|
||||
@@ -78,13 +215,13 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
|
||||
|
||||
// TODO: Set up bounce buffer if needed
|
||||
|
||||
if (async)
|
||||
if (!sync)
|
||||
rreq->iocb = iocb;
|
||||
|
||||
ret = netfs_begin_read(rreq, is_sync_kiocb(iocb));
|
||||
ret = netfs_unbuffered_read(rreq, sync);
|
||||
if (ret < 0)
|
||||
goto out; /* May be -EIOCBQUEUED */
|
||||
if (!async) {
|
||||
if (sync) {
|
||||
// TODO: Copy from bounce buffer
|
||||
iocb->ki_pos += rreq->transferred;
|
||||
ret = rreq->transferred;
|
||||
@@ -94,8 +231,6 @@ out:
|
||||
netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
|
||||
if (ret > 0)
|
||||
orig_count -= ret;
|
||||
if (ret != -EIOCBQUEUED)
|
||||
iov_iter_revert(iter, orig_count - iov_iter_count(iter));
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(netfs_unbuffered_read_iter_locked);
|
||||
|
||||
@@ -23,16 +23,9 @@
|
||||
/*
|
||||
* buffered_read.c
|
||||
*/
|
||||
void netfs_rreq_unlock_folios(struct netfs_io_request *rreq);
|
||||
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
|
||||
size_t offset, size_t len);
|
||||
|
||||
/*
|
||||
* io.c
|
||||
*/
|
||||
void netfs_rreq_work(struct work_struct *work);
|
||||
int netfs_begin_read(struct netfs_io_request *rreq, bool sync);
|
||||
|
||||
/*
|
||||
* main.c
|
||||
*/
|
||||
@@ -90,6 +83,28 @@ static inline void netfs_see_request(struct netfs_io_request *rreq,
|
||||
trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), what);
|
||||
}
|
||||
|
||||
/*
|
||||
* read_collect.c
|
||||
*/
|
||||
void netfs_read_termination_worker(struct work_struct *work);
|
||||
void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async);
|
||||
|
||||
/*
|
||||
* read_pgpriv2.c
|
||||
*/
|
||||
void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
|
||||
struct netfs_io_request *rreq,
|
||||
struct folio_queue *folioq,
|
||||
int slot);
|
||||
void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq);
|
||||
bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq);
|
||||
|
||||
/*
|
||||
* read_retry.c
|
||||
*/
|
||||
void netfs_retry_reads(struct netfs_io_request *rreq);
|
||||
void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq);
|
||||
|
||||
/*
|
||||
* stats.c
|
||||
*/
|
||||
@@ -117,6 +132,7 @@ extern atomic_t netfs_n_wh_buffered_write;
|
||||
extern atomic_t netfs_n_wh_writethrough;
|
||||
extern atomic_t netfs_n_wh_dio_write;
|
||||
extern atomic_t netfs_n_wh_writepages;
|
||||
extern atomic_t netfs_n_wh_copy_to_cache;
|
||||
extern atomic_t netfs_n_wh_wstream_conflict;
|
||||
extern atomic_t netfs_n_wh_upload;
|
||||
extern atomic_t netfs_n_wh_upload_done;
|
||||
@@ -162,6 +178,11 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
|
||||
void netfs_reissue_write(struct netfs_io_stream *stream,
|
||||
struct netfs_io_subrequest *subreq,
|
||||
struct iov_iter *source);
|
||||
void netfs_issue_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream);
|
||||
int netfs_advance_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream,
|
||||
loff_t start, size_t len, bool to_eof);
|
||||
struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len);
|
||||
int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
|
||||
struct folio *folio, size_t copied, bool to_page_end,
|
||||
|
||||
@@ -188,9 +188,59 @@ static size_t netfs_limit_xarray(const struct iov_iter *iter, size_t start_offse
|
||||
return min(span, max_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Select the span of a folio queue iterator we're going to use. Limit it by
|
||||
* both maximum size and maximum number of segments. Returns the size of the
|
||||
* span in bytes.
|
||||
*/
|
||||
static size_t netfs_limit_folioq(const struct iov_iter *iter, size_t start_offset,
|
||||
size_t max_size, size_t max_segs)
|
||||
{
|
||||
const struct folio_queue *folioq = iter->folioq;
|
||||
unsigned int nsegs = 0;
|
||||
unsigned int slot = iter->folioq_slot;
|
||||
size_t span = 0, n = iter->count;
|
||||
|
||||
if (WARN_ON(!iov_iter_is_folioq(iter)) ||
|
||||
WARN_ON(start_offset > n) ||
|
||||
n == 0)
|
||||
return 0;
|
||||
max_size = umin(max_size, n - start_offset);
|
||||
|
||||
if (slot >= folioq_nr_slots(folioq)) {
|
||||
folioq = folioq->next;
|
||||
slot = 0;
|
||||
}
|
||||
|
||||
start_offset += iter->iov_offset;
|
||||
do {
|
||||
size_t flen = folioq_folio_size(folioq, slot);
|
||||
|
||||
if (start_offset < flen) {
|
||||
span += flen - start_offset;
|
||||
nsegs++;
|
||||
start_offset = 0;
|
||||
} else {
|
||||
start_offset -= flen;
|
||||
}
|
||||
if (span >= max_size || nsegs >= max_segs)
|
||||
break;
|
||||
|
||||
slot++;
|
||||
if (slot >= folioq_nr_slots(folioq)) {
|
||||
folioq = folioq->next;
|
||||
slot = 0;
|
||||
}
|
||||
} while (folioq);
|
||||
|
||||
return umin(span, max_size);
|
||||
}
|
||||
|
||||
size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
|
||||
size_t max_size, size_t max_segs)
|
||||
{
|
||||
if (iov_iter_is_folioq(iter))
|
||||
return netfs_limit_folioq(iter, start_offset, max_size, max_segs);
|
||||
if (iov_iter_is_bvec(iter))
|
||||
return netfs_limit_bvec(iter, start_offset, max_size, max_segs);
|
||||
if (iov_iter_is_xarray(iter))
|
||||
|
||||
@@ -36,12 +36,14 @@ DEFINE_SPINLOCK(netfs_proc_lock);
|
||||
static const char *netfs_origins[nr__netfs_io_origin] = {
|
||||
[NETFS_READAHEAD] = "RA",
|
||||
[NETFS_READPAGE] = "RP",
|
||||
[NETFS_READ_GAPS] = "RG",
|
||||
[NETFS_READ_FOR_WRITE] = "RW",
|
||||
[NETFS_DIO_READ] = "DR",
|
||||
[NETFS_WRITEBACK] = "WB",
|
||||
[NETFS_WRITETHROUGH] = "WT",
|
||||
[NETFS_UNBUFFERED_WRITE] = "UW",
|
||||
[NETFS_DIO_WRITE] = "DW",
|
||||
[NETFS_PGPRIV2_COPY_TO_CACHE] = "2C",
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -61,7 +63,7 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v)
|
||||
|
||||
rreq = list_entry(v, struct netfs_io_request, proc_link);
|
||||
seq_printf(m,
|
||||
"%08x %s %3d %2lx %4d %3d @%04llx %llx/%llx",
|
||||
"%08x %s %3d %2lx %4ld %3d @%04llx %llx/%llx",
|
||||
rreq->debug_id,
|
||||
netfs_origins[rreq->origin],
|
||||
refcount_read(&rreq->ref),
|
||||
|
||||
@@ -36,7 +36,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
|
||||
memset(rreq, 0, kmem_cache_size(cache));
|
||||
rreq->start = start;
|
||||
rreq->len = len;
|
||||
rreq->upper_len = len;
|
||||
rreq->origin = origin;
|
||||
rreq->netfs_ops = ctx->ops;
|
||||
rreq->mapping = mapping;
|
||||
@@ -44,6 +43,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
|
||||
rreq->i_size = i_size_read(inode);
|
||||
rreq->debug_id = atomic_inc_return(&debug_ids);
|
||||
rreq->wsize = INT_MAX;
|
||||
rreq->io_streams[0].sreq_max_len = ULONG_MAX;
|
||||
rreq->io_streams[0].sreq_max_segs = 0;
|
||||
spin_lock_init(&rreq->lock);
|
||||
INIT_LIST_HEAD(&rreq->io_streams[0].subrequests);
|
||||
INIT_LIST_HEAD(&rreq->io_streams[1].subrequests);
|
||||
@@ -52,9 +53,10 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
|
||||
|
||||
if (origin == NETFS_READAHEAD ||
|
||||
origin == NETFS_READPAGE ||
|
||||
origin == NETFS_READ_GAPS ||
|
||||
origin == NETFS_READ_FOR_WRITE ||
|
||||
origin == NETFS_DIO_READ)
|
||||
INIT_WORK(&rreq->work, netfs_rreq_work);
|
||||
INIT_WORK(&rreq->work, netfs_read_termination_worker);
|
||||
else
|
||||
INIT_WORK(&rreq->work, netfs_write_collection_worker);
|
||||
|
||||
@@ -163,7 +165,7 @@ void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
|
||||
if (was_async) {
|
||||
rreq->work.func = netfs_free_request;
|
||||
if (!queue_work(system_unbound_wq, &rreq->work))
|
||||
BUG();
|
||||
WARN_ON(1);
|
||||
} else {
|
||||
netfs_free_request(&rreq->work);
|
||||
}
|
||||
|
||||
544
fs/netfs/read_collect.c
Normal file
544
fs/netfs/read_collect.c
Normal file
File diff suppressed because it is too large
Load Diff
264
fs/netfs/read_pgpriv2.c
Normal file
264
fs/netfs/read_pgpriv2.c
Normal file
@@ -0,0 +1,264 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Read with PG_private_2 [DEPRECATED].
|
||||
*
|
||||
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/task_io_accounting_ops.h>
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
* [DEPRECATED] Mark page as requiring copy-to-cache using PG_private_2. The
|
||||
* third mark in the folio queue is used to indicate that this folio needs
|
||||
* writing.
|
||||
*/
|
||||
void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
|
||||
struct netfs_io_request *rreq,
|
||||
struct folio_queue *folioq,
|
||||
int slot)
|
||||
{
|
||||
struct folio *folio = folioq_folio(folioq, slot);
|
||||
|
||||
trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
|
||||
folio_start_private_2(folio);
|
||||
folioq_mark3(folioq, slot);
|
||||
}
|
||||
|
||||
/*
|
||||
* [DEPRECATED] Cancel PG_private_2 on all marked folios in the event of an
|
||||
* unrecoverable error.
|
||||
*/
|
||||
static void netfs_pgpriv2_cancel(struct folio_queue *folioq)
|
||||
{
|
||||
struct folio *folio;
|
||||
int slot;
|
||||
|
||||
while (folioq) {
|
||||
if (!folioq->marks3) {
|
||||
folioq = folioq->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
slot = __ffs(folioq->marks3);
|
||||
folio = folioq_folio(folioq, slot);
|
||||
|
||||
trace_netfs_folio(folio, netfs_folio_trace_cancel_copy);
|
||||
folio_end_private_2(folio);
|
||||
folioq_unmark3(folioq, slot);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* [DEPRECATED] Copy a folio to the cache with PG_private_2 set.
|
||||
*/
|
||||
static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio *folio)
|
||||
{
|
||||
struct netfs_io_stream *cache = &wreq->io_streams[1];
|
||||
size_t fsize = folio_size(folio), flen = fsize;
|
||||
loff_t fpos = folio_pos(folio), i_size;
|
||||
bool to_eof = false;
|
||||
|
||||
_enter("");
|
||||
|
||||
/* netfs_perform_write() may shift i_size around the page or from out
|
||||
* of the page to beyond it, but cannot move i_size into or through the
|
||||
* page since we have it locked.
|
||||
*/
|
||||
i_size = i_size_read(wreq->inode);
|
||||
|
||||
if (fpos >= i_size) {
|
||||
/* mmap beyond eof. */
|
||||
_debug("beyond eof");
|
||||
folio_end_private_2(folio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (fpos + fsize > wreq->i_size)
|
||||
wreq->i_size = i_size;
|
||||
|
||||
if (flen > i_size - fpos) {
|
||||
flen = i_size - fpos;
|
||||
to_eof = true;
|
||||
} else if (flen == i_size - fpos) {
|
||||
to_eof = true;
|
||||
}
|
||||
|
||||
_debug("folio %zx %zx", flen, fsize);
|
||||
|
||||
trace_netfs_folio(folio, netfs_folio_trace_store_copy);
|
||||
|
||||
/* Attach the folio to the rolling buffer. */
|
||||
if (netfs_buffer_append_folio(wreq, folio, false) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
cache->submit_max_len = fsize;
|
||||
cache->submit_off = 0;
|
||||
cache->submit_len = flen;
|
||||
|
||||
/* Attach the folio to one or more subrequests. For a big folio, we
|
||||
* could end up with thousands of subrequests if the wsize is small -
|
||||
* but we might need to wait during the creation of subrequests for
|
||||
* network resources (eg. SMB credits).
|
||||
*/
|
||||
do {
|
||||
ssize_t part;
|
||||
|
||||
wreq->io_iter.iov_offset = cache->submit_off;
|
||||
|
||||
atomic64_set(&wreq->issued_to, fpos + cache->submit_off);
|
||||
part = netfs_advance_write(wreq, cache, fpos + cache->submit_off,
|
||||
cache->submit_len, to_eof);
|
||||
cache->submit_off += part;
|
||||
cache->submit_max_len -= part;
|
||||
if (part > cache->submit_len)
|
||||
cache->submit_len = 0;
|
||||
else
|
||||
cache->submit_len -= part;
|
||||
} while (cache->submit_len > 0);
|
||||
|
||||
wreq->io_iter.iov_offset = 0;
|
||||
iov_iter_advance(&wreq->io_iter, fsize);
|
||||
atomic64_set(&wreq->issued_to, fpos + fsize);
|
||||
|
||||
if (flen < fsize)
|
||||
netfs_issue_write(wreq, cache);
|
||||
|
||||
_leave(" = 0");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* [DEPRECATED] Go through the buffer and write any folios that are marked with
|
||||
* the third mark to the cache.
|
||||
*/
|
||||
void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct netfs_io_request *wreq;
|
||||
struct folio_queue *folioq;
|
||||
struct folio *folio;
|
||||
int error = 0;
|
||||
int slot = 0;
|
||||
|
||||
_enter("");
|
||||
|
||||
if (!fscache_resources_valid(&rreq->cache_resources))
|
||||
goto couldnt_start;
|
||||
|
||||
/* Need the first folio to be able to set up the op. */
|
||||
for (folioq = rreq->buffer; folioq; folioq = folioq->next) {
|
||||
if (folioq->marks3) {
|
||||
slot = __ffs(folioq->marks3);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!folioq)
|
||||
return;
|
||||
folio = folioq_folio(folioq, slot);
|
||||
|
||||
wreq = netfs_create_write_req(rreq->mapping, NULL, folio_pos(folio),
|
||||
NETFS_PGPRIV2_COPY_TO_CACHE);
|
||||
if (IS_ERR(wreq)) {
|
||||
kleave(" [create %ld]", PTR_ERR(wreq));
|
||||
goto couldnt_start;
|
||||
}
|
||||
|
||||
trace_netfs_write(wreq, netfs_write_trace_copy_to_cache);
|
||||
netfs_stat(&netfs_n_wh_copy_to_cache);
|
||||
|
||||
for (;;) {
|
||||
error = netfs_pgpriv2_copy_folio(wreq, folio);
|
||||
if (error < 0)
|
||||
break;
|
||||
|
||||
folioq_unmark3(folioq, slot);
|
||||
if (!folioq->marks3) {
|
||||
folioq = folioq->next;
|
||||
if (!folioq)
|
||||
break;
|
||||
}
|
||||
|
||||
slot = __ffs(folioq->marks3);
|
||||
folio = folioq_folio(folioq, slot);
|
||||
}
|
||||
|
||||
netfs_issue_write(wreq, &wreq->io_streams[1]);
|
||||
smp_wmb(); /* Write lists before ALL_QUEUED. */
|
||||
set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
|
||||
|
||||
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
|
||||
_leave(" = %d", error);
|
||||
couldnt_start:
|
||||
netfs_pgpriv2_cancel(rreq->buffer);
|
||||
}
|
||||
|
||||
/*
|
||||
* [DEPRECATED] Remove the PG_private_2 mark from any folios we've finished
|
||||
* copying.
|
||||
*/
|
||||
bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
|
||||
{
|
||||
struct folio_queue *folioq = wreq->buffer;
|
||||
unsigned long long collected_to = wreq->collected_to;
|
||||
unsigned int slot = wreq->buffer_head_slot;
|
||||
bool made_progress = false;
|
||||
|
||||
if (slot >= folioq_nr_slots(folioq)) {
|
||||
folioq = netfs_delete_buffer_head(wreq);
|
||||
slot = 0;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
struct folio *folio;
|
||||
unsigned long long fpos, fend;
|
||||
size_t fsize, flen;
|
||||
|
||||
folio = folioq_folio(folioq, slot);
|
||||
if (WARN_ONCE(!folio_test_private_2(folio),
|
||||
"R=%08x: folio %lx is not marked private_2\n",
|
||||
wreq->debug_id, folio->index))
|
||||
trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);
|
||||
|
||||
fpos = folio_pos(folio);
|
||||
fsize = folio_size(folio);
|
||||
flen = fsize;
|
||||
|
||||
fend = min_t(unsigned long long, fpos + flen, wreq->i_size);
|
||||
|
||||
trace_netfs_collect_folio(wreq, folio, fend, collected_to);
|
||||
|
||||
/* Unlock any folio we've transferred all of. */
|
||||
if (collected_to < fend)
|
||||
break;
|
||||
|
||||
trace_netfs_folio(folio, netfs_folio_trace_end_copy);
|
||||
folio_end_private_2(folio);
|
||||
wreq->cleaned_to = fpos + fsize;
|
||||
made_progress = true;
|
||||
|
||||
/* Clean up the head folioq. If we clear an entire folioq, then
|
||||
* we can get rid of it provided it's not also the tail folioq
|
||||
* being filled by the issuer.
|
||||
*/
|
||||
folioq_clear(folioq, slot);
|
||||
slot++;
|
||||
if (slot >= folioq_nr_slots(folioq)) {
|
||||
if (READ_ONCE(wreq->buffer_tail) == folioq)
|
||||
break;
|
||||
folioq = netfs_delete_buffer_head(wreq);
|
||||
slot = 0;
|
||||
}
|
||||
|
||||
if (fpos + fsize >= collected_to)
|
||||
break;
|
||||
}
|
||||
|
||||
wreq->buffer = folioq;
|
||||
wreq->buffer_head_slot = slot;
|
||||
return made_progress;
|
||||
}
|
||||
256
fs/netfs/read_retry.c
Normal file
256
fs/netfs/read_retry.c
Normal file
@@ -0,0 +1,256 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Network filesystem read subrequest retrying.
|
||||
*
|
||||
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include "internal.h"
|
||||
|
||||
static void netfs_reissue_read(struct netfs_io_request *rreq,
|
||||
struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct iov_iter *io_iter = &subreq->io_iter;
|
||||
|
||||
if (iov_iter_is_folioq(io_iter)) {
|
||||
subreq->curr_folioq = (struct folio_queue *)io_iter->folioq;
|
||||
subreq->curr_folioq_slot = io_iter->folioq_slot;
|
||||
subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot];
|
||||
}
|
||||
|
||||
atomic_inc(&rreq->nr_outstanding);
|
||||
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
|
||||
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
||||
subreq->rreq->netfs_ops->issue_read(subreq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Go through the list of failed/short reads, retrying all retryable ones. We
|
||||
* need to switch failed cache reads to network downloads.
|
||||
*/
|
||||
static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq;
|
||||
struct netfs_io_stream *stream0 = &rreq->io_streams[0];
|
||||
LIST_HEAD(sublist);
|
||||
LIST_HEAD(queue);
|
||||
|
||||
_enter("R=%x", rreq->debug_id);
|
||||
|
||||
if (list_empty(&rreq->subrequests))
|
||||
return;
|
||||
|
||||
if (rreq->netfs_ops->retry_request)
|
||||
rreq->netfs_ops->retry_request(rreq, NULL);
|
||||
|
||||
/* If there's no renegotiation to do, just resend each retryable subreq
|
||||
* up to the first permanently failed one.
|
||||
*/
|
||||
if (!rreq->netfs_ops->prepare_read &&
|
||||
!test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags)) {
|
||||
struct netfs_io_subrequest *subreq;
|
||||
|
||||
list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
|
||||
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
|
||||
break;
|
||||
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
|
||||
netfs_reset_iter(subreq);
|
||||
netfs_reissue_read(rreq, subreq);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Okay, we need to renegotiate all the download requests and flip any
|
||||
* failed cache reads over to being download requests and negotiate
|
||||
* those also. All fully successful subreqs have been removed from the
|
||||
* list and any spare data from those has been donated.
|
||||
*
|
||||
* What we do is decant the list and rebuild it one subreq at a time so
|
||||
* that we don't end up with donations jumping over a gap we're busy
|
||||
* populating with smaller subrequests. In the event that the subreq
|
||||
* we just launched finishes before we insert the next subreq, it'll
|
||||
* fill in rreq->prev_donated instead.
|
||||
|
||||
* Note: Alternatively, we could split the tail subrequest right before
|
||||
* we reissue it and fix up the donations under lock.
|
||||
*/
|
||||
list_splice_init(&rreq->subrequests, &queue);
|
||||
|
||||
do {
|
||||
struct netfs_io_subrequest *from;
|
||||
struct iov_iter source;
|
||||
unsigned long long start, len;
|
||||
size_t part, deferred_next_donated = 0;
|
||||
bool boundary = false;
|
||||
|
||||
/* Go through the subreqs and find the next span of contiguous
|
||||
* buffer that we then rejig (cifs, for example, needs the
|
||||
* rsize renegotiating) and reissue.
|
||||
*/
|
||||
from = list_first_entry(&queue, struct netfs_io_subrequest, rreq_link);
|
||||
list_move_tail(&from->rreq_link, &sublist);
|
||||
start = from->start + from->transferred;
|
||||
len = from->len - from->transferred;
|
||||
|
||||
_debug("from R=%08x[%x] s=%llx ctl=%zx/%zx/%zx",
|
||||
rreq->debug_id, from->debug_index,
|
||||
from->start, from->consumed, from->transferred, from->len);
|
||||
|
||||
if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
|
||||
!test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
|
||||
goto abandon;
|
||||
|
||||
deferred_next_donated = from->next_donated;
|
||||
while ((subreq = list_first_entry_or_null(
|
||||
&queue, struct netfs_io_subrequest, rreq_link))) {
|
||||
if (subreq->start != start + len ||
|
||||
subreq->transferred > 0 ||
|
||||
!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
|
||||
break;
|
||||
list_move_tail(&subreq->rreq_link, &sublist);
|
||||
len += subreq->len;
|
||||
deferred_next_donated = subreq->next_donated;
|
||||
if (test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags))
|
||||
break;
|
||||
}
|
||||
|
||||
_debug(" - range: %llx-%llx %llx", start, start + len - 1, len);
|
||||
|
||||
/* Determine the set of buffers we're going to use. Each
|
||||
* subreq gets a subset of a single overall contiguous buffer.
|
||||
*/
|
||||
netfs_reset_iter(from);
|
||||
source = from->io_iter;
|
||||
source.count = len;
|
||||
|
||||
/* Work through the sublist. */
|
||||
while ((subreq = list_first_entry_or_null(
|
||||
&sublist, struct netfs_io_subrequest, rreq_link))) {
|
||||
list_del(&subreq->rreq_link);
|
||||
|
||||
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
|
||||
subreq->start = start - subreq->transferred;
|
||||
subreq->len = len + subreq->transferred;
|
||||
stream0->sreq_max_len = subreq->len;
|
||||
|
||||
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
||||
__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
|
||||
|
||||
spin_lock_bh(&rreq->lock);
|
||||
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
|
||||
subreq->prev_donated += rreq->prev_donated;
|
||||
rreq->prev_donated = 0;
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
|
||||
BUG_ON(!len);
|
||||
|
||||
/* Renegotiate max_len (rsize) */
|
||||
if (rreq->netfs_ops->prepare_read(subreq) < 0) {
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed);
|
||||
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
||||
}
|
||||
|
||||
part = umin(len, stream0->sreq_max_len);
|
||||
if (unlikely(rreq->io_streams[0].sreq_max_segs))
|
||||
part = netfs_limit_iter(&source, 0, part, stream0->sreq_max_segs);
|
||||
subreq->len = subreq->transferred + part;
|
||||
subreq->io_iter = source;
|
||||
iov_iter_truncate(&subreq->io_iter, part);
|
||||
iov_iter_advance(&source, part);
|
||||
len -= part;
|
||||
start += part;
|
||||
if (!len) {
|
||||
if (boundary)
|
||||
__set_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
|
||||
subreq->next_donated = deferred_next_donated;
|
||||
} else {
|
||||
__clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
|
||||
subreq->next_donated = 0;
|
||||
}
|
||||
|
||||
netfs_reissue_read(rreq, subreq);
|
||||
if (!len)
|
||||
break;
|
||||
|
||||
/* If we ran out of subrequests, allocate another. */
|
||||
if (list_empty(&sublist)) {
|
||||
subreq = netfs_alloc_subrequest(rreq);
|
||||
if (!subreq)
|
||||
goto abandon;
|
||||
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
|
||||
subreq->start = start;
|
||||
|
||||
/* We get two refs, but need just one. */
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_new);
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_split);
|
||||
list_add_tail(&subreq->rreq_link, &sublist);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we managed to use fewer subreqs, we can discard the
|
||||
* excess.
|
||||
*/
|
||||
while ((subreq = list_first_entry_or_null(
|
||||
&sublist, struct netfs_io_subrequest, rreq_link))) {
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
|
||||
list_del(&subreq->rreq_link);
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
|
||||
}
|
||||
|
||||
} while (!list_empty(&queue));
|
||||
|
||||
return;
|
||||
|
||||
/* If we hit ENOMEM, fail all remaining subrequests */
|
||||
abandon:
|
||||
list_splice_init(&sublist, &queue);
|
||||
list_for_each_entry(subreq, &queue, rreq_link) {
|
||||
if (!subreq->error)
|
||||
subreq->error = -ENOMEM;
|
||||
__clear_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
||||
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
||||
__clear_bit(NETFS_SREQ_RETRYING, &subreq->flags);
|
||||
}
|
||||
spin_lock_bh(&rreq->lock);
|
||||
list_splice_tail_init(&queue, &rreq->subrequests);
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Retry reads.
|
||||
*/
|
||||
void netfs_retry_reads(struct netfs_io_request *rreq)
|
||||
{
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
|
||||
|
||||
atomic_inc(&rreq->nr_outstanding);
|
||||
|
||||
netfs_retry_read_subrequests(rreq);
|
||||
|
||||
if (atomic_dec_and_test(&rreq->nr_outstanding))
|
||||
netfs_rreq_terminated(rreq, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlock any the pages that haven't been unlocked yet due to abandoned
|
||||
* subrequests.
|
||||
*/
|
||||
void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct folio_queue *p;
|
||||
|
||||
for (p = rreq->buffer; p; p = p->next) {
|
||||
for (int slot = 0; slot < folioq_count(p); slot++) {
|
||||
struct folio *folio = folioq_folio(p, slot);
|
||||
|
||||
if (folio && !folioq_is_marked2(p, slot)) {
|
||||
trace_netfs_folio(folio, netfs_folio_trace_abandon);
|
||||
folio_unlock(folio);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -32,6 +32,7 @@ atomic_t netfs_n_wh_buffered_write;
|
||||
atomic_t netfs_n_wh_writethrough;
|
||||
atomic_t netfs_n_wh_dio_write;
|
||||
atomic_t netfs_n_wh_writepages;
|
||||
atomic_t netfs_n_wh_copy_to_cache;
|
||||
atomic_t netfs_n_wh_wstream_conflict;
|
||||
atomic_t netfs_n_wh_upload;
|
||||
atomic_t netfs_n_wh_upload_done;
|
||||
@@ -51,11 +52,12 @@ int netfs_stats_show(struct seq_file *m, void *v)
|
||||
atomic_read(&netfs_n_rh_read_folio),
|
||||
atomic_read(&netfs_n_rh_write_begin),
|
||||
atomic_read(&netfs_n_rh_write_zskip));
|
||||
seq_printf(m, "Writes : BW=%u WT=%u DW=%u WP=%u\n",
|
||||
seq_printf(m, "Writes : BW=%u WT=%u DW=%u WP=%u 2C=%u\n",
|
||||
atomic_read(&netfs_n_wh_buffered_write),
|
||||
atomic_read(&netfs_n_wh_writethrough),
|
||||
atomic_read(&netfs_n_wh_dio_write),
|
||||
atomic_read(&netfs_n_wh_writepages));
|
||||
atomic_read(&netfs_n_wh_writepages),
|
||||
atomic_read(&netfs_n_wh_copy_to_cache));
|
||||
seq_printf(m, "ZeroOps: ZR=%u sh=%u sk=%u\n",
|
||||
atomic_read(&netfs_n_rh_zero),
|
||||
atomic_read(&netfs_n_rh_short_read),
|
||||
|
||||
@@ -87,6 +87,12 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
|
||||
unsigned long long collected_to = wreq->collected_to;
|
||||
unsigned int slot = wreq->buffer_head_slot;
|
||||
|
||||
if (wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) {
|
||||
if (netfs_pgpriv2_unlock_copied_folios(wreq))
|
||||
*notes |= MADE_PROGRESS;
|
||||
return;
|
||||
}
|
||||
|
||||
if (slot >= folioq_nr_slots(folioq)) {
|
||||
folioq = netfs_delete_buffer_head(wreq);
|
||||
slot = 0;
|
||||
@@ -383,7 +389,8 @@ reassess_streams:
|
||||
smp_rmb();
|
||||
collected_to = ULLONG_MAX;
|
||||
if (wreq->origin == NETFS_WRITEBACK ||
|
||||
wreq->origin == NETFS_WRITETHROUGH)
|
||||
wreq->origin == NETFS_WRITETHROUGH ||
|
||||
wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE)
|
||||
notes = BUFFERED;
|
||||
else
|
||||
notes = 0;
|
||||
|
||||
@@ -95,7 +95,8 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
|
||||
struct netfs_io_request *wreq;
|
||||
struct netfs_inode *ictx;
|
||||
bool is_buffered = (origin == NETFS_WRITEBACK ||
|
||||
origin == NETFS_WRITETHROUGH);
|
||||
origin == NETFS_WRITETHROUGH ||
|
||||
origin == NETFS_PGPRIV2_COPY_TO_CACHE);
|
||||
|
||||
wreq = netfs_alloc_request(mapping, file, start, 0, origin);
|
||||
if (IS_ERR(wreq))
|
||||
@@ -161,10 +162,6 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
|
||||
|
||||
_enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
|
||||
|
||||
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
|
||||
refcount_read(&subreq->ref),
|
||||
netfs_sreq_trace_new);
|
||||
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
|
||||
|
||||
stream->sreq_max_len = UINT_MAX;
|
||||
@@ -241,8 +238,8 @@ void netfs_reissue_write(struct netfs_io_stream *stream,
|
||||
netfs_do_issue_write(stream, subreq);
|
||||
}
|
||||
|
||||
static void netfs_issue_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream)
|
||||
void netfs_issue_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq = stream->construct;
|
||||
|
||||
@@ -259,9 +256,9 @@ static void netfs_issue_write(struct netfs_io_request *wreq,
|
||||
* we can avoid overrunning the credits obtained (cifs) and try to parallelise
|
||||
* content-crypto preparation with network writes.
|
||||
*/
|
||||
static int netfs_advance_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream,
|
||||
loff_t start, size_t len, bool to_eof)
|
||||
int netfs_advance_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream,
|
||||
loff_t start, size_t len, bool to_eof)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq = stream->construct;
|
||||
size_t part;
|
||||
|
||||
@@ -267,6 +267,7 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi
|
||||
rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id);
|
||||
/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
|
||||
__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
|
||||
rreq->io_streams[0].sreq_max_len = NFS_SB(rreq->inode->i_sb)->rsize;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -288,14 +289,6 @@ static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sre
|
||||
return netfs;
|
||||
}
|
||||
|
||||
static bool nfs_netfs_clamp_length(struct netfs_io_subrequest *sreq)
|
||||
{
|
||||
size_t rsize = NFS_SB(sreq->rreq->inode->i_sb)->rsize;
|
||||
|
||||
sreq->len = min(sreq->len, rsize);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
|
||||
{
|
||||
struct nfs_netfs_io_data *netfs;
|
||||
@@ -304,17 +297,18 @@ static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
|
||||
struct nfs_open_context *ctx = sreq->rreq->netfs_priv;
|
||||
struct page *page;
|
||||
unsigned long idx;
|
||||
pgoff_t start, last;
|
||||
int err;
|
||||
pgoff_t start = (sreq->start + sreq->transferred) >> PAGE_SHIFT;
|
||||
pgoff_t last = ((sreq->start + sreq->len -
|
||||
sreq->transferred - 1) >> PAGE_SHIFT);
|
||||
|
||||
start = (sreq->start + sreq->transferred) >> PAGE_SHIFT;
|
||||
last = ((sreq->start + sreq->len - sreq->transferred - 1) >> PAGE_SHIFT);
|
||||
|
||||
nfs_pageio_init_read(&pgio, inode, false,
|
||||
&nfs_async_read_completion_ops);
|
||||
|
||||
netfs = nfs_netfs_alloc(sreq);
|
||||
if (!netfs)
|
||||
return netfs_subreq_terminated(sreq, -ENOMEM, false);
|
||||
return netfs_read_subreq_terminated(sreq, -ENOMEM, false);
|
||||
|
||||
pgio.pg_netfs = netfs; /* used in completion */
|
||||
|
||||
@@ -380,5 +374,4 @@ const struct netfs_request_ops nfs_netfs_ops = {
|
||||
.init_request = nfs_netfs_init_request,
|
||||
.free_request = nfs_netfs_free_request,
|
||||
.issue_read = nfs_netfs_issue_read,
|
||||
.clamp_length = nfs_netfs_clamp_length
|
||||
};
|
||||
|
||||
@@ -60,8 +60,6 @@ static inline void nfs_netfs_get(struct nfs_netfs_io_data *netfs)
|
||||
|
||||
static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
|
||||
{
|
||||
ssize_t final_len;
|
||||
|
||||
/* Only the last RPC completion should call netfs_subreq_terminated() */
|
||||
if (!refcount_dec_and_test(&netfs->refcount))
|
||||
return;
|
||||
@@ -74,8 +72,9 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
|
||||
* Correct the final length here to be no larger than the netfs subrequest
|
||||
* length, and thus avoid netfs's "Subreq overread" warning message.
|
||||
*/
|
||||
final_len = min_t(s64, netfs->sreq->len, atomic64_read(&netfs->transferred));
|
||||
netfs_subreq_terminated(netfs->sreq, netfs->error ?: final_len, false);
|
||||
netfs->sreq->transferred = min_t(s64, netfs->sreq->len,
|
||||
atomic64_read(&netfs->transferred));
|
||||
netfs_read_subreq_terminated(netfs->sreq, netfs->error, false);
|
||||
kfree(netfs);
|
||||
}
|
||||
static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user