sock: Remove ->sendpage*() in favour of sendmsg(MSG_SPLICE_PAGES)

Remove ->sendpage() and ->sendpage_locked().  sendmsg() with
MSG_SPLICE_PAGES should be used instead.  This allows multiple pages and
multipage folios to be passed through.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de> # for net/can
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
cc: linux-afs@lists.infradead.org
cc: mptcp@lists.linux.dev
cc: rds-devel@oss.oracle.com
cc: tipc-discussion@lists.sourceforge.net
cc: virtualization@lists.linux-foundation.org
Link: https://lore.kernel.org/r/20230623225513.2732256-16-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
David Howells
2023-06-23 23:55:12 +01:00
committed by Jakub Kicinski
parent e52828cc01
commit dc97391e66
66 changed files with 20 additions and 442 deletions

View File

@@ -240,11 +240,11 @@ offsets into ``msg``, respectively.
If a program of type ``BPF_PROG_TYPE_SK_MSG`` is run on a ``msg`` it can only
parse data that the (``data``, ``data_end``) pointers have already consumed.
For ``sendmsg()`` hooks this is likely the first scatterlist element. But for
calls relying on the ``sendpage`` handler (e.g., ``sendfile()``) this will be
the range (**0**, **0**) because the data is shared with user space and by
default the objective is to avoid allowing user space to modify data while (or
after) BPF verdict is being decided. This helper can be used to pull in data
and to set the start and end pointers to given values. Data will be copied if
calls relying on MSG_SPLICE_PAGES (e.g., ``sendfile()``) this will be the
range (**0**, **0**) because the data is shared with user space and by default
the objective is to avoid allowing user space to modify data while (or after)
BPF verdict is being decided. This helper can be used to pull in data and to
set the start and end pointers to given values. Data will be copied if
necessary (i.e., if data was not linear and if start and end pointers do not
point to the same chunk).

View File

@@ -521,8 +521,6 @@ prototypes::
int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long,
unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);

View File

@@ -1086,7 +1086,6 @@ This describes how the VFS can manipulate an open file. As of kernel
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock) (struct file *, int, struct file_lock *);

View File

@@ -269,8 +269,8 @@ a single application thread handles flows with many different flow hashes.
rps_sock_flow_table is a global flow table that contains the *desired* CPU
for flows: the CPU that is currently processing the flow in userspace.
Each table value is a CPU index that is updated during calls to recvmsg
and sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage()
and tcp_splice_read()).
and sendmsg (specifically, inet_recvmsg(), inet_sendmsg() and
tcp_splice_read()).
When the scheduler moves a thread to a new CPU while it has outstanding
receive packets on the old CPU, packets may arrive out of order. To

View File

@@ -482,7 +482,6 @@ static const struct proto_ops alg_proto_ops = {
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
@@ -1106,33 +1105,6 @@ unlock:
}
EXPORT_SYMBOL_GPL(af_alg_sendmsg);
/**
* af_alg_sendpage - sendpage system call handler
* @sock: socket of connection to user space to write to
* @page: data to send
* @offset: offset into page to begin sending
* @size: length of data
* @flags: message send/receive flags
*
* This is a generic implementation of sendpage to fill ctx->tsgl_list.
*/
ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, int flags)
{
struct bio_vec bvec;
struct msghdr msg = {
.msg_flags = flags | MSG_SPLICE_PAGES,
};
if (flags & MSG_SENDPAGE_NOTLAST)
msg.msg_flags |= MSG_MORE;
bvec_set_page(&bvec, page, size, offset);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
return sock_sendmsg(sock, &msg);
}
EXPORT_SYMBOL_GPL(af_alg_sendpage);
/**
* af_alg_free_resources - release resources required for crypto request
* @areq: Request holding the TX and RX SGL

View File

@@ -9,10 +9,10 @@
* The following concept of the memory management is used:
*
* The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
* filled by user space with the data submitted via sendpage. Filling up
* the TX SGL does not cause a crypto operation -- the data will only be
* tracked by the kernel. Upon receipt of one recvmsg call, the caller must
* provide a buffer which is tracked with the RX SGL.
* filled by user space with the data submitted via sendmsg (maybe with
* MSG_SPLICE_PAGES). Filling up the TX SGL does not cause a crypto operation
* -- the data will only be tracked by the kernel. Upon receipt of one recvmsg
* call, the caller must provide a buffer which is tracked with the RX SGL.
*
* During the processing of the recvmsg operation, the cipher request is
* allocated and prepared. As part of the recvmsg operation, the processed
@@ -370,7 +370,6 @@ static struct proto_ops algif_aead_ops = {
.release = af_alg_release,
.sendmsg = aead_sendmsg,
.sendpage = af_alg_sendpage,
.recvmsg = aead_recvmsg,
.poll = af_alg_poll,
};
@@ -422,18 +421,6 @@ static int aead_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
return aead_sendmsg(sock, msg, size);
}
static ssize_t aead_sendpage_nokey(struct socket *sock, struct page *page,
int offset, size_t size, int flags)
{
int err;
err = aead_check_key(sock);
if (err)
return err;
return af_alg_sendpage(sock, page, offset, size, flags);
}
static int aead_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
size_t ignored, int flags)
{
@@ -461,7 +448,6 @@ static struct proto_ops algif_aead_ops_nokey = {
.release = af_alg_release,
.sendmsg = aead_sendmsg_nokey,
.sendpage = aead_sendpage_nokey,
.recvmsg = aead_recvmsg_nokey,
.poll = af_alg_poll,
};

View File

@@ -174,7 +174,6 @@ static struct proto_ops algif_rng_ops = {
.bind = sock_no_bind,
.accept = sock_no_accept,
.sendmsg = sock_no_sendmsg,
.sendpage = sock_no_sendpage,
.release = af_alg_release,
.recvmsg = rng_recvmsg,
@@ -192,7 +191,6 @@ static struct proto_ops __maybe_unused algif_rng_test_ops = {
.mmap = sock_no_mmap,
.bind = sock_no_bind,
.accept = sock_no_accept,
.sendpage = sock_no_sendpage,
.release = af_alg_release,
.recvmsg = rng_test_recvmsg,

View File

@@ -194,7 +194,6 @@ static struct proto_ops algif_skcipher_ops = {
.release = af_alg_release,
.sendmsg = skcipher_sendmsg,
.sendpage = af_alg_sendpage,
.recvmsg = skcipher_recvmsg,
.poll = af_alg_poll,
};
@@ -246,18 +245,6 @@ static int skcipher_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
return skcipher_sendmsg(sock, msg, size);
}
static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page,
int offset, size_t size, int flags)
{
int err;
err = skcipher_check_key(sock);
if (err)
return err;
return af_alg_sendpage(sock, page, offset, size, flags);
}
static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
size_t ignored, int flags)
{
@@ -285,7 +272,6 @@ static struct proto_ops algif_skcipher_ops_nokey = {
.release = af_alg_release,
.sendmsg = skcipher_sendmsg_nokey,
.sendpage = skcipher_sendpage_nokey,
.recvmsg = skcipher_recvmsg_nokey,
.poll = af_alg_poll,
};

View File

@@ -569,8 +569,6 @@ int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int chtls_recvmsg(struct sock *sk, struct msghdr *msg,
size_t len, int flags, int *addr_len);
void chtls_splice_eof(struct socket *sock);
int chtls_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
int send_tx_flowc_wr(struct sock *sk, int compl,
u32 snd_nxt, u32 rcv_nxt);
void chtls_tcp_push(struct sock *sk, int flags);

View File

@@ -1246,20 +1246,6 @@ void chtls_splice_eof(struct socket *sock)
release_sock(sk);
}
int chtls_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
struct bio_vec bvec;
if (flags & MSG_SENDPAGE_NOTLAST)
msg.msg_flags |= MSG_MORE;
bvec_set_page(&bvec, page, size, offset);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
return chtls_sendmsg(sk, &msg, size);
}
static void chtls_select_window(struct sock *sk)
{
struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);

View File

@@ -607,7 +607,6 @@ static void __init chtls_init_ulp_ops(void)
chtls_cpl_prot.shutdown = chtls_shutdown;
chtls_cpl_prot.sendmsg = chtls_sendmsg;
chtls_cpl_prot.splice_eof = chtls_splice_eof;
chtls_cpl_prot.sendpage = chtls_sendpage;
chtls_cpl_prot.recvmsg = chtls_recvmsg;
chtls_cpl_prot.setsockopt = chtls_setsockopt;
chtls_cpl_prot.getsockopt = chtls_getsockopt;

View File

@@ -936,7 +936,7 @@ nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
/*
* Grab and keep cached pages associated with a file in the svc_rqst
* so that they can be passed to the network sendmsg/sendpage routines
* so that they can be passed to the network sendmsg routines
* directly. They will be released after the sending has completed.
*
* Return values: Number of bytes consumed, or -EIO if there are no

View File

@@ -229,8 +229,6 @@ void af_alg_wmem_wakeup(struct sock *sk);
int af_alg_wait_for_data(struct sock *sk, unsigned flags, unsigned min);
int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
unsigned int ivsize);
ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, int flags);
void af_alg_free_resources(struct af_alg_async_req *areq);
void af_alg_async_cb(void *data, int err);
__poll_t af_alg_poll(struct file *file, struct socket *sock,

View File

@@ -207,8 +207,6 @@ struct proto_ops {
size_t total_len, int flags);
int (*mmap) (struct file *file, struct socket *sock,
struct vm_area_struct * vma);
ssize_t (*sendpage) (struct socket *sock, struct page *page,
int offset, size_t size, int flags);
ssize_t (*splice_read)(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len, unsigned int flags);
void (*splice_eof)(struct socket *sock);
@@ -222,8 +220,6 @@ struct proto_ops {
sk_read_actor_t recv_actor);
/* This is different from read_sock(), it reads an entire skb at a time. */
int (*read_skb)(struct sock *sk, skb_read_actor_t recv_actor);
int (*sendpage_locked)(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
size_t size);
int (*set_rcvlowat)(struct sock *sk, int val);
@@ -341,10 +337,6 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags);
int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
int kernel_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
/* Routine returns the IP overhead imposed by a (caller-protected) socket. */

View File

@@ -36,8 +36,6 @@ void __inet_accept(struct socket *sock, struct socket *newsock,
int inet_send_prepare(struct sock *sk);
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
void inet_splice_eof(struct socket *sock);
ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int inet_shutdown(struct socket *sock, int how);

View File

@@ -1277,8 +1277,6 @@ struct proto {
size_t len);
int (*recvmsg)(struct sock *sk, struct msghdr *msg,
size_t len, int flags, int *addr_len);
int (*sendpage)(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
void (*splice_eof)(struct socket *sock);
int (*bind)(struct sock *sk,
struct sockaddr *addr, int addr_len);
@@ -1919,10 +1917,6 @@ int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len);
int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
int sock_no_mmap(struct file *file, struct socket *sock,
struct vm_area_struct *vma);
ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
/*
* Functions to fill in entries in struct proto_ops when a protocol

View File

@@ -329,10 +329,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
size_t size, struct ubuf_info *uarg);
void tcp_splice_eof(struct socket *sock);
int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
int tcp_wmem_schedule(struct sock *sk, int copy);
void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,

View File

@@ -1929,7 +1929,6 @@ static const struct proto_ops atalk_dgram_ops = {
.sendmsg = atalk_sendmsg,
.recvmsg = atalk_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};
static struct notifier_block ddp_notifier = {

View File

@@ -126,7 +126,6 @@ static const struct proto_ops pvc_proto_ops = {
.sendmsg = vcc_sendmsg,
.recvmsg = vcc_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};

View File

@@ -654,7 +654,6 @@ static const struct proto_ops svc_proto_ops = {
.sendmsg = vcc_sendmsg,
.recvmsg = vcc_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};

Some files were not shown because too many files have changed in this diff Show More