aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorArjun Roy2020-12-02 14:53:44 -0800
committerJakub Kicinski2020-12-04 13:40:52 -0800
commit7fba5309efe24e4f0284ef4b8663cdf401035e72 (patch)
tree8ad995ce4fb2d07d49d5f4eb05b0e90106db6672 /net/ipv4
parent2cd81161848daa9c1b5ba13ceb6ff067fbb86aa9 (diff)
net-zerocopy: Refactor skb frag fast-forward op.
Refactor skb frag fast-forwarding for tcp receive zerocopy. This is part of a patch set that introduces short-circuited hybrid copies for small receive operations, which results in roughly 33% fewer syscalls for small RPC scenarios. skb_advance_to_frag(), given a skb and an offset into the skb, iterates from the first frag for the skb until we're at the frag specified by the offset. Assuming the offset provided refers to how many bytes in the skb are already read, the returned frag points to the next frag we may read from, while offset_frag is set to the number of bytes from this frag that we have already read. If frag is not null and offset_frag is equal to 0, then we may be able to map this frag's page into the process address space with vm_insert_page(). However, if offset_frag is not equal to 0, then we cannot do so. Signed-off-by: Arjun Roy <arjunroy@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp.c35
1 files changed, 26 insertions, 9 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 72fe68014a8c..8372d31f4a88 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1758,6 +1758,28 @@ int tcp_mmap(struct file *file, struct socket *sock,
}
EXPORT_SYMBOL(tcp_mmap);
+static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
+ u32 *offset_frag)
+{
+ skb_frag_t *frag;
+
+ offset_skb -= skb_headlen(skb);
+ if ((int)offset_skb < 0 || skb_has_frag_list(skb))
+ return NULL;
+
+ frag = skb_shinfo(skb)->frags;
+ while (offset_skb) {
+ if (skb_frag_size(frag) > offset_skb) {
+ *offset_frag = offset_skb;
+ return frag;
+ }
+ offset_skb -= skb_frag_size(frag);
+ ++frag;
+ }
+ *offset_frag = 0;
+ return frag;
+}
+
static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
struct sk_buff *skb, u32 copylen,
u32 *offset, u32 *seq)
@@ -1884,6 +1906,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
curr_addr = address;
while (length + PAGE_SIZE <= zc->length) {
if (zc->recv_skip_hint < PAGE_SIZE) {
+ u32 offset_frag;
+
/* If we're here, finish the current batch. */
if (pg_idx) {
ret = tcp_zerocopy_vm_insert_batch(vma, pages,
@@ -1904,16 +1928,9 @@ static int tcp_zerocopy_receive(struct sock *sk,
skb = tcp_recv_skb(sk, seq, &offset);
}
zc->recv_skip_hint = skb->len - offset;
- offset -= skb_headlen(skb);
- if ((int)offset < 0 || skb_has_frag_list(skb))
+ frags = skb_advance_to_frag(skb, offset, &offset_frag);
+ if (!frags || offset_frag)
break;
- frags = skb_shinfo(skb)->frags;
- while (offset) {
- if (skb_frag_size(frags) > offset)
- goto out;
- offset -= skb_frag_size(frags);
- frags++;
- }
}
if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) {
int remaining = zc->recv_skip_hint;