diff options
author | David S. Miller | 2017-12-27 16:44:14 -0500 |
---|---|---|
committer | David S. Miller | 2017-12-27 16:44:14 -0500 |
commit | 55b07a65e15bea6e253a907dacaf89b61fe504ca (patch) | |
tree | 9acc15d07f24c796c13c6d8999de8c206827aa49 | |
parent | 1f119f90382a017dd0dc27340e1c80b83aa3e9cd (diff) | |
parent | 8ddab50839e29e965460b2cf794fd2b06a946893 (diff) |
Merge branch 'net-zerocopy-refine'
Willem de Bruijn says:
====================
zerocopy refinements
1/4 is a small optimization follow-up to the earlier fix to skb_segment:
check skb state once per skb, instead of once per frag.
2/4 makes behavior more consistent between standard and zerocopy send:
set the PSH bit when hitting MAX_SKB_FRAGS. This helps GRO.
3/4 resolves a surprising inconsistency in notification:
because small packets were not stored in frags, they would not set
the copied error code over loopback. This change also optimizes
the path by removing copying and making tso_fragment cheaper.
4/4 follows-up to 3/4 by no longer allocated now unused memory.
this was actually already in RFC patches, but dropped as I pared
down the patch set during revisions.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/core/skbuff.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 24 |
2 files changed, 24 insertions, 14 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a3cb0be4c6f3..00b0757830e2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3656,6 +3656,10 @@ normal: skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & SKBTX_SHARED_FRAG; + if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || + skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) + goto err; + while (pos < offset + len) { if (i >= nfrags) { BUG_ON(skb_headlen(list_skb)); @@ -3667,6 +3671,11 @@ normal: BUG_ON(!nfrags); + if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || + skb_zerocopy_clone(nskb, frag_skb, + GFP_ATOMIC)) + goto err; + list_skb = list_skb->next; } @@ -3678,11 +3687,6 @@ normal: goto err; } - if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) - goto err; - if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) - goto err; - *nskb_frag = *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 67d39b79c801..7ac583a2b9fe 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1104,12 +1104,15 @@ static int linear_payload_sz(bool first_skb) return 0; } -static int select_size(const struct sock *sk, bool sg, bool first_skb) +static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc) { const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sg) { + if (zc) + return 0; + if (sk_can_gso(sk)) { tmp = linear_payload_sz(first_skb); } else { @@ -1186,7 +1189,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; bool process_backlog = false; - bool sg; + bool sg, zc = false; long timeo; flags = msg->msg_flags; @@ -1204,7 +1207,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) goto out_err; } - if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG)) + zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG; + if (!zc) uarg->zerocopy = 0; } @@ -1281,6 +1285,7 @@ restart: if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; + int linear; new_segment: /* Allocate new segment. If the interface is SG, @@ -1294,9 +1299,8 @@ new_segment: goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); - skb = sk_stream_alloc_skb(sk, - select_size(sk, sg, first_skb), - sk->sk_allocation, + linear = select_size(sk, sg, first_skb, zc); + skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation, first_skb); if (!skb) goto wait_for_memory; @@ -1325,13 +1329,13 @@ new_segment: copy = msg_data_left(msg); /* Where to copy to? */ - if (skb_availroom(skb) > 0) { + if (skb_availroom(skb) > 0 && !zc) { /* We have some space in skb head. Superb! */ copy = min_t(int, copy, skb_availroom(skb)); err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); if (err) goto do_fault; - } else if (!uarg || !uarg->zerocopy) { + } else if (!zc) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); @@ -1371,8 +1375,10 @@ new_segment: pfrag->offset += copy; } else { err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); - if (err == -EMSGSIZE || err == -EEXIST) + if (err == -EMSGSIZE || err == -EEXIST) { + tcp_mark_push(tp, skb); goto new_segment; + } if (err < 0) goto do_error; copy = err; |