diff options
author | Linus Torvalds | 2017-03-04 17:31:39 -0800 |
---|---|---|
committer | Linus Torvalds | 2017-03-04 17:31:39 -0800 |
commit | 8d70eeb84ab277377c017af6a21d0a337025dede (patch) | |
tree | d6e8a80e1d5e953ab37eef0c7468c77e7735779b /net | |
parent | 2d62e0768d3c28536d4cfe4c40ba1e5e8e442a93 (diff) | |
parent | f78ef7cd9a0686b979679d0de061c6dbfd8d649e (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:
1) Fix double-free in batman-adv, from Sven Eckelmann.
2) Fix packet stats for fast-RX path, from Joannes Berg.
3) Netfilter's ip_route_me_harder() doesn't handle request sockets
properly, fix from Florian Westphal.
4) Fix sendmsg deadlock in rxrpc, from David Howells.
5) Add missing RCU locking to transport hashtable scan, from Xin Long.
6) Fix potential packet loss in mlxsw driver, from Ido Schimmel.
7) Fix race in NAPI handling between poll handlers and busy polling,
from Eric Dumazet.
8) TX path in vxlan and geneve need proper RCU locking, from Jakub
Kicinski.
9) SYN processing in DCCP and TCP need to disable BH, from Eric
Dumazet.
10) Properly handle net_enable_timestamp() being invoked from IRQ
context, also from Eric Dumazet.
11) Fix crash on device-tree systems in xgene driver, from Alban Bedel.
12) Do not call sk_free() on a locked socket, from Arnaldo Carvalho de
Melo.
13) Fix use-after-free in netvsc driver, from Dexuan Cui.
14) Fix max MTU setting in bonding driver, from WANG Cong.
15) xen-netback hash table can be allocated from softirq context, so use
GFP_ATOMIC. From Anoob Soman.
16) Fix MAC address change bug in bgmac driver, from Hari Vyas.
17) strparser needs to destroy strp_wq on module exit, from WANG Cong.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (69 commits)
strparser: destroy workqueue on module exit
sfc: fix IPID endianness in TSOv2
sfc: avoid max() in array size
rds: remove unnecessary returned value check
rxrpc: Fix potential NULL-pointer exception
nfp: correct DMA direction in XDP DMA sync
nfp: don't tell FW about the reserved buffer space
net: ethernet: bgmac: mac address change bug
net: ethernet: bgmac: init sequence bug
xen-netback: don't vfree() queues under spinlock
xen-netback: keep a local pointer for vif in backend_disconnect()
netfilter: nf_tables: don't call nfnetlink_set_err() if nfnetlink_send() fails
netfilter: nft_set_rbtree: incorrect assumption on lower interval lookups
netfilter: nf_conntrack_sip: fix wrong memory initialisation
can: flexcan: fix typo in comment
can: usb_8dev: Fix memory leak of priv->cmd_msg_buffer
can: gs_usb: fix coding style
can: gs_usb: Don't use stack memory for USB transfers
ixgbe: Limit use of 2K buffers on architectures with 256B or larger cache lines
ixgbe: update the rss key on h/w, when ethtool ask for it
...
Diffstat (limited to 'net')
42 files changed, 462 insertions, 197 deletions
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index ead18ca836de..11a23fd6e1a0 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -239,8 +239,10 @@ err_unlock: spin_unlock_bh(&chain->lock); err: - if (!ret) + if (!ret) { kfree(frag_entry_new); + kfree_skb(skb); + } return ret; } @@ -313,7 +315,7 @@ free: * * There are three possible outcomes: 1) Packet is merged: Return true and * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb - * to NULL; 3) Error: Return false and leave skb as is. + * to NULL; 3) Error: Return false and free skb. * * Return: true when packet is merged or buffered, false when skb is not not * used. @@ -338,9 +340,9 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb, goto out_err; out: - *skb = skb_out; ret = true; out_err: + *skb = skb_out; return ret; } @@ -499,6 +501,12 @@ int batadv_frag_send_packet(struct sk_buff *skb, /* Eat and send fragments from the tail of skb */ while (skb->len > max_fragment_size) { + /* The initial check in this function should cover this case */ + if (unlikely(frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)) { + ret = -EINVAL; + goto put_primary_if; + } + skb_fragment = batadv_frag_create(skb, &frag_header, mtu); if (!skb_fragment) { ret = -ENOMEM; @@ -515,12 +523,6 @@ int batadv_frag_send_packet(struct sk_buff *skb, } frag_header.no++; - - /* The initial check in this function should cover this case */ - if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) { - ret = -EINVAL; - goto put_primary_if; - } } /* Make room for the fragment header. */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 8f64a5c01345..66b25e410a41 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -402,7 +402,7 @@ struct batadv_gw_node { struct rcu_head rcu; }; -DECLARE_EWMA(throughput, 1024, 8) +DECLARE_EWMA(throughput, 10, 8) /** * struct batadv_hardif_neigh_node_bat_v - B.A.T.M.A.N. V private neighbor diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 6bfac29318f2..902af6ba481c 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -186,8 +186,9 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, /* Do not flood unicast traffic to ports that turn it off */ if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD)) continue; + /* Do not flood if mc off, except for traffic we originate */ if (pkt_type == BR_PKT_MULTICAST && - !(p->flags & BR_MCAST_FLOOD)) + !(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) continue; /* Do not flood to ports that enable proxy ARP */ diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 62e68c0dc687..b838213c408e 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -997,10 +997,10 @@ err_vlan_add: RCU_INIT_POINTER(p->vlgrp, NULL); synchronize_rcu(); vlan_tunnel_deinit(vg); -err_vlan_enabled: err_tunnel_init: rhashtable_destroy(&vg->vlan_hash); err_rhtbl: +err_vlan_enabled: kfree(vg); goto out; diff --git a/net/core/dev.c b/net/core/dev.c index 304f2deae5f9..8637b2b71f3d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1698,27 +1698,54 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue); static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL static atomic_t netstamp_needed_deferred; +static atomic_t netstamp_wanted; static void netstamp_clear(struct work_struct *work) { int deferred = atomic_xchg(&netstamp_needed_deferred, 0); + int wanted; - while (deferred--) - static_key_slow_dec(&netstamp_needed); + wanted = atomic_add_return(deferred, &netstamp_wanted); + if (wanted > 0) + static_key_enable(&netstamp_needed); + else + static_key_disable(&netstamp_needed); } static DECLARE_WORK(netstamp_work, netstamp_clear); #endif void net_enable_timestamp(void) { +#ifdef HAVE_JUMP_LABEL + int wanted; + + while (1) { + wanted = atomic_read(&netstamp_wanted); + if (wanted <= 0) + break; + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted) + return; + } + atomic_inc(&netstamp_needed_deferred); + schedule_work(&netstamp_work); +#else static_key_slow_inc(&netstamp_needed); +#endif } EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { #ifdef HAVE_JUMP_LABEL - /* net_disable_timestamp() can be called from non process context */ - atomic_inc(&netstamp_needed_deferred); + int wanted; + + while (1) { + wanted = atomic_read(&netstamp_wanted); + if (wanted <= 1) + break; + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted) + return; + } + atomic_dec(&netstamp_needed_deferred); schedule_work(&netstamp_work); #else static_key_slow_dec(&netstamp_needed); @@ -4884,6 +4911,39 @@ void __napi_schedule(struct napi_struct *n) EXPORT_SYMBOL(__napi_schedule); /** + * napi_schedule_prep - check if napi can be scheduled + * @n: napi context + * + * Test if NAPI routine is already running, and if not mark + * it as running. This is used as a condition variable + * insure only one NAPI poll instance runs. We also make + * sure there is no pending NAPI disable. + */ +bool napi_schedule_prep(struct napi_struct *n) +{ + unsigned long val, new; + + do { + val = READ_ONCE(n->state); + if (unlikely(val & NAPIF_STATE_DISABLE)) + return false; + new = val | NAPIF_STATE_SCHED; + + /* Sets STATE_MISSED bit if STATE_SCHED was already set + * This was suggested by Alexander Duyck, as compiler + * emits better code than : + * if (val & NAPIF_STATE_SCHED) + * new |= NAPIF_STATE_MISSED; + */ + new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED * + NAPIF_STATE_MISSED; + } while (cmpxchg(&n->state, val, new) != val); + + return !(val & NAPIF_STATE_SCHED); +} +EXPORT_SYMBOL(napi_schedule_prep); + +/** * __napi_schedule_irqoff - schedule for receive * @n: entry to schedule * @@ -4897,7 +4957,7 @@ EXPORT_SYMBOL(__napi_schedule_irqoff); bool napi_complete_done(struct napi_struct *n, int work_done) { - unsigned long flags; + unsigned long flags, val, new; /* * 1) Don't let napi dequeue from the cpu poll list @@ -4927,7 +4987,27 @@ bool napi_complete_done(struct napi_struct *n, int work_done) list_del_init(&n->poll_list); local_irq_restore(flags); } - WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state)); + + do { + val = READ_ONCE(n->state); + + WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED)); + + new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED); + + /* If STATE_MISSED was set, leave STATE_SCHED set, + * because we will call napi->poll() one more time. + * This C code was suggested by Alexander Duyck to help gcc. + */ + new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED * + NAPIF_STATE_SCHED; + } while (cmpxchg(&n->state, val, new) != val); + + if (unlikely(val & NAPIF_STATE_MISSED)) { + __napi_schedule(n); + return false; + } + return true; } EXPORT_SYMBOL(napi_complete_done); @@ -4953,6 +5033,16 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) { int rc; + /* Busy polling means there is a high chance device driver hard irq + * could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was + * set in napi_schedule_prep(). + * Since we are about to call napi->poll() once more, we can safely + * clear NAPI_STATE_MISSED. + * + * Note: x86 could use a single "lock and ..." instruction + * to perform these two clear_bit() + */ + clear_bit(NAPI_STATE_MISSED, &napi->state); clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state); local_bh_disable(); @@ -5088,8 +5178,13 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) struct napi_struct *napi; napi = container_of(timer, struct napi_struct, timer); - if (napi->gro_list) - napi_schedule_irqoff(napi); + + /* Note : we use a relaxed variant of napi_schedule_prep() not setting + * NAPI_STATE_MISSED, since we do not react to a device IRQ. + */ + if (napi->gro_list && !napi_disable_pending(napi) && + !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) + __napi_schedule_irqoff(napi); return HRTIMER_NORESTART; } diff --git a/net/core/sock.c b/net/core/sock.c index e7d74940e863..f6fd79f33097 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1539,11 +1539,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) is_charged = sk_filter_charge(newsk, filter); if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { - /* It is still raw copy of parent, so invalidate - * destructor and make plain sk_free() */ - newsk->sk_destruct = NULL; - bh_unlock_sock(newsk); - sk_free(newsk); + sk_free_unlock_clone(newsk); newsk = NULL; goto out; } @@ -1592,6 +1588,16 @@ out: } EXPORT_SYMBOL_GPL(sk_clone_lock); +void sk_free_unlock_clone(struct sock *sk) +{ + /* It is still raw copy of parent, so invalidate + * destructor and make plain sk_free() */ + sk->sk_destruct = NULL; + bh_unlock_sock(sk); + sk_free(sk); +} +EXPORT_SYMBOL_GPL(sk_free_unlock_clone); + void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { u32 max_segs = 1; diff --git a/net/dccp/input.c b/net/dccp/input.c index 8fedc2d49770..4a05d7876850 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -577,6 +577,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct dccp_sock *dp = dccp_sk(sk); struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int old_state = sk->sk_state; + bool acceptable; int queued = 0; /* @@ -603,8 +604,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (sk->sk_state == DCCP_LISTEN) { if (dh->dccph_type == DCCP_PKT_REQUEST) { - if (inet_csk(sk)->icsk_af_ops->conn_request(sk, - skb) < 0) + /* It is possible that we process SYN packets from backlog, + * so we need to make sure to disable BH right there. + */ + local_bh_disable(); + acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0; + local_bh_enable(); + if (!acceptable) return 1; consume_skb(skb); return 0; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 53eddf99e4f6..e267e6f4c9a5 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -119,10 +119,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, * Activate features: initialise CCIDs, sequence windows etc. */ if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { - /* It is still raw copy of parent, so invalidate - * destructor and make plain sk_free() */ - newsk->sk_destruct = NULL; - sk_free(newsk); + sk_free_unlock_clone(newsk); return NULL; } dccp_init_xmit_timers(newsk); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b39a791f6756..42bfd08109dd 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -622,6 +622,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_UID] = { .type = NLA_U32 }, + [RTA_MARK] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index b3cc1335adbc..c0cc6aa8cfaa 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -23,7 +23,8 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; - __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; + const struct sock *sk = skb_to_full_sk(skb); + __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0; struct net_device *dev = skb_dst(skb)->dev; unsigned int hh_len; @@ -40,7 +41,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t fl4.daddr = iph->daddr; fl4.saddr = saddr; fl4.flowi4_tos = RT_TOS(iph->tos); - fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0; if (!fl4.flowi4_oif) fl4.flowi4_oif = l3mdev_master_ifindex(dev); fl4.flowi4_mark = skb->mark; @@ -61,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); skb_dst_set(skb, NULL); - dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); + dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); skb_dst_set(skb, dst); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index da385ae997a3..cf4555581282 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1110,9 +1110,14 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; err = __inet_stream_connect(sk->sk_socket, msg->msg_name, msg->msg_namelen, flags, 1); - inet->defer_connect = 0; - *copied = tp->fastopen_req->copied; - tcp_free_fastopen_req(tp); + /* fastopen_req could already be freed in __inet_stream_connect + * if the connection times out or gets rst + */ + if (tp->fastopen_req) { + *copied = tp->fastopen_req->copied; + tcp_free_fastopen_req(tp); + inet->defer_connect = 0; + } return err; } @@ -2318,6 +2323,10 @@ int tcp_disconnect(struct sock *sk, int flags) memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); + /* Clean up fastopen related fields */ + tcp_free_fastopen_req(tp); + inet->defer_connect = 0; + WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2c0ff327b6df..39c393cc0fd3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5886,9 +5886,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (th->syn) { if (th->fin) goto discard; - if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) - return 1; + /* It is possible that we process SYN packets from backlog, + * so we need to make sure to disable BH right there. + */ + local_bh_disable(); + acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; + local_bh_enable(); + if (!acceptable) + return 1; consume_skb(skb); return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 77362b88a661..363172527e43 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5693,13 +5693,18 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; struct net *net = (struct net *)ctl->extra2; + if (!rtnl_trylock()) + return restart_syscall(); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) { new_val = *((int *)ctl->data); - if (check_addr_gen_mode(new_val) < 0) - return -EINVAL; + if (check_addr_gen_mode(new_val) < 0) { + ret = -EINVAL; + goto out; + } /* request for default */ if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) { @@ -5708,20 +5713,23 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, /* request for individual net device */ } else { if (!idev) - return ret; + goto out; - if (check_stable_privacy(idev, net, new_val) < 0) - return -EINVAL; + if (check_stable_privacy(idev, net, new_val) < 0) { + ret = -EINVAL; + goto out; + } if (idev->cnf.addr_gen_mode != new_val) { idev->cnf.addr_gen_mode = new_val; - rtnl_lock(); addrconf_dev_config(idev->dev); - rtnl_unlock(); } } } +out: + rtnl_unlock(); + return ret; } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 9948b5ce52da..986d4ca38832 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -589,6 +589,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); + skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq == NULL) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f54f4265b37f..229bfcc451ef 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2169,10 +2169,13 @@ int ip6_del_rt(struct rt6_info *rt) static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) { struct nl_info *info = &cfg->fc_nlinfo; + struct net *net = info->nl_net; struct sk_buff *skb = NULL; struct fib6_table *table; - int err; + int err = -ENOENT; + if (rt == net->ipv6.ip6_null_entry) + goto out_put; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); @@ -2184,7 +2187,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) if (skb) { u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; - if (rt6_fill_node(info->nl_net, skb, rt, + if (rt6_fill_node(net, skb, rt, NULL, NULL, 0, RTM_DELROUTE, info->portid, seq, 0) < 0) { kfree_skb(skb); @@ -2198,17 +2201,18 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) rt6i_siblings) { err = fib6_del(sibling, info); if (err) - goto out; + goto out_unlock; } } err = fib6_del(rt, info); -out: +out_unlock: write_unlock_bh(&table->tb6_lock); +out_put: ip6_rt_put(rt); if (skb) { - rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV6_ROUTE, + rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any()); } return err; @@ -2891,6 +2895,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_EXPIRES] = { .type = NLA_U32 }, [RTA_UID] = { .type = NLA_U32 }, + [RTA_MARK] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -3627,6 +3632,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); } + if (rt == net->ipv6.ip6_null_entry) { + err = rt->dst.error; + ip6_rt_put(rt); + goto errout; + } + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { ip6_rt_put(rt); diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 3b5fd4188f2a..4456559cb056 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -85,7 +85,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, ht_dbg(sta->sdata, "Rx BA session stop requested for %pM tid %u %s reason: %d\n", sta->sta.addr, tid, - initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", + initiator == WLAN_BACK_RECIPIENT ? "recipient" : "initiator", (int)reason); if (drv_ampdu_action(local, sta->sdata, ¶ms)) @@ -398,6 +398,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, tid_agg_rx->timeout = timeout; tid_agg_rx->stored_mpdu_num = 0; tid_agg_rx->auto_seq = auto_seq; + tid_agg_rx->started = false; tid_agg_rx->reorder_buf_filtered = 0; status = WLAN_STATUS_SUCCESS; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 159a1a733725..0e718437d080 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -428,7 +428,7 @@ struct ieee80211_sta_tx_tspec { bool downgraded; }; -DECLARE_EWMA(beacon_signal, 16, 4) +DECLARE_EWMA(beacon_signal, 4, 4) struct ieee80211_if_managed { struct timer_list timer; diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 28a3a0957c9e..76a8bcd8ef11 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -168,6 +168,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) break; } + flush_delayed_work(&sdata->dec_tailroom_needed_wk); drv_remove_interface(local, sdata); } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 50ca3828b124..e48724a6725e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright(c) 2015 - 2016 Intel Deutschland GmbH + * Copyright(c) 2015 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1034,6 +1034,18 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata buf_size = tid_agg_rx->buf_size; head_seq_num = tid_agg_rx->head_seq_num; + /* + * If the current MPDU's SN is smaller than the SSN, it shouldn't + * be reordered. + */ + if (unlikely(!tid_agg_rx->started)) { + if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) { + ret = false; + goto out; + } + tid_agg_rx->started = true; + } + /* frame with out of date sequence number */ if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) { dev_kfree_skb(skb); @@ -3880,6 +3892,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, stats->last_rate = sta_stats_encode_rate(status); stats->fragments++; + stats->packets++; if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { stats->last_signal = status->signal; @@ -4073,15 +4086,17 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, ieee80211_is_beacon(hdr->frame_control))) ieee80211_scan_rx(local, skb); - if (pubsta) { - rx.sta = container_of(pubsta, struct sta_info, sta); - rx.sdata = rx.sta->sdata; - if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) - return; - goto out; - } else if (ieee80211_is_data(fc)) { + if (ieee80211_is_data(fc)) { struct sta_info *sta, *prev_sta; + if (pubsta) { + rx.sta = container_of(pubsta, struct sta_info, sta); + rx.sdata = rx.sta->sdata; + if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) + return; + goto out; + } + prev_sta = NULL; for_each_sta_info(local, hdr->addr2, sta, tmp) { diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 4774e663a411..3323a2fb289b 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -688,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) } /* No need to do anything if the driver does all */ - if (ieee80211_hw_check(&local->hw, AP_LINK_PS)) + if (ieee80211_hw_check(&local->hw, AP_LINK_PS) && !local->ops->set_tim) return; if (sta->dead) @@ -1264,7 +1264,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) sta_info_recalc_tim(sta); ps_dbg(sdata, - "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n", + "STA %pM aid %d sending %d filtered/%d PS frames since STA woke up\n", sta->sta.addr, sta->sta.aid, filtered, buffered); ieee80211_check_fast_xmit(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index dd06ef0b8861..e65cda34d2bc 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -189,6 +189,7 @@ struct tid_ampdu_tx { * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and * and ssn. * @removed: this session is removed (but might have been found due to RCU) + * @started: this session has started (head ssn or higher was received) * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. @@ -212,8 +213,9 @@ struct tid_ampdu_rx { u16 ssn; u16 buf_size; u16 timeout; - bool auto_seq; - bool removed; + u8 auto_seq:1, + removed:1, + started:1; }; /** @@ -370,7 +372,7 @@ struct mesh_sta { unsigned int fail_avg; }; -DECLARE_EWMA(signal, 1024, 8) +DECLARE_EWMA(signal, 10, 8) struct ieee80211_sta_rx_stats { unsigned long packets; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 0dd7c351002d..83b8b11f24ea 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -51,7 +51,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, struct ieee80211_hdr *hdr = (void *)skb->data; int ac; - if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) { + if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_CTL_AMPDU)) { ieee80211_free_txskb(&local->hw, skb); return; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 24174c520239..0d17894798b5 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1628,8 +1628,6 @@ static int __init nf_conntrack_sip_init(void) ports[ports_c++] = SIP_PORT; for (i = 0; i < ports_c; i++) { - memset(&sip[i], 0, sizeof(sip[i])); - nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip", SIP_PORT, ports[i], i, sip_exp_policy, SIP_EXPECT_MAX, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ff7304ae58ac..5e0ccfd5bb37 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -461,16 +461,15 @@ nla_put_failure: return -1; } -static int nf_tables_table_notify(const struct nft_ctx *ctx, int event) +static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; int err; if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) - return 0; + return; - err = -ENOBUFS; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) goto err; @@ -482,14 +481,11 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event) goto err; } - err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); + return; err: - if (err < 0) { - nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, - err); - } - return err; + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); } static int nf_tables_dump_tables(struct sk_buff *skb, @@ -1050,16 +1046,15 @@ nla_put_failure: return -1; } -static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event) +static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; int err; if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) - return 0; + return; - err = -ENOBUFS; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) goto err; @@ -1072,14 +1067,11 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event) goto err; } - err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); + return; err: - if (err < 0) { - nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, - err); - } - return err; + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); } static int nf_tables_dump_chains(struct sk_buff *skb, @@ -1934,18 +1926,16 @@ nla_put_failure: return -1; } -static int nf_tables_rule_notify(const struct nft_ctx *ctx, - const struct nft_rule *rule, - int event) +static void nf_tables_rule_notify(const struct nft_ctx *ctx, + const struct nft_rule *rule, int event) { struct sk_buff *skb; int err; if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) - return 0; + return; - err = -ENOBUFS; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) goto err; @@ -1958,14 +1948,11 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx, goto err; } - err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); + return; err: - if (err < 0) { - nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, - err); - } - return err; + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); } struct nft_rule_dump_ctx { @@ -2696,9 +2683,9 @@ nla_put_failure: return -1; } -static int nf_tables_set_notify(const struct nft_ctx *ctx, - const struct nft_set *set, - int event, gfp_t gfp_flags) +static void nf_tables_set_notify(const struct nft_ctx *ctx, + const struct nft_set *set, int event, + gfp_t gfp_flags) { struct sk_buff *skb; u32 portid = ctx->portid; @@ -2706,9 +2693,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) - return 0; + return; - err = -ENOBUFS; skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags); if (skb == NULL) goto err; @@ -2719,12 +2705,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, - ctx->report, gfp_flags); + nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report, + gfp_flags); + return; err: - if (err < 0) - nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err); - return err; + nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS); } static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) @@ -3504,10 +3489,10 @@ nla_put_failure: return -1; } -static int nf_tables_setelem_notify(const struct nft_ctx *ctx, - const struct nft_set *set, - const struct nft_set_elem *elem, - int event, u16 flags) +static void nf_tables_setelem_notify(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_elem *elem, + int event, u16 flags) { struct net *net = ctx->net; u32 portid = ctx->portid; @@ -3515,9 +3500,8 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx, int err; if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) - return 0; + return; - err = -ENOBUFS; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) goto err; @@ -3529,12 +3513,11 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx, goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report, - GFP_KERNEL); + nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report, + GFP_KERNEL); + return; err: - if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); - return err; + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); } static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, @@ -4476,18 +4459,17 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk, return nft_delobj(&ctx, obj); } -int nft_obj_notify(struct net *net, struct nft_table *table, - struct nft_object *obj, u32 portid, u32 seq, int event, - int family, int report, gfp_t gfp) +void nft_obj_notify(struct net *net, struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, int event, + int family, int report, gfp_t gfp) { struct sk_buff *skb; int err; if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) - return 0; + return; - err = -ENOBUFS; skb = nlmsg_new(NLMSG_GOODSIZE, gfp); if (skb == NULL) goto err; @@ -4499,21 +4481,18 @@ int nft_obj_notify(struct net *net, struct nft_table *table, goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp); + nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp); + return; err: - if (err < 0) { - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); - } - return err; + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); } EXPORT_SYMBOL_GPL(nft_obj_notify); -static int nf_tables_obj_notify(const struct nft_ctx *ctx, - struct nft_object *obj, int event) +static void nf_tables_obj_notify(const struct nft_ctx *ctx, + struct nft_object *obj, int event) { - return nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, - ctx->seq, event, ctx->afi->family, ctx->report, - GFP_KERNEL); + nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, + ctx->afi->family, ctx->report, GFP_KERNEL); } static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, @@ -4543,7 +4522,8 @@ nla_put_failure: return -EMSGSIZE; } -static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event) +static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb, + int event) { struct nlmsghdr *nlh = nlmsg_hdr(skb); struct sk_buff *skb2; @@ -4551,9 +4531,8 @@ static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event) if (nlmsg_report(nlh) && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) - return 0; + return; - err = -ENOBUFS; skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb2 == NULL) goto err; @@ -4565,14 +4544,12 @@ static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event) goto err; } - err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid, - NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL); + nfnetlink_send(skb2, net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES, + nlmsg_report(nlh), GFP_KERNEL); + return; err: - if (err < 0) { - nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES, - err); - } - return err; + nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES, + -ENOBUFS); } static int nf_tables_getgen(struct net *net, struct sock *nlsk, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 71e8fb886a73..78dfbf9588b3 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -60,11 +60,10 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, d = memcmp(this, key, set->klen); if (d < 0) { parent = parent->rb_left; - /* In case of adjacent ranges, we always see the high - * part of the range in first place, before the low one. - * So don't update interval if the keys are equal. - */ - if (interval && nft_rbtree_equal(set, this, interval)) + if (interval && + nft_rbtree_equal(set, this, interval) && + nft_rbtree_interval_end(this) && + !nft_rbtree_interval_end(interval)) continue; interval = rbe; } else if (d > 0) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index b1beb2b94ec7..c82301ce3fff 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -796,9 +796,8 @@ static void ovs_fragment(struct net *net, struct vport *vport, unsigned long orig_dst; struct rt6_info ovs_rt; - if (!v6ops) { + if (!v6ops) goto err; - } prepare_frag(vport, skb, orig_network_offset, ovs_key_mac_proto(key)); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 85cd59526670..e0a87776a010 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -485,7 +485,6 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, } else if (key->eth.type == htons(ETH_P_IPV6)) { enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; - skb_orphan(skb); memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); err = nf_ct_frag6_gather(net, skb, user); if (err) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2bd0d1949312..a0dbe7ca8f72 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3103,7 +3103,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[15]; + char name[sizeof(uaddr->sa_data) + 1]; /* * Check legality @@ -3111,7 +3111,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, if (addr_len != sizeof(struct sockaddr)) return -EINVAL; - strlcpy(name, uaddr->sa_data, sizeof(name)); + /* uaddr->sa_data comes from the userspace, it's not guaranteed to be + * zero-terminated. + */ + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); + name[sizeof(uaddr->sa_data)] = 0; return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } diff --git a/net/rds/ib.c b/net/rds/ib.c index 91fe46f1e4cc..7a64c8db81ab 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -45,8 +45,8 @@ #include "ib.h" #include "ib_mr.h" -unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE; -unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE; +static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE; +static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE; unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; module_param(rds_ib_mr_1m_pool_size, int, 0444); @@ -438,16 +438,12 @@ int rds_ib_init(void) if (ret) goto out_sysctl; - ret = rds_trans_register(&rds_ib_transport); - if (ret) - goto out_recv; + rds_trans_register(&rds_ib_transport); rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); goto out; -out_recv: - rds_ib_recv_exit(); out_sysctl: rds_ib_sysctl_exit(); out_ibreg: diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 24c086db4511..5d6e98a79a5e 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -107,8 +107,6 @@ struct rds_ib_mr_pool { }; extern struct workqueue_struct *rds_ib_mr_wq; -extern unsigned int rds_ib_mr_1m_pool_size; -extern unsigned int rds_ib_mr_8k_pool_size; extern bool prefer_frmr; struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev, diff --git a/net/rds/rds.h b/net/rds/rds.h index 966d2ee1f107..39518ef7af4d 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -903,7 +903,7 @@ void rds_connect_path_complete(struct rds_conn_path *conn, int curr); void rds_connect_complete(struct rds_connection *conn); /* transport.c */ -int rds_trans_register(struct rds_transport *trans); +void rds_trans_register(struct rds_transport *trans); void rds_trans_unregister(struct rds_transport *trans); struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr); void rds_trans_put(struct rds_transport *trans); diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 5438f6725092..a973d3b4dff0 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -652,16 +652,12 @@ static int rds_tcp_init(void) if (ret) goto out_pernet; - ret = rds_trans_register(&rds_tcp_transport); - if (ret) - goto out_recv; + rds_trans_register(&rds_tcp_transport); rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); goto out; -out_recv: - rds_tcp_recv_exit(); out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); out_notifier: diff --git a/net/rds/transport.c b/net/rds/transport.c index 2ffd3e30c643..0b188dd0a344 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -40,7 +40,7 @@ static struct rds_transport *transports[RDS_TRANS_COUNT]; static DECLARE_RWSEM(rds_trans_sem); -int rds_trans_register(struct rds_transport *trans) +void rds_trans_register(struct rds_transport *trans) { BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ); @@ -55,8 +55,6 @@ int rds_trans_register(struct rds_transport *trans) } up_write(&rds_trans_sem); - - return 0; } EXPORT_SYMBOL_GPL(rds_trans_register); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 199b46e93e64..7fb59c3f1542 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -290,10 +290,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.exclusive = false; cp.service_id = srx->srx_service; call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp); + /* The socket has been unlocked. */ if (!IS_ERR(call)) call->notify_rx = notify_rx; - release_sock(&rx->sk); + mutex_unlock(&call->user_mutex); _leave(" = %p", call); return call; } @@ -310,7 +311,10 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call); void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); + + mutex_lock(&call->user_mutex); rxrpc_release_call(rxrpc_sk(sock->sk), call); + mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put_kernel); } EXPORT_SYMBOL(rxrpc_kernel_end_call); @@ -450,14 +454,16 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) case RXRPC_SERVER_BOUND: case RXRPC_SERVER_LISTENING: ret = rxrpc_do_sendmsg(rx, m, len); - break; + /* The socket has been unlocked */ + goto out; default: ret = -EINVAL; - break; + goto error_unlock; } error_unlock: release_sock(&rx->sk); +out: _leave(" = %d", ret); return ret; } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 12be432be9b2..26a7b1db1361 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -467,6 +467,7 @@ struct rxrpc_call { struct rxrpc_connection *conn; /* connection carrying call */ struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_sock __rcu *socket; /* socket responsible */ + struct mutex user_mutex; /* User access mutex */ ktime_t ack_at; /* When deferred ACK needs to happen */ ktime_t resend_at; /* When next resend needs to happen */ ktime_t ping_at; /* When next to send a ping */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 7c4c64ab8da2..0ed181f53f32 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -323,6 +323,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, * * If we want to report an error, we mark the skb with the packet type and * abort code and return NULL. + * + * The call is returned with the user access mutex held. */ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_connection *conn, @@ -371,6 +373,18 @@ found_service: trace_rxrpc_receive(call, rxrpc_receive_incoming, sp->hdr.serial, sp->hdr.seq); + /* Lock the call to prevent rxrpc_kernel_send/recv_data() and + * sendmsg()/recvmsg() inconveniently stealing the mutex once the + * notification is generated. + * + * The BUG should never happen because the kernel should be well + * behaved enough not to access the call before the first notification + * event and userspace is prevented from doing so until the state is + * appropriate. + */ + if (!mutex_trylock(&call->user_mutex)) + BUG(); + /* Make the call live. */ rxrpc_incoming_call(rx, call, skb); conn = call->conn; @@ -429,10 +443,12 @@ out: /* * handle acceptance of a call by userspace * - assign the user call ID to the call at the front of the queue + * - called with the socket locked. */ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, unsigned long user_call_ID, rxrpc_notify_rx_t notify_rx) + __releases(&rx->sk.sk_lock.slock) { struct rxrpc_call *call; struct rb_node *parent, **pp; @@ -446,6 +462,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, if (list_empty(&rx->to_be_accepted)) { write_unlock(&rx->call_lock); + release_sock(&rx->sk); kleave(" = -ENODATA [empty]"); return ERR_PTR(-ENODATA); } @@ -470,10 +487,39 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, */ call = list_entry(rx->to_be_accepted.next, struct rxrpc_call, accept_link); + write_unlock(&rx->call_lock); + + /* We need to gain the mutex from the interrupt handler without + * upsetting lockdep, so we have to release it there and take it here. + * We are, however, still holding the socket lock, so other accepts + * must wait for us and no one can add the user ID behind our backs. + */ + if (mutex_lock_interruptible(&call->user_mutex) < 0) { + release_sock(&rx->sk); + kleave(" = -ERESTARTSYS"); + return ERR_PTR(-ERESTARTSYS); + } + + write_lock(&rx->call_lock); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); rxrpc_see_call(call); + /* Find the user ID insertion point. */ + pp = &rx->calls.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + call = rb_entry(parent, struct rxrpc_call, sock_node); + + if (user_call_ID < call->user_call_ID) + pp = &(*pp)->rb_left; + else if (user_call_ID > call->user_call_ID) + pp = &(*pp)->rb_right; + else + BUG(); + } + write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_SERVER_ACCEPTING: @@ -499,6 +545,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); rxrpc_notify_socket(call); rxrpc_service_prealloc(rx, GFP_KERNEL); + release_sock(&rx->sk); _leave(" = %p{%d}", call, call->debug_id); return call; @@ -515,6 +562,7 @@ id_in_use: write_unlock(&rx->call_lock); out: rxrpc_service_prealloc(rx, GFP_KERNEL); + release_sock(&rx->sk); _leave(" = %d", ret); return ERR_PTR(ret); } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 8b94db3c9b2e..d79cd36987a9 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -115,6 +115,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) if (!call->rxtx_annotations) goto nomem_2; + mutex_init(&call->user_mutex); setup_timer(&call->timer, rxrpc_call_timer_expired, (unsigned long)call); INIT_WORK(&call->processor, &rxrpc_process_call); @@ -194,14 +195,16 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) } /* - * set up a call for the given data - * - called in process context with IRQs enabled + * Set up a call for the given parameters. + * - Called with the socket lock held, which it must release. + * - If it returns a call, the call's lock will need releasing by the caller. */ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, unsigned long user_call_ID, gfp_t gfp) + __releases(&rx->sk.sk_lock.slock) { struct rxrpc_call *call, *xcall; struct rb_node *parent, **pp; @@ -212,6 +215,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, call = rxrpc_alloc_client_call(srx, gfp); if (IS_ERR(call)) { + release_sock(&rx->sk); _leave(" = %ld", PTR_ERR(call)); return call; } @@ -219,6 +223,11 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), here, (const void *)user_call_ID); + /* We need to protect a partially set up call against the user as we + * will be acting outside the socket lock. + */ + mutex_lock(&call->user_mutex); + /* Publish the call, even though it is incompletely set up as yet */ write_lock(&rx->call_lock); @@ -250,6 +259,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, list_add_tail(&call->link, &rxrpc_calls); write_unlock(&rxrpc_call_lock); + /* From this point on, the call is protected by its own lock. */ + release_sock(&rx->sk); + /* Set up or get a connection record and set the protocol parameters, * including channel number and call ID. */ @@ -279,6 +291,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, */ error_dup_user_ID: write_unlock(&rx->call_lock); + release_sock(&rx->sk); ret = -EEXIST; error: @@ -287,6 +300,7 @@ error: trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), here, ERR_PTR(ret)); rxrpc_release_call(rx, call); + mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); return ERR_PTR(ret); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 78ec33477adf..9f4cfa25af7c 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1194,6 +1194,7 @@ void rxrpc_data_ready(struct sock *udp_sk) goto reject_packet; } rxrpc_send_ping(call, skb, skew); + mutex_unlock(&call->user_mutex); } rxrpc_input_call_packet(call, skb, skew); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 28274a3c9831..6491ca46a03f 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -489,6 +489,20 @@ try_again: trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0); + /* We're going to drop the socket lock, so we need to lock the call + * against interference by sendmsg. + */ + if (!mutex_trylock(&call->user_mutex)) { + ret = -EWOULDBLOCK; + if (flags & MSG_DONTWAIT) + goto error_requeue_call; + ret = -ERESTARTSYS; + if (mutex_lock_interruptible(&call->user_mutex) < 0) + goto error_requeue_call; + } + + release_sock(&rx->sk); + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); @@ -504,7 +518,7 @@ try_again: &call->user_call_ID); } if (ret < 0) - goto error; + goto error_unlock_call; } if (msg->msg_name) { @@ -535,12 +549,12 @@ try_again: } if (ret < 0) - goto error; + goto error_unlock_call; if (call->state == RXRPC_CALL_COMPLETE) { ret = rxrpc_recvmsg_term(call, msg); if (ret < 0) - goto error; + goto error_unlock_call; if (!(flags & MSG_PEEK)) rxrpc_release_call(rx, call); msg->msg_flags |= MSG_EOR; @@ -553,8 +567,21 @@ try_again: msg->msg_flags &= ~MSG_MORE; ret = copied; -error: +error_unlock_call: + mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); + return ret; + +error_requeue_call: + if (!(flags & MSG_PEEK)) { + write_lock_bh(&rx->recvmsg_lock); + list_add(&call->recvmsg_link, &rx->recvmsg_q); + write_unlock_bh(&rx->recvmsg_lock); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0, 0, 0, 0); + } else { + rxrpc_put_call(call, rxrpc_call_put); + } error_no_call: release_sock(&rx->sk); trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); @@ -611,7 +638,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, iov.iov_len = size - *_offset; iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset); - lock_sock(sock->sk); + mutex_lock(&call->user_mutex); switch (call->state) { case RXRPC_CALL_CLIENT_RECV_REPLY: @@ -650,7 +677,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, read_phase_complete: ret = 1; out: - release_sock(sock->sk); + mutex_unlock(&call->user_mutex); _leave(" = %d [%zu,%d]", ret, *_offset, *_abort); return ret; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 19b36c60fb4c..bc2d3dcff9de 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -61,9 +61,12 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, } trace_rxrpc_transmit(call, rxrpc_transmit_wait); - release_sock(&rx->sk); + mutex_unlock(&call->user_mutex); *timeo = schedule_timeout(*timeo); - lock_sock(&rx->sk); + if (mutex_lock_interruptible(&call->user_mutex) < 0) { + ret = sock_intr_errno(*timeo); + break; + } } remove_wait_queue(&call->waitq, &myself); @@ -173,7 +176,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, /* * send data through a socket * - must be called in process context - * - caller holds the socket locked + * - The caller holds the call user access mutex, but not the socket lock. */ static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, @@ -439,10 +442,13 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, /* * Create a new client call for sendmsg(). + * - Called with the socket lock held, which it must release. + * - If it returns a call, the call's lock will need releasing by the caller. */ static struct rxrpc_call * rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, unsigned long user_call_ID, bool exclusive) + __releases(&rx->sk.sk_lock.slock) { struct rxrpc_conn_parameters cp; struct rxrpc_call *call; @@ -452,8 +458,10 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, _enter(""); - if (!msg->msg_name) + if (!msg->msg_name) { + release_sock(&rx->sk); return ERR_PTR(-EDESTADDRREQ); + } key = rx->key; if (key && !rx->key->payload.data[0]) @@ -466,6 +474,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, cp.exclusive = rx->exclusive | exclusive; cp.service_id = srx->srx_service; call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL); + /* The socket is now unlocked */ _leave(" = %p\n", call); return call; @@ -477,6 +486,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, * - the socket may be either a client socket or a server socket */ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) + __releases(&rx->sk.sk_lock.slock) { enum rxrpc_command cmd; struct rxrpc_call *call; @@ -490,12 +500,14 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code, &exclusive); if (ret < 0) - return ret; + goto error_release_sock; if (cmd == RXRPC_CMD_ACCEPT) { + ret = -EINVAL; if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) - return -EINVAL; + goto error_release_sock; call = rxrpc_accept_call(rx, user_call_ID, NULL); + /* The socket is now unlocked. */ if (IS_ERR(call)) return PTR_ERR(call); rxrpc_put_call(call, rxrpc_call_put); @@ -504,12 +516,30 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) call = rxrpc_find_call_by_user_ID(rx, user_call_ID); if (!call) { + ret = -EBADSLT; if (cmd != RXRPC_CMD_SEND_DATA) - return -EBADSLT; + goto error_release_sock; call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID, exclusive); + /* The socket is now unlocked... */ if (IS_ERR(call)) return PTR_ERR(call); + /* ... and we have the call lock. */ + } else { + ret = -EBUSY; + if (call->state == RXRPC_CALL_UNINITIALISED || + call->state == RXRPC_CALL_CLIENT_AWAIT_CONN || + call->state == RXRPC_CALL_SERVER_PREALLOC || + call->state == RXRPC_CALL_SERVER_SECURING || + call->state == RXRPC_CALL_SERVER_ACCEPTING) + goto error_release_sock; + + ret = mutex_lock_interruptible(&call->user_mutex); + release_sock(&rx->sk); + if (ret < 0) { + ret = -ERESTARTSYS; + goto error_put; + } } _debug("CALL %d USR %lx ST %d on CONN %p", @@ -537,9 +567,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = rxrpc_send_data(rx, call, msg, len); } + mutex_unlock(&call->user_mutex); +error_put: rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); return ret; + +error_release_sock: + release_sock(&rx->sk); + return ret; } /** @@ -564,7 +600,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, ASSERTCMP(msg->msg_name, ==, NULL); ASSERTCMP(msg->msg_control, ==, NULL); - lock_sock(sock->sk); + mutex_lock(&call->user_mutex); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); @@ -579,7 +615,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); } - release_sock(sock->sk); + mutex_unlock(&call->user_mutex); _leave(" = %d", ret); return ret; } @@ -600,12 +636,12 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, { _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); - lock_sock(sock->sk); + mutex_lock(&call->user_mutex); if (rxrpc_abort_call(why, call, 0, abort_code, error)) rxrpc_send_abort_packet(call); - release_sock(sock->sk); + mutex_unlock(&call->user_mutex); _leave(""); } diff --git a/net/sctp/input.c b/net/sctp/input.c index fc458968fe4b..2a28ab20487f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -884,14 +884,17 @@ int sctp_hash_transport(struct sctp_transport *t) arg.paddr = &t->ipaddr; arg.lport = htons(t->asoc->base.bind_addr.port); + rcu_read_lock(); list = rhltable_lookup(&sctp_transport_hashtable, &arg, sctp_hash_params); rhl_for_each_entry_rcu(transport, tmp, list, node) if (transport->asoc->ep == t->asoc->ep) { + rcu_read_unlock(); err = -EEXIST; goto out; } + rcu_read_unlock(); err = rhltable_insert_key(&sctp_transport_hashtable, &arg, &t->node, sctp_hash_params); diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 41adf362936d..b5c279b22680 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -504,6 +504,7 @@ static int __init strp_mod_init(void) static void __exit strp_mod_exit(void) { + destroy_workqueue(strp_wq); } module_init(strp_mod_init); module_exit(strp_mod_exit); |