From 047fe3605235888f3ebcda0c728cb31937eadfe6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Jun 2012 15:24:40 +0200 Subject: splice: fix racy pipe->buffers uses Dave Jones reported a kernel BUG at mm/slub.c:3474! triggered by splice_shrink_spd() called from vmsplice_to_pipe() commit 35f3d14dbbc5 (pipe: add support for shrinking and growing pipes) added capability to adjust pipe->buffers. Problem is some paths don't hold pipe mutex and assume pipe->buffers doesn't change for their duration. Fix this by adding nr_pages_max field in struct splice_pipe_desc, and use it in place of pipe->buffers where appropriate. splice_shrink_spd() loses its struct pipe_inode_info argument. Reported-by: Dave Jones Signed-off-by: Eric Dumazet Cc: Jens Axboe Cc: Alexander Viro Cc: Tom Herbert Cc: stable # 2.6.35 Tested-by: Dave Jones Signed-off-by: Jens Axboe --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 016694d62484..bac3c5756d63 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1755,6 +1755,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct splice_pipe_desc spd = { .pages = pages, .partial = partial, + .nr_pages_max = MAX_SKB_FRAGS, .flags = flags, .ops = &sock_pipe_buf_ops, .spd_release = sock_spd_release, -- cgit v1.2.3 From 88a9e31c506c00c8b7a2f1611406d0e38dcb33b3 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Fri, 1 Jun 2012 11:14:03 +0300 Subject: mac80211: clear ifmgd->bssid only after building DELBA ieee80211_set_disassoc() clears ifmgd->bssid before building DELBA frames, resulting in frames with invalid bssid ("00:00:00:00:00:00"). Fix it by clearing ifmgd->bssid only after building all the needed frames. After this change, we no longer need to save the bssid (before clearing it), so remove the local array. Reported-by: Ido Yariv Cc: stable@vger.kernel.org Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 66e4fcdd1c6b..a4bb856de08f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1342,7 +1342,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; u32 changed = 0; - u8 bssid[ETH_ALEN]; ASSERT_MGD_MTX(ifmgd); @@ -1354,10 +1353,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_stop_poll(sdata); - memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); - ifmgd->associated = NULL; - memset(ifmgd->bssid, 0, ETH_ALEN); /* * we need to commit the associated = NULL change because the @@ -1377,7 +1373,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, netif_carrier_off(sdata->dev); mutex_lock(&local->sta_mtx); - sta = sta_info_get(sdata, bssid); + sta = sta_info_get(sdata, ifmgd->bssid); if (sta) { set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, tx); @@ -1386,13 +1382,16 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* deauthenticate/disassociate now */ if (tx || frame_buf) - ieee80211_send_deauth_disassoc(sdata, bssid, stype, reason, - tx, frame_buf); + ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype, + reason, tx, frame_buf); /* flush out frame */ if (tx) drv_flush(local, false); + /* clear bssid only after building the needed mgmt frames */ + memset(ifmgd->bssid, 0, ETH_ALEN); + /* remove AP and TDLS peers */ sta_info_flush(local, sdata); -- cgit v1.2.3 From ef5b6e127761667f78d99b7510a3876077fe9abe Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 17 Jun 2012 09:56:46 +0000 Subject: netfilter: ipset: fix interface comparision in hash-netiface sets ifname_compare() assumes that skb->dev is zero-padded, e.g 'eth1\0\0\0\0\0...'. This isn't always the case. e1000 driver does strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); in e1000_probe(), so once device is registered dev->name memory contains 'eth1\0:0:3\0\0\0' (or something like that), which makes eth1 compare fail. Use plain strcmp() instead. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_netiface.c | 32 ++++-------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index ee863943c826..d5d3607ae7bc 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -38,30 +38,6 @@ struct iface_node { #define iface_data(n) (rb_entry(n, struct iface_node, node)->iface) -static inline long -ifname_compare(const char *_a, const char *_b) -{ - const long *a = (const long *)_a; - const long *b = (const long *)_b; - - BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); - if (a[0] != b[0]) - return a[0] - b[0]; - if (IFNAMSIZ > sizeof(long)) { - if (a[1] != b[1]) - return a[1] - b[1]; - } - if (IFNAMSIZ > 2 * sizeof(long)) { - if (a[2] != b[2]) - return a[2] - b[2]; - } - if (IFNAMSIZ > 3 * sizeof(long)) { - if (a[3] != b[3]) - return a[3] - b[3]; - } - return 0; -} - static void rbtree_destroy(struct rb_root *root) { @@ -99,7 +75,7 @@ iface_test(struct rb_root *root, const char **iface) while (n) { const char *d = iface_data(n); - long res = ifname_compare(*iface, d); + int res = strcmp(*iface, d); if (res < 0) n = n->rb_left; @@ -121,7 +97,7 @@ iface_add(struct rb_root *root, const char **iface) while (*n) { char *ifname = iface_data(*n); - long res = ifname_compare(*iface, ifname); + int res = strcmp(*iface, ifname); p = *n; if (res < 0) @@ -366,7 +342,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface4_elem data = { .cidr = HOST_MASK }; u32 ip = 0, ip_to, last; u32 timeout = h->timeout; - char iface[IFNAMSIZ] = {}; + char iface[IFNAMSIZ]; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -663,7 +639,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem data = { .cidr = HOST_MASK }; u32 timeout = h->timeout; - char iface[IFNAMSIZ] = {}; + char iface[IFNAMSIZ]; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || -- cgit v1.2.3 From c24584c028a62900ea6b541b312030f0feac93b8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 24 Jun 2012 21:58:23 +0000 Subject: netfilter: ipvs: fix dst leak in __ip_vs_addr_is_local_v6 After call to ip6_route_output() we must release dst or we leak it. Also should test dst->error, as ip6_route_output() never returns NULL. Use boolean while we are at it. Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index dd811b8dd97c..d43e3c122f7b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -76,19 +76,19 @@ static void __ip_vs_del_service(struct ip_vs_service *svc); #ifdef CONFIG_IP_VS_IPV6 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ -static int __ip_vs_addr_is_local_v6(struct net *net, - const struct in6_addr *addr) +static bool __ip_vs_addr_is_local_v6(struct net *net, + const struct in6_addr *addr) { - struct rt6_info *rt; struct flowi6 fl6 = { .daddr = *addr, }; + struct dst_entry *dst = ip6_route_output(net, NULL, &fl6); + bool is_local; - rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); - if (rt && rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) - return 1; + is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK); - return 0; + dst_release(dst); + return is_local; } #endif -- cgit v1.2.3 From 67de956ff5dc1d4f321e16cfbd63f5be3b691b43 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Mon, 25 Jun 2012 16:05:27 +0200 Subject: NFC: Prevent multiple buffer overflows in NCI Fix multiple remotely-exploitable stack-based buffer overflows due to the NCI code pulling length fields directly from incoming frames and copying too much data into statically-sized arrays. Signed-off-by: Dan Rosenberg Cc: stable@kernel.org Cc: security@kernel.org Cc: Lauro Ramos Venancio Cc: Aloisio Almeida Jr Cc: Samuel Ortiz Cc: David S. Miller Acked-by: Ilan Elias Signed-off-by: Samuel Ortiz --- net/nfc/nci/ntf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index cb2646179e5f..2ab196a9f228 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -106,7 +106,7 @@ static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, nfca_poll->sens_res = __le16_to_cpu(*((__u16 *)data)); data += 2; - nfca_poll->nfcid1_len = *data++; + nfca_poll->nfcid1_len = min_t(__u8, *data++, NFC_NFCID1_MAXSIZE); pr_debug("sens_res 0x%x, nfcid1_len %d\n", nfca_poll->sens_res, nfca_poll->nfcid1_len); @@ -130,7 +130,7 @@ static __u8 *nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev, struct rf_tech_specific_params_nfcb_poll *nfcb_poll, __u8 *data) { - nfcb_poll->sensb_res_len = *data++; + nfcb_poll->sensb_res_len = min_t(__u8, *data++, NFC_SENSB_RES_MAXSIZE); pr_debug("sensb_res_len %d\n", nfcb_poll->sensb_res_len); @@ -145,7 +145,7 @@ static __u8 *nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev, __u8 *data) { nfcf_poll->bit_rate = *data++; - nfcf_poll->sensf_res_len = *data++; + nfcf_poll->sensf_res_len = min_t(__u8, *data++, NFC_SENSF_RES_MAXSIZE); pr_debug("bit_rate %d, sensf_res_len %d\n", nfcf_poll->bit_rate, nfcf_poll->sensf_res_len); @@ -331,7 +331,7 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev, switch (ntf->activation_rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: nfca_poll = &ntf->activation_params.nfca_poll_iso_dep; - nfca_poll->rats_res_len = *data++; + nfca_poll->rats_res_len = min_t(__u8, *data++, 20); pr_debug("rats_res_len %d\n", nfca_poll->rats_res_len); if (nfca_poll->rats_res_len > 0) { memcpy(nfca_poll->rats_res, @@ -341,7 +341,7 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev, case NCI_NFC_B_PASSIVE_POLL_MODE: nfcb_poll = &ntf->activation_params.nfcb_poll_iso_dep; - nfcb_poll->attrib_res_len = *data++; + nfcb_poll->attrib_res_len = min_t(__u8, *data++, 50); pr_debug("attrib_res_len %d\n", nfcb_poll->attrib_res_len); if (nfcb_poll->attrib_res_len > 0) { memcpy(nfcb_poll->attrib_res, -- cgit v1.2.3 From 03e934f620101ca2cfc9383bd76172dd3e1f8567 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Jun 2012 00:47:58 +0200 Subject: NFC: Return from rawsock_release when sk is NULL Sasha Levin reported following panic : [ 2136.383310] BUG: unable to handle kernel NULL pointer dereference at 00000000000003b0 [ 2136.384022] IP: [] __lock_acquire+0xc0/0x4b0 [ 2136.384022] PGD 131c4067 PUD 11c0c067 PMD 0 [ 2136.388106] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 2136.388106] CPU 1 [ 2136.388106] Pid: 24855, comm: trinity-child1 Tainted: G W 3.5.0-rc2-sasha-00015-g7b268f7 #374 [ 2136.388106] RIP: 0010:[] [] __lock_acquire+0xc0/0x4b0 [ 2136.388106] RSP: 0018:ffff8800130b3ca8 EFLAGS: 00010046 [ 2136.388106] RAX: 0000000000000086 RBX: ffff88001186b000 RCX: 0000000000000000 [ 2136.388106] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 2136.388106] RBP: ffff8800130b3d08 R08: 0000000000000001 R09: 0000000000000000 [ 2136.388106] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000002 [ 2136.388106] R13: 00000000000003b0 R14: 0000000000000000 R15: 0000000000000000 [ 2136.388106] FS: 00007fa5b1bd4700(0000) GS:ffff88001b800000(0000) knlGS:0000000000000000 [ 2136.388106] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2136.388106] CR2: 00000000000003b0 CR3: 0000000011d1f000 CR4: 00000000000406e0 [ 2136.388106] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 2136.388106] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 2136.388106] Process trinity-child1 (pid: 24855, threadinfo ffff8800130b2000, task ffff88001186b000) [ 2136.388106] Stack: [ 2136.388106] ffff8800130b3cd8 ffffffff81121785 ffffffff81236774 000080d000000001 [ 2136.388106] ffff88001b9d6c00 00000000001d6c00 ffffffff130b3d08 ffff88001186b000 [ 2136.388106] 0000000000000000 0000000000000002 0000000000000000 0000000000000000 [ 2136.388106] Call Trace: [ 2136.388106] [] ? sched_clock_local+0x25/0x90 [ 2136.388106] [] ? get_empty_filp+0x74/0x220 [ 2136.388106] [] lock_acquire+0x18a/0x1e0 [ 2136.388106] [] ? rawsock_release+0x4f/0xa0 [ 2136.388106] [] _raw_write_lock_bh+0x40/0x80 [ 2136.388106] [] ? rawsock_release+0x4f/0xa0 [ 2136.388106] [] rawsock_release+0x4f/0xa0 [ 2136.388106] [] sock_release+0x18/0x70 [ 2136.388106] [] sock_close+0x29/0x30 [ 2136.388106] [] __fput+0x11a/0x2c0 [ 2136.388106] [] fput+0x15/0x20 [ 2136.388106] [] sys_accept4+0x1b4/0x200 [ 2136.388106] [] ? _raw_spin_unlock_irq+0x4c/0x80 [ 2136.388106] [] ? _raw_spin_unlock_irq+0x59/0x80 [ 2136.388106] [] ? sysret_check+0x22/0x5d [ 2136.388106] [] sys_accept+0xb/0x10 [ 2136.388106] [] system_call_fastpath+0x16/0x1b [ 2136.388106] Code: ec 04 00 0f 85 ea 03 00 00 be d5 0b 00 00 48 c7 c7 8a c1 40 84 e8 b1 a5 f8 ff 31 c0 e9 d4 03 00 00 66 2e 0f 1f 84 00 00 00 00 00 <49> 81 7d 00 60 73 5e 85 b8 01 00 00 00 44 0f 44 e0 83 fe 01 77 [ 2136.388106] RIP [] __lock_acquire+0xc0/0x4b0 [ 2136.388106] RSP [ 2136.388106] CR2: 00000000000003b0 [ 2136.388106] ---[ end trace 6d450e935ee18982 ]--- [ 2136.388106] Kernel panic - not syncing: Fatal exception in interrupt rawsock_release() should test if sock->sk is NULL before calling sock_orphan()/sock_put() Reported-by: Sasha Levin Tested-by: Sasha Levin Cc: stable@kernel.org Signed-off-by: Eric Dumazet Signed-off-by: Samuel Ortiz --- net/nfc/rawsock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index ec1134c9e07f..8b8a6a2b2bad 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -54,7 +54,10 @@ static int rawsock_release(struct socket *sock) { struct sock *sk = sock->sk; - pr_debug("sock=%p\n", sock); + pr_debug("sock=%p sk=%p\n", sock, sk); + + if (!sk) + return 0; sock_orphan(sk); sock_put(sk); -- cgit v1.2.3 From 4b5ebccc40843104d980f0714bc86bfcd5568941 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 27 Jun 2012 15:38:56 +0200 Subject: mac80211: correct behaviour on unrecognised action frames When receiving an "individually addressed" action frame, the receiver is required to return it to the sender. mac80211 gets this wrong as it also returns group addressed (mcast) frames to the sender. Fix this and update the reference to the new 802.11 standards version since things were shuffled around significantly. Cc: stable@kernel.org Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7bcecf73aafb..965e6ec0adb6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2455,7 +2455,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) * frames that we didn't handle, including returning unknown * ones. For all other modes we will return them to the sender, * setting the 0x80 bit in the action category, as required by - * 802.11-2007 7.3.1.11. + * 802.11-2012 9.24.4. * Newer versions of hostapd shall also use the management frame * registration mechanisms, but older ones still use cooked * monitor interfaces so push all frames there. @@ -2465,6 +2465,9 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) return RX_DROP_MONITOR; + if (is_multicast_ether_addr(mgmt->da)) + return RX_DROP_MONITOR; + /* do not return rejected action frames */ if (mgmt->u.action.category & 0x80) return RX_DROP_UNUSABLE; -- cgit v1.2.3 From 7cecb523adedcaf8acba5e14d47559d8bc3f40d7 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 27 Jun 2012 14:32:07 +0000 Subject: net: Downgrade CAP_SYS_MODULE deprecated message from error to warning. Make logging level consistent with other deprecation messages in net subsystem. Signed-off-by: Vinson Lee Cc: David Mackey Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6df214041a5e..84f01ba81a34 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1136,8 +1136,8 @@ void dev_load(struct net *net, const char *name) no_module = request_module("netdev-%s", name); if (no_module && capable(CAP_SYS_MODULE)) { if (!request_module("%s", name)) - pr_err("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", - name); + pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", + name); } } EXPORT_SYMBOL(dev_load); -- cgit v1.2.3 From d31f4d448f7671dc3e6a7a1c92a4c085a36058bb Mon Sep 17 00:00:00 2001 From: Tomasz Bursztyka Date: Thu, 28 Jun 2012 02:57:48 +0000 Subject: netfilter: ipset: fix crash if IPSET_CMD_NONE command is sent This patch fixes a crash if that ipset command is sent over nfnetlink. Signed-off-by: Tomasz Bursztyka Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 819c342f5b30..9730882697aa 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -639,6 +639,14 @@ find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) return 0; } +static int +ip_set_none(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + return -EOPNOTSUPP; +} + static int ip_set_create(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -1539,6 +1547,10 @@ nlmsg_failure: } static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { + [IPSET_CMD_NONE] = { + .call = ip_set_none, + .attr_count = IPSET_ATTR_CMD_MAX, + }, [IPSET_CMD_CREATE] = { .call = ip_set_create, .attr_count = IPSET_ATTR_CMD_MAX, -- cgit v1.2.3 From 4009e18851ea555959c6017d848983b3d60bf667 Mon Sep 17 00:00:00 2001 From: Tomasz Bursztyka Date: Thu, 28 Jun 2012 02:57:49 +0000 Subject: netfilter: nfnetlink: fix missing rcu_read_unlock in nfnetlink_rcv_msg Bug added in commit 6b75e3e8d664a9a (netfilter: nfnetlink: add RCU in nfnetlink_rcv_msg()) Signed-off-by: Tomasz Bursztyka Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 3e797d1fcb94..791d56bbd74a 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -169,8 +169,10 @@ replay: err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, ss->cb[cb_id].policy); - if (err < 0) + if (err < 0) { + rcu_read_unlock(); return err; + } if (nc->call_rcu) { err = nc->call_rcu(net->nfnl, skb, nlh, -- cgit v1.2.3 From 4244854d22bf8f782698c5224b9191c8d2d42610 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Sat, 30 Jun 2012 03:04:26 +0000 Subject: sctp: be more restrictive in transport selection on bundled sacks It was noticed recently that when we send data on a transport, its possible that we might bundle a sack that arrived on a different transport. While this isn't a major problem, it does go against the SHOULD requirement in section 6.4 of RFC 2960: An endpoint SHOULD transmit reply chunks (e.g., SACK, HEARTBEAT ACK, etc.) to the same destination transport address from which it received the DATA or control chunk to which it is replying. This rule should also be followed if the endpoint is bundling DATA chunks together with the reply chunk. This patch seeks to correct that. It restricts the bundling of sack operations to only those transports which have moved the ctsn of the association forward since the last sack. By doing this we guarantee that we only bundle outbound saks on a transport that has received a chunk since the last sack. This brings us into stricter compliance with the RFC. Vlad had initially suggested that we strictly allow only sack bundling on the transport that last moved the ctsn forward. While this makes sense, I was concerned that doing so prevented us from bundling in the case where we had received chunks that moved the ctsn on multiple transports. In those cases, the RFC allows us to select any of the transports having received chunks to bundle the sack on. so I've modified the approach to allow for that, by adding a state variable to each transport that tracks weather it has moved the ctsn since the last sack. This I think keeps our behavior (and performance), close enough to our current profile that I think we can do this without a sysctl knob to enable/disable it. Signed-off-by: Neil Horman CC: Vlad Yaseivch CC: David S. Miller CC: linux-sctp@vger.kernel.org Reported-by: Michele Baldessari Reported-by: sorin serban Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++++ include/net/sctp/tsnmap.h | 3 ++- net/sctp/associola.c | 1 + net/sctp/output.c | 5 +++++ net/sctp/sm_make_chunk.c | 16 ++++++++++++++++ net/sctp/sm_sideeffect.c | 2 +- net/sctp/transport.c | 2 ++ net/sctp/tsnmap.c | 6 +++++- net/sctp/ulpevent.c | 3 ++- net/sctp/ulpqueue.c | 2 +- 10 files changed, 39 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e4652fe58958..fecdf31816f2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -912,6 +912,9 @@ struct sctp_transport { /* Is this structure kfree()able? */ malloced:1; + /* Has this transport moved the ctsn since we last sacked */ + __u32 sack_generation; + struct flowi fl; /* This is the peer's IP address and port. */ @@ -1584,6 +1587,7 @@ struct sctp_association { */ __u8 sack_needed; /* Do we need to sack the peer? */ __u32 sack_cnt; + __u32 sack_generation; /* These are capabilities which our peer advertised. */ __u8 ecn_capable:1, /* Can peer do ECN? */ diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index e7728bc14ccf..2c5d2b4d5d1e 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -117,7 +117,8 @@ void sctp_tsnmap_free(struct sctp_tsnmap *map); int sctp_tsnmap_check(const struct sctp_tsnmap *, __u32 tsn); /* Mark this TSN as seen. */ -int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn); +int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn, + struct sctp_transport *trans); /* Mark this TSN and all lower as seen. */ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5bc9ab161b37..b16517ee1aaf 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -271,6 +271,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a */ asoc->peer.sack_needed = 1; asoc->peer.sack_cnt = 0; + asoc->peer.sack_generation = 1; /* Assume that the peer will tell us if he recognizes ASCONF * as part of INIT exchange. diff --git a/net/sctp/output.c b/net/sctp/output.c index f1b7d4bb591e..6ae47acaaec6 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -248,6 +248,11 @@ static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt, /* If the SACK timer is running, we have a pending SACK */ if (timer_pending(timer)) { struct sctp_chunk *sack; + + if (pkt->transport->sack_generation != + pkt->transport->asoc->peer.sack_generation) + return retval; + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (sack) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index a85eeeb55dd0..b6de71efb140 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -734,8 +734,10 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) int len; __u32 ctsn; __u16 num_gabs, num_dup_tsns; + struct sctp_association *aptr = (struct sctp_association *)asoc; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; + struct sctp_transport *trans; memset(gabs, 0, sizeof(gabs)); ctsn = sctp_tsnmap_get_ctsn(map); @@ -805,6 +807,20 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns, sctp_tsnmap_get_dups(map)); + /* Once we have a sack generated, check to see what our sack + * generation is, if its 0, reset the transports to 0, and reset + * the association generation to 1 + * + * The idea is that zero is never used as a valid generation for the + * association so no transport will match after a wrap event like this, + * Until the next sack + */ + if (++aptr->peer.sack_generation == 0) { + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) + trans->sack_generation = 0; + aptr->peer.sack_generation = 1; + } nodata: return retval; } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index c96d1a81cf42..8716da1a8592 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1268,7 +1268,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_REPORT_TSN: /* Record the arrival of a TSN. */ error = sctp_tsnmap_mark(&asoc->peer.tsn_map, - cmd->obj.u32); + cmd->obj.u32, NULL); break; case SCTP_CMD_REPORT_FWDTSN: diff --git a/net/sctp/transport.c b/net/sctp/transport.c index b026ba0c6992..1dcceb6e0ce6 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -68,6 +68,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); memset(&peer->saddr, 0, sizeof(union sctp_addr)); + peer->sack_generation = 0; + /* From 6.3.1 RTO Calculation: * * C1) Until an RTT measurement has been made for a packet sent to the diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index f1e40cebc981..b5fb7c409023 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -114,7 +114,8 @@ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn) /* Mark this TSN as seen. */ -int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) +int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn, + struct sctp_transport *trans) { u16 gap; @@ -133,6 +134,9 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) */ map->max_tsn_seen++; map->cumulative_tsn_ack_point++; + if (trans) + trans->sack_generation = + trans->asoc->peer.sack_generation; map->base_tsn++; } else { /* Either we already have a gap, or about to record a gap, so diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 8a84017834c2..33d894776192 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -715,7 +715,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, * can mark it as received so the tsn_map is updated correctly. */ if (sctp_tsnmap_mark(&asoc->peer.tsn_map, - ntohl(chunk->subh.data_hdr->tsn))) + ntohl(chunk->subh.data_hdr->tsn), + chunk->transport)) goto fail_mark; /* First calculate the padding, so we don't inadvertently diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index f2d1de7f2ffb..f5a6a4f4faf7 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -1051,7 +1051,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, if (chunk && (freed >= needed)) { __u32 tsn; tsn = ntohl(chunk->subh.data_hdr->tsn); - sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn); + sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport); sctp_ulpq_tail_data(ulpq, chunk, gfp); sctp_ulpq_partial_delivery(ulpq, chunk, gfp); -- cgit v1.2.3