diff options
author | Linus Torvalds | 2016-05-17 16:26:30 -0700 |
---|---|---|
committer | Linus Torvalds | 2016-05-17 16:26:30 -0700 |
commit | a7fd20d1c476af4563e66865213474a2f9f473a4 (patch) | |
tree | fb1399e2f82842450245fb058a8fb23c52865f43 /net | |
parent | b80fed9595513384424cd141923c9161c4b5021b (diff) | |
parent | 917fa5353da05e8a0045b8acacba8d50400d5b12 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
1) Support SPI based w5100 devices, from Akinobu Mita.
2) Partial Segmentation Offload, from Alexander Duyck.
3) Add GMAC4 support to stmmac driver, from Alexandre TORGUE.
4) Allow cls_flower stats offload, from Amir Vadai.
5) Implement bpf blinding, from Daniel Borkmann.
6) Optimize _ASYNC_ bit twiddling on sockets, unless the socket is
actually using FASYNC these atomics are superfluous. From Eric
Dumazet.
7) Run TCP more preemptibly, also from Eric Dumazet.
8) Support LED blinking, EEPROM dumps, and rxvlan offloading in mlx5e
driver, from Gal Pressman.
9) Allow creating ppp devices via rtnetlink, from Guillaume Nault.
10) Improve BPF usage documentation, from Jesper Dangaard Brouer.
11) Support tunneling offloads in qed, from Manish Chopra.
12) aRFS offloading in mlx5e, from Maor Gottlieb.
13) Add RFS and RPS support to SCTP protocol, from Marcelo Ricardo
Leitner.
14) Add MSG_EOR support to TCP, this allows controlling packet
coalescing on application record boundaries for more accurate
socket timestamp sampling. From Martin KaFai Lau.
15) Fix alignment of 64-bit netlink attributes across the board, from
Nicolas Dichtel.
16) Per-vlan stats in bridging, from Nikolay Aleksandrov.
17) Several conversions of drivers to ethtool ksettings, from Philippe
Reynes.
18) Checksum neutral ILA in ipv6, from Tom Herbert.
19) Factorize all of the various marvell dsa drivers into one, from
Vivien Didelot
20) Add VF support to qed driver, from Yuval Mintz"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1649 commits)
Revert "phy dp83867: Fix compilation with CONFIG_OF_MDIO=m"
Revert "phy dp83867: Make rgmii parameters optional"
r8169: default to 64-bit DMA on recent PCIe chips
phy dp83867: Make rgmii parameters optional
phy dp83867: Fix compilation with CONFIG_OF_MDIO=m
bpf: arm64: remove callee-save registers use for tmp registers
asix: Fix offset calculation in asix_rx_fixup() causing slow transmissions
switchdev: pass pointer to fib_info instead of copy
net_sched: close another race condition in tcf_mirred_release()
tipc: fix nametable publication field in nl compat
drivers: net: Don't print unpopulated net_device name
qed: add support for dcbx.
ravb: Add missing free_irq() calls to ravb_close()
qed: Remove a stray tab
net: ethernet: fec-mpc52xx: use phy_ethtool_{get|set}_link_ksettings
net: ethernet: fec-mpc52xx: use phydev from struct net_device
bpf, doc: fix typo on bpf_asm descriptions
stmmac: hardware TX COE doesn't work when force_thresh_dma_mode is set
net: ethernet: fs-enet: use phy_ethtool_{get|set}_link_ksettings
net: ethernet: fs-enet: use phydev from struct net_device
...
Diffstat (limited to 'net')
365 files changed, 13259 insertions, 7855 deletions
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h index d16bb4b14aa1..97ecc27aeca6 100644 --- a/net/6lowpan/6lowpan_i.h +++ b/net/6lowpan/6lowpan_i.h @@ -3,6 +3,15 @@ #include <linux/netdevice.h> +#include <net/6lowpan.h> + +/* caller need to be sure it's dev->type is ARPHRD_6LOWPAN */ +static inline bool lowpan_is_ll(const struct net_device *dev, + enum lowpan_lltypes lltype) +{ + return lowpan_dev(dev)->lltype == lltype; +} + #ifdef CONFIG_6LOWPAN_DEBUGFS int lowpan_dev_debugfs_init(struct net_device *dev); void lowpan_dev_debugfs_exit(struct net_device *dev); diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index 34e44c0c0836..7a240b3eaed1 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -27,11 +27,11 @@ int lowpan_register_netdevice(struct net_device *dev, dev->mtu = IPV6_MIN_MTU; dev->priv_flags |= IFF_NO_QUEUE; - lowpan_priv(dev)->lltype = lltype; + lowpan_dev(dev)->lltype = lltype; - spin_lock_init(&lowpan_priv(dev)->ctx.lock); + spin_lock_init(&lowpan_dev(dev)->ctx.lock); for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) - lowpan_priv(dev)->ctx.table[i].id = i; + lowpan_dev(dev)->ctx.table[i].id = i; ret = register_netdevice(dev); if (ret < 0) @@ -85,7 +85,7 @@ static int lowpan_event(struct notifier_block *unused, case NETDEV_DOWN: for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE, - &lowpan_priv(dev)->ctx.table[i].flags); + &lowpan_dev(dev)->ctx.table[i].flags); break; default: return NOTIFY_DONE; diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c index 0793a8157472..acbaa3db493b 100644 --- a/net/6lowpan/debugfs.c +++ b/net/6lowpan/debugfs.c @@ -172,7 +172,7 @@ static const struct file_operations lowpan_ctx_pfx_fops = { static int lowpan_dev_debugfs_ctx_init(struct net_device *dev, struct dentry *ctx, u8 id) { - struct lowpan_priv *lpriv = lowpan_priv(dev); + struct lowpan_dev *ldev = lowpan_dev(dev); struct dentry *dentry, *root; char buf[32]; @@ -185,25 +185,25 @@ static int lowpan_dev_debugfs_ctx_init(struct net_device *dev, return -EINVAL; dentry = debugfs_create_file("active", 0644, root, - &lpriv->ctx.table[id], + &ldev->ctx.table[id], &lowpan_ctx_flag_active_fops); if (!dentry) return -EINVAL; dentry = debugfs_create_file("compression", 0644, root, - &lpriv->ctx.table[id], + &ldev->ctx.table[id], &lowpan_ctx_flag_c_fops); if (!dentry) return -EINVAL; dentry = debugfs_create_file("prefix", 0644, root, - &lpriv->ctx.table[id], + &ldev->ctx.table[id], &lowpan_ctx_pfx_fops); if (!dentry) return -EINVAL; dentry = debugfs_create_file("prefix_len", 0644, root, - &lpriv->ctx.table[id], + &ldev->ctx.table[id], &lowpan_ctx_plen_fops); if (!dentry) return -EINVAL; @@ -247,21 +247,21 @@ static const struct file_operations lowpan_context_fops = { int lowpan_dev_debugfs_init(struct net_device *dev) { - struct lowpan_priv *lpriv = lowpan_priv(dev); + struct lowpan_dev *ldev = lowpan_dev(dev); struct dentry *contexts, *dentry; int ret, i; /* creating the root */ - lpriv->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs); - if (!lpriv->iface_debugfs) + ldev->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs); + if (!ldev->iface_debugfs) goto fail; - contexts = debugfs_create_dir("contexts", lpriv->iface_debugfs); + contexts = debugfs_create_dir("contexts", ldev->iface_debugfs); if (!contexts) goto remove_root; dentry = debugfs_create_file("show", 0644, contexts, - &lowpan_priv(dev)->ctx, + &lowpan_dev(dev)->ctx, &lowpan_context_fops); if (!dentry) goto remove_root; @@ -282,7 +282,7 @@ fail: void lowpan_dev_debugfs_exit(struct net_device *dev) { - debugfs_remove_recursive(lowpan_priv(dev)->iface_debugfs); + debugfs_remove_recursive(lowpan_dev(dev)->iface_debugfs); } int __init lowpan_debugfs_init(void) diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c index 99bb22aea346..8501dd532fe1 100644 --- a/net/6lowpan/iphc.c +++ b/net/6lowpan/iphc.c @@ -53,9 +53,6 @@ #include <net/6lowpan.h> #include <net/ipv6.h> -/* special link-layer handling */ -#include <net/mac802154.h> - #include "6lowpan_i.h" #include "nhc.h" @@ -148,35 +145,25 @@ (((a)->s6_addr16[6]) == 0) && \ (((a)->s6_addr[14]) == 0)) +#define lowpan_is_linklocal_zero_padded(a) \ + (!(hdr->saddr.s6_addr[1] & 0x3f) && \ + !hdr->saddr.s6_addr16[1] && \ + !hdr->saddr.s6_addr32[1]) + #define LOWPAN_IPHC_CID_DCI(cid) (cid & 0x0f) #define LOWPAN_IPHC_CID_SCI(cid) ((cid & 0xf0) >> 4) -static inline void iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr, - const void *lladdr) -{ - /* fe:80::XXXX:XXXX:XXXX:XXXX - * \_________________/ - * hwaddr - */ - ipaddr->s6_addr[0] = 0xFE; - ipaddr->s6_addr[1] = 0x80; - memcpy(&ipaddr->s6_addr[8], lladdr, EUI64_ADDR_LEN); - /* second bit-flip (Universe/Local) - * is done according RFC2464 - */ - ipaddr->s6_addr[8] ^= 0x02; -} - -static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr, - const void *lladdr) +static inline void +lowpan_iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr, + const void *lladdr) { const struct ieee802154_addr *addr = lladdr; - u8 eui64[EUI64_ADDR_LEN] = { }; + u8 eui64[EUI64_ADDR_LEN]; switch (addr->mode) { case IEEE802154_ADDR_LONG: ieee802154_le64_to_be64(eui64, &addr->extended_addr); - iphc_uncompress_eui64_lladdr(ipaddr, eui64); + lowpan_iphc_uncompress_eui64_lladdr(ipaddr, eui64); break; case IEEE802154_ADDR_SHORT: /* fe:80::ff:fe00:XXXX @@ -202,7 +189,7 @@ static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr, static struct lowpan_iphc_ctx * lowpan_iphc_ctx_get_by_id(const struct net_device *dev, u8 id) { - struct lowpan_iphc_ctx *ret = &lowpan_priv(dev)->ctx.table[id]; + struct lowpan_iphc_ctx *ret = &lowpan_dev(dev)->ctx.table[id]; if (!lowpan_iphc_ctx_is_active(ret)) return NULL; @@ -214,7 +201,7 @@ static struct lowpan_iphc_ctx * lowpan_iphc_ctx_get_by_addr(const struct net_device *dev, const struct in6_addr *addr) { - struct lowpan_iphc_ctx *table = lowpan_priv(dev)->ctx.table; + struct lowpan_iphc_ctx *table = lowpan_dev(dev)->ctx.table; struct lowpan_iphc_ctx *ret = NULL; struct in6_addr addr_pfx; u8 addr_plen; @@ -258,7 +245,7 @@ static struct lowpan_iphc_ctx * lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev, const struct in6_addr *addr) { - struct lowpan_iphc_ctx *table = lowpan_priv(dev)->ctx.table; + struct lowpan_iphc_ctx *table = lowpan_dev(dev)->ctx.table; struct lowpan_iphc_ctx *ret = NULL; struct in6_addr addr_mcast, network_pfx = {}; int i; @@ -296,9 +283,10 @@ lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev, * * address_mode is the masked value for sam or dam value */ -static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev, - struct in6_addr *ipaddr, u8 address_mode, - const void *lladdr) +static int lowpan_iphc_uncompress_addr(struct sk_buff *skb, + const struct net_device *dev, + struct in6_addr *ipaddr, + u8 address_mode, const void *lladdr) { bool fail; @@ -327,12 +315,12 @@ static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev, case LOWPAN_IPHC_SAM_11: case LOWPAN_IPHC_DAM_11: fail = false; - switch (lowpan_priv(dev)->lltype) { + switch (lowpan_dev(dev)->lltype) { case LOWPAN_LLTYPE_IEEE802154: - iphc_uncompress_802154_lladdr(ipaddr, lladdr); + lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr); break; default: - iphc_uncompress_eui64_lladdr(ipaddr, lladdr); + lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr); break; } break; @@ -355,11 +343,11 @@ static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev, /* Uncompress address function for source context * based address(non-multicast). */ -static int uncompress_ctx_addr(struct sk_buff *skb, - const struct net_device *dev, - const struct lowpan_iphc_ctx *ctx, - struct in6_addr *ipaddr, u8 address_mode, - const void *lladdr) +static int lowpan_iphc_uncompress_ctx_addr(struct sk_buff *skb, + const struct net_device *dev, + const struct lowpan_iphc_ctx *ctx, + struct in6_addr *ipaddr, + u8 address_mode, const void *lladdr) { bool fail; @@ -388,12 +376,12 @@ static int uncompress_ctx_addr(struct sk_buff *skb, case LOWPAN_IPHC_SAM_11: case LOWPAN_IPHC_DAM_11: fail = false; - switch (lowpan_priv(dev)->lltype) { + switch (lowpan_dev(dev)->lltype) { case LOWPAN_LLTYPE_IEEE802154: - iphc_uncompress_802154_lladdr(ipaddr, lladdr); + lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr); break; default: - iphc_uncompress_eui64_lladdr(ipaddr, lladdr); + lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr); break; } ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen); @@ -652,22 +640,24 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, } if (iphc1 & LOWPAN_IPHC_SAC) { - spin_lock_bh(&lowpan_priv(dev)->ctx.lock); + spin_lock_bh(&lowpan_dev(dev)->ctx.lock); ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_SCI(cid)); if (!ci) { - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); return -EINVAL; } pr_debug("SAC bit is set. Handle context based source address.\n"); - err = uncompress_ctx_addr(skb, dev, ci, &hdr.saddr, - iphc1 & LOWPAN_IPHC_SAM_MASK, saddr); - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + err = lowpan_iphc_uncompress_ctx_addr(skb, dev, ci, &hdr.saddr, + iphc1 & LOWPAN_IPHC_SAM_MASK, + saddr); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); } else { /* Source address uncompression */ pr_debug("source address stateless compression\n"); - err = uncompress_addr(skb, dev, &hdr.saddr, - iphc1 & LOWPAN_IPHC_SAM_MASK, saddr); + err = lowpan_iphc_uncompress_addr(skb, dev, &hdr.saddr, + iphc1 & LOWPAN_IPHC_SAM_MASK, + saddr); } /* Check on error of previous branch */ @@ -676,10 +666,10 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, switch (iphc1 & (LOWPAN_IPHC_M | LOWPAN_IPHC_DAC)) { case LOWPAN_IPHC_M | LOWPAN_IPHC_DAC: - spin_lock_bh(&lowpan_priv(dev)->ctx.lock); + spin_lock_bh(&lowpan_dev(dev)->ctx.lock); ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid)); if (!ci) { - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); return -EINVAL; } @@ -688,7 +678,7 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, err = lowpan_uncompress_multicast_ctx_daddr(skb, ci, &hdr.daddr, iphc1 & LOWPAN_IPHC_DAM_MASK); - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); break; case LOWPAN_IPHC_M: /* multicast */ @@ -696,22 +686,24 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, iphc1 & LOWPAN_IPHC_DAM_MASK); break; case LOWPAN_IPHC_DAC: - spin_lock_bh(&lowpan_priv(dev)->ctx.lock); + spin_lock_bh(&lowpan_dev(dev)->ctx.lock); ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid)); if (!ci) { - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); return -EINVAL; } /* Destination address context based uncompression */ pr_debug("DAC bit is set. Handle context based destination address.\n"); - err = uncompress_ctx_addr(skb, dev, ci, &hdr.daddr, - iphc1 & LOWPAN_IPHC_DAM_MASK, daddr); - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + err = lowpan_iphc_uncompress_ctx_addr(skb, dev, ci, &hdr.daddr, + iphc1 & LOWPAN_IPHC_DAM_MASK, + daddr); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); break; default: - err = uncompress_addr(skb, dev, &hdr.daddr, - iphc1 & LOWPAN_IPHC_DAM_MASK, daddr); + err = lowpan_iphc_uncompress_addr(skb, dev, &hdr.daddr, + iphc1 & LOWPAN_IPHC_DAM_MASK, + daddr); pr_debug("dest: stateless compression mode %d dest %pI6c\n", iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr); break; @@ -731,7 +723,7 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, return err; } - switch (lowpan_priv(dev)->lltype) { + switch (lowpan_dev(dev)->lltype) { case LOWPAN_LLTYPE_IEEE802154: if (lowpan_802154_cb(skb)->d_size) hdr.payload_len = htons(lowpan_802154_cb(skb)->d_size - @@ -1028,7 +1020,7 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev, skb->data, skb->len); ipv6_daddr_type = ipv6_addr_type(&hdr->daddr); - spin_lock_bh(&lowpan_priv(dev)->ctx.lock); + spin_lock_bh(&lowpan_dev(dev)->ctx.lock); if (ipv6_daddr_type & IPV6_ADDR_MULTICAST) dci = lowpan_iphc_ctx_get_by_mcast_addr(dev, &hdr->daddr); else @@ -1037,15 +1029,15 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev, memcpy(&dci_entry, dci, sizeof(*dci)); cid |= dci->id; } - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); - spin_lock_bh(&lowpan_priv(dev)->ctx.lock); + spin_lock_bh(&lowpan_dev(dev)->ctx.lock); sci = lowpan_iphc_ctx_get_by_addr(dev, &hdr->saddr); if (sci) { memcpy(&sci_entry, sci, sizeof(*sci)); cid |= (sci->id << 4); } - spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); + spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); /* if cid is zero it will be compressed */ if (cid) { @@ -1101,7 +1093,8 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev, true); iphc1 |= LOWPAN_IPHC_SAC; } else { - if (ipv6_saddr_type & IPV6_ADDR_LINKLOCAL) { + if (ipv6_saddr_type & IPV6_ADDR_LINKLOCAL && + lowpan_is_linklocal_zero_padded(hdr->saddr)) { iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->saddr, saddr, true); @@ -1135,7 +1128,8 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev, false); iphc1 |= LOWPAN_IPHC_DAC; } else { - if (ipv6_daddr_type & IPV6_ADDR_LINKLOCAL) { + if (ipv6_daddr_type & IPV6_ADDR_LINKLOCAL && + lowpan_is_linklocal_zero_padded(hdr->daddr)) { iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->daddr, daddr, false); diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c index 69537a2eaab1..225d91906dfa 100644 --- a/net/6lowpan/nhc_udp.c +++ b/net/6lowpan/nhc_udp.c @@ -91,7 +91,7 @@ static int udp_uncompress(struct sk_buff *skb, size_t needed) * here, we obtain the hint from the remaining size of the * frame */ - switch (lowpan_priv(skb->dev)->lltype) { + switch (lowpan_dev(skb->dev)->lltype) { case LOWPAN_LLTYPE_IEEE802154: if (lowpan_802154_cb(skb)->d_size) uh.len = htons(lowpan_802154_cb(skb)->d_size - diff --git a/net/Kconfig b/net/Kconfig index a8934d8c8fda..ff40562a782c 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -236,6 +236,7 @@ source "net/mpls/Kconfig" source "net/hsr/Kconfig" source "net/switchdev/Kconfig" source "net/l3mdev/Kconfig" +source "net/qrtr/Kconfig" config RPS bool @@ -288,14 +289,17 @@ config BQL config BPF_JIT bool "enable BPF Just In Time compiler" - depends on HAVE_BPF_JIT + depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT depends on MODULES ---help--- Berkeley Packet Filter filtering capabilities are normally handled by an interpreter. This option allows kernel to generate a native code when filter is loaded in memory. This should speedup - packet sniffing (libpcap/tcpdump). Note : Admin should enable - this feature changing /proc/sys/net/core/bpf_jit_enable + packet sniffing (libpcap/tcpdump). + + Note, admin should enable this feature changing: + /proc/sys/net/core/bpf_jit_enable + /proc/sys/net/core/bpf_jit_harden (optional) config NET_FLOW_LIMIT bool @@ -418,6 +422,14 @@ config MAY_USE_DEVLINK endif # if NET -# Used by archs to tell that they support BPF_JIT -config HAVE_BPF_JIT +# Used by archs to tell that they support BPF JIT compiler plus which flavour. +# Only one of the two can be selected for a specific arch since eBPF JIT supersedes +# the cBPF JIT. + +# Classic BPF JIT (cBPF) +config HAVE_CBPF_JIT + bool + +# Extended BPF JIT (eBPF) +config HAVE_EBPF_JIT bool diff --git a/net/Makefile b/net/Makefile index 81d14119eab5..bdd14553a774 100644 --- a/net/Makefile +++ b/net/Makefile @@ -78,3 +78,4 @@ endif ifneq ($(CONFIG_NET_L3_MASTER_DEV),) obj-y += l3mdev/ endif +obj-$(CONFIG_QRTR) += qrtr/ diff --git a/net/atm/lec.c b/net/atm/lec.c index cd3b37989057..e574a7e9db6f 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -194,7 +194,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb) static void lec_tx_timeout(struct net_device *dev) { pr_info("%s\n", dev->name); - dev->trans_start = jiffies; + netif_trans_update(dev); netif_wake_queue(dev); } @@ -324,7 +324,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb, out: if (entry) lec_arp_put(entry); - dev->trans_start = jiffies; + netif_trans_update(dev); return NETDEV_TX_OK; } diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index cb2d1b9b0340..7f98a9d39883 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -32,6 +32,7 @@ #include <linux/jiffies.h> #include <linux/list.h> #include <linux/kref.h> +#include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/pkt_sched.h> #include <linux/printk.h> @@ -175,71 +176,107 @@ unlock: } /** - * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to - * exclude the removed interface + * batadv_iv_ogm_drop_bcast_own_entry - drop section of bcast_own * @orig_node: the orig_node that has to be changed * @max_if_num: the current amount of interfaces * @del_if_num: the index of the interface being removed - * - * Return: 0 on success, a negative error code otherwise. */ -static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) +static void +batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, + int max_if_num, int del_if_num) { - int ret = -ENOMEM; - size_t chunk_size, if_offset; - void *data_ptr = NULL; - - spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); + size_t chunk_size; + size_t if_offset; + void *data_ptr; - /* last interface was removed */ - if (max_if_num == 0) - goto free_bcast_own; + lockdep_assert_held(&orig_node->bat_iv.ogm_cnt_lock); chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS; data_ptr = kmalloc_array(max_if_num, chunk_size, GFP_ATOMIC); if (!data_ptr) - goto unlock; + /* use old buffer when new one could not be allocated */ + data_ptr = orig_node->bat_iv.bcast_own; /* copy first part */ - memcpy(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size); + memmove(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size); /* copy second part */ if_offset = (del_if_num + 1) * chunk_size; - memcpy((char *)data_ptr + del_if_num * chunk_size, - (uint8_t *)orig_node->bat_iv.bcast_own + if_offset, - (max_if_num - del_if_num) * chunk_size); + memmove((char *)data_ptr + del_if_num * chunk_size, + (uint8_t *)orig_node->bat_iv.bcast_own + if_offset, + (max_if_num - del_if_num) * chunk_size); -free_bcast_own: - kfree(orig_node->bat_iv.bcast_own); - orig_node->bat_iv.bcast_own = data_ptr; + /* bcast_own was shrunk down in new buffer; free old one */ + if (orig_node->bat_iv.bcast_own != data_ptr) { + kfree(orig_node->bat_iv.bcast_own); + orig_node->bat_iv.bcast_own = data_ptr; + } +} + +/** + * batadv_iv_ogm_drop_bcast_own_sum_entry - drop section of bcast_own_sum + * @orig_node: the orig_node that has to be changed + * @max_if_num: the current amount of interfaces + * @del_if_num: the index of the interface being removed + */ +static void +batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, + int max_if_num, int del_if_num) +{ + size_t if_offset; + void *data_ptr; - if (max_if_num == 0) - goto free_own_sum; + lockdep_assert_held(&orig_node->bat_iv.ogm_cnt_lock); data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC); - if (!data_ptr) { - kfree(orig_node->bat_iv.bcast_own); - goto unlock; - } + if (!data_ptr) + /* use old buffer when new one could not be allocated */ + data_ptr = orig_node->bat_iv.bcast_own_sum; - memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum, - del_if_num * sizeof(u8)); + memmove(data_ptr, orig_node->bat_iv.bcast_own_sum, + del_if_num * sizeof(u8)); if_offset = (del_if_num + 1) * sizeof(u8); - memcpy((char *)data_ptr + del_if_num * sizeof(u8), - orig_node->bat_iv.bcast_own_sum + if_offset, - (max_if_num - del_if_num) * sizeof(u8)); + memmove((char *)data_ptr + del_if_num * sizeof(u8), + orig_node->bat_iv.bcast_own_sum + if_offset, + (max_if_num - del_if_num) * sizeof(u8)); + + /* bcast_own_sum was shrunk down in new buffer; free old one */ + if (orig_node->bat_iv.bcast_own_sum != data_ptr) { + kfree(orig_node->bat_iv.bcast_own_sum); + orig_node->bat_iv.bcast_own_sum = data_ptr; + } +} -free_own_sum: - kfree(orig_node->bat_iv.bcast_own_sum); - orig_node->bat_iv.bcast_own_sum = data_ptr; +/** + * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to + * exclude the removed interface + * @orig_node: the orig_node that has to be changed + * @max_if_num: the current amount of interfaces + * @del_if_num: the index of the interface being removed + * + * Return: 0 on success, a negative error code otherwise. + */ +static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, + int max_if_num, int del_if_num) +{ + spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); + + if (max_if_num == 0) { + kfree(orig_node->bat_iv.bcast_own); + kfree(orig_node->bat_iv.bcast_own_sum); + orig_node->bat_iv.bcast_own = NULL; + orig_node->bat_iv.bcast_own_sum = NULL; + } else { + batadv_iv_ogm_drop_bcast_own_entry(orig_node, max_if_num, + del_if_num); + batadv_iv_ogm_drop_bcast_own_sum_entry(orig_node, max_if_num, + del_if_num); + } - ret = 0; -unlock: spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); - return ret; + return 0; } /** @@ -644,18 +681,12 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, unsigned char *skb_buff; unsigned int skb_size; - if (!kref_get_unless_zero(&if_incoming->refcount)) - return; - - if (!kref_get_unless_zero(&if_outgoing->refcount)) - goto out_free_incoming; - /* own packet should always be scheduled */ if (!own_packet) { if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "batman packet queue full\n"); - goto out_free_outgoing; + return; } } @@ -681,6 +712,8 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, forw_packet_aggr->packet_len = packet_len; memcpy(skb_buff, packet_buff, packet_len); + kref_get(&if_incoming->refcount); + kref_get(&if_outgoing->refcount); forw_packet_aggr->own = own_packet; forw_packet_aggr->if_incoming = if_incoming; forw_packet_aggr->if_outgoing = if_outgoing; @@ -710,10 +743,6 @@ out_free_forw_packet: out_nomem: if (!own_packet) atomic_inc(&bat_priv->batman_queue_left); -out_free_outgoing: - batadv_hardif_put(if_outgoing); -out_free_incoming: - batadv_hardif_put(if_incoming); } /* aggregate a new packet into the existing ogm packet */ @@ -950,9 +979,15 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) { if (tmp_hard_iface->soft_iface != hard_iface->soft_iface) continue; + + if (!kref_get_unless_zero(&tmp_hard_iface->refcount)) + continue; + batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len, hard_iface, tmp_hard_iface, 1, send_time); + + batadv_hardif_put(tmp_hard_iface); } rcu_read_unlock(); @@ -1133,13 +1168,13 @@ out: * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * - * Return: 1 if the link can be considered bidirectional, 0 otherwise + * Return: true if the link can be considered bidirectional, false otherwise */ -static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh_node, - struct batadv_ogm_packet *batadv_ogm_packet, - struct batadv_hard_iface *if_incoming, - struct batadv_hard_iface *if_outgoing) +static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + struct batadv_ogm_packet *batadv_ogm_packet, + struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node; @@ -1147,9 +1182,10 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, u8 total_count; u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; - int tq_asym_penalty, inv_asym_penalty, if_num, ret = 0; + int tq_asym_penalty, inv_asym_penalty, if_num; unsigned int combined_tq; int tq_iface_penalty; + bool ret = false; /* find corresponding one hop neighbor */ rcu_read_lock(); @@ -1261,7 +1297,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, * consider it bidirectional */ if (batadv_ogm_packet->tq >= BATADV_TQ_TOTAL_BIDRECT_LIMIT) - ret = 1; + ret = true; out: if (neigh_node) @@ -1290,9 +1326,9 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, struct batadv_orig_ifinfo *orig_ifinfo = NULL; struct batadv_neigh_node *neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; - int is_dup; + bool is_dup; s32 seq_diff; - int need_update = 0; + bool need_update = false; int set_mark; enum batadv_dup_status ret = BATADV_NO_DUP; u32 seqno = ntohl(batadv_ogm_packet->seqno); @@ -1402,7 +1438,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, struct sk_buff *skb_priv; struct ethhdr *ethhdr; u8 *prev_sender; - int is_bidirect; + bool is_bidirect; /* create a private copy of the skb, as some functions change tq value * and/or flags. @@ -1730,8 +1766,13 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, if (hard_iface->soft_iface != bat_priv->soft_iface) continue; + if (!kref_get_unless_zero(&hard_iface->refcount)) + continue; + batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node, if_incoming, hard_iface); + + batadv_hardif_put(hard_iface); } rcu_read_unlock(); @@ -1829,9 +1870,8 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv, int batman_count = 0; u32 i; - seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n", - "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE, - "Nexthop", "outgoingIF", "Potential nexthops"); + seq_puts(seq, + " Originator last-seen (#/255) Nexthop [outgoingIF]: Potential nexthops ...\n"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1911,8 +1951,7 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv, struct batadv_hard_iface *hard_iface; int batman_count = 0; - seq_printf(seq, " %10s %-13s %s\n", - "IF", "Neighbor", "last-seen"); + seq_puts(seq, " IF Neighbor last-seen\n"); rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 4026f198a734..3ff8bd1b7bdc 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -162,8 +162,8 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv, struct batadv_hard_iface *hard_iface; int batman_count = 0; - seq_printf(seq, " %-15s %s (%11s) [%10s]\n", "Neighbor", - "last-seen", "throughput", "IF"); + seq_puts(seq, + " Neighbor last-seen ( throughput) [ IF]\n"); rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { @@ -202,9 +202,8 @@ static void batadv_v_orig_print(struct batadv_priv *bat_priv, int batman_count = 0; u32 i; - seq_printf(seq, " %-15s %s (%11s) %17s [%10s]: %20s ...\n", - "Originator", "last-seen", "throughput", "Nexthop", - "outgoingIF", "Potential nexthops"); + seq_puts(seq, + " Originator last-seen ( throughput) Nexthop [outgoingIF]: Potential nexthops ...\n"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index d9bcbe6e7d65..473ebb9a0e73 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -26,6 +26,7 @@ #include <linux/if_ether.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/random.h> @@ -176,6 +177,9 @@ static void batadv_v_ogm_send(struct work_struct *work) if (hard_iface->soft_iface != bat_priv->soft_iface) continue; + if (!kref_get_unless_zero(&hard_iface->refcount)) + continue; + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n", ogm_packet->orig, ntohl(ogm_packet->seqno), @@ -185,10 +189,13 @@ static void batadv_v_ogm_send(struct work_struct *work) /* this skb gets consumed by batadv_v_ogm_send_to_if() */ skb_tmp = skb_clone(skb, GFP_ATOMIC); - if (!skb_tmp) + if (!skb_tmp) { + batadv_hardif_put(hard_iface); break; + } batadv_v_ogm_send_to_if(skb_tmp, hard_iface); + batadv_hardif_put(hard_iface); } rcu_read_unlock(); @@ -234,73 +241,6 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) } /** - * batadv_v_ogm_orig_update - update the originator status based on the received - * OGM - * @bat_priv: the bat priv with all the soft interface information - * @orig_node: the originator to update - * @neigh_node: the neighbour the OGM has been received from (to update) - * @ogm2: the received OGM - * @if_outgoing: the interface where this OGM is going to be forwarded through - */ -static void -batadv_v_ogm_orig_update(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node, - const struct batadv_ogm2_packet *ogm2, - struct batadv_hard_iface *if_outgoing) -{ - struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL; - struct batadv_neigh_node *router = NULL; - s32 neigh_seq_diff; - u32 neigh_last_seqno; - u32 router_last_seqno; - u32 router_throughput, neigh_throughput; - - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Searching and updating originator entry of received packet\n"); - - /* if this neighbor already is our next hop there is nothing - * to change - */ - router = batadv_orig_router_get(orig_node, if_outgoing); - if (router == neigh_node) - goto out; - - /* don't consider neighbours with worse throughput. - * also switch route if this seqno is BATADV_V_MAX_ORIGDIFF newer than - * the last received seqno from our best next hop. - */ - if (router) { - router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing); - neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); - - /* if these are not allocated, something is wrong. */ - if (!router_ifinfo || !neigh_ifinfo) - goto out; - - neigh_last_seqno = neigh_ifinfo->bat_v.last_seqno; - router_last_seqno = router_ifinfo->bat_v.last_seqno; - neigh_seq_diff = neigh_last_seqno - router_last_seqno; - router_throughput = router_ifinfo->bat_v.throughput; - neigh_throughput = neigh_ifinfo->bat_v.throughput; - - if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) && - (router_throughput >= neigh_throughput)) - goto out; - } - - batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node); - -out: - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - if (neigh_ifinfo) - batadv_neigh_ifinfo_put(neigh_ifinfo); - if (router) - batadv_neigh_node_put(router); -} - -/** * batadv_v_forward_penalty - apply a penalty to the throughput metric forwarded * with B.A.T.M.A.N. V OGMs * @bat_priv: the bat priv with all the soft interface information @@ -347,10 +287,12 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv, } /** - * batadv_v_ogm_forward - forward an OGM to the given outgoing interface + * batadv_v_ogm_forward - check conditions and forward an OGM to the given + * outgoing interface * @bat_priv: the bat priv with all the soft interface information * @ogm_received: previously received OGM to be forwarded - * @throughput: throughput to announce, may vary per outgoing interface + * @orig_node: the originator which has been updated + * @neigh_node: the neigh_node through with the OGM has been received * @if_incoming: the interface on which this OGM was received on * @if_outgoing: the interface to which the OGM has to be forwarded to * @@ -359,28 +301,57 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv, */ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv, const struct batadv_ogm2_packet *ogm_received, - u32 throughput, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { + struct batadv_neigh_ifinfo *neigh_ifinfo = NULL; + struct batadv_orig_ifinfo *orig_ifinfo = NULL; + struct batadv_neigh_node *router = NULL; struct batadv_ogm2_packet *ogm_forward; unsigned char *skb_buff; struct sk_buff *skb; size_t packet_len; u16 tvlv_len; + /* only forward for specific interfaces, not for the default one. */ + if (if_outgoing == BATADV_IF_DEFAULT) + goto out; + + orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); + if (!orig_ifinfo) + goto out; + + /* acquire possibly updated router */ + router = batadv_orig_router_get(orig_node, if_outgoing); + + /* strict rule: forward packets coming from the best next hop only */ + if (neigh_node != router) + goto out; + + /* don't forward the same seqno twice on one interface */ + if (orig_ifinfo->last_seqno_forwarded == ntohl(ogm_received->seqno)) + goto out; + + orig_ifinfo->last_seqno_forwarded = ntohl(ogm_received->seqno); + if (ogm_received->ttl <= 1) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n"); - return; + goto out; } + neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); + if (!neigh_ifinfo) + goto out; + tvlv_len = ntohs(ogm_received->tvlv_len); packet_len = BATADV_OGM2_HLEN + tvlv_len; skb = netdev_alloc_skb_ip_align(if_outgoing->net_dev, ETH_HLEN + packet_len); if (!skb) - return; + goto out; skb_reserve(skb, ETH_HLEN); skb_buff = skb_put(skb, packet_len); @@ -388,15 +359,23 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv, /* apply forward penalty */ ogm_forward = (struct batadv_ogm2_packet *)skb_buff; - ogm_forward->throughput = htonl(throughput); + ogm_forward->throughput = htonl(neigh_ifinfo->bat_v.throughput); ogm_forward->ttl--; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding OGM2 packet on %s: throughput %u, ttl %u, received via %s\n", - if_outgoing->net_dev->name, throughput, ogm_forward->ttl, - if_incoming->net_dev->name); + if_outgoing->net_dev->name, ntohl(ogm_forward->throughput), + ogm_forward->ttl, if_incoming->net_dev->name); batadv_v_ogm_send_to_if(skb, if_outgoing); + +out: + if (orig_ifinfo) + batadv_orig_ifinfo_put(orig_ifinfo); + if (router) + batadv_neigh_node_put(router); + if (neigh_ifinfo) + batadv_neigh_ifinfo_put(neigh_ifinfo); } /** @@ -493,8 +472,10 @@ out: * @neigh_node: the neigh_node through with the OGM has been received * @if_incoming: the interface where this packet was received * @if_outgoing: the interface for which the packet should be considered + * + * Return: true if the packet should be forwarded, false otherwise */ -static void batadv_v_ogm_route_update(struct batadv_priv *bat_priv, +static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv, const struct ethhdr *ethhdr, const struct batadv_ogm2_packet *ogm2, struct batadv_orig_node *orig_node, @@ -503,14 +484,14 @@ static void batadv_v_ogm_route_update(struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing) { struct batadv_neigh_node *router = NULL; - struct batadv_neigh_ifinfo *neigh_ifinfo = NULL; struct batadv_orig_node *orig_neigh_node = NULL; - struct batadv_orig_ifinfo *orig_ifinfo = NULL; struct batadv_neigh_node *orig_neigh_router = NULL; - - neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); - if (!neigh_ifinfo) - goto out; + struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL; + u32 router_throughput, neigh_throughput; + u32 router_last_seqno; + u32 neigh_last_seqno; + s32 neigh_seq_diff; + bool forward = false; orig_neigh_node = batadv_v_ogm_orig_get(bat_priv, ethhdr->h_source); if (!orig_neigh_node) @@ -529,47 +510,57 @@ static void batadv_v_ogm_route_update(struct batadv_priv *bat_priv, goto out; } - if (router) - batadv_neigh_node_put(router); + /* Mark the OGM to be considered for forwarding, and update routes + * if needed. + */ + forward = true; - /* Update routes, and check if the OGM is from the best next hop */ - batadv_v_ogm_orig_update(bat_priv, orig_node, neigh_node, ogm2, - if_outgoing); + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Searching and updating originator entry of received packet\n"); - orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); - if (!orig_ifinfo) + /* if this neighbor already is our next hop there is nothing + * to change + */ + if (router == neigh_node) goto out; - /* don't forward the same seqno twice on one interface */ - if (orig_ifinfo->last_seqno_forwarded == ntohl(ogm2->seqno)) - goto out; + /* don't consider neighbours with worse throughput. + * also switch route if this seqno is BATADV_V_MAX_ORIGDIFF newer than + * the last received seqno from our best next hop. + */ + if (router) { + router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing); + neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); - /* acquire possibly updated router */ - router = batadv_orig_router_get(orig_node, if_outgoing); + /* if these are not allocated, something is wrong. */ + if (!router_ifinfo || !neigh_ifinfo) + goto out; - /* strict rule: forward packets coming from the best next hop only */ - if (neigh_node != router) - goto out; + neigh_last_seqno = neigh_ifinfo->bat_v.last_seqno; + router_last_seqno = router_ifinfo->bat_v.last_seqno; + neigh_seq_diff = neigh_last_seqno - router_last_seqno; + router_throughput = router_ifinfo->bat_v.throughput; + neigh_throughput = neigh_ifinfo->bat_v.throughput; - /* only forward for specific interface, not for the default one. */ - if (if_outgoing != BATADV_IF_DEFAULT) { - orig_ifinfo->last_seqno_forwarded = ntohl(ogm2->seqno); - batadv_v_ogm_forward(bat_priv, ogm2, - neigh_ifinfo->bat_v.throughput, - if_incoming, if_outgoing); + if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) && + (router_throughput >= neigh_throughput)) + goto out; } + batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node); out: - if (orig_ifinfo) - batadv_orig_ifinfo_put(orig_ifinfo); if (router) batadv_neigh_node_put(router); if (orig_neigh_router) batadv_neigh_node_put(orig_neigh_router); if (orig_neigh_node) batadv_orig_node_put(orig_neigh_node); + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); if (neigh_ifinfo) batadv_neigh_ifinfo_put(neigh_ifinfo); + + return forward; } /** @@ -592,6 +583,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing) { int seqno_age; + bool forward; /* first, update the metric with according sanity checks */ seqno_age = batadv_v_ogm_metric_update(bat_priv, ogm2, orig_node, @@ -610,8 +602,14 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv, ntohs(ogm2->tvlv_len)); /* if the metric update went through, update routes if needed */ - batadv_v_ogm_route_update(bat_priv, ethhdr, ogm2, orig_node, - neigh_node, if_incoming, if_outgoing); + forward = batadv_v_ogm_route_update(bat_priv, ethhdr, ogm2, orig_node, + neigh_node, if_incoming, + if_outgoing); + + /* if the routes have been processed correctly, check and forward */ + if (forward) + batadv_v_ogm_forward(bat_priv, ogm2, orig_node, neigh_node, + if_incoming, if_outgoing); } /** @@ -713,9 +711,14 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, if (hard_iface->soft_iface != bat_priv->soft_iface) continue; + if (!kref_get_unless_zero(&hard_iface->refcount)) + continue; + batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet, orig_node, neigh_node, if_incoming, hard_iface); + + batadv_hardif_put(hard_iface); } rcu_read_unlock(); out: diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index b56bb000a0ab..a0c7913837a5 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -38,11 +38,11 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n) * the last sequence number * @set_mark: whether this packet should be marked in seq_bits * - * Return: 1 if the window was moved (either new or very old), - * 0 if the window was not moved/shifted. + * Return: true if the window was moved (either new or very old), + * false if the window was not moved/shifted. */ -int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, - int set_mark) +bool batadv_bit_get_packet(void *priv, unsigned long *seq_bits, + s32 seq_num_diff, int set_mark) { struct batadv_priv *bat_priv = priv; @@ -52,7 +52,7 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, if (seq_num_diff <= 0 && seq_num_diff > -BATADV_TQ_LOCAL_WINDOW_SIZE) { if (set_mark) batadv_set_bit(seq_bits, -seq_num_diff); - return 0; + return false; } /* sequence number is slightly newer, so we shift the window and @@ -63,7 +63,7 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, if (set_mark) batadv_set_bit(seq_bits, 0); - return 1; + return true; } /* sequence number is much newer, probably missed a lot of packets */ @@ -75,7 +75,7 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, bitmap_zero(seq_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); if (set_mark) batadv_set_bit(seq_bits, 0); - return 1; + return true; } /* received a much older packet. The other host either restarted @@ -94,5 +94,5 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, if (set_mark) batadv_set_bit(seq_bits, 0); - return 1; + return true; } diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index 3e41bb80eb81..0e6e9d09078c 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -22,6 +22,7 @@ #include <linux/bitops.h> #include <linux/compiler.h> +#include <linux/stddef.h> #include <linux/types.h> /** @@ -31,17 +32,17 @@ * @last_seqno: latest sequence number in seq_bits * @curr_seqno: sequence number to test for * - * Return: 1 if the corresponding bit in the given seq_bits indicates true - * and curr_seqno is within range of last_seqno. Otherwise returns 0. + * Return: true if the corresponding bit in the given seq_bits indicates true + * and curr_seqno is within range of last_seqno. Otherwise returns false. */ -static inline int batadv_test_bit(const unsigned long *seq_bits, - u32 last_seqno, u32 curr_seqno) +static inline bool batadv_test_bit(const unsigned long *seq_bits, + u32 last_seqno, u32 curr_seqno) { s32 diff; diff = last_seqno - curr_seqno; if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE) - return 0; + return false; return test_bit(diff, seq_bits) != 0; } @@ -55,7 +56,7 @@ static inline void batadv_set_bit(unsigned long *seq_bits, s32 n) set_bit(n, seq_bits); /* turn the position on */ } -int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, - int set_mark); +bool batadv_bit_get_packet(void *priv, unsigned long *seq_bits, + s32 seq_num_diff, int set_mark); #endif /* _NET_BATMAN_ADV_BITARRAY_H_ */ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 0a6c8b824a00..748a9ead7ce5 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -50,6 +50,7 @@ #include "hash.h" #include "originator.h" #include "packet.h" +#include "sysfs.h" #include "translation-table.h" static const u8 batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05}; @@ -100,10 +101,10 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size) * @node: list node of the first entry to compare * @data2: pointer to the second backbone gateway * - * Return: 1 if the backbones have the same data, 0 otherwise + * Return: true if the backbones have the same data, false otherwise */ -static int batadv_compare_backbone_gw(const struct hlist_node *node, - const void *data2) +static bool batadv_compare_backbone_gw(const struct hlist_node *node, + const void *data2) { const void *data1 = container_of(node, struct batadv_bla_backbone_gw, hash_entry); @@ -111,23 +112,23 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node, const struct batadv_bla_backbone_gw *gw2 = data2; if (!batadv_compare_eth(gw1->orig, gw2->orig)) - return 0; + return false; if (gw1->vid != gw2->vid) - return 0; + return false; - return 1; + return true; } /** - * batadv_compare_backbone_gw - compare address and vid of two claims + * batadv_compare_claim - compare address and vid of two claims * @node: list node of the first entry to compare * @data2: pointer to the second claims * - * Return: 1 if the claim have the same data, 0 otherwise + * Return: true if the claim have the same data, 0 otherwise */ -static int batadv_compare_claim(const struct hlist_node *node, - const void *data2) +static bool batadv_compare_claim(const struct hlist_node *node, + const void *data2) { const void *data1 = container_of(node, struct batadv_bla_claim, hash_entry); @@ -135,12 +136,12 @@ static int batadv_compare_claim(const struct hlist_node *node, const struct batadv_bla_claim *cl2 = data2; if (!batadv_compare_eth(cl1->addr, cl2->addr)) - return 0; + return false; if (cl1->vid != cl2->vid) - return 0; + return false; - return 1; + return true; } /** @@ -200,9 +201,9 @@ static void batadv_claim_put(struct batadv_bla_claim *claim) * * Return: claim if found or NULL otherwise. */ -static struct batadv_bla_claim -*batadv_claim_hash_find(struct batadv_priv *bat_priv, - struct batadv_bla_claim *data) +static struct batadv_bla_claim * +batadv_claim_hash_find(struct batadv_priv *bat_priv, + struct batadv_bla_claim *data) { struct batadv_hashtable *hash = bat_priv->bla.claim_hash; struct hlist_head *head; @@ -407,6 +408,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, ethhdr->h_source, ethhdr->h_dest, BATADV_PRINT_VID(vid)); break; + case BATADV_CLAIM_TYPE_LOOPDETECT: + ether_addr_copy(ethhdr->h_source, mac); + batadv_dbg(BATADV_DBG_BLA, bat_priv, + "bla_send_claim(): LOOPDETECT of %pM to %pM on vid %d\n", + ethhdr->h_source, ethhdr->h_dest, + BATADV_PRINT_VID(vid)); + + break; } if (vid & BATADV_VLAN_HAS_TAG) @@ -427,6 +436,36 @@ out: } /** + * batadv_bla_loopdetect_report - worker for reporting the loop + * @work: work queue item + * + * Throws an uevent, as the loopdetect check function can't do that itself + * since the kernel may sleep while throwing uevents. + */ +static void batadv_bla_loopdetect_report(struct work_struct *work) +{ + struct batadv_bla_backbone_gw *backbone_gw; + struct batadv_priv *bat_priv; + char vid_str[6] = { '\0' }; + + backbone_gw = container_of(work, struct batadv_bla_backbone_gw, + report_work); + bat_priv = backbone_gw->bat_priv; + + batadv_info(bat_priv->soft_iface, + "Possible loop on VLAN %d detected which can't be handled by BLA - please check your network setup!\n", + BATADV_PRINT_VID(backbone_gw->vid)); + snprintf(vid_str, sizeof(vid_str), "%d", + BATADV_PRINT_VID(backbone_gw->vid)); + vid_str[sizeof(vid_str) - 1] = 0; + + batadv_throw_uevent(bat_priv, BATADV_UEV_BLA, BATADV_UEV_LOOPDETECT, + vid_str); + + batadv_backbone_gw_put(backbone_gw); +} + +/** * batadv_bla_get_backbone_gw - finds or creates a backbone gateway * @bat_priv: the bat priv with all the soft interface information * @orig: the mac address of the originator @@ -464,6 +503,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, atomic_set(&entry->request_sent, 0); atomic_set(&entry->wait_periods, 0); ether_addr_copy(entry->orig, orig); + INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report); /* one for the hash, one for returning */ kref_init(&entry->refcount); @@ -735,22 +775,22 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, * @backbone_addr: originator address of the sender (Ethernet source MAC) * @vid: the VLAN ID of the frame * - * Return: 1 if handled + * Return: true if handled */ -static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, - u8 *backbone_addr, unsigned short vid) +static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, + u8 *backbone_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; u16 backbone_crc, crc; if (memcmp(an_addr, batadv_announce_mac, 4) != 0) - return 0; + return false; backbone_gw = batadv_bla_get_backbone_gw(bat_priv, backbone_addr, vid, false); if (unlikely(!backbone_gw)) - return 1; + return true; /* handle as ANNOUNCE frame */ backbone_gw->lasttime = jiffies; @@ -783,7 +823,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, } batadv_backbone_gw_put(backbone_gw); - return 1; + return true; } /** @@ -794,29 +834,29 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, * @ethhdr: ethernet header of a packet * @vid: the VLAN ID of the frame * - * Return: 1 if handled + * Return: true if handled */ -static int batadv_handle_request(struct batadv_priv *bat_priv, - struct batadv_hard_iface *primary_if, - u8 *backbone_addr, struct ethhdr *ethhdr, - unsigned short vid) +static bool batadv_handle_request(struct batadv_priv *bat_priv, + struct batadv_hard_iface *primary_if, + u8 *backbone_addr, struct ethhdr *ethhdr, + unsigned short vid) { /* check for REQUEST frame */ if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest)) - return 0; + return false; /* sanity check, this should not happen on a normal switch, * we ignore it in this case. */ if (!batadv_compare_eth(ethhdr->h_dest, primary_if->net_dev->dev_addr)) - return 1; + return true; batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_request(): REQUEST vid %d (sent by %pM)...\n", BATADV_PRINT_VID(vid), ethhdr->h_source); batadv_bla_answer_request(bat_priv, primary_if, vid); - return 1; + return true; } /** @@ -827,12 +867,12 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, * @claim_addr: Client to be unclaimed (ARP sender HW MAC) * @vid: the VLAN ID of the frame * - * Return: 1 if handled + * Return: true if handled */ -static int batadv_handle_unclaim(struct batadv_priv *bat_priv, - struct batadv_hard_iface *primary_if, - u8 *backbone_addr, u8 *claim_addr, - unsigned short vid) +static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, + struct batadv_hard_iface *primary_if, + u8 *backbone_addr, u8 *claim_addr, + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -845,7 +885,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, backbone_gw = batadv_backbone_hash_find(bat_priv, backbone_addr, vid); if (!backbone_gw) - return 1; + return true; /* this must be an UNCLAIM frame */ batadv_dbg(BATADV_DBG_BLA, bat_priv, @@ -854,7 +894,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, batadv_bla_del_claim(bat_priv, claim_addr, vid); batadv_backbone_gw_put(backbone_gw); - return 1; + return true; } /** @@ -865,12 +905,12 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, * @claim_addr: client mac address to be claimed (ARP sender HW MAC) * @vid: the VLAN ID of the frame * - * Return: 1 if handled + * Return: true if handled */ -static int batadv_handle_claim(struct batadv_priv *bat_priv, - struct batadv_hard_iface *primary_if, - u8 *backbone_addr, u8 *claim_addr, - unsigned short vid) +static bool batadv_handle_claim(struct batadv_priv *bat_priv, + struct batadv_hard_iface *primary_if, + u8 *backbone_addr, u8 *claim_addr, + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -880,7 +920,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv, false); if (unlikely(!backbone_gw)) - return 1; + return true; /* this must be a CLAIM frame */ batadv_bla_add_claim(bat_priv, claim_addr, vid, backbone_gw); @@ -891,7 +931,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv, /* TODO: we could call something like tt_local_del() here. */ batadv_backbone_gw_put(backbone_gw); - return 1; + return true; } /** @@ -975,12 +1015,12 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, * @primary_if: the primary hard interface of this batman soft interface * @skb: the frame to be checked * - * Return: 1 if it was a claim frame, otherwise return 0 to + * Return: true if it was a claim frame, otherwise return false to * tell the callee that it can use the frame on its own. */ -static int batadv_bla_process_claim(struct batadv_priv *bat_priv, - struct batadv_hard_iface *primary_if, - struct sk_buff *skb) +static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, + struct batadv_hard_iface *primary_if, + struct sk_buff *skb) { struct batadv_bla_claim_dst *bla_dst, *bla_dst_own; u8 *hw_src, *hw_dst; @@ -1011,7 +1051,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, vhdr = skb_header_pointer(skb, headlen, VLAN_HLEN, &vhdr_buf); if (!vhdr) - return 0; + return false; proto = vhdr->h_vlan_encapsulated_proto; headlen += VLAN_HLEN; @@ -1020,12 +1060,12 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, } if (proto != htons(ETH_P_ARP)) - return 0; /* not a claim frame */ + return false; /* not a claim frame */ /* this must be a ARP frame. check if it is a claim. */ if (unlikely(!pskb_may_pull(skb, headlen + arp_hdr_len(skb->dev)))) - return 0; + return false; /* pskb_may_pull() may have modified the pointers, get ethhdr again */ ethhdr = eth_hdr(skb); @@ -1035,13 +1075,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, * IP information */ if (arphdr->ar_hrd != htons(ARPHRD_ETHER)) - return 0; + return false; if (arphdr->ar_pro != htons(ETH_P_IP)) - return 0; + return false; if (arphdr->ar_hln != ETH_ALEN) - return 0; + return false; if (arphdr->ar_pln != 4) - return 0; + return false; hw_src = (u8 *)arphdr + sizeof(struct arphdr); hw_dst = hw_src + ETH_ALEN + 4; @@ -1051,14 +1091,18 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, /* check if it is a claim frame in general */ if (memcmp(bla_dst->magic, bla_dst_own->magic, sizeof(bla_dst->magic)) != 0) - return 0; + return false; /* check if there is a claim frame encapsulated deeper in (QinQ) and * drop that, as this is not supported by BLA but should also not be * sent via the mesh. */ if (vlan_depth > 1) - return 1; + return true; + + /* Let the loopdetect frames on the mesh in any case. */ + if (bla_dst->type == BATADV_CLAIM_TYPE_LOOPDETECT) + return 0; /* check if it is a claim frame. */ ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst, @@ -1070,7 +1114,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, hw_dst); if (ret < 2) - return ret; + return !!ret; /* become a backbone gw ourselves on this vlan if not happened yet */ batadv_bla_update_own_backbone_gw(bat_priv, primary_if, vid); @@ -1080,30 +1124,30 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, case BATADV_CLAIM_TYPE_CLAIM: if (batadv_handle_claim(bat_priv, primary_if, hw_src, ethhdr->h_source, vid)) - return 1; + return true; break; case BATADV_CLAIM_TYPE_UNCLAIM: if (batadv_handle_unclaim(bat_priv, primary_if, ethhdr->h_source, hw_src, vid)) - return 1; + return true; break; case BATADV_CLAIM_TYPE_ANNOUNCE: if (batadv_handle_announce(bat_priv, hw_src, ethhdr->h_source, vid)) - return 1; + return true; break; case BATADV_CLAIM_TYPE_REQUEST: if (batadv_handle_request(bat_priv, primary_if, hw_src, ethhdr, vid)) - return 1; + return true; break; } batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst); - return 1; + return true; } /** @@ -1265,6 +1309,26 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, } /** + * batadv_bla_send_loopdetect - send a loopdetect frame + * @bat_priv: the bat priv with all the soft interface information + * @backbone_gw: the backbone gateway for which a loop should be detected + * + * To detect loops that the bridge loop avoidance can't handle, send a loop + * detection packet on the backbone. Unlike other BLA frames, this frame will + * be allowed on the mesh by other nodes. If it is received on the mesh, this + * indicates that there is a loop. + */ +static void +batadv_bla_send_loopdetect(struct batadv_priv *bat_priv, + struct batadv_bla_backbone_gw *backbone_gw) +{ + batadv_dbg(BATADV_DBG_BLA, bat_priv, "Send loopdetect frame for vid %d\n", + backbone_gw->vid); + batadv_bla_send_claim(bat_priv, bat_priv->bla.loopdetect_addr, + backbone_gw->vid, BATADV_CLAIM_TYPE_LOOPDETECT); +} + +/** * batadv_bla_status_update - purge bla interfaces if necessary * @net_dev: the soft interface net device */ @@ -1301,9 +1365,10 @@ static void batadv_bla_periodic_work(struct work_struct *work) struct batadv_bla_backbone_gw *backbone_gw; struct batadv_hashtable *hash; struct batadv_hard_iface *primary_if; + bool send_loopdetect = false; int i; - delayed_work = container_of(work, struct delayed_work, work); + delayed_work = to_delayed_work(work); priv_bla = container_of(delayed_work, struct batadv_priv_bla, work); bat_priv = container_of(priv_bla, struct batadv_priv, bla); primary_if = batadv_primary_if_get_selected(bat_priv); @@ -1316,6 +1381,22 @@ static void batadv_bla_periodic_work(struct work_struct *work) if (!atomic_read(&bat_priv->bridge_loop_avoidance)) goto out; + if (atomic_dec_and_test(&bat_priv->bla.loopdetect_next)) { + /* set a new random mac address for the next bridge loop + * detection frames. Set the locally administered bit to avoid + * collisions with users mac addresses. + */ + random_ether_addr(bat_priv->bla.loopdetect_addr); + bat_priv->bla.loopdetect_addr[0] = 0xba; + bat_priv->bla.loopdetect_addr[1] = 0xbe; + bat_priv->bla.loopdetect_lasttime = jiffies; + atomic_set(&bat_priv->bla.loopdetect_next, + BATADV_BLA_LOOPDETECT_PERIODS); + + /* mark for sending loop detect on all VLANs */ + send_loopdetect = true; + } + hash = bat_priv->bla.backbone_hash; if (!hash) goto out; @@ -1332,6 +1413,9 @@ static void batadv_bla_periodic_work(struct work_struct *work) backbone_gw->lasttime = jiffies; batadv_bla_send_announce(bat_priv, backbone_gw); + if (send_loopdetect) + batadv_bla_send_loopdetect(bat_priv, + backbone_gw); /* request_sent is only set after creation to avoid * problems when we are not yet known as backbone gw @@ -1405,6 +1489,9 @@ int batadv_bla_init(struct batadv_priv *bat_priv) bat_priv->bla.bcast_duplist[i].entrytime = entrytime; bat_priv->bla.bcast_duplist_curr = 0; + atomic_set(&bat_priv->bla.loopdetect_next, + BATADV_BLA_LOOPDETECT_PERIODS); + if (bat_priv->bla.claim_hash) return 0; @@ -1442,15 +1529,16 @@ int batadv_bla_init(struct batadv_priv *bat_priv) * sent by another host, drop it. We allow equal packets from * the same host however as this might be intended. * - * Return: 1 if a packet is in the duplicate list, 0 otherwise. + * Return: true if a packet is in the duplicate list, false otherwise. */ -int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, - struct sk_buff *skb) +bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, + struct sk_buff *skb) { - int i, curr, ret = 0; + int i, curr; __be32 crc; struct batadv_bcast_packet *bcast_packet; struct batadv_bcast_duplist_entry *entry; + bool ret = false; bcast_packet = (struct batadv_bcast_packet *)skb->data; @@ -1478,9 +1566,9 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, continue; /* this entry seems to match: same crc, not too old, - * and from another gw. therefore return 1 to forbid it. + * and from another gw. therefore return true to forbid it. */ - ret = 1; + ret = true; goto out; } /* not found, add a new entry (overwrite the oldest entry) @@ -1546,21 +1634,21 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, * @orig_node: the orig_node of the frame * @hdr_size: maximum length of the frame * - * Return: 1 if the orig_node is also a gateway on the soft interface, otherwise - * it returns 0. + * Return: true if the orig_node is also a gateway on the soft interface, + * otherwise it returns false. */ -int batadv_bla_is_backbone_gw(struct sk_buff *skb, - struct batadv_orig_node *orig_node, int hdr_size) +bool batadv_bla_is_backbone_gw(struct sk_buff *skb, + struct batadv_orig_node *orig_node, int hdr_size) { struct batadv_bla_backbone_gw *backbone_gw; unsigned short vid; if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance)) - return 0; + return false; /* first, find out the vid. */ if (!pskb_may_pull(skb, hdr_size + ETH_HLEN)) - return 0; + return false; vid = batadv_get_vid(skb, hdr_size); @@ -1568,14 +1656,14 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, backbone_gw = batadv_backbone_hash_find(orig_node->bat_priv, orig_node->orig, vid); if (!backbone_gw) - return 0; + return false; batadv_backbone_gw_put(backbone_gw); - return 1; + return true; } /** - * batadv_bla_init - free all bla structures + * batadv_bla_free - free all bla structures * @bat_priv: the bat priv with all the soft interface information * * for softinterface free or module unload @@ -1602,6 +1690,55 @@ void batadv_bla_free(struct batadv_priv *bat_priv) } /** + * batadv_bla_loopdetect_check - check and handle a detected loop + * @bat_priv: the bat priv with all the soft interface information + * @skb: the packet to check + * @primary_if: interface where the request came on + * @vid: the VLAN ID of the frame + * + * Checks if this packet is a loop detect frame which has been sent by us, + * throw an uevent and log the event if that is the case. + * + * Return: true if it is a loop detect frame which is to be dropped, false + * otherwise. + */ +static bool +batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, + struct batadv_hard_iface *primary_if, + unsigned short vid) +{ + struct batadv_bla_backbone_gw *backbone_gw; + struct ethhdr *ethhdr; + + ethhdr = eth_hdr(skb); + + /* Only check for the MAC address and skip more checks here for + * performance reasons - this function is on the hotpath, after all. + */ + if (!batadv_compare_eth(ethhdr->h_source, + bat_priv->bla.loopdetect_addr)) + return false; + + /* If the packet came too late, don't forward it on the mesh + * but don't consider that as loop. It might be a coincidence. + */ + if (batadv_has_timed_out(bat_priv->bla.loopdetect_lasttime, + BATADV_BLA_LOOPDETECT_TIMEOUT)) + return true; + + backbone_gw = batadv_bla_get_backbone_gw(bat_priv, + primary_if->net_dev->dev_addr, + vid, true); + if (unlikely(!backbone_gw)) + return true; + + queue_work(batadv_event_workqueue, &backbone_gw->report_work); + /* backbone_gw is unreferenced in the report work function function */ + + return true; +} + +/** * batadv_bla_rx - check packets coming from the mesh. * @bat_priv: the bat priv with all the soft interface information * @skb: the frame to be checked @@ -1614,16 +1751,16 @@ void batadv_bla_free(struct batadv_priv *bat_priv) * * in these cases, the skb is further handled by this function * - * Return: 1 if handled, otherwise it returns 0 and the caller shall further - * process the skb. + * Return: true if handled, otherwise it returns false and the caller shall + * further process the skb. */ -int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid, bool is_bcast) +bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, bool is_bcast) { struct ethhdr *ethhdr; struct batadv_bla_claim search_claim, *claim = NULL; struct batadv_hard_iface *primary_if; - int ret; + bool ret; ethhdr = eth_hdr(skb); @@ -1634,6 +1771,9 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, if (!atomic_read(&bat_priv->bridge_loop_avoidance)) goto allow; + if (batadv_bla_loopdetect_check(bat_priv, skb, primary_if, vid)) + goto handled; + if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) @@ -1682,12 +1822,12 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, } allow: batadv_bla_update_own_backbone_gw(bat_priv, primary_if, vid); - ret = 0; + ret = false; goto out; handled: kfree_skb(skb); - ret = 1; + ret = true; out: if (primary_if) @@ -1711,16 +1851,16 @@ out: * * This call might reallocate skb data. * - * Return: 1 if handled, otherwise it returns 0 and the caller shall further - * process the skb. + * Return: true if handled, otherwise it returns false and the caller shall + * further process the skb. */ -int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid) +bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid) { struct ethhdr *ethhdr; struct batadv_bla_claim search_claim, *claim = NULL; struct batadv_hard_iface *primary_if; - int ret = 0; + bool ret = false; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) @@ -1774,10 +1914,10 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, } allow: batadv_bla_update_own_backbone_gw(bat_priv, primary_if, vid); - ret = 0; + ret = false; goto out; handled: - ret = 1; + ret = true; out: if (primary_if) batadv_hardif_put(primary_if); @@ -1815,8 +1955,8 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) "Claims announced for the mesh %s (orig %pM, group id %#.4x)\n", net_dev->name, primary_addr, ntohs(bat_priv->bla.claim_dest.group)); - seq_printf(seq, " %-17s %-5s %-17s [o] (%-6s)\n", - "Client", "VID", "Originator", "CRC"); + seq_puts(seq, + " Client VID Originator [o] (CRC )\n"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1873,8 +2013,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) "Backbones announced for the mesh %s (orig %pM, group id %#.4x)\n", net_dev->name, primary_addr, ntohs(bat_priv->bla.claim_dest.group)); - seq_printf(seq, " %-17s %-5s %-9s (%-6s)\n", - "Originator", "VID", "last seen", "CRC"); + seq_puts(seq, " Originator VID last seen (CRC )\n"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 579f0fa6fe6a..0f01daeb359e 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -27,19 +27,20 @@ struct seq_file; struct sk_buff; #ifdef CONFIG_BATMAN_ADV_BLA -int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid, bool is_bcast); -int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid); -int batadv_bla_is_backbone_gw(struct sk_buff *skb, - struct batadv_orig_node *orig_node, int hdr_size); +bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, bool is_bcast); +bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid); +bool batadv_bla_is_backbone_gw(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + int hdr_size); int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset); bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, unsigned short vid); -int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, - struct sk_buff *skb); +bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, + struct sk_buff *skb); void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, struct batadv_hard_iface *oldif); @@ -50,24 +51,24 @@ void batadv_bla_free(struct batadv_priv *bat_priv); #define BATADV_BLA_CRC_INIT 0 #else /* ifdef CONFIG_BATMAN_ADV_BLA */ -static inline int batadv_bla_rx(struct batadv_priv *bat_priv, - struct sk_buff *skb, unsigned short vid, - bool is_bcast) +static inline bool batadv_bla_rx(struct batadv_priv *bat_priv, + struct sk_buff *skb, unsigned short vid, + bool is_bcast) { - return 0; + return false; } -static inline int batadv_bla_tx(struct batadv_priv *bat_priv, - struct sk_buff *skb, unsigned short vid) +static inline bool batadv_bla_tx(struct batadv_priv *bat_priv, + struct sk_buff *skb, unsigned short vid) { - return 0; + return false; } -static inline int batadv_bla_is_backbone_gw(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - int hdr_size) +static inline bool batadv_bla_is_backbone_gw(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + int hdr_size) { - return 0; + return false; } static inline int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, @@ -88,11 +89,11 @@ static inline bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, return false; } -static inline int +static inline bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, struct sk_buff *skb) { - return 0; + return false; } static inline void diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 48253cf8341b..952900466d88 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -134,7 +134,7 @@ static int batadv_log_release(struct inode *inode, struct file *file) return 0; } -static int batadv_log_empty(struct batadv_priv_debug_log *debug_log) +static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log) { return !(debug_log->log_start - debug_log->log_end); } @@ -365,14 +365,17 @@ static int batadv_nc_nodes_open(struct inode *inode, struct file *file) #define BATADV_DEBUGINFO(_name, _mode, _open) \ struct batadv_debuginfo batadv_debuginfo_##_name = { \ - .attr = { .name = __stringify(_name), \ - .mode = _mode, }, \ - .fops = { .owner = THIS_MODULE, \ - .open = _open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ - } \ + .attr = { \ + .name = __stringify(_name), \ + .mode = _mode, \ + }, \ + .fops = { \ + .owner = THIS_MODULE, \ + .open = _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ + }, \ } /* the following attributes are general and therefore they will be directly diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 3e6b2624f980..278800a99c69 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -152,7 +152,7 @@ static void batadv_dat_purge(struct work_struct *work) struct batadv_priv_dat *priv_dat; struct batadv_priv *bat_priv; - delayed_work = container_of(work, struct delayed_work, work); + delayed_work = to_delayed_work(work); priv_dat = container_of(delayed_work, struct batadv_priv_dat, work); bat_priv = container_of(priv_dat, struct batadv_priv, dat); @@ -165,14 +165,14 @@ static void batadv_dat_purge(struct work_struct *work) * @node: node in the local table * @data2: second object to compare the node to * - * Return: 1 if the two entries are the same, 0 otherwise. + * Return: true if the two entries are the same, false otherwise. */ -static int batadv_compare_dat(const struct hlist_node *node, const void *data2) +static bool batadv_compare_dat(const struct hlist_node *node, const void *data2) { const void *data1 = container_of(node, struct batadv_dat_entry, hash_entry); - return memcmp(data1, data2, sizeof(__be32)) == 0 ? 1 : 0; + return memcmp(data1, data2, sizeof(__be32)) == 0; } /** @@ -720,7 +720,7 @@ void batadv_dat_status_update(struct net_device *net_dev) } /** - * batadv_gw_tvlv_ogm_handler_v1 - process incoming dat tvlv container + * batadv_dat_tvlv_ogm_handler_v1 - process incoming dat tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) @@ -817,8 +817,8 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) goto out; seq_printf(seq, "Distributed ARP Table (%s):\n", net_dev->name); - seq_printf(seq, " %-7s %-9s %4s %11s\n", "IPv4", - "MAC", "VID", "last-seen"); + seq_puts(seq, + " IPv4 MAC VID last-seen\n"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index e6956d0746a2..65536db1bff7 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -407,8 +407,8 @@ static struct sk_buff *batadv_frag_create(struct sk_buff *skb, unsigned int mtu) { struct sk_buff *skb_fragment; - unsigned header_size = sizeof(*frag_head); - unsigned fragment_size = mtu - header_size; + unsigned int header_size = sizeof(*frag_head); + unsigned int fragment_size = mtu - header_size; skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN); if (!skb_fragment) @@ -444,15 +444,15 @@ bool batadv_frag_send_packet(struct sk_buff *skb, struct batadv_hard_iface *primary_if = NULL; struct batadv_frag_packet frag_header; struct sk_buff *skb_fragment; - unsigned mtu = neigh_node->if_incoming->net_dev->mtu; - unsigned header_size = sizeof(frag_header); - unsigned max_fragment_size, max_packet_size; + unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; + unsigned int header_size = sizeof(frag_header); + unsigned int max_fragment_size, max_packet_size; bool ret = false; /* To avoid merge and refragmentation at next-hops we never send * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE */ - mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE); + mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE); max_fragment_size = mtu - header_size; max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index c59aff5ccac8..5839c569f769 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -135,8 +135,8 @@ static void batadv_gw_select(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->gw.list_lock); - if (new_gw_node && !kref_get_unless_zero(&new_gw_node->refcount)) - new_gw_node = NULL; + if (new_gw_node) + kref_get(&new_gw_node->refcount); curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1); rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node); @@ -440,15 +440,11 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, if (gateway->bandwidth_down == 0) return; - if (!kref_get_unless_zero(&orig_node->refcount)) - return; - gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); - if (!gw_node) { - batadv_orig_node_put(orig_node); + if (!gw_node) return; - } + kref_get(&orig_node->refcount); INIT_HLIST_NODE(&gw_node->list); gw_node->orig_node = orig_node; gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 0a7deaf2670a..8c2f39962fa5 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -36,7 +36,6 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/workqueue.h> -#include <net/net_namespace.h> #include "bridge_loop_avoidance.h" #include "debugfs.h" @@ -121,6 +120,7 @@ static bool batadv_mutual_parents(const struct net_device *dev1, static bool batadv_is_on_batman_iface(const struct net_device *net_dev) { struct net_device *parent_dev; + struct net *net = dev_net(net_dev); bool ret; /* check if this is a batman-adv mesh interface */ @@ -133,7 +133,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) return false; /* recurse over the parent device */ - parent_dev = __dev_get_by_index(&init_net, dev_get_iflink(net_dev)); + parent_dev = __dev_get_by_index(net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ if (WARN(!parent_dev, "Cannot find parent device")) return false; @@ -146,22 +146,22 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) return ret; } -static int batadv_is_valid_iface(const struct net_device *net_dev) +static bool batadv_is_valid_iface(const struct net_device *net_dev) { if (net_dev->flags & IFF_LOOPBACK) - return 0; + return false; if (net_dev->type != ARPHRD_ETHER) - return 0; + return false; if (net_dev->addr_len != ETH_ALEN) - return 0; + return false; /* no batman over batman */ if (batadv_is_on_batman_iface(net_dev)) - return 0; + return false; - return 1; + return true; } /** @@ -236,8 +236,8 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv, ASSERT_RTNL(); - if (new_hard_iface && !kref_get_unless_zero(&new_hard_iface->refcount)) - new_hard_iface = NULL; + if (new_hard_iface) + kref_get(&new_hard_iface->refcount); curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1); rcu_assign_pointer(bat_priv->primary_if, new_hard_iface); @@ -456,7 +456,7 @@ static int batadv_master_del_slave(struct batadv_hard_iface *slave, } int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, - const char *iface_name) + struct net *net, const char *iface_name) { struct batadv_priv *bat_priv; struct net_device *soft_iface, *master; @@ -467,13 +467,12 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) goto out; - if (!kref_get_unless_zero(&hard_iface->refcount)) - goto out; + kref_get(&hard_iface->refcount); - soft_iface = dev_get_by_name(&init_net, iface_name); + soft_iface = dev_get_by_name(net, iface_name); if (!soft_iface) { - soft_iface = batadv_softif_create(iface_name); + soft_iface = batadv_softif_create(net, iface_name); if (!soft_iface) { ret = -ENOMEM; @@ -522,6 +521,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, goto err_upper; } + kref_get(&hard_iface->refcount); hard_iface->batman_adv_ptype.type = ethertype; hard_iface->batman_adv_ptype.func = batadv_batman_skb_recv; hard_iface->batman_adv_ptype.dev = hard_iface->net_dev; @@ -583,6 +583,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_info(hard_iface->soft_iface, "Removing interface: %s\n", hard_iface->net_dev->name); dev_remove_pack(&hard_iface->batman_adv_ptype); + batadv_hardif_put(hard_iface); bat_priv->num_ifaces--; batadv_orig_hash_del_if(hard_iface, bat_priv->num_ifaces); @@ -652,8 +653,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) ASSERT_RTNL(); - ret = batadv_is_valid_iface(net_dev); - if (ret != 1) + if (!batadv_is_valid_iface(net_dev)) goto out; dev_hold(net_dev); diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index d74f1983f33e..a76724d369bf 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -28,6 +28,7 @@ #include <linux/types.h> struct net_device; +struct net; enum batadv_hard_if_state { BATADV_IF_NOT_IN_USE, @@ -55,7 +56,7 @@ bool batadv_is_wifi_iface(int ifindex); struct batadv_hard_iface* batadv_hardif_get_by_netdev(const struct net_device *net_dev); int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, - const char *iface_name); + struct net *net, const char *iface_name); void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, enum batadv_hard_if_cleanup autodel); void batadv_hardif_remove_interfaces(void); diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 9bb57b87447c..cbbf87075f06 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -32,10 +32,10 @@ struct lock_class_key; /* callback to a compare function. should compare 2 element datas for their * keys * - * Return: 0 if same and not 0 if not same + * Return: true if same and false if not same */ -typedef int (*batadv_hashdata_compare_cb)(const struct hlist_node *, - const void *); +typedef bool (*batadv_hashdata_compare_cb)(const struct hlist_node *, + const void *); /* the hashfunction * diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 14d0013b387e..777aea10cd8f 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -104,25 +104,21 @@ static int batadv_socket_open(struct inode *inode, struct file *file) static int batadv_socket_release(struct inode *inode, struct file *file) { - struct batadv_socket_client *socket_client = file->private_data; - struct batadv_socket_packet *socket_packet; - struct list_head *list_pos, *list_pos_tmp; + struct batadv_socket_client *client = file->private_data; + struct batadv_socket_packet *packet, *tmp; - spin_lock_bh(&socket_client->lock); + spin_lock_bh(&client->lock); /* for all packets in the queue ... */ - list_for_each_safe(list_pos, list_pos_tmp, &socket_client->queue_list) { - socket_packet = list_entry(list_pos, - struct batadv_socket_packet, list); - - list_del(list_pos); - kfree(socket_packet); + list_for_each_entry_safe(packet, tmp, &client->queue_list, list) { + list_del(&packet->list); + kfree(packet); } - batadv_socket_client_hash[socket_client->index] = NULL; - spin_unlock_bh(&socket_client->lock); + batadv_socket_client_hash[client->index] = NULL; + spin_unlock_bh(&client->lock); - kfree(socket_client); + kfree(client); module_put(THIS_MODULE); return 0; @@ -337,7 +333,7 @@ err: } /** - * batadv_socket_receive_packet - schedule an icmp packet to be sent to + * batadv_socket_add_packet - schedule an icmp packet to be sent to * userspace on an icmp socket. * @socket_client: the socket this packet belongs to * @icmph: pointer to the header of the icmp packet diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index d64ddb961979..5f2974bd1227 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -401,11 +401,19 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, hard_iface = container_of(ptype, struct batadv_hard_iface, batman_adv_ptype); + + /* Prevent processing a packet received on an interface which is getting + * shut down otherwise the packet may trigger de-reference errors + * further down in the receive path. + */ + if (!kref_get_unless_zero(&hard_iface->refcount)) + goto err_out; + skb = skb_share_check(skb, GFP_ATOMIC); /* skb was released by skb_share_check() */ if (!skb) - goto err_out; + goto err_put; /* packet should hold at least type and version */ if (unlikely(!pskb_may_pull(skb, 2))) @@ -448,6 +456,8 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, if (ret == NET_RX_DROP) kfree_skb(skb); + batadv_hardif_put(hard_iface); + /* return NET_RX_SUCCESS in any case as we * most probably dropped the packet for * routing-logical reasons. @@ -456,6 +466,8 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, err_free: kfree_skb(skb); +err_put: + batadv_hardif_put(hard_iface); err_out: return NET_RX_DROP; } @@ -663,8 +675,8 @@ static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler) * * Return: tvlv handler if found or NULL otherwise. */ -static struct batadv_tvlv_handler -*batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version) +static struct batadv_tvlv_handler * +batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version) { struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL; @@ -722,8 +734,8 @@ static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv) * * Return: tvlv container if found or NULL otherwise. */ -static struct batadv_tvlv_container -*batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) +static struct batadv_tvlv_container * +batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) { struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL; @@ -736,9 +748,7 @@ static struct batadv_tvlv_container if (tvlv_tmp->tvlv_hdr.version != version) continue; - if (!kref_get_unless_zero(&tvlv_tmp->refcount)) - continue; - + kref_get(&tvlv_tmp->refcount); tvlv = tvlv_tmp; break; } diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index db4533631834..76925266deed 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2016.1" +#define BATADV_SOURCE_VERSION "2016.2" #endif /* B.A.T.M.A.N. parameters */ @@ -120,6 +120,8 @@ #define BATADV_BLA_BACKBONE_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 6) #define BATADV_BLA_CLAIM_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 10) #define BATADV_BLA_WAIT_PERIODS 3 +#define BATADV_BLA_LOOPDETECT_PERIODS 6 +#define BATADV_BLA_LOOPDETECT_TIMEOUT 3000 /* 3 seconds */ #define BATADV_DUPLIST_SIZE 16 #define BATADV_DUPLIST_TIMEOUT 500 /* 500 ms */ @@ -142,10 +144,12 @@ enum batadv_uev_action { BATADV_UEV_ADD = 0, BATADV_UEV_DEL, BATADV_UEV_CHANGE, + BATADV_UEV_LOOPDETECT, }; enum batadv_uev_type { BATADV_UEV_GW = 0, + BATADV_UEV_BLA, }; #define BATADV_GW_THRESHOLD 50 @@ -288,7 +292,7 @@ static inline void _batadv_dbg(int type __always_unused, * * note: can't use ether_addr_equal() as it requires aligned memory * - * Return: 1 if they are the same ethernet addr + * Return: true if they are the same ethernet addr */ static inline bool batadv_compare_eth(const void *data1, const void *data2) { @@ -296,7 +300,8 @@ static inline bool batadv_compare_eth(const void *data1, const void *data2) } /** - * has_timed_out - compares current time (jiffies) and timestamp + timeout + * batadv_has_timed_out - compares current time (jiffies) and timestamp + + * timeout * @timestamp: base value to compare with (in jiffies) * @timeout: added to base value before comparing (in milliseconds) * diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 8caa2c72efa3..c32f24fafe67 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -394,7 +394,8 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, } /** - * batadv_mcast_want_all_ip_count - count nodes with unspecific mcast interest + * batadv_mcast_forw_want_all_ip_count - count nodes with unspecific mcast + * interest * @bat_priv: the bat priv with all the soft interface information * @ethhdr: ethernet header of a packet * @@ -433,7 +434,7 @@ batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv, } /** - * batadv_mcast_want_forw_ipv4_node_get - get a node with an ipv4 flag + * batadv_mcast_forw_ipv4_node_get - get a node with an ipv4 flag * @bat_priv: the bat priv with all the soft interface information * * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and @@ -460,7 +461,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv) } /** - * batadv_mcast_want_forw_ipv6_node_get - get a node with an ipv6 flag + * batadv_mcast_forw_ipv6_node_get - get a node with an ipv6 flag * @bat_priv: the bat priv with all the soft interface information * * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set @@ -487,7 +488,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv) } /** - * batadv_mcast_want_forw_ip_node_get - get a node with an ipv4/ipv6 flag + * batadv_mcast_forw_ip_node_get - get a node with an ipv4/ipv6 flag * @bat_priv: the bat priv with all the soft interface information * @ethhdr: an ethernet header to determine the protocol family from * @@ -511,7 +512,7 @@ batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv, } /** - * batadv_mcast_want_forw_unsnoop_node_get - get a node with an unsnoopable flag + * batadv_mcast_forw_unsnoop_node_get - get a node with an unsnoopable flag * @bat_priv: the bat priv with all the soft interface information * * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index b41719b6487a..678f06865312 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -510,10 +510,10 @@ static u32 batadv_nc_hash_choose(const void *data, u32 size) * @node: node in the local table * @data2: second object to compare the node to * - * Return: 1 if the two entry are the same, 0 otherwise + * Return: true if the two entry are the same, false otherwise */ -static int batadv_nc_hash_compare(const struct hlist_node *node, - const void *data2) +static bool batadv_nc_hash_compare(const struct hlist_node *node, + const void *data2) { const struct batadv_nc_path *nc_path1, *nc_path2; @@ -521,15 +521,13 @@ static int batadv_nc_hash_compare(const struct hlist_node *node, nc_path2 = data2; /* Return 1 if the two keys are identical */ - if (memcmp(nc_path1->prev_hop, nc_path2->prev_hop, - sizeof(nc_path1->prev_hop)) != 0) - return 0; + if (!batadv_compare_eth(nc_path1->prev_hop, nc_path2->prev_hop)) + return false; - if (memcmp(nc_path1->next_hop, nc_path2->next_hop, - sizeof(nc_path1->next_hop)) != 0) - return 0; + if (!batadv_compare_eth(nc_path1->next_hop, nc_path2->next_hop)) + return false; - return 1; + return true; } /** @@ -714,7 +712,7 @@ static void batadv_nc_worker(struct work_struct *work) struct batadv_priv *bat_priv; unsigned long timeout; - delayed_work = container_of(work, struct delayed_work, work); + delayed_work = to_delayed_work(work); priv_nc = container_of(delayed_work, struct batadv_priv_nc, work); bat_priv = container_of(priv_nc, struct batadv_priv, nc); @@ -793,10 +791,10 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, * * Return: the nc_node if found, NULL otherwise. */ -static struct batadv_nc_node -*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh_node, - bool in_coding) +static struct batadv_nc_node * +batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + bool in_coding) { struct batadv_nc_node *nc_node, *nc_node_out = NULL; struct list_head *list; @@ -835,11 +833,11 @@ static struct batadv_nc_node * * Return: the nc_node if found or created, NULL in case of an error. */ -static struct batadv_nc_node -*batadv_nc_get_nc_node(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh_node, - bool in_coding) +static struct batadv_nc_node * +batadv_nc_get_nc_node(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + bool in_coding) { struct batadv_nc_node *nc_node; spinlock_t *lock; /* Used to lock list selected by "int in_coding" */ @@ -856,8 +854,7 @@ static struct batadv_nc_node if (!nc_node) return NULL; - if (!kref_get_unless_zero(&orig_neigh_node->refcount)) - goto free; + kref_get(&orig_neigh_node->refcount); /* Initialize nc_node */ INIT_LIST_HEAD(&nc_node->list); @@ -884,10 +881,6 @@ static struct batadv_nc_node spin_unlock_bh(lock); return nc_node; - -free: - kfree(nc_node); - return NULL; } /** diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index c355a824713c..1ff4ee473966 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -54,9 +54,9 @@ static void batadv_purge_orig(struct work_struct *work); * @node: node in the local table * @data2: second object to compare the node to * - * Return: 1 if they are the same originator + * Return: true if they are the same originator */ -int batadv_compare_orig(const struct hlist_node *node, const void *data2) +bool batadv_compare_orig(const struct hlist_node *node, const void *data2) { const void *data1 = container_of(node, struct batadv_orig_node, hash_entry); @@ -282,7 +282,7 @@ void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node) } /** - * batadv_orig_node_get_router - router to the originator depending on iface + * batadv_orig_router_get - router to the originator depending on iface * @orig_node: the orig node for the router * @if_outgoing: the interface where the payload packet has been received or * the OGM should be sent to @@ -374,12 +374,8 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, if (!orig_ifinfo) goto out; - if (if_outgoing != BATADV_IF_DEFAULT && - !kref_get_unless_zero(&if_outgoing->refcount)) { - kfree(orig_ifinfo); - orig_ifinfo = NULL; - goto out; - } + if (if_outgoing != BATADV_IF_DEFAULT) + kref_get(&if_outgoing->refcount); reset_time = jiffies - 1; reset_time -= msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); @@ -455,11 +451,8 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, if (!neigh_ifinfo) goto out; - if (if_outgoing && !kref_get_unless_zero(&if_outgoing->refcount)) { - kfree(neigh_ifinfo); - neigh_ifinfo = NULL; - goto out; - } + if (if_outgoing) + kref_get(&if_outgoing->refcount); INIT_HLIST_NODE(&neigh_ifinfo->list); kref_init(&neigh_ifinfo->refcount); @@ -532,15 +525,11 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, if (hardif_neigh) goto out; - if (!kref_get_unless_zero(&hard_iface->refcount)) - goto out; - hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC); - if (!hardif_neigh) { - batadv_hardif_put(hard_iface); + if (!hardif_neigh) goto out; - } + kref_get(&hard_iface->refcount); INIT_HLIST_NODE(&hardif_neigh->list); ether_addr_copy(hardif_neigh->addr, neigh_addr); hardif_neigh->if_incoming = hard_iface; @@ -643,16 +632,11 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, if (!neigh_node) goto out; - if (!kref_get_unless_zero(&hard_iface->refcount)) { - kfree(neigh_node); - neigh_node = NULL; - goto out; - } - INIT_HLIST_NODE(&neigh_node->list); INIT_HLIST_HEAD(&neigh_node->ifinfo_list); spin_lock_init(&neigh_node->ifinfo_lock); + kref_get(&hard_iface->refcount); ether_addr_copy(neigh_node->addr, neigh_addr); neigh_node->if_incoming = hard_iface; neigh_node->orig_node = orig_node; @@ -1160,6 +1144,9 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, if (hard_iface->soft_iface != bat_priv->soft_iface) continue; + if (!kref_get_unless_zero(&hard_iface->refcount)) + continue; + best_neigh_node = batadv_find_best_neighbor(bat_priv, orig_node, hard_iface); @@ -1167,6 +1154,8 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, best_neigh_node); if (best_neigh_node) batadv_neigh_node_put(best_neigh_node); + + batadv_hardif_put(hard_iface); } rcu_read_unlock(); @@ -1217,7 +1206,7 @@ static void batadv_purge_orig(struct work_struct *work) struct delayed_work *delayed_work; struct batadv_priv *bat_priv; - delayed_work = container_of(work, struct delayed_work, work); + delayed_work = to_delayed_work(work); bat_priv = container_of(delayed_work, struct batadv_priv, orig_work); _batadv_purge_orig(bat_priv); queue_delayed_work(batadv_event_workqueue, diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 4e8b67f11051..64a8951e5844 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -33,7 +33,7 @@ struct seq_file; -int batadv_compare_orig(const struct hlist_node *node, const void *data2); +bool batadv_compare_orig(const struct hlist_node *node, const void *data2); int batadv_originator_init(struct batadv_priv *bat_priv); void batadv_originator_free(struct batadv_priv *bat_priv); void batadv_purge_orig_ref(struct batadv_priv *bat_priv); diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 8a8d7ca1a5cf..372128ddb474 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -175,6 +175,7 @@ enum batadv_bla_claimframe { BATADV_CLAIM_TYPE_UNCLAIM = 0x01, BATADV_CLAIM_TYPE_ANNOUNCE = 0x02, BATADV_CLAIM_TYPE_REQUEST = 0x03, + BATADV_CLAIM_TYPE_LOOPDETECT = 0x04, }; /** @@ -501,7 +502,7 @@ struct batadv_coded_packet { #pragma pack() /** - * struct batadv_unicast_tvlv - generic unicast packet with tvlv payload + * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the genereal header * @ttl: time to live for this packet, part of the genereal header diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index b781bf753250..ae850f2d11cb 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -100,10 +100,6 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, if (curr_router) batadv_neigh_node_put(curr_router); - /* increase refcount of new best neighbor */ - if (neigh_node && !kref_get_unless_zero(&neigh_node->refcount)) - neigh_node = NULL; - spin_lock_bh(&orig_node->neigh_list_lock); /* curr_router used earlier may not be the current orig_ifinfo->router * anymore because it was dereferenced outside of the neigh_list_lock @@ -114,6 +110,10 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, */ curr_router = rcu_dereference_protected(orig_ifinfo->router, true); + /* increase refcount of new best neighbor */ + if (neigh_node) + kref_get(&neigh_node->refcount); + rcu_assign_pointer(orig_ifinfo->router, neigh_node); spin_unlock_bh(&orig_node->neigh_list_lock); batadv_orig_ifinfo_put(orig_ifinfo); @@ -163,18 +163,18 @@ out: * doesn't change otherwise. * * Return: - * 0 if the packet is to be accepted. - * 1 if the packet is to be ignored. + * false if the packet is to be accepted. + * true if the packet is to be ignored. */ -int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, - s32 seq_old_max_diff, unsigned long *last_reset, - bool *protection_started) +bool batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, + s32 seq_old_max_diff, unsigned long *last_reset, + bool *protection_started) { if (seq_num_diff <= -seq_old_max_diff || seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) { if (!batadv_has_timed_out(*last_reset, BATADV_RESET_PROTECTION_MS)) - return 1; + return true; *last_reset = jiffies; if (protection_started) @@ -183,7 +183,7 @@ int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, "old packet received, start protection\n"); } - return 0; + return false; } bool batadv_check_management_packet(struct sk_buff *skb, @@ -718,8 +718,9 @@ out: return ret; } -static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, - struct sk_buff *skb, int hdr_len) { +static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_len) +{ struct batadv_unicast_packet *unicast_packet; struct batadv_hard_iface *primary_if; struct batadv_orig_node *orig_node; @@ -730,11 +731,11 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, /* check if there is enough data before accessing it */ if (!pskb_may_pull(skb, hdr_len + ETH_HLEN)) - return 0; + return false; /* create a copy of the skb (in case of for re-routing) to modify it. */ if (skb_cow(skb, sizeof(*unicast_packet)) < 0) - return 0; + return false; unicast_packet = (struct batadv_unicast_packet *)skb->data; vid = batadv_get_vid(skb, hdr_len); @@ -758,7 +759,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * table. If not, let the packet go untouched anyway because * there is nothing the node can do */ - return 1; + return true; } /* retrieve the TTVN known by this node for the packet destination. This @@ -774,7 +775,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * not be possible to deliver it */ if (!orig_node) - return 0; + return false; curr_ttvn = (u8)atomic_read(&orig_node->last_ttvn); batadv_orig_node_put(orig_node); @@ -785,7 +786,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, */ is_old_ttvn = batadv_seq_before(unicast_packet->ttvn, curr_ttvn); if (!is_old_ttvn) - return 1; + return true; old_ttvn = unicast_packet->ttvn; /* the packet was forged based on outdated network information. Its @@ -798,7 +799,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n", unicast_packet->dest, ethhdr->h_dest, old_ttvn, curr_ttvn); - return 1; + return true; } /* the packet has not been re-routed: either the destination is @@ -806,14 +807,14 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * it is possible to drop the packet */ if (!batadv_is_my_client(bat_priv, ethhdr->h_dest, vid)) - return 0; + return false; /* update the header in order to let the packet be delivered to this * node's soft interface */ primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) - return 0; + return false; ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr); @@ -821,7 +822,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, unicast_packet->ttvn = curr_ttvn; - return 1; + return true; } /** @@ -912,7 +913,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, hdr_size)) goto rx_success; - batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, + batadv_interface_rx(recv_if->soft_iface, skb, hdr_size, orig_node); rx_success: @@ -1122,8 +1123,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, goto rx_success; /* broadcast for me */ - batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, - orig_node); + batadv_interface_rx(recv_if->soft_iface, skb, hdr_size, orig_node); rx_success: ret = NET_RX_SUCCESS; diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 02a5caa84127..05c3ff42e181 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -51,8 +51,8 @@ struct batadv_neigh_node * batadv_find_router(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if); -int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, - s32 seq_old_max_diff, unsigned long *last_reset, - bool *protection_started); +bool batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, + s32 seq_old_max_diff, unsigned long *last_reset, + bool *protection_started); #endif /* _NET_BATMAN_ADV_ROUTING_H_ */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 76417850d3fc..f2f125684ed9 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -26,6 +26,7 @@ #include <linux/if.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/printk.h> @@ -552,7 +553,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) struct net_device *soft_iface; struct batadv_priv *bat_priv; - delayed_work = container_of(work, struct delayed_work, work); + delayed_work = to_delayed_work(work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, delayed_work); soft_iface = forw_packet->if_incoming->soft_iface; @@ -577,10 +578,15 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (forw_packet->num_packets >= hard_iface->num_bcasts) continue; + if (!kref_get_unless_zero(&hard_iface->refcount)) + continue; + /* send a copy of the saved skb */ skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); if (skb1) batadv_send_broadcast_skb(skb1, hard_iface); + + batadv_hardif_put(hard_iface); } rcu_read_unlock(); @@ -604,7 +610,7 @@ void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work) struct batadv_forw_packet *forw_packet; struct batadv_priv *bat_priv; - delayed_work = container_of(work, struct delayed_work, work); + delayed_work = to_delayed_work(work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, delayed_work); bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 8a136b6a1ff0..343d2c904399 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -186,7 +186,6 @@ static int batadv_interface_tx(struct sk_buff *skb, struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_hard_iface *primary_if = NULL; struct batadv_bcast_packet *bcast_packet; - __be16 ethertype = htons(ETH_P_BATMAN); static const u8 stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x00}; static const u8 ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, @@ -208,7 +207,7 @@ static int batadv_interface_tx(struct sk_buff *skb, if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; - soft_iface->trans_start = jiffies; + netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); @@ -216,7 +215,8 @@ static int batadv_interface_tx(struct sk_buff *skb, case ETH_P_8021Q: vhdr = vlan_eth_hdr(skb); - if (vhdr->h_vlan_encapsulated_proto != ethertype) { + /* drop batman-in-batman packets to prevent loops */ + if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN)) { network_offset += VLAN_HLEN; break; } @@ -381,13 +381,29 @@ end: return NETDEV_TX_OK; } +/** + * batadv_interface_rx - receive ethernet frame on local batman-adv interface + * @soft_iface: local interface which will receive the ethernet frame + * @skb: ethernet frame for @soft_iface + * @hdr_size: size of already parsed batman-adv header + * @orig_node: originator from which the batman-adv packet was sent + * + * Sends a ethernet frame to the receive path of the local @soft_iface. + * skb->data has still point to the batman-adv header with the size @hdr_size. + * The caller has to have parsed this header already and made sure that at least + * @hdr_size bytes are still available for pull in @skb. + * + * The packet may still get dropped. This can happen when the encapsulated + * ethernet frame is invalid or contains again an batman-adv packet. Also + * unicast packets will be dropped directly when it was sent between two + * isolated clients. + */ void batadv_interface_rx(struct net_device *soft_iface, - struct sk_buff *skb, struct batadv_hard_iface *recv_if, - int hdr_size, struct batadv_orig_node *orig_node) + struct sk_buff *skb, int hdr_size, + struct batadv_orig_node *orig_node) { struct batadv_bcast_packet *batadv_bcast_packet; struct batadv_priv *bat_priv = netdev_priv(soft_iface); - __be16 ethertype = htons(ETH_P_BATMAN); struct vlan_ethhdr *vhdr; struct ethhdr *ethhdr; unsigned short vid; @@ -396,10 +412,6 @@ void batadv_interface_rx(struct net_device *soft_iface, batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data; is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST); - /* check if enough space is available for pulling, and pull */ - if (!pskb_may_pull(skb, hdr_size)) - goto dropped; - skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); @@ -421,7 +433,8 @@ void batadv_interface_rx(struct net_device *soft_iface, vhdr = (struct vlan_ethhdr *)skb->data; - if (vhdr->h_vlan_encapsulated_proto != ethertype) + /* drop batman-in-batman packets to prevent loops */ + if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN)) break; /* fall through */ @@ -543,7 +556,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, } /** - * batadv_create_vlan - allocate the needed resources for a new vlan + * batadv_softif_create_vlan - allocate the needed resources for a new vlan * @bat_priv: the bat priv with all the soft interface information * @vid: the VLAN identifier * @@ -872,13 +885,14 @@ static int batadv_softif_slave_add(struct net_device *dev, struct net_device *slave_dev) { struct batadv_hard_iface *hard_iface; + struct net *net = dev_net(dev); int ret = -EINVAL; hard_iface = batadv_hardif_get_by_netdev(slave_dev); if (!hard_iface || hard_iface->soft_iface) goto out; - ret = batadv_hardif_enable_interface(hard_iface, dev->name); + ret = batadv_hardif_enable_interface(hard_iface, net, dev->name); out: if (hard_iface) @@ -959,7 +973,7 @@ static void batadv_softif_init_early(struct net_device *dev) dev->netdev_ops = &batadv_netdev_ops; dev->destructor = batadv_softif_free; - dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL; dev->priv_flags |= IFF_NO_QUEUE; /* can't call min_mtu, because the needed variables @@ -975,7 +989,7 @@ static void batadv_softif_init_early(struct net_device *dev) memset(priv, 0, sizeof(*priv)); } -struct net_device *batadv_softif_create(const char *name) +struct net_device *batadv_softif_create(struct net *net, const char *name) { struct net_device *soft_iface; int ret; @@ -985,6 +999,8 @@ struct net_device *batadv_softif_create(const char *name) if (!soft_iface) return NULL; + dev_net_set(soft_iface, net); + soft_iface->rtnl_link_ops = &batadv_link_ops; ret = register_netdevice(soft_iface); @@ -1029,12 +1045,12 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface, unregister_netdevice_queue(soft_iface, head); } -int batadv_softif_is_valid(const struct net_device *net_dev) +bool batadv_softif_is_valid(const struct net_device *net_dev) { if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) - return 1; + return true; - return 0; + return false; } struct rtnl_link_ops batadv_link_ops __read_mostly = { diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 9ae265703d23..ec303ddbf647 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -20,18 +20,20 @@ #include "main.h" +#include <linux/types.h> #include <net/rtnetlink.h> struct net_device; +struct net; struct sk_buff; int batadv_skb_head_push(struct sk_buff *skb, unsigned int len); void batadv_interface_rx(struct net_device *soft_iface, - struct sk_buff *skb, struct batadv_hard_iface *recv_if, - int hdr_size, struct batadv_orig_node *orig_node); -struct net_device *batadv_softif_create(const char *name); + struct sk_buff *skb, int hdr_size, + struct batadv_orig_node *orig_node); +struct net_device *batadv_softif_create(struct net *net, const char *name); void batadv_softif_destroy_sysfs(struct net_device *soft_iface); -int batadv_softif_is_valid(const struct net_device *net_dev); +bool batadv_softif_is_valid(const struct net_device *net_dev); extern struct rtnl_link_ops batadv_link_ops; int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); void batadv_softif_vlan_put(struct batadv_softif_vlan *softif_vlan); diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index e7cf51333a36..414b2074165f 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -116,11 +116,13 @@ batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj) static char *batadv_uev_action_str[] = { "add", "del", - "change" + "change", + "loopdetect", }; static char *batadv_uev_type_str[] = { - "gw" + "gw", + "bla", }; /* Use this, if you have customized show and store functions for vlan attrs */ @@ -830,6 +832,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, size_t count) { struct net_device *net_dev = batadv_kobj_to_netdev(kobj); + struct net *net = dev_net(net_dev); struct batadv_hard_iface *hard_iface; int status_tmp = -1; int ret = count; @@ -873,7 +876,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_AUTO); - ret = batadv_hardif_enable_interface(hard_iface, buff); + ret = batadv_hardif_enable_interface(hard_iface, net, buff); unlock: rtnl_unlock(); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 9b4551a86535..feaf492b01ca 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -43,7 +43,6 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/workqueue.h> -#include <net/net_namespace.h> #include "bridge_loop_avoidance.h" #include "hard-interface.h" @@ -76,9 +75,9 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, * * Compare the MAC address and the VLAN ID of the two TT entries and check if * they are the same TT client. - * Return: 1 if the two TT clients are the same, 0 otherwise + * Return: true if the two TT clients are the same, false otherwise */ -static int batadv_compare_tt(const struct hlist_node *node, const void *data2) +static bool batadv_compare_tt(const struct hlist_node *node, const void *data2) { const void *data1 = container_of(node, struct batadv_tt_common_entry, hash_entry); @@ -585,6 +584,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_tt_local_entry *tt_local; struct batadv_tt_global_entry *tt_global = NULL; + struct net *net = dev_net(soft_iface); struct batadv_softif_vlan *vlan; struct net_device *in_dev = NULL; struct hlist_head *head; @@ -596,7 +596,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, u32 match_mark; if (ifindex != BATADV_NULL_IFINDEX) - in_dev = dev_get_by_index(&init_net, ifindex); + in_dev = dev_get_by_index(net, ifindex); tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid); @@ -1010,8 +1010,8 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", net_dev->name, (u8)atomic_read(&bat_priv->tt.vn)); - seq_printf(seq, " %-13s %s %-8s %-9s (%-10s)\n", "Client", "VID", - "Flags", "Last seen", "CRC"); + seq_puts(seq, + " Client VID Flags Last seen (CRC )\n"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1680,9 +1680,8 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Globally announced TT entries received via the mesh %s\n", net_dev->name); - seq_printf(seq, " %-13s %s %s %-15s %s (%-10s) %s\n", - "Client", "VID", "(TTVN)", "Originator", "(Curr TTVN)", - "CRC", "Flags"); + seq_puts(seq, + " Client VID (TTVN) Originator (Curr TTVN) (CRC ) Flags\n"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -2362,19 +2361,19 @@ unlock: * @entry_ptr: to be checked local tt entry * @data_ptr: not used but definition required to satisfy the callback prototype * - * Return: 1 if the entry is a valid, 0 otherwise. + * Return: true if the entry is a valid, false otherwise. */ -static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) +static bool batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW) - return 0; - return 1; + return false; + return true; } -static int batadv_tt_global_valid(const void *entry_ptr, - const void *data_ptr) +static bool batadv_tt_global_valid(const void *entry_ptr, + const void *data_ptr) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; const struct batadv_tt_global_entry *tt_global_entry; @@ -2382,7 +2381,7 @@ static int batadv_tt_global_valid(const void *entry_ptr, if (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM || tt_common_entry->flags & BATADV_TT_CLIENT_TEMP) - return 0; + return false; tt_global_entry = container_of(tt_common_entry, struct batadv_tt_global_entry, @@ -2404,7 +2403,8 @@ static int batadv_tt_global_valid(const void *entry_ptr, static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, void *tvlv_buff, u16 tt_len, - int (*valid_cb)(const void *, const void *), + bool (*valid_cb)(const void *, + const void *), void *cb_data) { struct batadv_tt_common_entry *tt_common_entry; @@ -2553,11 +2553,11 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv, * * Return: true if the TT Request was sent, false otherwise */ -static int batadv_send_tt_request(struct batadv_priv *bat_priv, - struct batadv_orig_node *dst_orig_node, - u8 ttvn, - struct batadv_tvlv_tt_vlan_data *tt_vlan, - u16 num_vlan, bool full_table) +static bool batadv_send_tt_request(struct batadv_priv *bat_priv, + struct batadv_orig_node *dst_orig_node, + u8 ttvn, + struct batadv_tvlv_tt_vlan_data *tt_vlan, + u16 num_vlan, bool full_table) { struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; struct batadv_tt_req_node *tt_req_node = NULL; @@ -3201,7 +3201,7 @@ static void batadv_tt_purge(struct work_struct *work) struct batadv_priv_tt *priv_tt; struct batadv_priv *bat_priv; - delayed_work = container_of(work, struct delayed_work, work); + delayed_work = to_delayed_work(work); priv_tt = container_of(delayed_work, struct batadv_priv_tt, work); bat_priv = container_of(priv_tt, struct batadv_priv, tt); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 1e47fbe8bb7b..6a577f4f8ba7 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -657,6 +657,9 @@ struct batadv_priv_tt { * @num_requests: number of bla requests in flight * @claim_hash: hash table containing mesh nodes this host has claimed * @backbone_hash: hash table containing all detected backbone gateways + * @loopdetect_addr: MAC address used for own loopdetection frames + * @loopdetect_lasttime: time when the loopdetection frames were sent + * @loopdetect_next: how many periods to wait for the next loopdetect process * @bcast_duplist: recently received broadcast packets array (for broadcast * duplicate suppression) * @bcast_duplist_curr: index of last broadcast packet added to bcast_duplist @@ -668,6 +671,9 @@ struct batadv_priv_bla { atomic_t num_requests; struct batadv_hashtable *claim_hash; struct batadv_hashtable *backbone_hash; + u8 loopdetect_addr[ETH_ALEN]; + unsigned long loopdetect_lasttime; + atomic_t loopdetect_next; struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE]; int bcast_duplist_curr; /* protects bcast_duplist & bcast_duplist_curr */ @@ -1012,6 +1018,7 @@ struct batadv_socket_packet { * resolved * @crc: crc16 checksum over all claims * @crc_lock: lock protecting crc + * @report_work: work struct for reporting detected loops * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner */ @@ -1025,6 +1032,7 @@ struct batadv_bla_backbone_gw { atomic_t request_sent; u16 crc; spinlock_t crc_lock; /* protects crc */ + struct work_struct report_work; struct kref refcount; struct rcu_head rcu; }; diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 8a4cc2f7f0db..780089d75915 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -68,7 +68,7 @@ struct lowpan_peer { struct in6_addr peer_addr; }; -struct lowpan_dev { +struct lowpan_btle_dev { struct list_head list; struct hci_dev *hdev; @@ -80,18 +80,21 @@ struct lowpan_dev { struct delayed_work notify_peers; }; -static inline struct lowpan_dev *lowpan_dev(const struct net_device *netdev) +static inline struct lowpan_btle_dev * +lowpan_btle_dev(const struct net_device *netdev) { - return (struct lowpan_dev *)lowpan_priv(netdev)->priv; + return (struct lowpan_btle_dev *)lowpan_dev(netdev)->priv; } -static inline void peer_add(struct lowpan_dev *dev, struct lowpan_peer *peer) +static inline void peer_add(struct lowpan_btle_dev *dev, + struct lowpan_peer *peer) { list_add_rcu(&peer->list, &dev->peers); atomic_inc(&dev->peer_count); } -static inline bool peer_del(struct lowpan_dev *dev, struct lowpan_peer *peer) +static inline bool peer_del(struct lowpan_btle_dev *dev, + struct lowpan_peer *peer) { list_del_rcu(&peer->list); kfree_rcu(peer, rcu); @@ -106,7 +109,7 @@ static inline bool peer_del(struct lowpan_dev *dev, struct lowpan_peer *peer) return false; } -static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_dev *dev, +static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_btle_dev *dev, bdaddr_t *ba, __u8 type) { struct lowpan_peer *peer; @@ -134,8 +137,8 @@ static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_dev *dev, return NULL; } -static inline struct lowpan_peer *__peer_lookup_chan(struct lowpan_dev *dev, - struct l2cap_chan *chan) +static inline struct lowpan_peer * +__peer_lookup_chan(struct lowpan_btle_dev *dev, struct l2cap_chan *chan) { struct lowpan_peer *peer; @@ -147,8 +150,8 @@ static inline struct lowpan_peer *__peer_lookup_chan(struct lowpan_dev *dev, return NULL; } -static inline struct lowpan_peer *__peer_lookup_conn(struct lowpan_dev *dev, - struct l2cap_conn *conn) +static inline struct lowpan_peer * +__peer_lookup_conn(struct lowpan_btle_dev *dev, struct l2cap_conn *conn) { struct lowpan_peer *peer; @@ -160,7 +163,7 @@ static inline struct lowpan_peer *__peer_lookup_conn(struct lowpan_dev *dev, return NULL; } -static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev, +static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev, struct in6_addr *daddr, struct sk_buff *skb) { @@ -220,7 +223,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev, static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn) { - struct lowpan_dev *entry; + struct lowpan_btle_dev *entry; struct lowpan_peer *peer = NULL; rcu_read_lock(); @@ -236,10 +239,10 @@ static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn) return peer; } -static struct lowpan_dev *lookup_dev(struct l2cap_conn *conn) +static struct lowpan_btle_dev *lookup_dev(struct l2cap_conn *conn) { - struct lowpan_dev *entry; - struct lowpan_dev *dev = NULL; + struct lowpan_btle_dev *entry; + struct lowpan_btle_dev *dev = NULL; rcu_read_lock(); @@ -270,10 +273,10 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev, struct l2cap_chan *chan) { const u8 *saddr, *daddr; - struct lowpan_dev *dev; + struct lowpan_btle_dev *dev; struct lowpan_peer *peer; - dev = lowpan_dev(netdev); + dev = lowpan_btle_dev(netdev); rcu_read_lock(); peer = __peer_lookup_chan(dev, chan); @@ -375,7 +378,7 @@ drop: /* Packet from BT LE device */ static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { - struct lowpan_dev *dev; + struct lowpan_btle_dev *dev; struct lowpan_peer *peer; int err; @@ -431,15 +434,18 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev, bdaddr_t *peer_addr, u8 *peer_addr_type) { struct in6_addr ipv6_daddr; - struct lowpan_dev *dev; + struct ipv6hdr *hdr; + struct lowpan_btle_dev *dev; struct lowpan_peer *peer; bdaddr_t addr, *any = BDADDR_ANY; u8 *daddr = any->b; int err, status = 0; - dev = lowpan_dev(netdev); + hdr = ipv6_hdr(skb); + + dev = lowpan_btle_dev(netdev); - memcpy(&ipv6_daddr, &lowpan_cb(skb)->addr, sizeof(ipv6_daddr)); + memcpy(&ipv6_daddr, &hdr->daddr, sizeof(ipv6_daddr)); if (ipv6_addr_is_multicast(&ipv6_daddr)) { lowpan_cb(skb)->chan = NULL; @@ -489,15 +495,9 @@ static int header_create(struct sk_buff *skb, struct net_device *netdev, unsigned short type, const void *_daddr, const void *_saddr, unsigned int len) { - struct ipv6hdr *hdr; - if (type != ETH_P_IPV6) return -EINVAL; - hdr = ipv6_hdr(skb); - - memcpy(&lowpan_cb(skb)->addr, &hdr->daddr, sizeof(struct in6_addr)); - return 0; } @@ -543,19 +543,19 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb, static int send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev) { struct sk_buff *local_skb; - struct lowpan_dev *entry; + struct lowpan_btle_dev *entry; int err = 0; rcu_read_lock(); list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { struct lowpan_peer *pentry; - struct lowpan_dev *dev; + struct lowpan_btle_dev *dev; if (entry->netdev != netdev) continue; - dev = lowpan_dev(entry->netdev); + dev = lowpan_btle_dev(entry->netdev); list_for_each_entry_rcu(pentry, &dev->peers, list) { int ret; @@ -723,8 +723,8 @@ static void ifdown(struct net_device *netdev) static void do_notify_peers(struct work_struct *work) { - struct lowpan_dev *dev = container_of(work, struct lowpan_dev, - notify_peers.work); + struct lowpan_btle_dev *dev = container_of(work, struct lowpan_btle_dev, + notify_peers.work); netdev_notify_peers(dev->netdev); /* send neighbour adv at startup */ } @@ -766,7 +766,7 @@ static void set_ip_addr_bits(u8 addr_type, u8 *addr) } static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, - struct lowpan_dev *dev) + struct lowpan_btle_dev *dev) { struct lowpan_peer *peer; @@ -803,12 +803,12 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, return peer->chan; } -static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev) +static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev) { struct net_device *netdev; int err = 0; - netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_dev)), + netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_btle_dev)), IFACE_NAME_TEMPLATE, NET_NAME_UNKNOWN, netdev_setup); if (!netdev) @@ -820,7 +820,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev) SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev); SET_NETDEV_DEVTYPE(netdev, &bt_type); - *dev = lowpan_dev(netdev); + *dev = lowpan_btle_dev(netdev); (*dev)->netdev = netdev; (*dev)->hdev = chan->conn->hcon->hdev; INIT_LIST_HEAD(&(*dev)->peers); @@ -853,7 +853,7 @@ out: static inline void chan_ready_cb(struct l2cap_chan *chan) { - struct lowpan_dev *dev; + struct lowpan_btle_dev *dev; dev = lookup_dev(chan->conn); @@ -890,8 +890,9 @@ static inline struct l2cap_chan *chan_new_conn_cb(struct l2cap_chan *pchan) static void delete_netdev(struct work_struct *work) { - struct lowpan_dev *entry = container_of(work, struct lowpan_dev, - delete_netdev); + struct lowpan_btle_dev *entry = container_of(work, + struct lowpan_btle_dev, + delete_netdev); lowpan_unregister_netdev(entry->netdev); @@ -900,8 +901,8 @@ static void delete_netdev(struct work_struct *work) static void chan_close_cb(struct l2cap_chan *chan) { - struct lowpan_dev *entry; - struct lowpan_dev *dev = NULL; + struct lowpan_btle_dev *entry; + struct lowpan_btle_dev *dev = NULL; struct lowpan_peer *peer; int err = -ENOENT; bool last = false, remove = true; @@ -921,7 +922,7 @@ static void chan_close_cb(struct l2cap_chan *chan) spin_lock(&devices_lock); list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { - dev = lowpan_dev(entry->netdev); + dev = lowpan_btle_dev(entry->netdev); peer = __peer_lookup_chan(dev, chan); if (peer) { last = peer_del(dev, peer); @@ -1131,7 +1132,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, static void disconnect_all_peers(void) { - struct lowpan_dev *entry; + struct lowpan_btle_dev *entry; struct lowpan_peer *peer, *tmp_peer, *new_peer; struct list_head peers; @@ -1291,7 +1292,7 @@ static ssize_t lowpan_control_write(struct file *fp, static int lowpan_control_show(struct seq_file *f, void *ptr) { - struct lowpan_dev *entry; + struct lowpan_btle_dev *entry; struct lowpan_peer *peer; spin_lock(&devices_lock); @@ -1322,7 +1323,7 @@ static const struct file_operations lowpan_control_fops = { static void disconnect_devices(void) { - struct lowpan_dev *entry, *tmp, *new_dev; + struct lowpan_btle_dev *entry, *tmp, *new_dev; struct list_head devices; INIT_LIST_HEAD(&devices); @@ -1360,7 +1361,7 @@ static int device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *netdev = netdev_notifier_info_to_dev(ptr); - struct lowpan_dev *entry; + struct lowpan_btle_dev *entry; if (netdev->type != ARPHRD_6LOWPAN) return NOTIFY_DONE; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 955eda93e66f..3df7aefb7663 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -65,7 +65,7 @@ static const char *const bt_slock_key_strings[BT_MAX_PROTO] = { void bt_sock_reclassify_lock(struct sock *sk, int proto) { BUG_ON(!sk); - BUG_ON(sock_owned_by_user(sk)); + BUG_ON(!sock_allow_reclassification(sk)); sock_lock_init_class_and_name(sk, bt_slock_key_strings[proto], &bt_slock_key[proto], diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 6ceb5d36a32b..f4fcb4a9d5c1 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -188,7 +188,7 @@ static netdev_tx_t bnep_net_xmit(struct sk_buff *skb, * So we have to queue them and wake up session thread which is sleeping * on the sk_sleep(sk). */ - dev->trans_start = jiffies; + netif_trans_update(dev); skb_queue_tail(&sk->sk_write_queue, skb); wake_up_interruptible(sk_sleep(sk)); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2713fc86e85a..45a9fc68c677 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3139,10 +3139,10 @@ void hci_unregister_dev(struct hci_dev *hdev) list_del(&hdev->list); write_unlock(&hci_dev_list_lock); - hci_dev_do_close(hdev); - cancel_work_sync(&hdev->power_on); + hci_dev_do_close(hdev); + if (!test_bit(HCI_INIT, &hdev->flags) && !hci_dev_test_flag(hdev, HCI_SETUP) && !hci_dev_test_flag(hdev, HCI_CONFIG)) { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index c162af5d16bf..d4b3dd5413be 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4727,6 +4727,19 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, u32 flags; u8 *ptr, real_len; + switch (type) { + case LE_ADV_IND: + case LE_ADV_DIRECT_IND: + case LE_ADV_SCAN_IND: + case LE_ADV_NONCONN_IND: + case LE_ADV_SCAN_RSP: + break; + default: + BT_ERR_RATELIMITED("Unknown advetising packet type: 0x%02x", + type); + return; + } + /* Find the end of the data in case the report contains padded zero * bytes at the end causing an invalid length value. * diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 6e125d76df0d..c045b3c54768 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1065,6 +1065,9 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) flags |= LE_AD_LIMITED; + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + flags |= LE_AD_NO_BREDR; + if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { /* If a discovery flag wasn't provided, simply use the global * settings. @@ -1072,9 +1075,6 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) if (!flags) flags |= mgmt_get_adv_discov_flags(hdev); - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - flags |= LE_AD_NO_BREDR; - /* If flags would still be empty, then there is no need to * include the "Flags" AD field". */ diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index e4cae72895a7..388ee8b59145 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -778,7 +778,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, } if (sec.level < BT_SECURITY_LOW || - sec.level > BT_SECURITY_HIGH) { + sec.level > BT_SECURITY_FIPS) { err = -EINVAL; break; } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 60a3dbfca8a1..d99b2009771a 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -113,7 +113,9 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p = NULL; unsigned long args[4]; + int ret = -EOPNOTSUPP; if (copy_from_user(args, rq->ifr_data, sizeof(args))) return -EFAULT; @@ -183,25 +185,29 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - return br_set_forward_delay(br, args[1]); + ret = br_set_forward_delay(br, args[1]); + break; case BRCTL_SET_BRIDGE_HELLO_TIME: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - return br_set_hello_time(br, args[1]); + ret = br_set_hello_time(br, args[1]); + break; case BRCTL_SET_BRIDGE_MAX_AGE: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - return br_set_max_age(br, args[1]); + ret = br_set_max_age(br, args[1]); + break; case BRCTL_SET_AGEING_TIME: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - return br_set_ageing_time(br, args[1]); + ret = br_set_ageing_time(br, args[1]); + break; case BRCTL_GET_PORT_INFO: { @@ -241,20 +247,19 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return -EPERM; br_stp_set_enabled(br, args[1]); - return 0; + ret = 0; + break; case BRCTL_SET_BRIDGE_PRIORITY: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; br_stp_set_bridge_priority(br, args[1]); - return 0; + ret = 0; + break; case BRCTL_SET_PORT_PRIORITY: { - struct net_bridge_port *p; - int ret; - if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -264,14 +269,11 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) else ret = br_stp_set_port_priority(p, args[2]); spin_unlock_bh(&br->lock); - return ret; + break; } case BRCTL_SET_PATH_COST: { - struct net_bridge_port *p; - int ret; - if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -281,8 +283,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) else ret = br_stp_set_path_cost(p, args[2]); spin_unlock_bh(&br->lock); - - return ret; + break; } case BRCTL_GET_FDB_ENTRIES: @@ -290,7 +291,14 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) args[2], args[3]); } - return -EOPNOTSUPP; + if (!ret) { + if (p) + br_ifinfo_notify(RTM_NEWLINK, p); + else + netdev_state_change(br->dev); + } + + return ret; } static int old_deviceless(struct net *net, void __user *uarg) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 44114a94c576..2d25979273a6 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -217,13 +217,13 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb) len = ntohs(iph->tot_len); if (skb->len < len) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS); + __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; if (pskb_trim_rcsum(skb, len)) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); + __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); goto drop; } @@ -236,7 +236,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb) return 0; inhdr_error: - IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); + __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); drop: return -1; } diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index d61f56efc8dc..5e59a8457e7b 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -122,13 +122,13 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + ip6h_len > skb->len) { - IP6_INC_STATS_BH(net, idev, - IPSTATS_MIB_INTRUNCATEDPKTS); + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { - IP6_INC_STATS_BH(net, idev, - IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INDISCARDS); goto drop; } } @@ -142,7 +142,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) return 0; inhdr_error: - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); drop: return -1; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e9c635eae24d..a5343c7232bf 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -135,9 +135,9 @@ static inline size_t br_port_info_size(void) + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_NO */ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_TOPOLOGY_CHANGE_ACK */ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_CONFIG_PENDING */ - + nla_total_size(sizeof(u64)) /* IFLA_BRPORT_MESSAGE_AGE_TIMER */ - + nla_total_size(sizeof(u64)) /* IFLA_BRPORT_FORWARD_DELAY_TIMER */ - + nla_total_size(sizeof(u64)) /* IFLA_BRPORT_HOLD_TIMER */ + + nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_MESSAGE_AGE_TIMER */ + + nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_FORWARD_DELAY_TIMER */ + + nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_HOLD_TIMER */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */ #endif @@ -190,13 +190,16 @@ static int br_port_fill_attrs(struct sk_buff *skb, return -EMSGSIZE; timerval = br_timer_value(&p->message_age_timer); - if (nla_put_u64(skb, IFLA_BRPORT_MESSAGE_AGE_TIMER, timerval)) + if (nla_put_u64_64bit(skb, IFLA_BRPORT_MESSAGE_AGE_TIMER, timerval, + IFLA_BRPORT_PAD)) return -EMSGSIZE; timerval = br_timer_value(&p->forward_delay_timer); - if (nla_put_u64(skb, IFLA_BRPORT_FORWARD_DELAY_TIMER, timerval)) + if (nla_put_u64_64bit(skb, IFLA_BRPORT_FORWARD_DELAY_TIMER, timerval, + IFLA_BRPORT_PAD)) return -EMSGSIZE; timerval = br_timer_value(&p->hold_timer); - if (nla_put_u64(skb, IFLA_BRPORT_HOLD_TIMER, timerval)) + if (nla_put_u64_64bit(skb, IFLA_BRPORT_HOLD_TIMER, timerval, + IFLA_BRPORT_PAD)) return -EMSGSIZE; #ifdef CONFIG_BRIDGE_IGMP_SNOOPING @@ -847,6 +850,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_NF_CALL_IP6TABLES] = { .type = NLA_U8 }, [IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 }, [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 }, + [IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -918,6 +922,14 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (err) return err; } + + if (data[IFLA_BR_VLAN_STATS_ENABLED]) { + __u8 vlan_stats = nla_get_u8(data[IFLA_BR_VLAN_STATS_ENABLED]); + + err = br_vlan_set_stats(br, vlan_stats); + if (err) + return err; + } #endif if (data[IFLA_BR_GROUP_FWD_MASK]) { @@ -1079,6 +1091,7 @@ static size_t br_get_size(const struct net_device *brdev) #ifdef CONFIG_BRIDGE_VLAN_FILTERING nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */ nla_total_size(sizeof(u16)) + /* IFLA_BR_VLAN_DEFAULT_PVID */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_STATS_ENABLED */ #endif nla_total_size(sizeof(u16)) + /* IFLA_BR_GROUP_FWD_MASK */ nla_total_size(sizeof(struct ifla_bridge_id)) + /* IFLA_BR_ROOT_ID */ @@ -1087,10 +1100,10 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u32)) + /* IFLA_BR_ROOT_PATH_COST */ nla_total_size(sizeof(u8)) + /* IFLA_BR_TOPOLOGY_CHANGE */ nla_total_size(sizeof(u8)) + /* IFLA_BR_TOPOLOGY_CHANGE_DETECTED */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_HELLO_TIMER */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_TCN_TIMER */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_GC_TIMER */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_HELLO_TIMER */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_TCN_TIMER */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_GC_TIMER */ nla_total_size(ETH_ALEN) + /* IFLA_BR_GROUP_ADDR */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_ROUTER */ @@ -1101,12 +1114,12 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_MAX */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_LAST_MEMBER_CNT */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_STARTUP_QUERY_CNT */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_LAST_MEMBER_INTVL */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_MEMBERSHIP_INTVL */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_QUERIER_INTVL */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_INTVL */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */ - nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_LAST_MEMBER_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_MEMBERSHIP_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERIER_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */ #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */ @@ -1129,16 +1142,17 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) u64 clockval; clockval = br_timer_value(&br->hello_timer); - if (nla_put_u64(skb, IFLA_BR_HELLO_TIMER, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_HELLO_TIMER, clockval, IFLA_BR_PAD)) return -EMSGSIZE; clockval = br_timer_value(&br->tcn_timer); - if (nla_put_u64(skb, IFLA_BR_TCN_TIMER, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_TCN_TIMER, clockval, IFLA_BR_PAD)) return -EMSGSIZE; clockval = br_timer_value(&br->topology_change_timer); - if (nla_put_u64(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; clockval = br_timer_value(&br->gc_timer); - if (nla_put_u64(skb, IFLA_BR_GC_TIMER, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD)) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) || @@ -1163,7 +1177,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) #ifdef CONFIG_BRIDGE_VLAN_FILTERING if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto) || - nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid)) + nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid) || + nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, br->vlan_stats_enabled)) return -EMSGSIZE; #endif #ifdef CONFIG_BRIDGE_IGMP_SNOOPING @@ -1182,22 +1197,28 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) return -EMSGSIZE; clockval = jiffies_to_clock_t(br->multicast_last_member_interval); - if (nla_put_u64(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; clockval = jiffies_to_clock_t(br->multicast_membership_interval); - if (nla_put_u64(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; clockval = jiffies_to_clock_t(br->multicast_querier_interval); - if (nla_put_u64(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; clockval = jiffies_to_clock_t(br->multicast_query_interval); - if (nla_put_u64(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; clockval = jiffies_to_clock_t(br->multicast_query_response_interval); - if (nla_put_u64(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; clockval = jiffies_to_clock_t(br->multicast_startup_query_interval); - if (nla_put_u64(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval)) + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval, + IFLA_BR_PAD)) return -EMSGSIZE; #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) @@ -1213,6 +1234,69 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) return 0; } +static size_t br_get_linkxstats_size(const struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + int numvls = 0; + + vg = br_vlan_group(br); + if (!vg) + return 0; + + /* we need to count all, even placeholder entries */ + list_for_each_entry(v, &vg->vlan_list, vlist) + numvls++; + + /* account for the vlans and the link xstats type nest attribute */ + return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) + + nla_total_size(0); +} + +static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, + int *prividx) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct nlattr *nest; + int vl_idx = 0; + + vg = br_vlan_group(br); + if (!vg) + goto out; + nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE); + if (!nest) + return -EMSGSIZE; + list_for_each_entry(v, &vg->vlan_list, vlist) { + struct bridge_vlan_xstats vxi; + struct br_vlan_stats stats; + + if (vl_idx++ < *prividx) + continue; + memset(&vxi, 0, sizeof(vxi)); + vxi.vid = v->vid; + br_vlan_get_stats(v, &stats); + vxi.rx_bytes = stats.rx_bytes; + vxi.rx_packets = stats.rx_packets; + vxi.tx_bytes = stats.tx_bytes; + vxi.tx_packets = stats.tx_packets; + + if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi)) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + *prividx = 0; +out: + return 0; + +nla_put_failure: + nla_nest_end(skb, nest); + *prividx = vl_idx; + + return -EMSGSIZE; +} static struct rtnl_af_ops br_af_ops __read_mostly = { .family = AF_BRIDGE, @@ -1231,6 +1315,8 @@ struct rtnl_link_ops br_link_ops __read_mostly = { .dellink = br_dev_delete, .get_size = br_get_size, .fill_info = br_fill_info, + .fill_linkxstats = br_fill_linkxstats, + .get_linkxstats_size = br_get_linkxstats_size, .slave_maxtype = IFLA_BRPORT_MAX, .slave_policy = br_port_policy, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d9da857182ef..c7fb5d7a7218 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -77,12 +77,21 @@ struct bridge_mcast_querier { }; #endif +struct br_vlan_stats { + u64 rx_bytes; + u64 rx_packets; + u64 tx_bytes; + u64 tx_packets; + struct u64_stats_sync syncp; +}; + /** * struct net_bridge_vlan - per-vlan entry * * @vnode: rhashtable member * @vid: VLAN id * @flags: bridge vlan flags + * @stats: per-cpu VLAN statistics * @br: if MASTER flag set, this points to a bridge struct * @port: if MASTER flag unset, this points to a port struct * @refcnt: if MASTER flag set, this is bumped for each port referencing it @@ -100,6 +109,7 @@ struct net_bridge_vlan { struct rhash_head vnode; u16 vid; u16 flags; + struct br_vlan_stats __percpu *stats; union { struct net_bridge *br; struct net_bridge_port *port; @@ -342,6 +352,7 @@ struct net_bridge #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; u8 vlan_enabled; + u8 vlan_stats_enabled; __be16 vlan_proto; u16 default_pvid; #endif @@ -691,6 +702,7 @@ int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int __br_vlan_set_proto(struct net_bridge *br, __be16 proto); int br_vlan_set_proto(struct net_bridge *br, unsigned long val); +int br_vlan_set_stats(struct net_bridge *br, unsigned long val); int br_vlan_init(struct net_bridge *br); int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid); @@ -699,6 +711,8 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); void nbp_vlan_flush(struct net_bridge_port *port); int nbp_vlan_init(struct net_bridge_port *port); int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask); +void br_vlan_get_stats(const struct net_bridge_vlan *v, + struct br_vlan_stats *stats); static inline struct net_bridge_vlan_group *br_vlan_group( const struct net_bridge *br) @@ -881,6 +895,10 @@ static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu( return NULL; } +static inline void br_vlan_get_stats(const struct net_bridge_vlan *v, + struct br_vlan_stats *stats) +{ +} #endif struct nf_br_ops { diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 6b8091407ca3..beb47071e38d 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -43,7 +43,14 @@ static ssize_t store_bridge_parm(struct device *d, if (endp == buf) return -EINVAL; + if (!rtnl_trylock()) + return restart_syscall(); + err = (*set)(br, val); + if (!err) + netdev_state_change(br->dev); + rtnl_unlock(); + return err ? err : len; } @@ -101,15 +108,7 @@ static ssize_t ageing_time_show(struct device *d, static int set_ageing_time(struct net_bridge *br, unsigned long val) { - int ret; - - if (!rtnl_trylock()) - return restart_syscall(); - - ret = br_set_ageing_time(br, val); - rtnl_unlock(); - - return ret; + return br_set_ageing_time(br, val); } static ssize_t ageing_time_store(struct device *d, @@ -128,27 +127,18 @@ static ssize_t stp_state_show(struct device *d, } +static int set_stp_state(struct net_bridge *br, unsigned long val) +{ + br_stp_set_enabled(br, val); + + return 0; +} + static ssize_t stp_state_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { - struct net_bridge *br = to_bridge(d); - char *endp; - unsigned long val; - - if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - val = simple_strtoul(buf, &endp, 0); - if (endp == buf) - return -EINVAL; - - if (!rtnl_trylock()) - return restart_syscall(); - br_stp_set_enabled(br, val); - rtnl_unlock(); - - return len; + return store_bridge_parm(d, buf, len, set_stp_state); } static DEVICE_ATTR_RW(stp_state); @@ -160,29 +150,22 @@ static ssize_t group_fwd_mask_show(struct device *d, return sprintf(buf, "%#x\n", br->group_fwd_mask); } - -static ssize_t group_fwd_mask_store(struct device *d, - struct device_attribute *attr, - const char *buf, - size_t len) +static int set_group_fwd_mask(struct net_bridge *br, unsigned long val) { - struct net_bridge *br = to_bridge(d); - char *endp; - unsigned long val; - - if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - val = simple_strtoul(buf, &endp, 0); - if (endp == buf) - return -EINVAL; - if (val & BR_GROUPFWD_RESTRICTED) return -EINVAL; br->group_fwd_mask = val; - return len; + return 0; +} + +static ssize_t group_fwd_mask_store(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_group_fwd_mask); } static DEVICE_ATTR_RW(group_fwd_mask); @@ -328,6 +311,7 @@ static ssize_t group_addr_store(struct device *d, br->group_addr_set = true; br_recalculate_fwd_mask(br); + netdev_state_change(br->dev); rtnl_unlock(); @@ -336,17 +320,17 @@ static ssize_t group_addr_store(struct device *d, static DEVICE_ATTR_RW(group_addr); +static int set_flush(struct net_bridge *br, unsigned long val) +{ + br_fdb_flush(br); + return 0; +} + static ssize_t flush_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { - struct net_bridge *br = to_bridge(d); - - if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - br_fdb_flush(br); - return len; + return store_bridge_parm(d, buf, len, set_flush); } static DEVICE_ATTR_WO(flush); @@ -747,6 +731,22 @@ static ssize_t default_pvid_store(struct device *d, return store_bridge_parm(d, buf, len, br_vlan_set_default_pvid); } static DEVICE_ATTR_RW(default_pvid); + +static ssize_t vlan_stats_enabled_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->vlan_stats_enabled); +} + +static ssize_t vlan_stats_enabled_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_set_stats); +} +static DEVICE_ATTR_RW(vlan_stats_enabled); #endif static struct attribute *bridge_attrs[] = { @@ -794,6 +794,7 @@ static struct attribute *bridge_attrs[] = { &dev_attr_vlan_filtering.attr, &dev_attr_vlan_protocol.attr, &dev_attr_default_pvid.attr, + &dev_attr_vlan_stats_enabled.attr, #endif NULL }; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index efe415ad842a..1e04d4d44273 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -61,7 +61,6 @@ static int store_flag(struct net_bridge_port *p, unsigned long v, if (flags != p->flags) { p->flags = flags; br_port_flags_change(p, mask); - br_ifinfo_notify(RTM_NEWLINK, p); } return 0; } @@ -253,8 +252,10 @@ static ssize_t brport_store(struct kobject *kobj, spin_lock_bh(&p->br->lock); ret = brport_attr->store(p, val); spin_unlock_bh(&p->br->lock); - if (ret == 0) + if (!ret) { + br_ifinfo_notify(RTM_NEWLINK, p); ret = count; + } } rtnl_unlock(); } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 9309bb4f2a5b..b6de4f457161 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -162,6 +162,17 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid return masterv; } +static void br_master_vlan_rcu_free(struct rcu_head *rcu) +{ + struct net_bridge_vlan *v; + + v = container_of(rcu, struct net_bridge_vlan, rcu); + WARN_ON(!br_vlan_is_master(v)); + free_percpu(v->stats); + v->stats = NULL; + kfree(v); +} + static void br_vlan_put_master(struct net_bridge_vlan *masterv) { struct net_bridge_vlan_group *vg; @@ -174,7 +185,7 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv) rhashtable_remove_fast(&vg->vlan_hash, &masterv->vnode, br_vlan_rht_params); __vlan_del_list(masterv); - kfree_rcu(masterv, rcu); + call_rcu(&masterv->rcu, br_master_vlan_rcu_free); } } @@ -230,6 +241,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags) if (!masterv) goto out_filt; v->brvlan = masterv; + v->stats = masterv->stats; } /* Add the dev mac and count the vlan only if it's usable */ @@ -329,6 +341,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb) { + struct br_vlan_stats *stats; struct net_bridge_vlan *v; u16 vid; @@ -355,18 +368,27 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, return NULL; } } + if (br->vlan_stats_enabled) { + stats = this_cpu_ptr(v->stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_bytes += skb->len; + stats->tx_packets++; + u64_stats_update_end(&stats->syncp); + } + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) skb->vlan_tci = 0; - out: return skb; } /* Called under RCU */ -static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto, +static bool __allowed_ingress(const struct net_bridge *br, + struct net_bridge_vlan_group *vg, struct sk_buff *skb, u16 *vid) { - const struct net_bridge_vlan *v; + struct br_vlan_stats *stats; + struct net_bridge_vlan *v; bool tagged; BR_INPUT_SKB_CB(skb)->vlan_filtered = true; @@ -375,7 +397,7 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto, * HW accelerated vlan tag. */ if (unlikely(!skb_vlan_tag_present(skb) && - skb->protocol == proto)) { + skb->protocol == br->vlan_proto)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) return false; @@ -383,7 +405,7 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto, if (!br_vlan_get_tag(skb, vid)) { /* Tagged frame */ - if (skb->vlan_proto != proto) { + if (skb->vlan_proto != br->vlan_proto) { /* Protocol-mismatch, empty out vlan_tci for new tag */ skb_push(skb, ETH_HLEN); skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, @@ -419,7 +441,7 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto, *vid = pvid; if (likely(!tagged)) /* Untagged Frame. */ - __vlan_hwaccel_put_tag(skb, proto, pvid); + __vlan_hwaccel_put_tag(skb, br->vlan_proto, pvid); else /* Priority-tagged Frame. * At this point, We know that skb->vlan_tci had @@ -428,13 +450,24 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto, */ skb->vlan_tci |= pvid; - return true; + /* if stats are disabled we can avoid the lookup */ + if (!br->vlan_stats_enabled) + return true; } - - /* Frame had a valid vlan tag. See if vlan is allowed */ v = br_vlan_find(vg, *vid); - if (v && br_vlan_should_use(v)) - return true; + if (!v || !br_vlan_should_use(v)) + goto drop; + + if (br->vlan_stats_enabled) { + stats = this_cpu_ptr(v->stats); + u64_stats_update_begin(&stats->syncp); + stats->rx_bytes += skb->len; + stats->rx_packets++; + u64_stats_update_end(&stats->syncp); + } + + return true; + drop: kfree_skb(skb); return false; @@ -452,7 +485,7 @@ bool br_allowed_ingress(const struct net_bridge *br, return true; } - return __allowed_ingress(vg, br->vlan_proto, skb, vid); + return __allowed_ingress(br, vg, skb, vid); } /* Called under RCU. */ @@ -542,6 +575,11 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) if (!vlan) return -ENOMEM; + vlan->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats); + if (!vlan->stats) { + kfree(vlan); + return -ENOMEM; + } vlan->vid = vid; vlan->flags = flags | BRIDGE_VLAN_INFO_MASTER; vlan->flags &= ~BRIDGE_VLAN_INFO_PVID; @@ -549,8 +587,10 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) if (flags & BRIDGE_VLAN_INFO_BRENTRY) atomic_set(&vlan->refcnt, 1); ret = __vlan_add(vlan, flags); - if (ret) + if (ret) { + free_percpu(vlan->stats); kfree(vlan); + } return ret; } @@ -651,15 +691,7 @@ int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) { - int err; - - if (!rtnl_trylock()) - return restart_syscall(); - - err = __br_vlan_filter_toggle(br, val); - rtnl_unlock(); - - return err; + return __br_vlan_filter_toggle(br, val); } int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) @@ -713,18 +745,24 @@ err_filt: int br_vlan_set_proto(struct net_bridge *br, unsigned long val) { - int err; - if (val != ETH_P_8021Q && val != ETH_P_8021AD) return -EPROTONOSUPPORT; - if (!rtnl_trylock()) - return restart_syscall(); + return __br_vlan_set_proto(br, htons(val)); +} - err = __br_vlan_set_proto(br, htons(val)); - rtnl_unlock(); +int br_vlan_set_stats(struct net_bridge *br, unsigned long val) +{ + switch (val) { + case 0: + case 1: + br->vlan_stats_enabled = val; + break; + default: + return -EINVAL; + } - return err; + return 0; } static bool vlan_default_pvid(struct net_bridge_vlan_group *vg, u16 vid) @@ -855,21 +893,17 @@ int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val) if (val >= VLAN_VID_MASK) return -EINVAL; - if (!rtnl_trylock()) - return restart_syscall(); - if (pvid == br->default_pvid) - goto unlock; + goto out; /* Only allow default pvid change when filtering is disabled */ if (br->vlan_enabled) { pr_info_once("Please disable vlan filtering to change default_pvid\n"); err = -EPERM; - goto unlock; + goto out; } err = __br_vlan_set_default_pvid(br, pvid); -unlock: - rtnl_unlock(); +out: return err; } @@ -1020,3 +1054,30 @@ void nbp_vlan_flush(struct net_bridge_port *port) synchronize_rcu(); __vlan_group_free(vg); } + +void br_vlan_get_stats(const struct net_bridge_vlan *v, + struct br_vlan_stats *stats) +{ + int i; + + memset(stats, 0, sizeof(*stats)); + for_each_possible_cpu(i) { + u64 rxpackets, rxbytes, txpackets, txbytes; + struct br_vlan_stats *cpu_stats; + unsigned int start; + + cpu_stats = per_cpu_ptr(v->stats, i); + do { + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + rxpackets = cpu_stats->rx_packets; + rxbytes = cpu_stats->rx_bytes; + txbytes = cpu_stats->tx_bytes; + txpackets = cpu_stats->tx_packets; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + stats->rx_packets += rxpackets; + stats->rx_bytes += rxbytes; + stats->tx_bytes += txbytes; + stats->tx_packets += txpackets; + } +} diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 7fcdd7261d88..a78c4e2826e5 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -162,15 +162,57 @@ static const struct nf_chain_type filter_bridge = { (1 << NF_BR_POST_ROUTING), }; +static void nf_br_saveroute(const struct sk_buff *skb, + struct nf_queue_entry *entry) +{ +} + +static int nf_br_reroute(struct net *net, struct sk_buff *skb, + const struct nf_queue_entry *entry) +{ + return 0; +} + +static __sum16 nf_br_checksum(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, u_int8_t protocol) +{ + return 0; +} + +static __sum16 nf_br_checksum_partial(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, unsigned int len, + u_int8_t protocol) +{ + return 0; +} + +static int nf_br_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict __always_unused) +{ + return 0; +} + +static const struct nf_afinfo nf_br_afinfo = { + .family = AF_BRIDGE, + .checksum = nf_br_checksum, + .checksum_partial = nf_br_checksum_partial, + .route = nf_br_route, + .saveroute = nf_br_saveroute, + .reroute = nf_br_reroute, + .route_key_size = 0, +}; + static int __init nf_tables_bridge_init(void) { int ret; + nf_register_afinfo(&nf_br_afinfo); nft_register_chain_type(&filter_bridge); ret = register_pernet_subsys(&nf_tables_bridge_net_ops); - if (ret < 0) + if (ret < 0) { nft_unregister_chain_type(&filter_bridge); - + nf_unregister_afinfo(&nf_br_afinfo); + } return ret; } @@ -178,6 +220,7 @@ static void __exit nf_tables_bridge_exit(void) { unregister_pernet_subsys(&nf_tables_bridge_net_ops); nft_unregister_chain_type(&filter_bridge); + nf_unregister_afinfo(&nf_br_afinfo); } module_init(nf_tables_bridge_init); diff --git a/net/can/raw.c b/net/can/raw.c index 2e67b1423cd3..972c187d40ab 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -755,7 +755,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (err < 0) goto free_skb; - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sk->sk_tsflags, &skb_shinfo(skb)->tx_flags); skb->dev = dev; skb->sk = sk; diff --git a/net/core/datagram.c b/net/core/datagram.c index fa9dc6450b08..b7de71f8d5d3 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -301,16 +301,19 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(skb_free_datagram); -void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) +void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) { bool slow; if (likely(atomic_read(&skb->users) == 1)) smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) + else if (likely(!atomic_dec_and_test(&skb->users))) { + sk_peek_offset_bwd(sk, len); return; + } slow = lock_sock_fast(sk); + sk_peek_offset_bwd(sk, len); skb_orphan(skb); sk_mem_reclaim_partial(sk); unlock_sock_fast(sk, slow); @@ -318,7 +321,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) /* skb is now orphaned, can be freed outside of locked section */ __kfree_skb(skb); } -EXPORT_SYMBOL(skb_free_datagram_locked); +EXPORT_SYMBOL(__skb_free_datagram_locked); /** * skb_kill_datagram - Free a datagram skbuff forcibly diff --git a/net/core/dev.c b/net/core/dev.c index 5c925ac50b95..904ff431d570 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1741,7 +1741,7 @@ static inline void net_timestamp_set(struct sk_buff *skb) __net_timestamp(SKB); \ } \ -bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) +bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb) { unsigned int len; @@ -1850,7 +1850,7 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) * taps currently in use. */ -static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) +void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; struct sk_buff *skb2 = NULL; @@ -1907,6 +1907,7 @@ out_unlock: pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); rcu_read_unlock(); } +EXPORT_SYMBOL_GPL(dev_queue_xmit_nit); /** * netif_setup_tc - Handle tc mappings on real_num_tx_queues change @@ -2711,6 +2712,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return ERR_PTR(err); } + /* Only report GSO partial support if it will enable us to + * support segmentation on this frame without needing additional + * work. + */ + if (features & NETIF_F_GSO_PARTIAL) { + netdev_features_t partial_features = NETIF_F_GSO_ROBUST; + struct net_device *dev = skb->dev; + + partial_features |= dev->features & dev->gso_partial_features; + if (!skb_gso_ok(skb, features | partial_features)) + features &= ~NETIF_F_GSO_PARTIAL; + } + BUILD_BUG_ON(SKB_SGO_CB_OFFSET + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); @@ -2825,14 +2839,45 @@ static netdev_features_t dflt_features_check(const struct sk_buff *skb, return vlan_features_check(skb, features); } +static netdev_features_t gso_features_check(const struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + u16 gso_segs = skb_shinfo(skb)->gso_segs; + + if (gso_segs > dev->gso_max_segs) + return features & ~NETIF_F_GSO_MASK; + + /* Support for GSO partial features requires software + * intervention before we can actually process the packets + * so we need to strip support for any partial features now + * and we can pull them back in after we have partially + * segmented the frame. + */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL)) + features &= ~dev->gso_partial_features; + + /* Make sure to clear the IPv4 ID mangling feature if the + * IPv4 header has the potential to be fragmented. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { + struct iphdr *iph = skb->encapsulation ? + inner_ip_hdr(skb) : ip_hdr(skb); + + if (!(iph->frag_off & htons(IP_DF))) + features &= ~NETIF_F_TSO_MANGLEID; + } + + return features; +} + netdev_features_t netif_skb_features(struct sk_buff *skb) { struct net_device *dev = skb->dev; netdev_features_t features = dev->features; - u16 gso_segs = skb_shinfo(skb)->gso_segs; - if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) - features &= ~NETIF_F_GSO_MASK; + if (skb_is_gso(skb)) + features = gso_features_check(skb, dev, features); /* If encapsulation offload request, verify we are testing * hardware encapsulation features instead of standard @@ -2915,9 +2960,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device { netdev_features_t features; - if (skb->next) - return skb; - features = netif_skb_features(skb); skb = validate_xmit_vlan(skb, features); if (unlikely(!skb)) @@ -2960,6 +3002,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device out_kfree_skb: kfree_skb(skb); out_null: + atomic_long_inc(&dev->tx_dropped); return NULL; } @@ -3143,12 +3186,12 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) case TC_ACT_SHOT: qdisc_qstats_cpu_drop(cl->q); *ret = NET_XMIT_DROP; - goto drop; + kfree_skb(skb); + return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: *ret = NET_XMIT_SUCCESS; -drop: - kfree_skb(skb); + consume_skb(skb); return NULL; case TC_ACT_REDIRECT: /* No need to push/pop skb's mac_header here on egress! */ @@ -3349,7 +3392,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) skb = validate_xmit_skb(skb, dev); if (!skb) - goto drop; + goto out; HARD_TX_LOCK(dev, txq, cpu); @@ -3376,7 +3419,6 @@ recursion_alert: } rc = -ENETDOWN; -drop: rcu_read_unlock_bh(); atomic_long_inc(&dev->tx_dropped); @@ -3428,6 +3470,7 @@ u32 rps_cpu_mask __read_mostly; EXPORT_SYMBOL(rps_cpu_mask); struct static_key rps_needed __read_mostly; +EXPORT_SYMBOL(rps_needed); static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, @@ -3914,9 +3957,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, break; case TC_ACT_SHOT: qdisc_qstats_cpu_drop(cl->q); + kfree_skb(skb); + return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: - kfree_skb(skb); + consume_skb(skb); return NULL; case TC_ACT_REDIRECT: /* skb_mac_header check was done by cls/act_bpf, so @@ -4440,6 +4485,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->is_fou = 0; + NAPI_GRO_CB(skb)->is_atomic = 1; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ @@ -4664,6 +4710,8 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) if (unlikely(skb_gro_header_hard(skb, hlen))) { eth = skb_gro_header_slow(skb, hlen, 0); if (unlikely(!eth)) { + net_warn_ratelimited("%s: dropping impossible skb from %s\n", + __func__, napi->dev->name); napi_reuse_skb(napi, skb); return NULL; } @@ -4938,8 +4986,8 @@ bool sk_busy_loop(struct sock *sk, int nonblock) netpoll_poll_unlock(have); } if (rc > 0) - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); + __NET_ADD_STATS(sock_net(sk), + LINUX_MIB_BUSYPOLLRXPACKETS, rc); local_bh_enable(); if (rc == LL_FLUSH_FAILED) @@ -6676,6 +6724,10 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_TSO6; } + /* TSO with IPv4 ID mangling requires IPv4 TSO be enabled */ + if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO)) + features &= ~NETIF_F_TSO_MANGLEID; + /* TSO ECN requires that TSO is present as well. */ if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) features &= ~NETIF_F_TSO_ECN; @@ -6704,6 +6756,14 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } + /* GSO partial features require GSO partial be set */ + if ((features & dev->gso_partial_features) && + !(features & NETIF_F_GSO_PARTIAL)) { + netdev_dbg(dev, + "Dropping partially supported GSO features since no GSO partial.\n"); + features &= ~dev->gso_partial_features; + } + #ifdef CONFIG_NET_RX_BUSY_POLL if (dev->netdev_ops->ndo_busy_poll) features |= NETIF_F_BUSY_POLL; @@ -6974,9 +7034,22 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - if (!(dev->flags & IFF_LOOPBACK)) { + if (!(dev->flags & IFF_LOOPBACK)) dev->hw_features |= NETIF_F_NOCACHE_COPY; - } + + /* If IPv4 TCP segmentation offload is supported we should also + * allow the device to enable segmenting the frame with the option + * of ignoring a static IP ID value. This doesn't enable the + * feature itself but allows the user to enable it later. + */ + if (dev->hw_features & NETIF_F_TSO) + dev->hw_features |= NETIF_F_TSO_MANGLEID; + if (dev->vlan_features & NETIF_F_TSO) + dev->vlan_features |= NETIF_F_TSO_MANGLEID; + if (dev->mpls_features & NETIF_F_TSO) + dev->mpls_features |= NETIF_F_TSO_MANGLEID; + if (dev->hw_enc_features & NETIF_F_TSO) + dev->hw_enc_features |= NETIF_F_TSO_MANGLEID; /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. */ @@ -6984,7 +7057,7 @@ int register_netdevice(struct net_device *dev) /* Make NETIF_F_SG inheritable to tunnel devices. */ - dev->hw_enc_features |= NETIF_F_SG; + dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL; /* Make NETIF_F_SG inheritable to MPLS. */ @@ -7427,7 +7500,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; - dev->gso_min_segs = 0; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); diff --git a/net/core/devlink.c b/net/core/devlink.c index 590fa561cb7f..933e8d4d3968 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -119,7 +119,171 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink, return devlink_port_get_from_attrs(devlink, info->attrs); } -#define DEVLINK_NL_FLAG_NEED_PORT BIT(0) +struct devlink_sb { + struct list_head list; + unsigned int index; + u32 size; + u16 ingress_pools_count; + u16 egress_pools_count; + u16 ingress_tc_count; + u16 egress_tc_count; +}; + +static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb) +{ + return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count; +} + +static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink, + unsigned int sb_index) +{ + struct devlink_sb *devlink_sb; + + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + if (devlink_sb->index == sb_index) + return devlink_sb; + } + return NULL; +} + +static bool devlink_sb_index_exists(struct devlink *devlink, + unsigned int sb_index) +{ + return devlink_sb_get_by_index(devlink, sb_index); +} + +static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink, + struct nlattr **attrs) +{ + if (attrs[DEVLINK_ATTR_SB_INDEX]) { + u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]); + struct devlink_sb *devlink_sb; + + devlink_sb = devlink_sb_get_by_index(devlink, sb_index); + if (!devlink_sb) + return ERR_PTR(-ENODEV); + return devlink_sb; + } + return ERR_PTR(-EINVAL); +} + +static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink, + struct genl_info *info) +{ + return devlink_sb_get_from_attrs(devlink, info->attrs); +} + +static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb, + struct nlattr **attrs, + u16 *p_pool_index) +{ + u16 val; + + if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX]) + return -EINVAL; + + val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]); + if (val >= devlink_sb_pool_count(devlink_sb)) + return -EINVAL; + *p_pool_index = val; + return 0; +} + +static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb, + struct genl_info *info, + u16 *p_pool_index) +{ + return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs, + p_pool_index); +} + +static int +devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs, + enum devlink_sb_pool_type *p_pool_type) +{ + u8 val; + + if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE]) + return -EINVAL; + + val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]); + if (val != DEVLINK_SB_POOL_TYPE_INGRESS && + val != DEVLINK_SB_POOL_TYPE_EGRESS) + return -EINVAL; + *p_pool_type = val; + return 0; +} + +static int +devlink_sb_pool_type_get_from_info(struct genl_info *info, + enum devlink_sb_pool_type *p_pool_type) +{ + return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type); +} + +static int +devlink_sb_th_type_get_from_attrs(struct nlattr **attrs, + enum devlink_sb_threshold_type *p_th_type) +{ + u8 val; + + if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]) + return -EINVAL; + + val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]); + if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC && + val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC) + return -EINVAL; + *p_th_type = val; + return 0; +} + +static int +devlink_sb_th_type_get_from_info(struct genl_info *info, + enum devlink_sb_threshold_type *p_th_type) +{ + return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type); +} + +static int +devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb, + struct nlattr **attrs, + enum devlink_sb_pool_type pool_type, + u16 *p_tc_index) +{ + u16 val; + + if (!attrs[DEVLINK_ATTR_SB_TC_INDEX]) + return -EINVAL; + + val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]); + if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS && + val >= devlink_sb->ingress_tc_count) + return -EINVAL; + if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS && + val >= devlink_sb->egress_tc_count) + return -EINVAL; + *p_tc_index = val; + return 0; +} + +static int +devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb, + struct genl_info *info, + enum devlink_sb_pool_type pool_type, + u16 *p_tc_index) +{ + return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs, + pool_type, p_tc_index); +} + +#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) +#define DEVLINK_NL_FLAG_NEED_PORT BIT(1) +#define DEVLINK_NL_FLAG_NEED_SB BIT(2) +#define DEVLINK_NL_FLAG_LOCK_PORTS BIT(3) + /* port is not needed but we need to ensure they don't + * change in the middle of command + */ static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) @@ -132,8 +296,9 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, mutex_unlock(&devlink_mutex); return PTR_ERR(devlink); } - info->user_ptr[0] = devlink; - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { + info->user_ptr[0] = devlink; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { struct devlink_port *devlink_port; mutex_lock(&devlink_port_mutex); @@ -143,7 +308,22 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, mutex_unlock(&devlink_mutex); return PTR_ERR(devlink_port); } - info->user_ptr[1] = devlink_port; + info->user_ptr[0] = devlink_port; + } + if (ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) { + mutex_lock(&devlink_port_mutex); + } + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) { + struct devlink_sb *devlink_sb; + + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) { + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) + mutex_unlock(&devlink_port_mutex); + mutex_unlock(&devlink_mutex); + return PTR_ERR(devlink_sb); + } + info->user_ptr[1] = devlink_sb; } return 0; } @@ -151,7 +331,8 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, static void devlink_nl_post_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT || + ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) mutex_unlock(&devlink_port_mutex); mutex_unlock(&devlink_mutex); } @@ -356,8 +537,8 @@ out: static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink *devlink = info->user_ptr[0]; - struct devlink_port *devlink_port = info->user_ptr[1]; + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink *devlink = devlink_port->devlink; struct sk_buff *msg; int err; @@ -436,8 +617,8 @@ static int devlink_port_type_set(struct devlink *devlink, static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink *devlink = info->user_ptr[0]; - struct devlink_port *devlink_port = info->user_ptr[1]; + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink *devlink = devlink_port->devlink; int err; if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) { @@ -497,12 +678,735 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, return devlink_port_unsplit(devlink, port_index); } +static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, + struct devlink_sb *devlink_sb, + enum devlink_command cmd, u32 portid, + u32 seq, int flags) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT, + devlink_sb->ingress_pools_count)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT, + devlink_sb->egress_pools_count)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT, + devlink_sb->ingress_tc_count)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT, + devlink_sb->egress_tc_count)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct sk_buff *msg; + int err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_sb_fill(msg, devlink, devlink_sb, + DEVLINK_CMD_SB_NEW, + info->snd_portid, info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + struct devlink_sb *devlink_sb; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_sb_fill(msg, devlink, devlink_sb, + DEVLINK_CMD_SB_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) + goto out; + idx++; + } + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink, + struct devlink_sb *devlink_sb, + u16 pool_index, enum devlink_command cmd, + u32 portid, u32 seq, int flags) +{ + struct devlink_sb_pool_info pool_info; + void *hdr; + int err; + + err = devlink->ops->sb_pool_get(devlink, devlink_sb->index, + pool_index, &pool_info); + if (err) + return err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index)) + goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size)) + goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, + pool_info.threshold_type)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct sk_buff *msg; + u16 pool_index; + int err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + if (!devlink->ops || !devlink->ops->sb_pool_get) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index, + DEVLINK_CMD_SB_POOL_NEW, + info->snd_portid, info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx, + struct devlink *devlink, + struct devlink_sb *devlink_sb, + u32 portid, u32 seq) +{ + u16 pool_count = devlink_sb_pool_count(devlink_sb); + u16 pool_index; + int err; + + for (pool_index = 0; pool_index < pool_count; pool_index++) { + if (*p_idx < start) { + (*p_idx)++; + continue; + } + err = devlink_nl_sb_pool_fill(msg, devlink, + devlink_sb, + pool_index, + DEVLINK_CMD_SB_POOL_NEW, + portid, seq, NLM_F_MULTI); + if (err) + return err; + (*p_idx)++; + } + return 0; +} + +static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + struct devlink_sb *devlink_sb; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || + !devlink->ops || !devlink->ops->sb_pool_get) + continue; + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + err = __sb_pool_get_dumpit(msg, start, &idx, devlink, + devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err && err != -EOPNOTSUPP) + goto out; + } + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type) + +{ + const struct devlink_ops *ops = devlink->ops; + + if (ops && ops->sb_pool_set) + return ops->sb_pool_set(devlink, sb_index, pool_index, + size, threshold_type); + return -EOPNOTSUPP; +} + +static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + enum devlink_sb_threshold_type threshold_type; + u16 pool_index; + u32 size; + int err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + err = devlink_sb_th_type_get_from_info(info, &threshold_type); + if (err) + return err; + + if (!info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]) + return -EINVAL; + + size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]); + return devlink_sb_pool_set(devlink, devlink_sb->index, + pool_index, size, threshold_type); +} + +static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_port *devlink_port, + struct devlink_sb *devlink_sb, + u16 pool_index, + enum devlink_command cmd, + u32 portid, u32 seq, int flags) +{ + const struct devlink_ops *ops = devlink->ops; + u32 threshold; + void *hdr; + int err; + + err = ops->sb_port_pool_get(devlink_port, devlink_sb->index, + pool_index, &threshold); + if (err) + return err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold)) + goto nla_put_failure; + + if (ops->sb_occ_port_pool_get) { + u32 cur; + u32 max; + + err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index, + pool_index, &cur, &max); + if (err && err != -EOPNOTSUPP) + return err; + if (!err) { + if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max)) + goto nla_put_failure; + } + } + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink *devlink = devlink_port->devlink; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct sk_buff *msg; + u16 pool_index; + int err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + if (!devlink->ops || !devlink->ops->sb_port_pool_get) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port, + devlink_sb, pool_index, + DEVLINK_CMD_SB_PORT_POOL_NEW, + info->snd_portid, info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx, + struct devlink *devlink, + struct devlink_sb *devlink_sb, + u32 portid, u32 seq) +{ + struct devlink_port *devlink_port; + u16 pool_count = devlink_sb_pool_count(devlink_sb); + u16 pool_index; + int err; + + list_for_each_entry(devlink_port, &devlink->port_list, list) { + for (pool_index = 0; pool_index < pool_count; pool_index++) { + if (*p_idx < start) { + (*p_idx)++; + continue; + } + err = devlink_nl_sb_port_pool_fill(msg, devlink, + devlink_port, + devlink_sb, + pool_index, + DEVLINK_CMD_SB_PORT_POOL_NEW, + portid, seq, + NLM_F_MULTI); + if (err) + return err; + (*p_idx)++; + } + } + return 0; +} + +static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + struct devlink_sb *devlink_sb; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + mutex_lock(&devlink_port_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || + !devlink->ops || !devlink->ops->sb_port_pool_get) + continue; + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + err = __sb_port_pool_get_dumpit(msg, start, &idx, + devlink, devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err && err != -EOPNOTSUPP) + goto out; + } + } +out: + mutex_unlock(&devlink_port_mutex); + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_sb_port_pool_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 threshold) + +{ + const struct devlink_ops *ops = devlink_port->devlink->ops; + + if (ops && ops->sb_port_pool_set) + return ops->sb_port_pool_set(devlink_port, sb_index, + pool_index, threshold); + return -EOPNOTSUPP; +} + +static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + u16 pool_index; + u32 threshold; + int err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD]) + return -EINVAL; + + threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]); + return devlink_sb_port_pool_set(devlink_port, devlink_sb->index, + pool_index, threshold); +} + +static int +devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink, + struct devlink_port *devlink_port, + struct devlink_sb *devlink_sb, u16 tc_index, + enum devlink_sb_pool_type pool_type, + enum devlink_command cmd, + u32 portid, u32 seq, int flags) +{ + const struct devlink_ops *ops = devlink->ops; + u16 pool_index; + u32 threshold; + void *hdr; + int err; + + err = ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index, + tc_index, pool_type, + &pool_index, &threshold); + if (err) + return err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index)) + goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold)) + goto nla_put_failure; + + if (ops->sb_occ_tc_port_bind_get) { + u32 cur; + u32 max; + + err = ops->sb_occ_tc_port_bind_get(devlink_port, + devlink_sb->index, + tc_index, pool_type, + &cur, &max); + if (err && err != -EOPNOTSUPP) + return err; + if (!err) { + if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max)) + goto nla_put_failure; + } + } + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink *devlink = devlink_port->devlink; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct sk_buff *msg; + enum devlink_sb_pool_type pool_type; + u16 tc_index; + int err; + + err = devlink_sb_pool_type_get_from_info(info, &pool_type); + if (err) + return err; + + err = devlink_sb_tc_index_get_from_info(devlink_sb, info, + pool_type, &tc_index); + if (err) + return err; + + if (!devlink->ops || !devlink->ops->sb_tc_pool_bind_get) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port, + devlink_sb, tc_index, pool_type, + DEVLINK_CMD_SB_TC_POOL_BIND_NEW, + info->snd_portid, + info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, + int start, int *p_idx, + struct devlink *devlink, + struct devlink_sb *devlink_sb, + u32 portid, u32 seq) +{ + struct devlink_port *devlink_port; + u16 tc_index; + int err; + + list_for_each_entry(devlink_port, &devlink->port_list, list) { + for (tc_index = 0; + tc_index < devlink_sb->ingress_tc_count; tc_index++) { + if (*p_idx < start) { + (*p_idx)++; + continue; + } + err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, + devlink_port, + devlink_sb, + tc_index, + DEVLINK_SB_POOL_TYPE_INGRESS, + DEVLINK_CMD_SB_TC_POOL_BIND_NEW, + portid, seq, + NLM_F_MULTI); + if (err) + return err; + (*p_idx)++; + } + for (tc_index = 0; + tc_index < devlink_sb->egress_tc_count; tc_index++) { + if (*p_idx < start) { + (*p_idx)++; + continue; + } + err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, + devlink_port, + devlink_sb, + tc_index, + DEVLINK_SB_POOL_TYPE_EGRESS, + DEVLINK_CMD_SB_TC_POOL_BIND_NEW, + portid, seq, + NLM_F_MULTI); + if (err) + return err; + (*p_idx)++; + } + } + return 0; +} + +static int +devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + struct devlink_sb *devlink_sb; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + mutex_lock(&devlink_port_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || + !devlink->ops || !devlink->ops->sb_tc_pool_bind_get) + continue; + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx, + devlink, + devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err && err != -EOPNOTSUPP) + goto out; + } + } +out: + mutex_unlock(&devlink_port_mutex); + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold) + +{ + const struct devlink_ops *ops = devlink_port->devlink->ops; + + if (ops && ops->sb_tc_pool_bind_set) + return ops->sb_tc_pool_bind_set(devlink_port, sb_index, + tc_index, pool_type, + pool_index, threshold); + return -EOPNOTSUPP; +} + +static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + enum devlink_sb_pool_type pool_type; + u16 tc_index; + u16 pool_index; + u32 threshold; + int err; + + err = devlink_sb_pool_type_get_from_info(info, &pool_type); + if (err) + return err; + + err = devlink_sb_tc_index_get_from_info(devlink_sb, info, + pool_type, &tc_index); + if (err) + return err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD]) + return -EINVAL; + + threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]); + return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index, + tc_index, pool_type, + pool_index, threshold); +} + +static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + const struct devlink_ops *ops = devlink->ops; + + if (ops && ops->sb_occ_snapshot) + return ops->sb_occ_snapshot(devlink, devlink_sb->index); + return -EOPNOTSUPP; +} + +static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + const struct devlink_ops *ops = devlink->ops; + + if (ops && ops->sb_occ_max_clear) + return ops->sb_occ_max_clear(devlink, devlink_sb->index); + return -EOPNOTSUPP; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 }, [DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 }, [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 }, + [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -511,6 +1415,7 @@ static const struct genl_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_get_doit, .dumpit = devlink_nl_cmd_get_dumpit, .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { @@ -533,12 +1438,92 @@ static const struct genl_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_port_split_doit, .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_PORT_UNSPLIT, .doit = devlink_nl_cmd_port_unsplit_doit, .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_SB_GET, + .doit = devlink_nl_cmd_sb_get_doit, + .dumpit = devlink_nl_cmd_sb_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SB_POOL_GET, + .doit = devlink_nl_cmd_sb_pool_get_doit, + .dumpit = devlink_nl_cmd_sb_pool_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SB_POOL_SET, + .doit = devlink_nl_cmd_sb_pool_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB, + }, + { + .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, + .doit = devlink_nl_cmd_sb_port_pool_get_doit, + .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | + DEVLINK_NL_FLAG_NEED_SB, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SB_PORT_POOL_SET, + .doit = devlink_nl_cmd_sb_port_pool_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | + DEVLINK_NL_FLAG_NEED_SB, + }, + { + .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, + .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit, + .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | + DEVLINK_NL_FLAG_NEED_SB, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET, + .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | + DEVLINK_NL_FLAG_NEED_SB, + }, + { + .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, + .doit = devlink_nl_cmd_sb_occ_snapshot_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB | + DEVLINK_NL_FLAG_LOCK_PORTS, + }, + { + .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, + .doit = devlink_nl_cmd_sb_occ_max_clear_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB | + DEVLINK_NL_FLAG_LOCK_PORTS, }, }; @@ -561,6 +1546,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) devlink->ops = ops; devlink_net_set(devlink, &init_net); INIT_LIST_HEAD(&devlink->port_list); + INIT_LIST_HEAD(&devlink->sb_list); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -630,7 +1616,6 @@ int devlink_port_register(struct devlink *devlink, } devlink_port->devlink = devlink; devlink_port->index = port_index; - devlink_port->type = DEVLINK_PORT_TYPE_NOTSET; devlink_port->registered = true; list_add_tail(&devlink_port->list, &devlink->port_list); mutex_unlock(&devlink_port_mutex); @@ -717,6 +1702,51 @@ void devlink_port_split_set(struct devlink_port *devlink_port, } EXPORT_SYMBOL_GPL(devlink_port_split_set); +int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, + u32 size, u16 ingress_pools_count, + u16 egress_pools_count, u16 ingress_tc_count, + u16 egress_tc_count) +{ + struct devlink_sb *devlink_sb; + int err = 0; + + mutex_lock(&devlink_mutex); + if (devlink_sb_index_exists(devlink, sb_index)) { + err = -EEXIST; + goto unlock; + } + + devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL); + if (!devlink_sb) { + err = -ENOMEM; + goto unlock; + } + devlink_sb->index = sb_index; + devlink_sb->size = size; + devlink_sb->ingress_pools_count = ingress_pools_count; + devlink_sb->egress_pools_count = egress_pools_count; + devlink_sb->ingress_tc_count = ingress_tc_count; + devlink_sb->egress_tc_count = egress_tc_count; + list_add_tail(&devlink_sb->list, &devlink->sb_list); +unlock: + mutex_unlock(&devlink_mutex); + return err; +} +EXPORT_SYMBOL_GPL(devlink_sb_register); + +void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) +{ + struct devlink_sb *devlink_sb; + + mutex_lock(&devlink_mutex); + devlink_sb = devlink_sb_get_by_index(devlink, sb_index); + WARN_ON(!devlink_sb); + list_del(&devlink_sb->list); + mutex_unlock(&devlink_mutex); + kfree(devlink_sb); +} +EXPORT_SYMBOL_GPL(devlink_sb_unregister); + static int __init devlink_module_init(void) { return genl_register_family_with_ops_groups(&devlink_nl_family, diff --git a/net/core/ethtool.c b/net/core/ethtool.c index f426c5ad6149..bdb4013581b1 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -79,12 +79,16 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_UFO_BIT] = "tx-udp-fragmentation", [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", + [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", + [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation", [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation", [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", + [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", + [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", @@ -387,15 +391,17 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) return 0; } -static void convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32) +void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, + u32 legacy_u32) { bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); dst[0] = legacy_u32; } +EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode); /* return false if src had higher bits set. lower bits always updated. */ -static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32, - const unsigned long *src) +bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, + const unsigned long *src) { bool retval = true; @@ -415,6 +421,7 @@ static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32, *legacy_u32 = src[0]; return retval; } +EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32); /* return false if legacy contained non-0 deprecated fields * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated @@ -437,13 +444,13 @@ convert_legacy_settings_to_link_ksettings( legacy_settings->maxrxpkt) retval = false; - convert_legacy_u32_to_link_mode( + ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.supported, legacy_settings->supported); - convert_legacy_u32_to_link_mode( + ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.advertising, legacy_settings->advertising); - convert_legacy_u32_to_link_mode( + ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.lp_advertising, legacy_settings->lp_advertising); link_ksettings->base.speed @@ -482,13 +489,13 @@ convert_link_ksettings_to_legacy_settings( * __u32 maxrxpkt; */ - retval &= convert_link_mode_to_legacy_u32( + retval &= ethtool_convert_link_mode_to_legacy_u32( &legacy_settings->supported, link_ksettings->link_modes.supported); - retval &= convert_link_mode_to_legacy_u32( + retval &= ethtool_convert_link_mode_to_legacy_u32( &legacy_settings->advertising, link_ksettings->link_modes.advertising); - retval &= convert_link_mode_to_legacy_u32( + retval &= ethtool_convert_link_mode_to_legacy_u32( &legacy_settings->lp_advertising, link_ksettings->link_modes.lp_advertising); ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 365de66436ac..840acebbb80c 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -549,7 +549,7 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */ + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ - + nla_total_size(8); /* FRA_TUN_ID */ + + nla_total_size_64bit(8); /* FRA_TUN_ID */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -607,7 +607,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->target && nla_put_u32(skb, FRA_GOTO, rule->target)) || (rule->tun_id && - nla_put_be64(skb, FRA_TUN_ID, rule->tun_id))) + nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/core/filter.c b/net/core/filter.c index ca7f832b2980..68adb5f52110 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -994,7 +994,11 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) */ goto out_err_free; - bpf_prog_select_runtime(fp); + /* We are guaranteed to never error here with cBPF to eBPF + * transitions, since there's no issue with type compatibility + * checks on program arrays. + */ + fp = bpf_prog_select_runtime(fp, &err); kfree(old_prog); return fp; @@ -1149,8 +1153,7 @@ void bpf_prog_destroy(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(bpf_prog_destroy); -static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, - bool locked) +static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) { struct sk_filter *fp, *old_fp; @@ -1166,8 +1169,10 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, return -ENOMEM; } - old_fp = rcu_dereference_protected(sk->sk_filter, locked); + old_fp = rcu_dereference_protected(sk->sk_filter, + lockdep_sock_is_held(sk)); rcu_assign_pointer(sk->sk_filter, fp); + if (old_fp) sk_filter_uncharge(sk, old_fp); @@ -1246,8 +1251,7 @@ struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) * occurs or there is insufficient memory for the filter a negative * errno code is returned. On success the return is zero. */ -int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, - bool locked) +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct bpf_prog *prog = __get_filter(fprog, sk); int err; @@ -1255,7 +1259,7 @@ int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk, locked); + err = __sk_attach_prog(prog, sk); if (err < 0) { __bpf_prog_release(prog); return err; @@ -1263,12 +1267,7 @@ int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, return 0; } -EXPORT_SYMBOL_GPL(__sk_attach_filter); - -int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) -{ - return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk)); -} +EXPORT_SYMBOL_GPL(sk_attach_filter); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) { @@ -1314,7 +1313,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk)); + err = __sk_attach_prog(prog, sk); if (err < 0) { bpf_prog_put(prog); return err; @@ -1349,6 +1348,21 @@ struct bpf_scratchpad { static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); +static inline int bpf_try_make_writable(struct sk_buff *skb, + unsigned int write_len) +{ + int err; + + if (!skb_cloned(skb)) + return 0; + if (skb_clone_writable(skb, write_len)) + return 0; + err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (!err) + bpf_compute_data_end(skb); + return err; +} + static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); @@ -1371,7 +1385,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) */ if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff))) return -EFAULT; - if (unlikely(skb_try_make_writable(skb, offset + len))) + if (unlikely(bpf_try_make_writable(skb, offset + len))) return -EFAULT; ptr = skb_header_pointer(skb, offset, len, sp->buff); @@ -1414,16 +1428,19 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) unsigned int len = (unsigned int) r4; void *ptr; - if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK)) - return -EFAULT; + if (unlikely((u32) offset > 0xffff)) + goto err_clear; ptr = skb_header_pointer(skb, offset, len, to); if (unlikely(!ptr)) - return -EFAULT; + goto err_clear; if (ptr != to) memcpy(to, ptr, len); return 0; +err_clear: + memset(to, 0, len); + return -EFAULT; } static const struct bpf_func_proto bpf_skb_load_bytes_proto = { @@ -1432,7 +1449,7 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_STACK, + .arg3_type = ARG_PTR_TO_RAW_STACK, .arg4_type = ARG_CONST_STACK_SIZE, }; @@ -1446,7 +1463,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) return -EINVAL; if (unlikely((u32) offset > 0xffff)) return -EFAULT; - if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) + if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); @@ -1501,7 +1518,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) return -EINVAL; if (unlikely((u32) offset > 0xffff)) return -EFAULT; - if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) + if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); @@ -1701,12 +1718,15 @@ static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1; __be16 vlan_proto = (__force __be16) r2; + int ret; if (unlikely(vlan_proto != htons(ETH_P_8021Q) && vlan_proto != htons(ETH_P_8021AD))) vlan_proto = htons(ETH_P_8021Q); - return skb_vlan_push(skb, vlan_proto, vlan_tci); + ret = skb_vlan_push(skb, vlan_proto, vlan_tci); + bpf_compute_data_end(skb); + return ret; } const struct bpf_func_proto bpf_skb_vlan_push_proto = { @@ -1722,8 +1742,11 @@ EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto); static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1; + int ret; - return skb_vlan_pop(skb); + ret = skb_vlan_pop(skb); + bpf_compute_data_end(skb); + return ret; } const struct bpf_func_proto bpf_skb_vlan_pop_proto = { @@ -1761,12 +1784,19 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2; const struct ip_tunnel_info *info = skb_tunnel_info(skb); u8 compat[sizeof(struct bpf_tunnel_key)]; + void *to_orig = to; + int err; - if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) - return -EINVAL; - if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) - return -EPROTO; + if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) { + err = -EINVAL; + goto err_clear; + } + if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) { + err = -EPROTO; + goto err_clear; + } if (unlikely(size != sizeof(struct bpf_tunnel_key))) { + err = -EINVAL; switch (size) { case offsetof(struct bpf_tunnel_key, tunnel_label): case offsetof(struct bpf_tunnel_key, tunnel_ext): @@ -1776,12 +1806,12 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) * a common path later on. */ if (ip_tunnel_info_af(info) != AF_INET) - return -EINVAL; + goto err_clear; set_compat: to = (struct bpf_tunnel_key *)compat; break; default: - return -EINVAL; + goto err_clear; } } @@ -1798,9 +1828,12 @@ set_compat: } if (unlikely(size != sizeof(struct bpf_tunnel_key))) - memcpy((void *)(long) r2, to, size); + memcpy(to_orig, to, size); return 0; +err_clear: + memset(to_orig, 0, size); + return err; } static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { @@ -1808,7 +1841,7 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_PTR_TO_RAW_STACK, .arg3_type = ARG_CONST_STACK_SIZE, .arg4_type = ARG_ANYTHING, }; @@ -1818,16 +1851,26 @@ static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5) struct sk_buff *skb = (struct sk_buff *) (long) r1; u8 *to = (u8 *) (long) r2; const struct ip_tunnel_info *info = skb_tunnel_info(skb); + int err; if (unlikely(!info || - !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) - return -ENOENT; - if (unlikely(size < info->options_len)) - return -ENOMEM; + !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) { + err = -ENOENT; + goto err_clear; + } + if (unlikely(size < info->options_len)) { + err = -ENOMEM; + goto err_clear; + } ip_tunnel_info_opts_get(to, info); + if (size > info->options_len) + memset(to + info->options_len, 0, size - info->options_len); return info->options_len; +err_clear: + memset(to, 0, size); + return err; } static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { @@ -1835,7 +1878,7 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_PTR_TO_RAW_STACK, .arg3_type = ARG_CONST_STACK_SIZE, }; @@ -2021,6 +2064,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_redirect_proto; case BPF_FUNC_get_route_realm: return &bpf_get_route_realm_proto; + case BPF_FUNC_perf_event_output: + return bpf_get_event_output_proto(); default: return sk_filter_func_proto(func_id); } @@ -2028,16 +2073,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) static bool __is_valid_access(int off, int size, enum bpf_access_type type) { - /* check bounds */ if (off < 0 || off >= sizeof(struct __sk_buff)) return false; - - /* disallow misaligned access */ + /* The verifier guarantees that size > 0. */ if (off % size != 0) return false; - - /* all __sk_buff fields are __u32 */ - if (size != 4) + if (size != sizeof(__u32)) return false; return true; @@ -2046,13 +2087,17 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type) static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type) { - if (off == offsetof(struct __sk_buff, tc_classid)) + switch (off) { + case offsetof(struct __sk_buff, tc_classid): + case offsetof(struct __sk_buff, data): + case offsetof(struct __sk_buff, data_end): return false; + } if (type == BPF_WRITE) { switch (off) { case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]): + offsetof(struct __sk_buff, cb[4]): break; default: return false; @@ -2195,6 +2240,20 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off); break; + case offsetof(struct __sk_buff, data): + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, data)), + dst_reg, src_reg, + offsetof(struct sk_buff, data)); + break; + + case offsetof(struct __sk_buff, data_end): + ctx_off -= offsetof(struct __sk_buff, data_end); + ctx_off += offsetof(struct sk_buff, cb); + ctx_off += offsetof(struct bpf_skb_data_end, data_end); + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)), + dst_reg, src_reg, ctx_off); + break; + case offsetof(struct __sk_buff, tc_index): #ifdef CONFIG_NET_SCHED BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); @@ -2219,30 +2278,30 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, } static const struct bpf_verifier_ops sk_filter_ops = { - .get_func_proto = sk_filter_func_proto, - .is_valid_access = sk_filter_is_valid_access, - .convert_ctx_access = bpf_net_convert_ctx_access, + .get_func_proto = sk_filter_func_proto, + .is_valid_access = sk_filter_is_valid_access, + .convert_ctx_access = bpf_net_convert_ctx_access, }; static const struct bpf_verifier_ops tc_cls_act_ops = { - .get_func_proto = tc_cls_act_func_proto, - .is_valid_access = tc_cls_act_is_valid_access, - .convert_ctx_access = bpf_net_convert_ctx_access, + .get_func_proto = tc_cls_act_func_proto, + .is_valid_access = tc_cls_act_is_valid_access, + .convert_ctx_access = bpf_net_convert_ctx_access, }; static struct bpf_prog_type_list sk_filter_type __read_mostly = { - .ops = &sk_filter_ops, - .type = BPF_PROG_TYPE_SOCKET_FILTER, + .ops = &sk_filter_ops, + .type = BPF_PROG_TYPE_SOCKET_FILTER, }; static struct bpf_prog_type_list sched_cls_type __read_mostly = { - .ops = &tc_cls_act_ops, - .type = BPF_PROG_TYPE_SCHED_CLS, + .ops = &tc_cls_act_ops, + .type = BPF_PROG_TYPE_SCHED_CLS, }; static struct bpf_prog_type_list sched_act_type __read_mostly = { - .ops = &tc_cls_act_ops, - .type = BPF_PROG_TYPE_SCHED_ACT, + .ops = &tc_cls_act_ops, + .type = BPF_PROG_TYPE_SCHED_ACT, }; static int __init register_sk_filter_ops(void) @@ -2255,7 +2314,7 @@ static int __init register_sk_filter_ops(void) } late_initcall(register_sk_filter_ops); -int __sk_detach_filter(struct sock *sk, bool locked) +int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; struct sk_filter *filter; @@ -2263,7 +2322,8 @@ int __sk_detach_filter(struct sock *sk, bool locked) if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; - filter = rcu_dereference_protected(sk->sk_filter, locked); + filter = rcu_dereference_protected(sk->sk_filter, + lockdep_sock_is_held(sk)); if (filter) { RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); @@ -2272,12 +2332,7 @@ int __sk_detach_filter(struct sock *sk, bool locked) return ret; } -EXPORT_SYMBOL_GPL(__sk_detach_filter); - -int sk_detach_filter(struct sock *sk) -{ - return __sk_detach_filter(sk, sock_owned_by_user(sk)); -} +EXPORT_SYMBOL_GPL(sk_detach_filter); int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) @@ -2288,7 +2343,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, lock_sock(sk); filter = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (!filter) goto out; diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index e640462ea8bf..f96ee8b9478d 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -25,9 +25,9 @@ static inline int -gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) +gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr) { - if (nla_put(d->skb, type, size, buf)) + if (nla_put_64bit(d->skb, type, size, buf, padattr)) goto nla_put_failure; return 0; @@ -59,7 +59,8 @@ nla_put_failure: */ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, - int xstats_type, spinlock_t *lock, struct gnet_dump *d) + int xstats_type, spinlock_t *lock, + struct gnet_dump *d, int padattr) __acquires(lock) { memset(d, 0, sizeof(*d)); @@ -71,16 +72,17 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, d->skb = skb; d->compat_tc_stats = tc_stats_type; d->compat_xstats = xstats_type; + d->padattr = padattr; if (d->tail) - return gnet_stats_copy(d, type, NULL, 0); + return gnet_stats_copy(d, type, NULL, 0, padattr); return 0; } EXPORT_SYMBOL(gnet_stats_start_copy_compat); /** - * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode + * gnet_stats_start_copy - start dumping procedure in compatibility mode * @skb: socket buffer to put statistics TLVs into * @type: TLV type for top level statistic TLV * @lock: statistics lock @@ -94,9 +96,9 @@ EXPORT_SYMBOL(gnet_stats_start_copy_compat); */ int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, - struct gnet_dump *d) + struct gnet_dump *d, int padattr) { - return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d); + return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d, padattr); } EXPORT_SYMBOL(gnet_stats_start_copy); @@ -169,7 +171,8 @@ gnet_stats_copy_basic(struct gnet_dump *d, memset(&sb, 0, sizeof(sb)); sb.bytes = bstats.bytes; sb.packets = bstats.packets; - return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb)); + return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb), + TCA_STATS_PAD); } return 0; } @@ -208,11 +211,13 @@ gnet_stats_copy_rate_est(struct gnet_dump *d, } if (d->tail) { - res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est)); + res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est), + TCA_STATS_PAD); if (res < 0 || est.bps == r->bps) return res; /* emit 64bit stats only if needed */ - return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r)); + return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r), + TCA_STATS_PAD); } return 0; @@ -286,7 +291,8 @@ gnet_stats_copy_queue(struct gnet_dump *d, if (d->tail) return gnet_stats_copy(d, TCA_STATS_QUEUE, - &qstats, sizeof(qstats)); + &qstats, sizeof(qstats), + TCA_STATS_PAD); return 0; } @@ -316,7 +322,8 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) } if (d->tail) - return gnet_stats_copy(d, TCA_STATS_APP, st, len); + return gnet_stats_copy(d, TCA_STATS_APP, st, len, + TCA_STATS_PAD); return 0; @@ -347,12 +354,12 @@ gnet_stats_finish_copy(struct gnet_dump *d) if (d->compat_tc_stats) if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats, - sizeof(d->tc_stats)) < 0) + sizeof(d->tc_stats), d->padattr) < 0) return -1; if (d->compat_xstats && d->xstats) { if (gnet_stats_copy(d, d->compat_xstats, d->xstats, - d->xstats_len) < 0) + d->xstats_len, d->padattr) < 0) return -1; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f18ae91b652e..29dd8cc22bbf 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1763,21 +1763,22 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) NEIGH_VAR(parms, MCAST_PROBES)) || nla_put_u32(skb, NDTPA_MCAST_REPROBES, NEIGH_VAR(parms, MCAST_REPROBES)) || - nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || + nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time, + NDTPA_PAD) || nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, - NEIGH_VAR(parms, BASE_REACHABLE_TIME)) || + NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_GC_STALETIME, - NEIGH_VAR(parms, GC_STALETIME)) || + NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, - NEIGH_VAR(parms, DELAY_PROBE_TIME)) || + NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_RETRANS_TIME, - NEIGH_VAR(parms, RETRANS_TIME)) || + NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, - NEIGH_VAR(parms, ANYCAST_DELAY)) || + NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_PROXY_DELAY, - NEIGH_VAR(parms, PROXY_DELAY)) || + NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_LOCKTIME, - NEIGH_VAR(parms, LOCKTIME))) + NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) goto nla_put_failure; return nla_nest_end(skb, nest); @@ -1804,7 +1805,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndtmsg->ndtm_pad2 = 0; if (nla_put_string(skb, NDTA_NAME, tbl->id) || - nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) || + nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) || nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) @@ -1856,7 +1857,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndst.ndts_table_fulls += st->table_fulls; } - if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst)) + if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, + NDTA_PAD)) goto nla_put_failure; } diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 2bf83299600a..14d09345f00d 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -162,7 +162,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v) "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", sd->processed, sd->dropped, sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ - sd->cpu_collision, sd->received_rps, flow_limit_count); + 0, /* was cpu_collision */ + sd->received_rps, flow_limit_count); return 0; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 20999aa596dd..8604ae245960 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3472,7 +3472,6 @@ xmit_more: pkt_dev->odevname, ret); pkt_dev->errors++; /* fallthru */ - case NETDEV_TX_LOCKED: case NETDEV_TX_BUSY: /* Retry it next time */ atomic_dec(&(pkt_dev->skb->users)); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 65763c29f845..d69c4644f8f2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -808,11 +808,6 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->rx_nohandler = b->rx_nohandler; } -static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) -{ - memcpy(v, b, sizeof(*b)); -} - /* All VF info */ static inline int rtnl_vfinfo_size(const struct net_device *dev, u32 ext_filter_mask) @@ -830,17 +825,17 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, nla_total_size(sizeof(struct ifla_vf_link_state)) + nla_total_size(sizeof(struct ifla_vf_rss_query_en)) + /* IFLA_VF_STATS_RX_PACKETS */ - nla_total_size(sizeof(__u64)) + + nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_TX_PACKETS */ - nla_total_size(sizeof(__u64)) + + nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_RX_BYTES */ - nla_total_size(sizeof(__u64)) + + nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_TX_BYTES */ - nla_total_size(sizeof(__u64)) + + nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_BROADCAST */ - nla_total_size(sizeof(__u64)) + + nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_MULTICAST */ - nla_total_size(sizeof(__u64)) + + nla_total_size_64bit(sizeof(__u64)) + nla_total_size(sizeof(struct ifla_vf_trust))); return size; } else @@ -881,9 +876,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */ + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ - + nla_total_size(sizeof(struct rtnl_link_ifmap)) + + nla_total_size_64bit(sizeof(struct rtnl_link_ifmap)) + nla_total_size(sizeof(struct rtnl_link_stats)) - + nla_total_size(sizeof(struct rtnl_link_stats64)) + + nla_total_size_64bit(sizeof(struct rtnl_link_stats64)) + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ + nla_total_size(4) /* IFLA_TXQLEN */ @@ -1054,25 +1049,23 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb, struct net_device *dev) { - const struct rtnl_link_stats64 *stats; - struct rtnl_link_stats64 temp; + struct rtnl_link_stats64 *sp; struct nlattr *attr; - stats = dev_get_stats(dev, &temp); - - attr = nla_reserve(skb, IFLA_STATS, - sizeof(struct rtnl_link_stats)); + attr = nla_reserve_64bit(skb, IFLA_STATS64, + sizeof(struct rtnl_link_stats64), IFLA_PAD); if (!attr) return -EMSGSIZE; - copy_rtnl_link_stats(nla_data(attr), stats); + sp = nla_data(attr); + dev_get_stats(dev, sp); - attr = nla_reserve(skb, IFLA_STATS64, - sizeof(struct rtnl_link_stats64)); + attr = nla_reserve(skb, IFLA_STATS, + sizeof(struct rtnl_link_stats)); if (!attr) return -EMSGSIZE; - copy_rtnl_link_stats64(nla_data(attr), stats); + copy_rtnl_link_stats(nla_data(attr), sp); return 0; } @@ -1160,18 +1153,18 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, nla_nest_cancel(skb, vfinfo); return -EMSGSIZE; } - if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS, - vf_stats.rx_packets) || - nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS, - vf_stats.tx_packets) || - nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES, - vf_stats.rx_bytes) || - nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES, - vf_stats.tx_bytes) || - nla_put_u64(skb, IFLA_VF_STATS_BROADCAST, - vf_stats.broadcast) || - nla_put_u64(skb, IFLA_VF_STATS_MULTICAST, - vf_stats.multicast)) + if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, + vf_stats.rx_packets, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, + vf_stats.tx_packets, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES, + vf_stats.rx_bytes, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES, + vf_stats.tx_bytes, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, + vf_stats.broadcast, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, + vf_stats.multicast, IFLA_VF_STATS_PAD)) return -EMSGSIZE; nla_nest_end(skb, vfstats); nla_nest_end(skb, vf); @@ -1190,7 +1183,7 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) map.dma = dev->dma; map.port = dev->if_port; - if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) + if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD)) return -EMSGSIZE; return 0; @@ -3453,6 +3446,202 @@ out: return err; } +static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr) +{ + return (mask & IFLA_STATS_FILTER_BIT(attrid)) && + (!idxattr || idxattr == attrid); +} + +static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, + int type, u32 pid, u32 seq, u32 change, + unsigned int flags, unsigned int filter_mask, + int *idxattr, int *prividx) +{ + struct if_stats_msg *ifsm; + struct nlmsghdr *nlh; + struct nlattr *attr; + int s_prividx = *prividx; + + ASSERT_RTNL(); + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags); + if (!nlh) + return -EMSGSIZE; + + ifsm = nlmsg_data(nlh); + ifsm->ifindex = dev->ifindex; + ifsm->filter_mask = filter_mask; + + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, *idxattr)) { + struct rtnl_link_stats64 *sp; + + attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64, + sizeof(struct rtnl_link_stats64), + IFLA_STATS_UNSPEC); + if (!attr) + goto nla_put_failure; + + sp = nla_data(attr); + dev_get_stats(dev, sp); + } + + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, *idxattr)) { + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + + if (ops && ops->fill_linkxstats) { + int err; + + *idxattr = IFLA_STATS_LINK_XSTATS; + attr = nla_nest_start(skb, + IFLA_STATS_LINK_XSTATS); + if (!attr) + goto nla_put_failure; + + err = ops->fill_linkxstats(skb, dev, prividx); + nla_nest_end(skb, attr); + if (err) + goto nla_put_failure; + *idxattr = 0; + } + } + + nlmsg_end(skb, nlh); + + return 0; + +nla_put_failure: + /* not a multi message or no progress mean a real error */ + if (!(flags & NLM_F_MULTI) || s_prividx == *prividx) + nlmsg_cancel(skb, nlh); + else + nlmsg_end(skb, nlh); + + return -EMSGSIZE; +} + +static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = { + [IFLA_STATS_LINK_64] = { .len = sizeof(struct rtnl_link_stats64) }, +}; + +static size_t if_nlmsg_stats_size(const struct net_device *dev, + u32 filter_mask) +{ + size_t size = 0; + + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0)) + size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64)); + + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) { + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + + if (ops && ops->get_linkxstats_size) { + size += nla_total_size(ops->get_linkxstats_size(dev)); + /* for IFLA_STATS_LINK_XSTATS */ + size += nla_total_size(0); + } + } + + return size; +} + +static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev = NULL; + int idxattr = 0, prividx = 0; + struct if_stats_msg *ifsm; + struct sk_buff *nskb; + u32 filter_mask; + int err; + + ifsm = nlmsg_data(nlh); + if (ifsm->ifindex > 0) + dev = __dev_get_by_index(net, ifsm->ifindex); + else + return -EINVAL; + + if (!dev) + return -ENODEV; + + filter_mask = ifsm->filter_mask; + if (!filter_mask) + return -EINVAL; + + nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL); + if (!nskb) + return -ENOBUFS; + + err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS, + NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, + 0, filter_mask, &idxattr, &prividx); + if (err < 0) { + /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(nskb); + } else { + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); + } + + return err; +} + +static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int h, s_h, err, s_idx, s_idxattr, s_prividx; + struct net *net = sock_net(skb->sk); + unsigned int flags = NLM_F_MULTI; + struct if_stats_msg *ifsm; + struct hlist_head *head; + struct net_device *dev; + u32 filter_mask = 0; + int idx = 0; + + s_h = cb->args[0]; + s_idx = cb->args[1]; + s_idxattr = cb->args[2]; + s_prividx = cb->args[3]; + + cb->seq = net->dev_base_seq; + + ifsm = nlmsg_data(cb->nlh); + filter_mask = ifsm->filter_mask; + if (!filter_mask) + return -EINVAL; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + flags, filter_mask, + &s_idxattr, &s_prividx); + /* If we ran out of room on the first message, + * we're in trouble + */ + WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + + if (err < 0) + goto out; + s_prividx = 0; + s_idxattr = 0; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + } +out: + cb->args[3] = s_prividx; + cb->args[2] = s_idxattr; + cb->args[1] = idx; + cb->args[0] = h; + + return skb->len; +} + /* Process one rtnetlink message. */ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -3602,4 +3791,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); + + rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, + NULL); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e561f9f07d6d..f2b77e549c03 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3076,11 +3076,11 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); + unsigned int partial_segs = 0; unsigned int headroom; - unsigned int len; + unsigned int len = head_skb->len; __be16 proto; - bool csum; - int sg = !!(features & NETIF_F_SG); + bool csum, sg; int nfrags = skb_shinfo(head_skb)->nr_frags; int err = -ENOMEM; int i = 0; @@ -3092,8 +3092,21 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (unlikely(!proto)) return ERR_PTR(-EINVAL); + sg = !!(features & NETIF_F_SG); csum = !!can_checksum_protocol(features, proto); + /* GSO partial only requires that we trim off any excess that + * doesn't fit into an MSS sized block, so take care of that + * now. + */ + if (sg && csum && (features & NETIF_F_GSO_PARTIAL)) { + partial_segs = len / mss; + if (partial_segs > 1) + mss *= partial_segs; + else + partial_segs = 0; + } + headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -3281,6 +3294,23 @@ perform_csum_check: */ segs->prev = tail; + /* Update GSO info on first skb in partial sequence. */ + if (partial_segs) { + int type = skb_shinfo(head_skb)->gso_type; + + /* Update type to add partial and then remove dodgy if set */ + type |= SKB_GSO_PARTIAL; + type &= ~SKB_GSO_DODGY; + + /* Update GSO info and prepare to start updating headers on + * our way back down the stack of protocols. + */ + skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size; + skb_shinfo(segs)->gso_segs = partial_segs; + skb_shinfo(segs)->gso_type = type; + SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset; + } + /* Following permits correct backpressure, for protocols * using skb_set_owner_w(). * Idea is to tranfert ownership from head_skb to last segment. @@ -4595,3 +4625,239 @@ failure: return NULL; } EXPORT_SYMBOL(alloc_skb_with_frags); + +/* carve out the first off bytes from skb when off < headlen */ +static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, + const int headlen, gfp_t gfp_mask) +{ + int i; + int size = skb_end_offset(skb); + int new_hlen = headlen - off; + u8 *data; + + size = SKB_DATA_ALIGN(size); + + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + data = kmalloc_reserve(size + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), + gfp_mask, NUMA_NO_NODE, NULL); + if (!data) + return -ENOMEM; + + size = SKB_WITH_OVERHEAD(ksize(data)); + + /* Copy real data, and all frags */ + skb_copy_from_linear_data_offset(skb, off, data, new_hlen); + skb->len -= off; + + memcpy((struct skb_shared_info *)(data + size), + skb_shinfo(skb), + offsetof(struct skb_shared_info, + frags[skb_shinfo(skb)->nr_frags])); + if (skb_cloned(skb)) { + /* drop the old head gracefully */ + if (skb_orphan_frags(skb, gfp_mask)) { + kfree(data); + return -ENOMEM; + } + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + skb_frag_ref(skb, i); + if (skb_has_frag_list(skb)) + skb_clone_fraglist(skb); + skb_release_data(skb); + } else { + /* we can reuse existing recount- all we did was + * relocate values + */ + skb_free_head(skb); + } + + skb->head = data; + skb->data = data; + skb->head_frag = 0; +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->end = size; +#else + skb->end = skb->head + size; +#endif + skb_set_tail_pointer(skb, skb_headlen(skb)); + skb_headers_offset_update(skb, 0); + skb->cloned = 0; + skb->hdr_len = 0; + skb->nohdr = 0; + atomic_set(&skb_shinfo(skb)->dataref, 1); + + return 0; +} + +static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp); + +/* carve out the first eat bytes from skb's frag_list. May recurse into + * pskb_carve() + */ +static int pskb_carve_frag_list(struct sk_buff *skb, + struct skb_shared_info *shinfo, int eat, + gfp_t gfp_mask) +{ + struct sk_buff *list = shinfo->frag_list; + struct sk_buff *clone = NULL; + struct sk_buff *insp = NULL; + + do { + if (!list) { + pr_err("Not enough bytes to eat. Want %d\n", eat); + return -EFAULT; + } + if (list->len <= eat) { + /* Eaten as whole. */ + eat -= list->len; + list = list->next; + insp = list; + } else { + /* Eaten partially. */ + if (skb_shared(list)) { + clone = skb_clone(list, gfp_mask); + if (!clone) + return -ENOMEM; + insp = list->next; + list = clone; + } else { + /* This may be pulled without problems. */ + insp = list; + } + if (pskb_carve(list, eat, gfp_mask) < 0) { + kfree_skb(clone); + return -ENOMEM; + } + break; + } + } while (eat); + + /* Free pulled out fragments. */ + while ((list = shinfo->frag_list) != insp) { + shinfo->frag_list = list->next; + kfree_skb(list); + } + /* And insert new clone at head. */ + if (clone) { + clone->next = list; + shinfo->frag_list = clone; + } + return 0; +} + +/* carve off first len bytes from skb. Split line (off) is in the + * non-linear part of skb + */ +static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, + int pos, gfp_t gfp_mask) +{ + int i, k = 0; + int size = skb_end_offset(skb); + u8 *data; + const int nfrags = skb_shinfo(skb)->nr_frags; + struct skb_shared_info *shinfo; + + size = SKB_DATA_ALIGN(size); + + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + data = kmalloc_reserve(size + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), + gfp_mask, NUMA_NO_NODE, NULL); + if (!data) + return -ENOMEM; + + size = SKB_WITH_OVERHEAD(ksize(data)); + + memcpy((struct skb_shared_info *)(data + size), + skb_shinfo(skb), offsetof(struct skb_shared_info, + frags[skb_shinfo(skb)->nr_frags])); + if (skb_orphan_frags(skb, gfp_mask)) { + kfree(data); + return -ENOMEM; + } + shinfo = (struct skb_shared_info *)(data + size); + for (i = 0; i < nfrags; i++) { + int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + if (pos + fsize > off) { + shinfo->frags[k] = skb_shinfo(skb)->frags[i]; + + if (pos < off) { + /* Split frag. + * We have two variants in this case: + * 1. Move all the frag to the second + * part, if it is possible. F.e. + * this approach is mandatory for TUX, + * where splitting is expensive. + * 2. Split is accurately. We make this. + */ + shinfo->frags[0].page_offset += off - pos; + skb_frag_size_sub(&shinfo->frags[0], off - pos); + } + skb_frag_ref(skb, i); + k++; + } + pos += fsize; + } + shinfo->nr_frags = k; + if (skb_has_frag_list(skb)) + skb_clone_fraglist(skb); + + if (k == 0) { + /* split line is in frag list */ + pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask); + } + skb_release_data(skb); + + skb->head = data; + skb->head_frag = 0; + skb->data = data; +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->end = size; +#else + skb->end = skb->head + size; +#endif + skb_reset_tail_pointer(skb); + skb_headers_offset_update(skb, 0); + skb->cloned = 0; + skb->hdr_len = 0; + skb->nohdr = 0; + skb->len -= off; + skb->data_len = skb->len; + atomic_set(&skb_shinfo(skb)->dataref, 1); + return 0; +} + +/* remove len bytes from the beginning of the skb */ +static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp) +{ + int headlen = skb_headlen(skb); + + if (len < headlen) + return pskb_carve_inside_header(skb, len, headlen, gfp); + else + return pskb_carve_inside_nonlinear(skb, len, headlen, gfp); +} + +/* Extract to_copy bytes starting at off from skb, and return this in + * a new skb + */ +struct sk_buff *pskb_extract(struct sk_buff *skb, int off, + int to_copy, gfp_t gfp) +{ + struct sk_buff *clone = skb_clone(skb, gfp); + + if (!clone) + return NULL; + + if (pskb_carve(clone, off, gfp) < 0 || + pskb_trim(clone, to_copy)) { + kfree_skb(clone); + return NULL; + } + return clone; +} +EXPORT_SYMBOL(pskb_extract); diff --git a/net/core/sock.c b/net/core/sock.c index 7e73c26b6bb4..08bf97eceeb3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -405,9 +405,8 @@ static void sock_disable_timestamp(struct sock *sk, unsigned long flags) } -int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { - int err; unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; @@ -417,10 +416,6 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return -ENOMEM; } - err = sk_filter(sk, skb); - if (err) - return err; - if (!sk_rmem_schedule(sk, skb, skb->truesize)) { atomic_inc(&sk->sk_drops); return -ENOBUFS; @@ -443,6 +438,18 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk->sk_data_ready(sk); return 0; } +EXPORT_SYMBOL(__sock_queue_rcv_skb); + +int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = sk_filter(sk, skb); + if (err) + return err; + + return __sock_queue_rcv_skb(sk, skb); +} EXPORT_SYMBOL(sock_queue_rcv_skb); int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) @@ -835,7 +842,8 @@ set_rcvbuf: !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { if (sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) { - if (sk->sk_state != TCP_ESTABLISHED) { + if ((1 << sk->sk_state) & + (TCPF_CLOSE | TCPF_LISTEN)) { ret = -EINVAL; break; } @@ -1421,8 +1429,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, } EXPORT_SYMBOL(sk_alloc); -void sk_destruct(struct sock *sk) +/* Sockets having SOCK_RCU_FREE will call this function after one RCU + * grace period. This is the case for UDP sockets and TCP listeners. + */ +static void __sk_destruct(struct rcu_head *head) { + struct sock *sk = container_of(head, struct sock, sk_rcu); struct sk_filter *filter; if (sk->sk_destruct) @@ -1451,6 +1463,14 @@ void sk_destruct(struct sock *sk) sk_prot_free(sk->sk_prot_creator, sk); } +void sk_destruct(struct sock *sk) +{ + if (sock_flag(sk, SOCK_RCU_FREE)) + call_rcu(&sk->sk_rcu, __sk_destruct); + else + __sk_destruct(&sk->sk_rcu); +} + static void __sk_free(struct sock *sk) { if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) @@ -1515,6 +1535,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_dst_cache = NULL; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; + atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; @@ -1634,6 +1655,17 @@ void sock_wfree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_wfree); +/* This variant of sock_wfree() is used by TCP, + * since it sets SOCK_USE_WRITE_QUEUE. + */ +void __sock_wfree(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) + __sk_free(sk); +} + void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) { skb_orphan(skb); @@ -1656,8 +1688,21 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) } EXPORT_SYMBOL(skb_set_owner_w); +/* This helper is used by netem, as it can hold packets in its + * delay queue. We want to allow the owner socket to send more + * packets, as if they were already TX completed by a typical driver. + * But we also want to keep skb->sk set because some packet schedulers + * rely on it (sch_fq for example). So we set skb->truesize to a small + * amount (1) and decrease sk_wmem_alloc accordingly. + */ void skb_orphan_partial(struct sk_buff *skb) { + /* If this skb is a TCP pure ACK or already went here, + * we have nothing to do. 2 is already a very small truesize. + */ + if (skb->truesize <= 2) + return; + /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, * so we do not completely orphan skb, but transfert all * accounted bytes but one, to avoid unexpected reorders. @@ -1869,27 +1914,51 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, } EXPORT_SYMBOL(sock_alloc_send_skb); +int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, + struct sockcm_cookie *sockc) +{ + u32 tsflags; + + switch (cmsg->cmsg_type) { + case SO_MARK: + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + sockc->mark = *(u32 *)CMSG_DATA(cmsg); + break; + case SO_TIMESTAMPING: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + + tsflags = *(u32 *)CMSG_DATA(cmsg); + if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK) + return -EINVAL; + + sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK; + sockc->tsflags |= tsflags; + break; + default: + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL(__sock_cmsg_send); + int sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct sockcm_cookie *sockc) { struct cmsghdr *cmsg; + int ret; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; if (cmsg->cmsg_level != SOL_SOCKET) continue; - switch (cmsg->cmsg_type) { - case SO_MARK: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) - return -EINVAL; - sockc->mark = *(u32 *)CMSG_DATA(cmsg); - break; - default: - return -EINVAL; - } + ret = __sock_cmsg_send(sk, msg, cmsg, sockc); + if (ret) + return ret; } return 0; } @@ -1974,33 +2043,27 @@ static void __release_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) { - struct sk_buff *skb = sk->sk_backlog.head; + struct sk_buff *skb, *next; - do { + while ((skb = sk->sk_backlog.head) != NULL) { sk->sk_backlog.head = sk->sk_backlog.tail = NULL; - bh_unlock_sock(sk); - do { - struct sk_buff *next = skb->next; + spin_unlock_bh(&sk->sk_lock.slock); + do { + next = skb->next; prefetch(next); WARN_ON_ONCE(skb_dst_is_noref(skb)); skb->next = NULL; sk_backlog_rcv(sk, skb); - /* - * We are in process context here with softirqs - * disabled, use cond_resched_softirq() to preempt. - * This is safe to do because we've taken the backlog - * queue private: - */ - cond_resched_softirq(); + cond_resched(); skb = next; } while (skb != NULL); - bh_lock_sock(sk); - } while ((skb = sk->sk_backlog.head) != NULL); + spin_lock_bh(&sk->sk_lock.slock); + } /* * Doing the zeroing here guarantee we can not loop forever @@ -2009,6 +2072,13 @@ static void __release_sock(struct sock *sk) sk->sk_backlog.len = 0; } +void __sk_flush_backlog(struct sock *sk) +{ + spin_lock_bh(&sk->sk_lock.slock); + __release_sock(sk); + spin_unlock_bh(&sk->sk_lock.slock); +} + /** * sk_wait_data - wait for data to arrive at sk_receive_queue * @sk: sock to wait on @@ -2145,6 +2215,15 @@ void __sk_mem_reclaim(struct sock *sk, int amount) } EXPORT_SYMBOL(__sk_mem_reclaim); +int sk_set_peek_off(struct sock *sk, int val) +{ + if (val < 0) + return -EINVAL; + + sk->sk_peek_off = val; + return 0; +} +EXPORT_SYMBOL_GPL(sk_set_peek_off); /* * Set of default routines for initialising struct proto_ops when @@ -2432,11 +2511,6 @@ EXPORT_SYMBOL(lock_sock_nested); void release_sock(struct sock *sk) { - /* - * The sk_lock has mutex_unlock() semantics: - */ - mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_backlog.tail) __release_sock(sk); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index a996ce8c8fb2..6b10573cc9fa 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -67,6 +67,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; + mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); return nla_put(skb, attrtype, sizeof(mem), &mem); } @@ -119,7 +120,7 @@ static size_t sock_diag_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct inet_diag_msg) + nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */ - + nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */ + + nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */ } static void sock_diag_broadcast_destroy_work(struct work_struct *work) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a6beb7b6ae55..0df2aa652530 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -294,6 +294,15 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, +# ifdef CONFIG_HAVE_EBPF_JIT + { + .procname = "bpf_jit_harden", + .data = &bpf_jit_harden, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec, + }, +# endif #endif { .procname = "netdev_tstamp_prequeue", diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index b0e28d24e1a7..0c55ffb859bf 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -198,9 +198,9 @@ struct dccp_mib { }; DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); -#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) -#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) -#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) +#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) +#define __DCCP_INC_STATS(field) __SNMP_INC_STATS(dccp_statistics, field) +#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) /* * Checksumming routines diff --git a/net/dccp/input.c b/net/dccp/input.c index 3bd14e885396..ba347184bda9 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -359,7 +359,7 @@ send_sync: goto discard; } - DCCP_INC_STATS_BH(DCCP_MIB_INERRS); + DCCP_INC_STATS(DCCP_MIB_INERRS); discard: __kfree_skb(skb); return 0; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 9c67a961ba53..5c7e413a3ae4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -62,7 +62,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = daddr = usin->sin_addr.s_addr; inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (inet_opt != NULL && inet_opt->opt.srr) { if (daddr == 0) return -EINVAL; @@ -205,7 +205,7 @@ void dccp_req_err(struct sock *sk, u64 seq) * socket here. */ if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); } else { /* * Still in RESPOND, just remove it silently. @@ -247,7 +247,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; } @@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) iph->saddr, ntohs(dh->dccph_sport), inet_iif(skb)); if (!sk) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; } @@ -273,7 +273,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; @@ -281,7 +281,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) dp = dccp_sk(sk); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -318,7 +318,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) case DCCP_REQUESTING: case DCCP_RESPOND: if (!sock_owned_by_user(sk)) { - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); @@ -431,11 +431,11 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, return newsk; exit_overflow: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); exit_nonewsk: dst_release(dst); exit: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; put_and_exit: inet_csk_prepare_forced_close(newsk); @@ -462,7 +462,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } @@ -533,8 +533,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) bh_unlock_sock(ctl_sk); if (net_xmit_eval(err) == 0) { - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS(DCCP_MIB_OUTRSTS); } out: dst_release(dst); @@ -637,7 +637,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) drop_and_free: reqsk_free(req); drop: - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); return -1; } EXPORT_SYMBOL_GPL(dccp_v4_conn_request); @@ -764,6 +764,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; const struct iphdr *iph; + bool refcounted; struct sock *sk; int min_cov; @@ -801,7 +802,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) lookup: sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), - dh->dccph_sport, dh->dccph_dport); + dh->dccph_sport, dh->dccph_dport, &refcounted); if (!sk) { dccp_pr_debug("failed to look up flow ID in table and " "get corresponding socket\n"); @@ -830,6 +831,7 @@ lookup: goto lookup; } sock_hold(sk); + refcounted = true; nsk = dccp_check_req(sk, skb, req); if (!nsk) { reqsk_put(req); @@ -886,7 +888,8 @@ discard_it: return 0; discard_and_relse: - sock_put(sk); + if (refcounted) + sock_put(sk); goto discard_it; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4663a01d5039..d176f4e66369 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -80,8 +80,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -91,8 +91,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, inet6_iif(skb)); if (!sk) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -106,7 +106,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, bh_lock_sock(sk); if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; @@ -114,7 +114,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, dp = dccp_sk(sk); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -156,7 +156,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; /* * Wake people up to see the error @@ -277,8 +277,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) if (!IS_ERR(dst)) { skb_dst_set(skb, dst); ip6_xmit(ctl_sk, skb, &fl6, NULL, 0); - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS(DCCP_MIB_OUTRSTS); return; } @@ -378,7 +378,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) drop_and_free: reqsk_free(req); drop: - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); return -1; } @@ -527,11 +527,11 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, return newsk; out_overflow: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: dst_release(dst); out: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; } @@ -642,6 +642,7 @@ discard: static int dccp_v6_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; + bool refcounted; struct sock *sk; int min_cov; @@ -670,7 +671,7 @@ static int dccp_v6_rcv(struct sk_buff *skb) lookup: sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), dh->dccph_sport, dh->dccph_dport, - inet6_iif(skb)); + inet6_iif(skb), &refcounted); if (!sk) { dccp_pr_debug("failed to look up flow ID in table and " "get corresponding socket\n"); @@ -699,6 +700,7 @@ lookup: goto lookup; } sock_hold(sk); + refcounted = true; nsk = dccp_check_req(sk, skb, req); if (!nsk) { reqsk_put(req); @@ -752,7 +754,8 @@ discard_it: return 0; discard_and_relse: - sock_put(sk); + if (refcounted) + sock_put(sk); goto discard_it; } @@ -865,7 +868,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 1994f8af646b..53eddf99e4f6 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -127,7 +127,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, } dccp_init_xmit_timers(newsk); - DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); + __DCCP_INC_STATS(DCCP_MIB_PASSIVEOPENS); } return newsk; } diff --git a/net/dccp/options.c b/net/dccp/options.c index 9bce31886bda..74d29c56c367 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -253,7 +253,7 @@ out_nonsensical_length: return 0; out_invalid_option: - DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); + DCCP_INC_STATS(DCCP_MIB_INVALIDOPT); rc = DCCP_RESET_CODE_OPTION_ERROR; out_featneg_failed: DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc); diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 3ef7acef3ce8..3a2c34027758 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -28,7 +28,7 @@ static void dccp_write_err(struct sock *sk) dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); dccp_done(sk); - DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT); + __DCCP_INC_STATS(DCCP_MIB_ABORTONTIMEOUT); } /* A write timeout has occurred. Process the after effects. */ @@ -100,7 +100,7 @@ static void dccp_retransmit_timer(struct sock *sk) * total number of retransmissions of clones of original packets. */ if (icsk->icsk_retransmits == 0) - DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); + __DCCP_INC_STATS(DCCP_MIB_TIMEOUTS); if (dccp_retransmit_skb(sk) != 0) { /* @@ -179,7 +179,7 @@ static void dccp_delack_timer(unsigned long data) if (sock_owned_by_user(sk)) { /* Try again later. */ icsk->icsk_ack.blocked = 1; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); goto out; @@ -209,7 +209,7 @@ static void dccp_delack_timer(unsigned long data) icsk->icsk_ack.ato = TCP_ATO_MIN; } dccp_send_ack(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); } out: bh_unlock_sock(sk); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index c28c47463b7e..eff5dfc2e33f 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -51,11 +51,12 @@ void unregister_switch_driver(struct dsa_switch_driver *drv) EXPORT_SYMBOL_GPL(unregister_switch_driver); static struct dsa_switch_driver * -dsa_switch_probe(struct device *host_dev, int sw_addr, char **_name) +dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, + const char **_name, void **priv) { struct dsa_switch_driver *ret; struct list_head *list; - char *name; + const char *name; ret = NULL; name = NULL; @@ -66,7 +67,7 @@ dsa_switch_probe(struct device *host_dev, int sw_addr, char **_name) drv = list_entry(list, struct dsa_switch_driver, list); - name = drv->probe(host_dev, sw_addr); + name = drv->probe(parent, host_dev, sw_addr, priv); if (name != NULL) { ret = drv; break; @@ -181,7 +182,7 @@ __ATTRIBUTE_GROUPS(dsa_hwmon); /* basic switch operations **************************************************/ static int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct net_device *master) { - struct dsa_chip_data *cd = ds->pd; + struct dsa_chip_data *cd = ds->cd; struct device_node *port_dn; struct phy_device *phydev; int ret, port, mode; @@ -218,7 +219,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) { struct dsa_switch_driver *drv = ds->drv; struct dsa_switch_tree *dst = ds->dst; - struct dsa_chip_data *pd = ds->pd; + struct dsa_chip_data *cd = ds->cd; bool valid_name_found = false; int index = ds->index; int i, ret; @@ -229,7 +230,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) for (i = 0; i < DSA_MAX_PORTS; i++) { char *name; - name = pd->port_names[i]; + name = cd->port_names[i]; if (name == NULL) continue; @@ -245,7 +246,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) } else if (!strcmp(name, "dsa")) { ds->dsa_port_mask |= 1 << i; } else { - ds->phys_port_mask |= 1 << i; + ds->enabled_port_mask |= 1 << i; } valid_name_found = true; } @@ -258,7 +259,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) /* Make the built-in MII bus mask match the number of ports, * switch drivers can override this later */ - ds->phys_mii_mask = ds->phys_port_mask; + ds->phys_mii_mask = ds->enabled_port_mask; /* * If the CPU connects to this switch, set the switch tree @@ -266,7 +267,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * switch. */ if (dst->cpu_switch == index) { - switch (ds->tag_protocol) { + switch (drv->tag_protocol) { #ifdef CONFIG_NET_DSA_TAG_DSA case DSA_TAG_PROTO_DSA: dst->rcv = dsa_netdev_ops.rcv; @@ -294,7 +295,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) goto out; } - dst->tag_protocol = ds->tag_protocol; + dst->tag_protocol = drv->tag_protocol; } /* @@ -324,13 +325,13 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * Create network devices for physical switch ports. */ for (i = 0; i < DSA_MAX_PORTS; i++) { - if (!(ds->phys_port_mask & (1 << i))) + if (!(ds->enabled_port_mask & (1 << i))) continue; - ret = dsa_slave_create(ds, parent, i, pd->port_names[i]); + ret = dsa_slave_create(ds, parent, i, cd->port_names[i]); if (ret < 0) { netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n", - index, i, pd->port_names[i], ret); + index, i, cd->port_names[i], ret); ret = 0; } } @@ -378,16 +379,17 @@ static struct dsa_switch * dsa_switch_setup(struct dsa_switch_tree *dst, int index, struct device *parent, struct device *host_dev) { - struct dsa_chip_data *pd = dst->pd->chip + index; + struct dsa_chip_data *cd = dst->pd->chip + index; struct dsa_switch_driver *drv; struct dsa_switch *ds; int ret; - char *name; + const char *name; + void *priv; /* * Probe for switch model. */ - drv = dsa_switch_probe(host_dev, pd->sw_addr, &name); + drv = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); if (drv == NULL) { netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", index); @@ -400,16 +402,16 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, /* * Allocate and initialise switch state. */ - ds = devm_kzalloc(parent, sizeof(*ds) + drv->priv_size, GFP_KERNEL); + ds = devm_kzalloc(parent, sizeof(*ds), GFP_KERNEL); if (ds == NULL) return ERR_PTR(-ENOMEM); ds->dst = dst; ds->index = index; - ds->pd = pd; + ds->cd = cd; ds->drv = drv; - ds->tag_protocol = drv->tag_protocol; - ds->master_dev = host_dev; + ds->priv = priv; + ds->dev = parent; ret = dsa_switch_setup_one(ds, parent); if (ret) @@ -422,7 +424,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) { struct device_node *port_dn; struct phy_device *phydev; - struct dsa_chip_data *cd = ds->pd; + struct dsa_chip_data *cd = ds->cd; int port; #ifdef CONFIG_NET_DSA_HWMON @@ -432,7 +434,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) /* Destroy network devices for physical switch ports. */ for (port = 0; port < DSA_MAX_PORTS; port++) { - if (!(ds->phys_port_mask & (1 << port))) + if (!(ds->enabled_port_mask & (1 << port))) continue; if (!ds->ports[port]) @@ -657,9 +659,6 @@ static int dsa_of_probe(struct device *dev) const char *port_name; int chip_index, port_index; const unsigned int *sw_addr, *port_reg; - int gpio; - enum of_gpio_flags of_flags; - unsigned long flags; u32 eeprom_len; int ret; @@ -738,19 +737,6 @@ static int dsa_of_probe(struct device *dev) put_device(cd->host_dev); cd->host_dev = &mdio_bus_switch->dev; } - gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, - &of_flags); - if (gpio_is_valid(gpio)) { - flags = (of_flags == OF_GPIO_ACTIVE_LOW ? - GPIOF_ACTIVE_LOW : 0); - ret = devm_gpio_request_one(dev, gpio, flags, - "switch_reset"); - if (ret) - goto out_free_chip; - - cd->reset = gpio_to_desc(gpio); - gpiod_direction_output(cd->reset, 0); - } for_each_available_child_of_node(child, port) { port_reg = of_get_property(port, "reg", NULL); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 1d1a54687e4a..dfa33779d49c 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -22,11 +22,6 @@ struct dsa_device_ops { }; struct dsa_slave_priv { - /* - * The linux network interface corresponding to this - * switch port. - */ - struct net_device *dev; struct sk_buff * (*xmit)(struct sk_buff *skb, struct net_device *dev); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a575f0350d5a..152436cdab30 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -50,8 +50,8 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) ds->slave_mii_bus->read = dsa_slave_phy_read; ds->slave_mii_bus->write = dsa_slave_phy_write; snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x", - ds->index, ds->pd->sw_addr); - ds->slave_mii_bus->parent = ds->master_dev; + ds->index, ds->cd->sw_addr); + ds->slave_mii_bus->parent = ds->dev; ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; } @@ -104,8 +104,8 @@ static int dsa_slave_open(struct net_device *dev) goto clear_promisc; } - if (ds->drv->port_stp_update) - ds->drv->port_stp_update(ds, p->port, stp_state); + if (ds->drv->port_stp_state_set) + ds->drv->port_stp_state_set(ds, p->port, stp_state); if (p->phy) phy_start(p->phy); @@ -147,8 +147,8 @@ static int dsa_slave_close(struct net_device *dev) if (ds->drv->port_disable) ds->drv->port_disable(ds, p->port, p->phy); - if (ds->drv->port_stp_update) - ds->drv->port_stp_update(ds, p->port, BR_STATE_DISABLED); + if (ds->drv->port_stp_state_set) + ds->drv->port_stp_state_set(ds, p->port, BR_STATE_DISABLED); return 0; } @@ -207,21 +207,16 @@ static int dsa_slave_port_vlan_add(struct net_device *dev, { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - int err; if (switchdev_trans_ph_prepare(trans)) { if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add) return -EOPNOTSUPP; - err = ds->drv->port_vlan_prepare(ds, p->port, vlan, trans); - if (err) - return err; - } else { - err = ds->drv->port_vlan_add(ds, p->port, vlan, trans); - if (err) - return err; + return ds->drv->port_vlan_prepare(ds, p->port, vlan, trans); } + ds->drv->port_vlan_add(ds, p->port, vlan, trans); + return 0; } @@ -256,17 +251,17 @@ static int dsa_slave_port_fdb_add(struct net_device *dev, { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - int ret; - if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add) - return -EOPNOTSUPP; + if (switchdev_trans_ph_prepare(trans)) { + if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add) + return -EOPNOTSUPP; - if (switchdev_trans_ph_prepare(trans)) - ret = ds->drv->port_fdb_prepare(ds, p->port, fdb, trans); - else - ret = ds->drv->port_fdb_add(ds, p->port, fdb, trans); + return ds->drv->port_fdb_prepare(ds, p->port, fdb, trans); + } - return ret; + ds->drv->port_fdb_add(ds, p->port, fdb, trans); + + return 0; } static int dsa_slave_port_fdb_del(struct net_device *dev, @@ -305,16 +300,19 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EOPNOTSUPP; } -static int dsa_slave_stp_update(struct net_device *dev, u8 state) +static int dsa_slave_stp_state_set(struct net_device *dev, + const struct switchdev_attr *attr, + struct switchdev_trans *trans) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - int ret = -EOPNOTSUPP; - if (ds->drv->port_stp_update) - ret = ds->drv->port_stp_update(ds, p->port, state); + if (switchdev_trans_ph_prepare(trans)) + return ds->drv->port_stp_state_set ? 0 : -EOPNOTSUPP; - return ret; + ds->drv->port_stp_state_set(ds, p->port, attr->u.stp_state); + + return 0; } static int dsa_slave_vlan_filtering(struct net_device *dev, @@ -339,17 +337,11 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->parent; int ret; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - if (switchdev_trans_ph_prepare(trans)) - ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP; - else - ret = ds->drv->port_stp_update(ds, p->port, - attr->u.stp_state); + ret = dsa_slave_stp_state_set(dev, attr, trans); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: ret = dsa_slave_vlan_filtering(dev, attr, trans); @@ -468,7 +460,8 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev) /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - dsa_slave_stp_update(dev, BR_STATE_FORWARDING); + if (ds->drv->port_stp_state_set) + ds->drv->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING); } static int dsa_slave_port_attr_get(struct net_device *dev, @@ -622,8 +615,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->pd->eeprom_len) - return ds->pd->eeprom_len; + if (ds->cd->eeprom_len) + return ds->cd->eeprom_len; if (ds->drv->get_eeprom_len) return ds->drv->get_eeprom_len(ds); @@ -673,6 +666,78 @@ static void dsa_slave_get_strings(struct net_device *dev, } } +static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds = dst->ds[0]; + s8 cpu_port = dst->cpu_port; + int count = 0; + + if (dst->master_ethtool_ops.get_sset_count) { + count = dst->master_ethtool_ops.get_sset_count(dev, + ETH_SS_STATS); + dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data); + } + + if (ds->drv->get_ethtool_stats) + ds->drv->get_ethtool_stats(ds, cpu_port, data + count); +} + +static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds = dst->ds[0]; + int count = 0; + + if (dst->master_ethtool_ops.get_sset_count) + count += dst->master_ethtool_ops.get_sset_count(dev, sset); + + if (sset == ETH_SS_STATS && ds->drv->get_sset_count) + count += ds->drv->get_sset_count(ds); + + return count; +} + +static void dsa_cpu_port_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds = dst->ds[0]; + s8 cpu_port = dst->cpu_port; + int len = ETH_GSTRING_LEN; + int mcount = 0, count; + unsigned int i; + uint8_t pfx[4]; + uint8_t *ndata; + + snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port); + /* We do not want to be NULL-terminated, since this is a prefix */ + pfx[sizeof(pfx) - 1] = '_'; + + if (dst->master_ethtool_ops.get_sset_count) { + mcount = dst->master_ethtool_ops.get_sset_count(dev, + ETH_SS_STATS); + dst->master_ethtool_ops.get_strings(dev, stringset, data); + } + + if (stringset == ETH_SS_STATS && ds->drv->get_strings) { + ndata = data + mcount * len; + /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle + * the output after to prepend our CPU port prefix we + * constructed earlier + */ + ds->drv->get_strings(ds, cpu_port, ndata); + count = ds->drv->get_sset_count(ds); + for (i = 0; i < count; i++) { + memmove(ndata + (i * len + sizeof(pfx)), + ndata + i * len, len - sizeof(pfx)); + memcpy(ndata + i * len, pfx, sizeof(pfx)); + } + } +} + static void dsa_slave_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, uint64_t *data) @@ -680,10 +745,10 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - data[0] = p->dev->stats.tx_packets; - data[1] = p->dev->stats.tx_bytes; - data[2] = p->dev->stats.rx_packets; - data[3] = p->dev->stats.rx_bytes; + data[0] = dev->stats.tx_packets; + data[1] = dev->stats.tx_bytes; + data[2] = dev->stats.rx_packets; + data[3] = dev->stats.rx_bytes; if (ds->drv->get_ethtool_stats != NULL) ds->drv->get_ethtool_stats(ds, p->port, data + 4); } @@ -828,6 +893,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_eee = dsa_slave_get_eee, }; +static struct ethtool_ops dsa_cpu_port_ethtool_ops; + static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, @@ -932,7 +999,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, struct net_device *slave_dev) { struct dsa_switch *ds = p->parent; - struct dsa_chip_data *cd = ds->pd; + struct dsa_chip_data *cd = ds->cd; struct device_node *phy_dn, *port_dn; bool phy_is_fixed = false; u32 phy_flags = 0; @@ -1045,6 +1112,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, int port, char *name) { struct net_device *master = ds->dst->master_netdev; + struct dsa_switch_tree *dst = ds->dst; struct net_device *slave_dev; struct dsa_slave_priv *p; int ret; @@ -1056,6 +1124,19 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev->features = master->vlan_features; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; + if (master->ethtool_ops != &dsa_cpu_port_ethtool_ops) { + memcpy(&dst->master_ethtool_ops, master->ethtool_ops, + sizeof(struct ethtool_ops)); + memcpy(&dsa_cpu_port_ethtool_ops, &dst->master_ethtool_ops, + sizeof(struct ethtool_ops)); + dsa_cpu_port_ethtool_ops.get_sset_count = + dsa_cpu_port_get_sset_count; + dsa_cpu_port_ethtool_ops.get_ethtool_stats = + dsa_cpu_port_get_ethtool_stats; + dsa_cpu_port_ethtool_ops.get_strings = + dsa_cpu_port_get_strings; + master->ethtool_ops = &dsa_cpu_port_ethtool_ops; + } eth_hw_addr_inherit(slave_dev, master); slave_dev->priv_flags |= IFF_NO_QUEUE; slave_dev->netdev_ops = &dsa_slave_netdev_ops; @@ -1066,11 +1147,10 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, NULL); SET_NETDEV_DEV(slave_dev, parent); - slave_dev->dev.of_node = ds->pd->port_dn[port]; + slave_dev->dev.of_node = ds->cd->port_dn[port]; slave_dev->vlan_features = master->vlan_features; p = netdev_priv(slave_dev); - p->dev = slave_dev; p->parent = ds; p->port = port; diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig index 0d3d709052ca..4b683fd0abf1 100644 --- a/net/hsr/Kconfig +++ b/net/hsr/Kconfig @@ -18,8 +18,9 @@ config HSR earlier. This code is a "best effort" to comply with the HSR standard as - described in IEC 62439-3:2010 (HSRv0), but no compliancy tests have - been made. + described in IEC 62439-3:2010 (HSRv0) and IEC 62439-3:2012 (HSRv1), + but no compliancy tests have been made. Use iproute2 to select + the version you desire. You need to perform any and all necessary tests yourself before relying on this code in a safety critical system! diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c7d1adca30d8..16737cd8dae8 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -90,7 +90,8 @@ static void hsr_check_announce(struct net_device *hsr_dev, hsr = netdev_priv(hsr_dev); - if ((hsr_dev->operstate == IF_OPER_UP) && (old_operstate != IF_OPER_UP)) { + if ((hsr_dev->operstate == IF_OPER_UP) + && (old_operstate != IF_OPER_UP)) { /* Went up */ hsr->announce_count = 0; hsr->announce_timer.expires = jiffies + @@ -250,31 +251,22 @@ static const struct header_ops hsr_header_ops = { .parse = eth_header_parse, }; - -/* HSR:2010 supervision frames should be padded so that the whole frame, - * including headers and FCS, is 64 bytes (without VLAN). - */ -static int hsr_pad(int size) -{ - const int min_size = ETH_ZLEN - HSR_HLEN - ETH_HLEN; - - if (size >= min_size) - return size; - return min_size; -} - -static void send_hsr_supervision_frame(struct hsr_port *master, u8 type) +static void send_hsr_supervision_frame(struct hsr_port *master, + u8 type, u8 hsrVer) { struct sk_buff *skb; int hlen, tlen; + struct hsr_tag *hsr_tag; struct hsr_sup_tag *hsr_stag; struct hsr_sup_payload *hsr_sp; unsigned long irqflags; hlen = LL_RESERVED_SPACE(master->dev); tlen = master->dev->needed_tailroom; - skb = alloc_skb(hsr_pad(sizeof(struct hsr_sup_payload)) + hlen + tlen, - GFP_ATOMIC); + skb = dev_alloc_skb( + sizeof(struct hsr_tag) + + sizeof(struct hsr_sup_tag) + + sizeof(struct hsr_sup_payload) + hlen + tlen); if (skb == NULL) return; @@ -282,32 +274,48 @@ static void send_hsr_supervision_frame(struct hsr_port *master, u8 type) skb_reserve(skb, hlen); skb->dev = master->dev; - skb->protocol = htons(ETH_P_PRP); + skb->protocol = htons(hsrVer ? ETH_P_HSR : ETH_P_PRP); skb->priority = TC_PRIO_CONTROL; - if (dev_hard_header(skb, skb->dev, ETH_P_PRP, + if (dev_hard_header(skb, skb->dev, (hsrVer ? ETH_P_HSR : ETH_P_PRP), master->hsr->sup_multicast_addr, skb->dev->dev_addr, skb->len) <= 0) goto out; skb_reset_mac_header(skb); - hsr_stag = (typeof(hsr_stag)) skb_put(skb, sizeof(*hsr_stag)); + if (hsrVer > 0) { + hsr_tag = (typeof(hsr_tag)) skb_put(skb, sizeof(struct hsr_tag)); + hsr_tag->encap_proto = htons(ETH_P_PRP); + set_hsr_tag_LSDU_size(hsr_tag, HSR_V1_SUP_LSDUSIZE); + } - set_hsr_stag_path(hsr_stag, 0xf); - set_hsr_stag_HSR_Ver(hsr_stag, 0); + hsr_stag = (typeof(hsr_stag)) skb_put(skb, sizeof(struct hsr_sup_tag)); + set_hsr_stag_path(hsr_stag, (hsrVer ? 0x0 : 0xf)); + set_hsr_stag_HSR_Ver(hsr_stag, hsrVer); + /* From HSRv1 on we have separate supervision sequence numbers. */ spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); - hsr_stag->sequence_nr = htons(master->hsr->sequence_nr); - master->hsr->sequence_nr++; + if (hsrVer > 0) { + hsr_stag->sequence_nr = htons(master->hsr->sup_sequence_nr); + hsr_tag->sequence_nr = htons(master->hsr->sequence_nr); + master->hsr->sup_sequence_nr++; + master->hsr->sequence_nr++; + } else { + hsr_stag->sequence_nr = htons(master->hsr->sequence_nr); + master->hsr->sequence_nr++; + } spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); hsr_stag->HSR_TLV_Type = type; - hsr_stag->HSR_TLV_Length = 12; + /* TODO: Why 12 in HSRv0? */ + hsr_stag->HSR_TLV_Length = hsrVer ? sizeof(struct hsr_sup_payload) : 12; /* Payload: MacAddressA */ - hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp)); + hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr); + skb_put_padto(skb, ETH_ZLEN + HSR_HLEN); + hsr_forward_skb(skb, master); return; @@ -329,19 +337,20 @@ static void hsr_announce(unsigned long data) rcu_read_lock(); master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); - if (hsr->announce_count < 3) { - send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE); + if (hsr->announce_count < 3 && hsr->protVersion == 0) { + send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE, + hsr->protVersion); hsr->announce_count++; - } else { - send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK); - } - if (hsr->announce_count < 3) hsr->announce_timer.expires = jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); - else + } else { + send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK, + hsr->protVersion); + hsr->announce_timer.expires = jiffies + msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); + } if (is_admin_up(master->dev)) add_timer(&hsr->announce_timer); @@ -428,7 +437,7 @@ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { }; int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], - unsigned char multicast_spec) + unsigned char multicast_spec, u8 protocol_version) { struct hsr_priv *hsr; struct hsr_port *port; @@ -450,18 +459,17 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], spin_lock_init(&hsr->seqnr_lock); /* Overflow soon to find bugs easier: */ hsr->sequence_nr = HSR_SEQNR_START; + hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; - init_timer(&hsr->announce_timer); - hsr->announce_timer.function = hsr_announce; - hsr->announce_timer.data = (unsigned long) hsr; + setup_timer(&hsr->announce_timer, hsr_announce, (unsigned long)hsr); - init_timer(&hsr->prune_timer); - hsr->prune_timer.function = hsr_prune_nodes; - hsr->prune_timer.data = (unsigned long) hsr; + setup_timer(&hsr->prune_timer, hsr_prune_nodes, (unsigned long)hsr); ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; + hsr->protVersion = protocol_version; + /* FIXME: should I modify the value of these? * * - hsr_dev->flags - i.e. @@ -490,8 +498,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (res) goto fail; - hsr->prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD); - add_timer(&hsr->prune_timer); + mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); return 0; diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index 108a5d59d2a6..9975e31bbb82 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -17,7 +17,7 @@ void hsr_dev_setup(struct net_device *dev); int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], - unsigned char multicast_spec); + unsigned char multicast_spec, u8 protocol_version); void hsr_check_carrier_and_operstate(struct hsr_priv *hsr); bool is_hsr_master(struct net_device *dev); int hsr_get_max_mtu(struct hsr_priv *hsr); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 7871ed6d3825..5ee1d43f1310 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -50,21 +50,40 @@ struct hsr_frame_info { */ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) { - struct hsr_ethhdr_sp *hdr; + struct ethhdr *ethHdr; + struct hsr_sup_tag *hsrSupTag; + struct hsrv1_ethhdr_sp *hsrV1Hdr; WARN_ON_ONCE(!skb_mac_header_was_set(skb)); - hdr = (struct hsr_ethhdr_sp *) skb_mac_header(skb); + ethHdr = (struct ethhdr *) skb_mac_header(skb); - if (!ether_addr_equal(hdr->ethhdr.h_dest, + /* Correct addr? */ + if (!ether_addr_equal(ethHdr->h_dest, hsr->sup_multicast_addr)) return false; - if (get_hsr_stag_path(&hdr->hsr_sup) != 0x0f) + /* Correct ether type?. */ + if (!(ethHdr->h_proto == htons(ETH_P_PRP) + || ethHdr->h_proto == htons(ETH_P_HSR))) return false; - if ((hdr->hsr_sup.HSR_TLV_Type != HSR_TLV_ANNOUNCE) && - (hdr->hsr_sup.HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) + + /* Get the supervision header from correct location. */ + if (ethHdr->h_proto == htons(ETH_P_HSR)) { /* Okay HSRv1. */ + hsrV1Hdr = (struct hsrv1_ethhdr_sp *) skb_mac_header(skb); + if (hsrV1Hdr->hsr.encap_proto != htons(ETH_P_PRP)) + return false; + + hsrSupTag = &hsrV1Hdr->hsr_sup; + } else { + hsrSupTag = &((struct hsrv0_ethhdr_sp *) skb_mac_header(skb))->hsr_sup; + } + + if ((hsrSupTag->HSR_TLV_Type != HSR_TLV_ANNOUNCE) && + (hsrSupTag->HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) return false; - if (hdr->hsr_sup.HSR_TLV_Length != 12) + if ((hsrSupTag->HSR_TLV_Length != 12) && + (hsrSupTag->HSR_TLV_Length != + sizeof(struct hsr_sup_payload))) return false; return true; @@ -110,7 +129,7 @@ static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame, static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, - struct hsr_port *port) + struct hsr_port *port, u8 protoVersion) { struct hsr_ethhdr *hsr_ethhdr; int lane_id; @@ -131,7 +150,8 @@ static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; - hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP); + hsr_ethhdr->ethhdr.h_proto = htons(protoVersion ? + ETH_P_HSR : ETH_P_PRP); } static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, @@ -160,7 +180,7 @@ static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, memmove(dst, src, movelen); skb_reset_mac_header(skb); - hsr_fill_tag(skb, frame, port); + hsr_fill_tag(skb, frame, port, port->hsr->protVersion); return skb; } @@ -320,7 +340,8 @@ static int hsr_fill_frame_info(struct hsr_frame_info *frame, /* FIXME: */ WARN_ONCE(1, "HSR: VLAN not yet supported"); } - if (ethhdr->h_proto == htons(ETH_P_PRP)) { + if (ethhdr->h_proto == htons(ETH_P_PRP) + || ethhdr->h_proto == htons(ETH_P_HSR)) { frame->skb_std = NULL; frame->skb_hsr = skb; frame->sequence_nr = hsr_get_skb_sequence_nr(skb); diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index bace124d14ef..7ea925816f79 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -177,17 +177,17 @@ struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, return node; } - if (!is_sup) - return NULL; /* Only supervision frame may create node entry */ + /* Everyone may create a node entry, connected node to a HSR device. */ - if (ethhdr->h_proto == htons(ETH_P_PRP)) { + if (ethhdr->h_proto == htons(ETH_P_PRP) + || ethhdr->h_proto == htons(ETH_P_HSR)) { /* Use the existing sequence_nr from the tag as starting point * for filtering duplicate frames. */ seq_out = hsr_get_skb_sequence_nr(skb) - 1; } else { WARN_ONCE(1, "%s: Non-HSR frame\n", __func__); - seq_out = 0; + seq_out = HSR_SEQNR_START; } return hsr_add_node(node_db, ethhdr->h_source, seq_out); @@ -200,17 +200,25 @@ struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, struct hsr_port *port_rcv) { + struct ethhdr *ethhdr; struct hsr_node *node_real; struct hsr_sup_payload *hsr_sp; struct list_head *node_db; int i; - skb_pull(skb, sizeof(struct hsr_ethhdr_sp)); - hsr_sp = (struct hsr_sup_payload *) skb->data; + ethhdr = (struct ethhdr *) skb_mac_header(skb); - if (ether_addr_equal(eth_hdr(skb)->h_source, hsr_sp->MacAddressA)) - /* Not sent from MacAddressB of a PICS_SUBS capable node */ - goto done; + /* Leave the ethernet header. */ + skb_pull(skb, sizeof(struct ethhdr)); + + /* And leave the HSR tag. */ + if (ethhdr->h_proto == htons(ETH_P_HSR)) + skb_pull(skb, sizeof(struct hsr_tag)); + + /* And leave the HSR sup tag. */ + skb_pull(skb, sizeof(struct hsr_sup_tag)); + + hsr_sp = (struct hsr_sup_payload *) skb->data; /* Merge node_curr (registered on MacAddressB) into node_real */ node_db = &port_rcv->hsr->node_db; @@ -225,7 +233,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, /* Node has already been merged */ goto done; - ether_addr_copy(node_real->MacAddressB, eth_hdr(skb)->h_source); + ether_addr_copy(node_real->MacAddressB, ethhdr->h_source); for (i = 0; i < HSR_PT_PORTS; i++) { if (!node_curr->time_in_stale[i] && time_after(node_curr->time_in[i], node_real->time_in[i])) { @@ -241,7 +249,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, kfree_rcu(node_curr, rcu_head); done: - skb_push(skb, sizeof(struct hsr_ethhdr_sp)); + skb_push(skb, sizeof(struct hsrv1_ethhdr_sp)); } diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 5a9c69962ded..9b9909e89e9e 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -30,6 +30,7 @@ */ #define MAX_SLAVE_DIFF 3000 /* ms */ #define HSR_SEQNR_START (USHRT_MAX - 1024) +#define HSR_SUP_SEQNR_START (HSR_SEQNR_START / 2) /* How often shall we check for broken ring and remove node entries older than @@ -58,6 +59,8 @@ struct hsr_tag { #define HSR_HLEN 6 +#define HSR_V1_SUP_LSDUSIZE 52 + /* The helper functions below assumes that 'path' occupies the 4 most * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or * equivalently, the 4 most significant bits of HSR tag byte 14). @@ -131,8 +134,14 @@ static inline void set_hsr_stag_HSR_Ver(struct hsr_sup_tag *hst, u16 HSR_Ver) set_hsr_tag_LSDU_size((struct hsr_tag *) hst, HSR_Ver); } -struct hsr_ethhdr_sp { +struct hsrv0_ethhdr_sp { + struct ethhdr ethhdr; + struct hsr_sup_tag hsr_sup; +} __packed; + +struct hsrv1_ethhdr_sp { struct ethhdr ethhdr; + struct hsr_tag hsr; struct hsr_sup_tag hsr_sup; } __packed; @@ -162,6 +171,8 @@ struct hsr_priv { struct timer_list prune_timer; int announce_count; u16 sequence_nr; + u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ + u8 protVersion; /* Indicate if HSRv0 or HSRv1. */ spinlock_t seqnr_lock; /* locking for sequence_nr */ unsigned char sup_multicast_addr[ETH_ALEN]; }; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index a2c7e4c0ac1e..d4d1617f43a8 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -23,7 +23,8 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { [IFLA_HSR_SLAVE1] = { .type = NLA_U32 }, [IFLA_HSR_SLAVE2] = { .type = NLA_U32 }, [IFLA_HSR_MULTICAST_SPEC] = { .type = NLA_U8 }, - [IFLA_HSR_SUPERVISION_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IFLA_HSR_VERSION] = { .type = NLA_U8 }, + [IFLA_HSR_SUPERVISION_ADDR] = { .len = ETH_ALEN }, [IFLA_HSR_SEQ_NR] = { .type = NLA_U16 }, }; @@ -35,7 +36,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net_device *link[2]; - unsigned char multicast_spec; + unsigned char multicast_spec, hsr_version; if (!data) { netdev_info(dev, "HSR: No slave devices specified\n"); @@ -62,7 +63,12 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, else multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]); - return hsr_dev_finalize(dev, link, multicast_spec); + if (!data[IFLA_HSR_VERSION]) + hsr_version = 0; + else + hsr_version = nla_get_u8(data[IFLA_HSR_VERSION]); + + return hsr_dev_finalize(dev, link, multicast_spec, hsr_version); } static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) @@ -115,10 +121,9 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = { /* attribute policy */ -/* NLA_BINARY missing in libnl; use NLA_UNSPEC in userspace instead. */ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = { - [HSR_A_NODE_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, - [HSR_A_NODE_ADDR_B] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [HSR_A_NODE_ADDR] = { .len = ETH_ALEN }, + [HSR_A_NODE_ADDR_B] = { .len = ETH_ALEN }, [HSR_A_IFINDEX] = { .type = NLA_U32 }, [HSR_A_IF1_AGE] = { .type = NLA_U32 }, [HSR_A_IF2_AGE] = { .type = NLA_U32 }, diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 7d37366cc695..f5b60388d02f 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -22,6 +22,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct hsr_port *port; + u16 protocol; if (!skb_mac_header_was_set(skb)) { WARN_ONCE(1, "%s: skb invalid", __func__); @@ -37,7 +38,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) goto finish_consume; } - if (eth_hdr(skb)->h_proto != htons(ETH_P_PRP)) + protocol = eth_hdr(skb)->h_proto; + if (protocol != htons(ETH_P_PRP) && protocol != htons(ETH_P_HSR)) goto finish_pass; skb_push(skb, ETH_HLEN); diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h index b4e17a7c0df0..5ac778962e4e 100644 --- a/net/ieee802154/6lowpan/6lowpan_i.h +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -41,24 +41,12 @@ static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) return (((__force u64)a->extended_addr) >> 32) ^ (((__force u64)a->extended_addr) & 0xffffffff); case IEEE802154_ADDR_SHORT: - return (__force u32)(a->short_addr); + return (__force u32)(a->short_addr + (a->pan_id << 16)); default: return 0; } } -/* private device info */ -struct lowpan_dev_info { - struct net_device *wdev; /* wpan device ptr */ - u16 fragment_tag; -}; - -static inline struct -lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) -{ - return (struct lowpan_dev_info *)lowpan_priv(dev)->priv; -} - int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); void lowpan_net_frag_exit(void); int lowpan_net_frag_init(void); diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 0023c9048812..dd085db8580e 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -148,7 +148,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev, return -EBUSY; } - lowpan_dev_info(ldev)->wdev = wdev; + lowpan_802154_dev(ldev)->wdev = wdev; /* Set the lowpan hardware address to the wpan hardware address. */ memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN); /* We need headroom for possible wpan_dev_hard_header call and tailroom @@ -173,7 +173,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev, static void lowpan_dellink(struct net_device *ldev, struct list_head *head) { - struct net_device *wdev = lowpan_dev_info(ldev)->wdev; + struct net_device *wdev = lowpan_802154_dev(ldev)->wdev; ASSERT_RTNL(); @@ -184,7 +184,7 @@ static void lowpan_dellink(struct net_device *ldev, struct list_head *head) static struct rtnl_link_ops lowpan_link_ops __read_mostly = { .kind = "lowpan", - .priv_size = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_dev_info)), + .priv_size = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_802154_dev)), .setup = lowpan_setup, .newlink = lowpan_newlink, .dellink = lowpan_dellink, diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index d4353faced35..e459afd16bb3 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -84,7 +84,7 @@ static struct sk_buff* lowpan_alloc_frag(struct sk_buff *skb, int size, const struct ieee802154_hdr *master_hdr, bool frag1) { - struct net_device *wdev = lowpan_dev_info(skb->dev)->wdev; + struct net_device *wdev = lowpan_802154_dev(skb->dev)->wdev; struct sk_buff *frag; int rc; @@ -148,8 +148,8 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *ldev, int frag_cap, frag_len, payload_cap, rc; int skb_unprocessed, skb_offset; - frag_tag = htons(lowpan_dev_info(ldev)->fragment_tag); - lowpan_dev_info(ldev)->fragment_tag++; + frag_tag = htons(lowpan_802154_dev(ldev)->fragment_tag); + lowpan_802154_dev(ldev)->fragment_tag++; frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); frag_hdr[1] = dgram_size & 0xff; @@ -208,7 +208,7 @@ err: static int lowpan_header(struct sk_buff *skb, struct net_device *ldev, u16 *dgram_size, u16 *dgram_offset) { - struct wpan_dev *wpan_dev = lowpan_dev_info(ldev)->wdev->ieee802154_ptr; + struct wpan_dev *wpan_dev = lowpan_802154_dev(ldev)->wdev->ieee802154_ptr; struct ieee802154_addr sa, da; struct ieee802154_mac_cb *cb = mac_cb_init(skb); struct lowpan_addr_info info; @@ -248,8 +248,8 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *ldev, cb->ackreq = wpan_dev->ackreq; } - return wpan_dev_hard_header(skb, lowpan_dev_info(ldev)->wdev, &da, &sa, - 0); + return wpan_dev_hard_header(skb, lowpan_802154_dev(ldev)->wdev, &da, + &sa, 0); } netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev) @@ -283,7 +283,7 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev) max_single = ieee802154_max_payload(&wpan_hdr); if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { - skb->dev = lowpan_dev_info(ldev)->wdev; + skb->dev = lowpan_802154_dev(ldev)->wdev; ldev->stats.tx_packets++; ldev->stats.tx_bytes += dgram_size; return dev_queue_xmit(skb); diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 3503c38954f9..d3cbb3258718 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -34,9 +34,11 @@ #include "ieee802154.h" -static int nla_put_hwaddr(struct sk_buff *msg, int type, __le64 hwaddr) +static int nla_put_hwaddr(struct sk_buff *msg, int type, __le64 hwaddr, + int padattr) { - return nla_put_u64(msg, type, swab64((__force u64)hwaddr)); + return nla_put_u64_64bit(msg, type, swab64((__force u64)hwaddr), + padattr); } static __le64 nla_get_hwaddr(const struct nlattr *nla) @@ -623,7 +625,8 @@ ieee802154_llsec_fill_key_id(struct sk_buff *msg, if (desc->device_addr.mode == IEEE802154_ADDR_LONG && nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, - desc->device_addr.extended_addr)) + desc->device_addr.extended_addr, + IEEE802154_ATTR_PAD)) return -EMSGSIZE; } @@ -638,7 +641,7 @@ ieee802154_llsec_fill_key_id(struct sk_buff *msg, if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && nla_put_hwaddr(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED, - desc->extended_source)) + desc->extended_source, IEEE802154_ATTR_PAD)) return -EMSGSIZE; return 0; @@ -1063,7 +1066,8 @@ ieee802154_nl_fill_dev(struct sk_buff *msg, u32 portid, u32 seq, nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->pan_id) || nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, desc->short_addr) || - nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr, + IEEE802154_ATTR_PAD) || nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, desc->frame_counter) || nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, @@ -1167,7 +1171,8 @@ ieee802154_nl_fill_devkey(struct sk_buff *msg, u32 portid, u32 seq, if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || - nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr, + IEEE802154_ATTR_PAD) || nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, devkey->frame_counter) || ieee802154_llsec_fill_key_id(msg, &devkey->key_id)) diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 16ef0d9f566e..ca207dbf673b 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -722,7 +722,8 @@ ieee802154_llsec_send_key_id(struct sk_buff *msg, break; case NL802154_DEV_ADDR_EXTENDED: if (nla_put_le64(msg, NL802154_DEV_ADDR_ATTR_EXTENDED, - desc->device_addr.extended_addr)) + desc->device_addr.extended_addr, + NL802154_DEV_ADDR_ATTR_PAD)) return -ENOBUFS; break; default: @@ -742,7 +743,8 @@ ieee802154_llsec_send_key_id(struct sk_buff *msg, break; case NL802154_KEY_ID_MODE_INDEX_EXTENDED: if (nla_put_le64(msg, NL802154_KEY_ID_ATTR_SOURCE_EXTENDED, - desc->extended_source)) + desc->extended_source, + NL802154_KEY_ID_ATTR_PAD)) return -ENOBUFS; break; default: @@ -811,7 +813,8 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, if (nla_put_u32(msg, NL802154_ATTR_WPAN_PHY, rdev->wpan_phy_idx) || nla_put_u32(msg, NL802154_ATTR_IFTYPE, wpan_dev->iftype) || - nla_put_u64(msg, NL802154_ATTR_WPAN_DEV, wpan_dev_id(wpan_dev)) || + nla_put_u64_64bit(msg, NL802154_ATTR_WPAN_DEV, + wpan_dev_id(wpan_dev), NL802154_ATTR_PAD) || nla_put_u32(msg, NL802154_ATTR_GENERATION, rdev->devlist_generation ^ (cfg802154_rdev_list_generation << 2))) @@ -819,7 +822,8 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, /* address settings */ if (nla_put_le64(msg, NL802154_ATTR_EXTENDED_ADDR, - wpan_dev->extended_addr) || + wpan_dev->extended_addr, + NL802154_ATTR_PAD) || nla_put_le16(msg, NL802154_ATTR_SHORT_ADDR, wpan_dev->short_addr) || nla_put_le16(msg, NL802154_ATTR_PAN_ID, wpan_dev->pan_id)) @@ -1074,6 +1078,11 @@ static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info) if (netif_running(dev)) return -EBUSY; + if (wpan_dev->lowpan_dev) { + if (netif_running(wpan_dev->lowpan_dev)) + return -EBUSY; + } + /* don't change address fields on monitor */ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR || !info->attrs[NL802154_ATTR_PAN_ID]) @@ -1105,6 +1114,11 @@ static int nl802154_set_short_addr(struct sk_buff *skb, struct genl_info *info) if (netif_running(dev)) return -EBUSY; + if (wpan_dev->lowpan_dev) { + if (netif_running(wpan_dev->lowpan_dev)) + return -EBUSY; + } + /* don't change address fields on monitor */ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR || !info->attrs[NL802154_ATTR_SHORT_ADDR]) @@ -1614,7 +1628,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid, nla_put_le16(msg, NL802154_DEV_ATTR_SHORT_ADDR, dev_desc->short_addr) || nla_put_le64(msg, NL802154_DEV_ATTR_EXTENDED_ADDR, - dev_desc->hwaddr) || + dev_desc->hwaddr, NL802154_DEV_ATTR_PAD) || nla_put_u8(msg, NL802154_DEV_ATTR_SECLEVEL_EXEMPT, dev_desc->seclevel_exempt) || nla_put_u32(msg, NL802154_DEV_ATTR_KEY_MODE, dev_desc->key_mode)) @@ -1778,7 +1792,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, goto nla_put_failure; if (nla_put_le64(msg, NL802154_DEVKEY_ATTR_EXTENDED_ADDR, - extended_addr) || + extended_addr, NL802154_DEVKEY_ATTR_PAD) || nla_put_u32(msg, NL802154_DEVKEY_ATTR_FRAME_COUNTER, devkey->frame_counter)) goto nla_put_failure; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9e481992dbae..2e6e65fc4d20 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -948,6 +948,7 @@ const struct proto_ops inet_dgram_ops = { .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, + .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, @@ -1106,7 +1107,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) struct ip_options_rcu *inet_opt; inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; @@ -1194,12 +1195,12 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); static struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { + bool udpfrag = false, fixedid = false, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; unsigned int offset = 0; - bool udpfrag, encap; struct iphdr *iph; - int proto; + int proto, tot_len; int nhoff; int ihl; int id; @@ -1216,7 +1217,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | + SKB_GSO_TCP_FIXEDID | SKB_GSO_TUNNEL_REMCSUM | + SKB_GSO_PARTIAL | 0))) goto out; @@ -1247,11 +1250,14 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); - if (skb->encapsulation && - skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) - udpfrag = proto == IPPROTO_UDP && encap; - else - udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; + if (!skb->encapsulation || encap) { + udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); + fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); + + /* fixed ID is invalid if DF bit is not set */ + if (fixedid && !(iph->frag_off & htons(IP_DF))) + goto out; + } ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) @@ -1264,15 +1270,25 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); if (udpfrag) { - iph->id = htons(id); iph->frag_off = htons(offset >> 3); if (skb->next) iph->frag_off |= htons(IP_MF); offset += skb->len - nhoff - ihl; + tot_len = skb->len - nhoff; + } else if (skb_is_gso(skb)) { + if (!fixedid) { + iph->id = htons(id); + id += skb_shinfo(skb)->gso_segs; + } + tot_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)iph; } else { - iph->id = htons(id++); + if (!fixedid) + iph->id = htons(id++); + tot_len = skb->len - nhoff; } - iph->tot_len = htons(skb->len - nhoff); + iph->tot_len = htons(tot_len); ip_send_check(iph); if (encap) skb_reset_inner_headers(skb); @@ -1324,6 +1340,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, for (p = *head; p; p = p->next) { struct iphdr *iph2; + u16 flush_id; if (!NAPI_GRO_CB(p)->same_flow) continue; @@ -1347,16 +1364,36 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, (iph->tos ^ iph2->tos) | ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); - /* Save the IP ID check to be included later when we get to - * the transport layer so only the inner most IP ID is checked. - * This is because some GSO/TSO implementations do not - * correctly increment the IP ID for the outer hdrs. - */ - NAPI_GRO_CB(p)->flush_id = - ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; + + /* We need to store of the IP ID check to be included later + * when we can verify that this packet does in fact belong + * to a given flow. + */ + flush_id = (u16)(id - ntohs(iph2->id)); + + /* This bit of code makes it much easier for us to identify + * the cases where we are doing atomic vs non-atomic IP ID + * checks. Specifically an atomic check can return IP ID + * values 0 - 0xFFFF, while a non-atomic check can only + * return 0 or 0xFFFF. + */ + if (!NAPI_GRO_CB(p)->is_atomic || + !(iph->frag_off & htons(IP_DF))) { + flush_id ^= NAPI_GRO_CB(p)->count; + flush_id = flush_id ? 0xFFFF : 0; + } + + /* If the previous IP ID value was based on an atomic + * datagram we can overwrite the value and ignore it. + */ + if (NAPI_GRO_CB(skb)->is_atomic) + NAPI_GRO_CB(p)->flush_id = flush_id; + else + NAPI_GRO_CB(p)->flush_id |= flush_id; } + NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF)); NAPI_GRO_CB(skb)->flush |= flush; skb_set_network_header(skb, off); /* The above will be needed by the transport layer if there is one diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c34c7544d1db..89a8cac4726a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -436,7 +436,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) if (IS_ERR(rt)) return 1; if (rt->dst.dev != dev) { - NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); + __NET_INC_STATS(net, LINUX_MIB_ARPFILTER); flag = 1; } ip_rt_put(rt); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index bdb2a07ec363..40d6b87713a1 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1933,7 +1933,8 @@ int cipso_v4_sock_setattr(struct sock *sk, sk_inet = inet_sk(sk); - old = rcu_dereference_protected(sk_inet->inet_opt, sock_owned_by_user(sk)); + old = rcu_dereference_protected(sk_inet->inet_opt, + lockdep_sock_is_held(sk)); if (sk_inet->is_icsk) { sk_conn = inet_csk(sk); if (old) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 63566ec54794..ef2ebeb89d0f 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -110,6 +110,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); return tb; } +EXPORT_SYMBOL_GPL(fib_new_table); /* caller must hold either rtnl or rcu read lock */ struct fib_table *fib_get_table(struct net *net, u32 id) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2b68418c7198..d09173bf9500 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1561,21 +1561,45 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) } #ifdef CONFIG_IP_ROUTE_MULTIPATH +static bool fib_good_nh(const struct fib_nh *nh) +{ + int state = NUD_REACHABLE; + + if (nh->nh_scope == RT_SCOPE_LINK) { + struct neighbour *n; + + rcu_read_lock_bh(); + + n = __ipv4_neigh_lookup_noref(nh->nh_dev, nh->nh_gw); + if (n) + state = n->nud_state; + + rcu_read_unlock_bh(); + } + + return !!(state & NUD_VALID); +} void fib_select_multipath(struct fib_result *res, int hash) { struct fib_info *fi = res->fi; + struct net *net = fi->fib_net; + bool first = false; for_nexthops(fi) { if (hash > atomic_read(&nh->nh_upper_bound)) continue; - res->nh_sel = nhsel; - return; + if (!net->ipv4.sysctl_fib_multipath_use_neigh || + fib_good_nh(nh)) { + res->nh_sel = nhsel; + return; + } + if (!first) { + res->nh_sel = nhsel; + first = true; + } } endfor_nexthops(fi); - - /* Race condition: route has just become dead. */ - res->nh_sel = 0; } #endif diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index a6962ccad98a..eeec7d60e5fd 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -22,7 +22,6 @@ struct fou { u8 flags; __be16 port; u16 type; - struct udp_offload udp_offloads; struct list_head list; struct rcu_head rcu; }; @@ -186,13 +185,13 @@ drop: return 0; } -static struct sk_buff **fou_gro_receive(struct sk_buff **head, - struct sk_buff *skb, - struct udp_offload *uoff) +static struct sk_buff **fou_gro_receive(struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff **pp = NULL; - u8 proto = NAPI_GRO_CB(skb)->proto; + u8 proto = fou_from_sock(sk)->protocol; const struct net_offload **offloads; /* We can clear the encap_mark for FOU as we are essentially doing @@ -220,11 +219,11 @@ out_unlock: return pp; } -static int fou_gro_complete(struct sk_buff *skb, int nhoff, - struct udp_offload *uoff) +static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, + int nhoff) { const struct net_offload *ops; - u8 proto = NAPI_GRO_CB(skb)->proto; + u8 proto = fou_from_sock(sk)->protocol; int err = -ENOSYS; const struct net_offload **offloads; @@ -267,9 +266,9 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, return guehdr; } -static struct sk_buff **gue_gro_receive(struct sk_buff **head, - struct sk_buff *skb, - struct udp_offload *uoff) +static struct sk_buff **gue_gro_receive(struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb) { const struct net_offload **offloads; const struct net_offload *ops; @@ -280,7 +279,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, void *data; u16 doffset = 0; int flush = 1; - struct fou *fou = container_of(uoff, struct fou, udp_offloads); + struct fou *fou = fou_from_sock(sk); struct gro_remcsum grc; skb_gro_remcsum_init(&grc); @@ -392,8 +391,7 @@ out: return pp; } -static int gue_gro_complete(struct sk_buff *skb, int nhoff, - struct udp_offload *uoff) +static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) { const struct net_offload **offloads; struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff); @@ -443,10 +441,7 @@ static int fou_add_to_port_list(struct net *net, struct fou *fou) static void fou_release(struct fou *fou) { struct socket *sock = fou->sock; - struct sock *sk = sock->sk; - if (sk->sk_family == AF_INET) - udp_del_offload(&fou->udp_offloads); list_del(&fou->list); udp_tunnel_sock_release(sock); @@ -456,11 +451,9 @@ static void fou_release(struct fou *fou) static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) { udp_sk(sk)->encap_rcv = fou_udp_recv; - fou->protocol = cfg->protocol; - fou->udp_offloads.callbacks.gro_receive = fou_gro_receive; - fou->udp_offloads.callbacks.gro_complete = fou_gro_complete; - fou->udp_offloads.port = cfg->udp_config.local_udp_port; - fou->udp_offloads.ipproto = cfg->protocol; + udp_sk(sk)->gro_receive = fou_gro_receive; + udp_sk(sk)->gro_complete = fou_gro_complete; + fou_from_sock(sk)->protocol = cfg->protocol; return 0; } @@ -468,9 +461,8 @@ static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) { udp_sk(sk)->encap_rcv = gue_udp_recv; - fou->udp_offloads.callbacks.gro_receive = gue_gro_receive; - fou->udp_offloads.callbacks.gro_complete = gue_gro_complete; - fou->udp_offloads.port = cfg->udp_config.local_udp_port; + udp_sk(sk)->gro_receive = gue_gro_receive; + udp_sk(sk)->gro_complete = gue_gro_complete; return 0; } @@ -529,12 +521,6 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk->sk_allocation = GFP_ATOMIC; - if (cfg->udp_config.family == AF_INET) { - err = udp_add_offload(net, &fou->udp_offloads); - if (err) - goto error; - } - err = fou_add_to_port_list(net, fou); if (err) goto error; @@ -818,11 +804,11 @@ int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; __be16 sport; + int err; - skb = iptunnel_handle_offloads(skb, type); - - if (IS_ERR(skb)) - return PTR_ERR(skb); + err = iptunnel_handle_offloads(skb, type); + if (err) + return err; sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), skb, 0, 0, false); @@ -842,6 +828,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, __be16 sport; void *data; bool need_priv = false; + int err; if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) && skb->ip_summed == CHECKSUM_PARTIAL) { @@ -852,10 +839,9 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, optlen += need_priv ? GUE_LEN_PRIV : 0; - skb = iptunnel_handle_offloads(skb, type); - - if (IS_ERR(skb)) - return PTR_ERR(skb); + err = iptunnel_handle_offloads(skb, type); + if (err) + return err; /* Get source port (based on flow hash) before skb_push */ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index d9c552a721fc..4c39f4fd332a 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -60,6 +60,67 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); +/* Fills in tpi and returns header length to be pulled. */ +int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err, __be16 proto) +{ + const struct gre_base_hdr *greh; + __be32 *options; + int hdr_len; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; + + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + hdr_len = gre_calc_hlen(tpi->flags); + + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + tpi->proto = greh->protocol; + + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (skb_checksum_simple_validate(skb)) { + *csum_err = true; + return -EINVAL; + } + + skb_checksum_try_convert(skb, IPPROTO_GRE, 0, + null_compute_pseudo); + options++; + } + + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else { + tpi->key = 0; + } + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else { + tpi->seq = 0; + } + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IPv4/IPv6 + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = proto; + if ((*(u8 *)options & 0xF0) != 0x40) + hdr_len += 4; + } + return hdr_len; +} +EXPORT_SYMBOL(gre_parse_header); + static int gre_rcv(struct sk_buff *skb) { const struct gre_protocol *proto; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 6a5bd4317866..e88190a8699a 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -32,10 +32,12 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_TCP_FIXEDID | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | - SKB_GSO_SIT))) + SKB_GSO_SIT | + SKB_GSO_PARTIAL))) goto out; if (!skb->encapsulation) @@ -86,7 +88,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb = segs; do { struct gre_base_hdr *greh; - __be32 *pcsum; + __sum16 *pcsum; /* Set up inner headers if we are offloading inner checksum */ if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -106,10 +108,25 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, continue; greh = (struct gre_base_hdr *)skb_transport_header(skb); - pcsum = (__be32 *)(greh + 1); + pcsum = (__sum16 *)(greh + 1); + + if (skb_is_gso(skb)) { + unsigned int partial_adj; + + /* Adjust checksum to account for the fact that + * the partial checksum is based on actual size + * whereas headers should be based on MSS size. + */ + partial_adj = skb->len + skb_headroom(skb) - + SKB_GSO_CB(skb)->data_offset - + skb_shinfo(skb)->gso_size; + *pcsum = ~csum_fold((__force __wsum)htonl(partial_adj)); + } else { + *pcsum = 0; + } - *pcsum = 0; - *(__sum16 *)pcsum = gso_make_checksum(skb, 0); + *(pcsum + 1) = 0; + *pcsum = gso_make_checksum(skb, 0); } while ((skb = skb->next)); out: return segs; @@ -275,6 +292,18 @@ static const struct net_offload gre_offload = { static int __init gre_offload_init(void) { - return inet_add_offload(&gre_offload, IPPROTO_GRE); + int err; + + err = inet_add_offload(&gre_offload, IPPROTO_GRE); +#if IS_ENABLED(CONFIG_IPV6) + if (err) + return err; + + err = inet6_add_offload(&gre_offload, IPPROTO_GRE); + if (err) + inet_del_offload(&gre_offload, IPPROTO_GRE); +#endif + + return err; } device_initcall(gre_offload_init); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 6333489771ed..38abe70e595f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -363,7 +363,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, ipc, rt, MSG_DONTWAIT) < 0) { - ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS); + __ICMP_INC_STATS(sock_net(sk), ICMP_MIB_OUTERRORS); ip_flush_pending_frames(sk); } else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { struct icmphdr *icmph = icmp_hdr(skb); @@ -744,7 +744,7 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) * avoid additional coding at protocol handlers. */ if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) { - ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); return; } @@ -865,7 +865,7 @@ static bool icmp_unreach(struct sk_buff *skb) out: return true; out_err: - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return false; } @@ -877,7 +877,7 @@ out_err: static bool icmp_redirect(struct sk_buff *skb) { if (skb->len < sizeof(struct iphdr)) { - ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); return false; } @@ -956,7 +956,7 @@ static bool icmp_timestamp(struct sk_buff *skb) return true; out_err: - ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); return false; } @@ -996,7 +996,7 @@ int icmp_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); + __ICMP_INC_STATS(net, ICMP_MIB_INMSGS); if (skb_checksum_simple_validate(skb)) goto csum_error; @@ -1006,7 +1006,7 @@ int icmp_rcv(struct sk_buff *skb) icmph = icmp_hdr(skb); - ICMPMSGIN_INC_STATS_BH(net, icmph->type); + ICMPMSGIN_INC_STATS(net, icmph->type); /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * @@ -1052,9 +1052,9 @@ drop: kfree_skb(skb); return 0; csum_error: - ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS); error: - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); goto drop; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index bc5196ea1bdf..fa8c39804bdb 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -427,7 +427,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, route_err: ip_rt_put(rt); no_route: - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } EXPORT_SYMBOL_GPL(inet_csk_route_req); @@ -466,7 +466,7 @@ route_err: ip_rt_put(rt); no_route: rcu_read_unlock(); - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); @@ -661,6 +661,9 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); newsk->sk_write_space = sk_stream_write_space; + /* listeners have SOCK_RCU_FREE, not the children */ + sock_reset_flag(newsk, SOCK_RCU_FREE); + newsk->sk_mark = inet_rsk(req)->ir_mark; atomic64_set(&newsk->sk_cookie, atomic64_read(&inet_rsk(req)->ir_cookie)); @@ -703,7 +706,9 @@ void inet_csk_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); + local_bh_disable(); percpu_counter_dec(sk->sk_prot->orphan_count); + local_bh_enable(); sock_put(sk); } EXPORT_SYMBOL(inet_csk_destroy_sock); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 5fdb02f5598e..25af1243649b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -66,7 +66,7 @@ static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) mutex_unlock(&inet_diag_table_mutex); } -static void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) +void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) { r->idiag_family = sk->sk_family; @@ -89,6 +89,7 @@ static void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) r->id.idiag_dst[0] = sk->sk_daddr; } } +EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); static size_t inet_sk_attr_size(void) { @@ -104,13 +105,50 @@ static size_t inet_sk_attr_size(void) + 64; } +int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct inet_diag_msg *r, int ext, + struct user_namespace *user_ns) +{ + const struct inet_sock *inet = inet_sk(sk); + + if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) + goto errout; + + /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, + * hence this needs to be included regardless of socket family. + */ + if (ext & (1 << (INET_DIAG_TOS - 1))) + if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0) + goto errout; + +#if IS_ENABLED(CONFIG_IPV6) + if (r->idiag_family == AF_INET6) { + if (ext & (1 << (INET_DIAG_TCLASS - 1))) + if (nla_put_u8(skb, INET_DIAG_TCLASS, + inet6_sk(sk)->tclass) < 0) + goto errout; + + if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && + nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) + goto errout; + } +#endif + + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); + r->idiag_inode = sock_i_ino(sk); + + return 0; +errout: + return 1; +} +EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); + int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { - const struct inet_sock *inet = inet_sk(sk); const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; int ext = req->idiag_ext; @@ -135,32 +173,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 0; r->idiag_retrans = 0; - if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns)) goto errout; - /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, - * hence this needs to be included regardless of socket family. - */ - if (ext & (1 << (INET_DIAG_TOS - 1))) - if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0) - goto errout; - -#if IS_ENABLED(CONFIG_IPV6) - if (r->idiag_family == AF_INET6) { - if (ext & (1 << (INET_DIAG_TCLASS - 1))) - if (nla_put_u8(skb, INET_DIAG_TCLASS, - inet6_sk(sk)->tclass) < 0) - goto errout; - - if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && - nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) - goto errout; - } -#endif - - r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); - r->idiag_inode = sock_i_ino(sk); - if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), @@ -182,31 +197,32 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto out; } -#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) - if (icsk->icsk_pending == ICSK_TIME_RETRANS || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; - r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + r->idiag_expires = + jiffies_to_msecs(icsk->icsk_timeout - jiffies); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = icsk->icsk_probes_out; - r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + r->idiag_expires = + jiffies_to_msecs(icsk->icsk_timeout - jiffies); } else if (timer_pending(&sk->sk_timer)) { r->idiag_timer = 2; r->idiag_retrans = icsk->icsk_probes_out; - r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); + r->idiag_expires = + jiffies_to_msecs(sk->sk_timer.expires - jiffies); } else { r->idiag_timer = 0; r->idiag_expires = 0; } -#undef EXPIRES_IN_MS if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { - attr = nla_reserve(skb, INET_DIAG_INFO, - handler->idiag_info_size); + attr = nla_reserve_64bit(skb, INET_DIAG_INFO, + handler->idiag_info_size, + INET_DIAG_PAD); if (!attr) goto errout; @@ -356,6 +372,7 @@ struct sock *inet_diag_find_one_icsk(struct net *net, { struct sock *sk; + rcu_read_lock(); if (req->sdiag_family == AF_INET) sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], @@ -376,9 +393,11 @@ struct sock *inet_diag_find_one_icsk(struct net *net, req->id.idiag_if); } #endif - else + else { + rcu_read_unlock(); return ERR_PTR(-EINVAL); - + } + rcu_read_unlock(); if (!sk) return ERR_PTR(-ENOENT); @@ -772,13 +791,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct inet_listen_hashbucket *ilb; - struct hlist_nulls_node *node; struct sock *sk; num = 0; ilb = &hashinfo->listening_hash[i]; spin_lock_bh(&ilb->lock); - sk_nulls_for_each(sk, node, &ilb->head) { + sk_for_each(sk, &ilb->head) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) @@ -1061,7 +1079,9 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) } attr = handler->idiag_info_size - ? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size) + ? nla_reserve_64bit(skb, INET_DIAG_INFO, + handler->idiag_info_size, + INET_DIAG_PAD) : NULL; if (attr) info = nla_data(attr); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 0d9e9d7bb029..77c20a489218 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -198,13 +198,13 @@ static inline int compute_score(struct sock *sk, struct net *net, } /* - * Don't inline this cruft. Here are some nice properties to exploit here. The - * BSD API does not allow a listening sock to specify the remote port nor the + * Here are some nice properties to exploit here. The BSD API + * does not allow a listening sock to specify the remote port nor the * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ - +/* called with rcu_read_lock() : No refcount taken on the socket */ struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -212,38 +212,27 @@ struct sock *__inet_lookup_listener(struct net *net, const __be32 daddr, const unsigned short hnum, const int dif) { - struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; - int score, hiscore, matches = 0, reuseport = 0; - bool select_ok = true; + int score, hiscore = 0, matches = 0, reuseport = 0; + struct sock *sk, *result = NULL; u32 phash = 0; - rcu_read_lock(); -begin: - result = NULL; - hiscore = 0; - sk_nulls_for_each_rcu(sk, node, &ilb->head) { + sk_for_each_rcu(sk, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif); if (score > hiscore) { - result = sk; - hiscore = score; reuseport = sk->sk_reuseport; if (reuseport) { phash = inet_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - sk2 = reuseport_select_sock(sk, phash, - skb, doff); - if (sk2) { - result = sk2; - goto found; - } - } + result = reuseport_select_sock(sk, phash, + skb, doff); + if (result) + return result; matches = 1; } + result = sk; + hiscore = score; } else if (score == hiscore && reuseport) { matches++; if (reciprocal_scale(phash, matches) == 0) @@ -251,25 +240,6 @@ begin: phash = next_pseudo_random32(phash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) - goto begin; - if (result) { -found: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) - result = NULL; - else if (unlikely(compute_score(result, net, hnum, daddr, - dif) < hiscore)) { - sock_put(result); - select_ok = false; - goto begin; - } - } - rcu_read_unlock(); return result; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); @@ -312,7 +282,6 @@ struct sock *__inet_lookup_established(struct net *net, unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - rcu_read_lock(); begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) @@ -339,7 +308,6 @@ begin: out: sk = NULL; found: - rcu_read_unlock(); return sk; } EXPORT_SYMBOL_GPL(__inet_lookup_established); @@ -392,7 +360,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { sk_nulls_del_node_init_rcu((struct sock *)tw); - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); + __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -472,10 +440,9 @@ static int inet_reuseport_add_sock(struct sock *sk, { struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; struct sock *sk2; - struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); - sk_nulls_for_each_rcu(sk2, node, &ilb->head) { + sk_for_each_rcu(sk2, &ilb->head) { if (sk2 != sk && sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && @@ -514,7 +481,12 @@ int __inet_hash(struct sock *sk, struct sock *osk, if (err) goto unlock; } - __sk_nulls_add_node_rcu(sk, &ilb->head); + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && + sk->sk_family == AF_INET6) + hlist_add_tail_rcu(&sk->sk_node, &ilb->head); + else + hlist_add_head_rcu(&sk->sk_node, &ilb->head); + sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: spin_unlock(&ilb->lock); @@ -541,20 +513,25 @@ void inet_unhash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; spinlock_t *lock; + bool listener = false; int done; if (sk_unhashed(sk)) return; - if (sk->sk_state == TCP_LISTEN) + if (sk->sk_state == TCP_LISTEN) { lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; - else + listener = true; + } else { lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - + } spin_lock_bh(lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - done = __sk_nulls_del_node_init_rcu(sk); + if (listener) + done = __sk_del_node_init(sk); + else + done = __sk_nulls_del_node_init_rcu(sk); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); @@ -690,9 +667,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h) for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); - INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, - i + LISTENING_NULLS_BASE); - } + INIT_HLIST_HEAD(&h->listening_hash[i].head); + } } EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index c67f9bd7699c..206581674806 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -94,7 +94,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, } /* - * Enter the time wait state. This is called with locally disabled BH. + * Enter the time wait state. * Essentially we whip up a timewait bucket, copy the relevant info into it * from the SK, and mess with hash chains and list linkage. */ @@ -112,7 +112,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; - spin_lock(&bhead->lock); + spin_lock_bh(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); @@ -138,7 +138,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - spin_unlock(lock); + spin_unlock_bh(lock); } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); @@ -147,9 +147,9 @@ static void tw_timer_handler(unsigned long data) struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; if (tw->tw_kill) - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); + __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); else - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); + __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED); inet_twsk_kill(tw); } diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index af18f1e4889e..cbfb1808fcc4 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -65,8 +65,8 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s { struct ip_options *opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS); - IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len); + __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); + __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -157,7 +157,7 @@ sr_failed: too_many_hops: /* Tell the sender its packet died... */ - IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); + __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index efbd47d1a531..bbe7f72db9c1 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -204,14 +204,14 @@ static void ip_expire(unsigned long arg) goto out; ipq_kill(qp); - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); if (!inet_frag_evicting(&qp->q)) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); + __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) goto out; @@ -291,7 +291,7 @@ static int ip_frag_too_far(struct ipq *qp) struct net *net; net = container_of(qp->q.net, struct net, ipv4.frags); - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); } return rc; @@ -635,7 +635,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, ip_send_check(iph); - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; qp->q.fragments_tail = NULL; return 0; @@ -647,7 +647,7 @@ out_nomem: out_oversize: net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr); out_fail: - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); return err; } @@ -658,7 +658,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) int vif = l3mdev_master_ifindex_rcu(dev); struct ipq *qp; - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); skb_orphan(skb); /* Lookup (or create) queue header */ @@ -675,7 +675,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) return ret; } - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -ENOMEM; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 4cc84212cce1..4d2025f7ec57 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -122,126 +122,6 @@ static int ipgre_tunnel_init(struct net_device *dev); static int ipgre_net_id __read_mostly; static int gre_tap_net_id __read_mostly; -static int ip_gre_calc_hlen(__be16 o_flags) -{ - int addend = 4; - - if (o_flags & TUNNEL_CSUM) - addend += 4; - if (o_flags & TUNNEL_KEY) - addend += 4; - if (o_flags & TUNNEL_SEQ) - addend += 4; - return addend; -} - -static __be16 gre_flags_to_tnl_flags(__be16 flags) -{ - __be16 tflags = 0; - - if (flags & GRE_CSUM) - tflags |= TUNNEL_CSUM; - if (flags & GRE_ROUTING) - tflags |= TUNNEL_ROUTING; - if (flags & GRE_KEY) - tflags |= TUNNEL_KEY; - if (flags & GRE_SEQ) - tflags |= TUNNEL_SEQ; - if (flags & GRE_STRICT) - tflags |= TUNNEL_STRICT; - if (flags & GRE_REC) - tflags |= TUNNEL_REC; - if (flags & GRE_VERSION) - tflags |= TUNNEL_VERSION; - - return tflags; -} - -static __be16 tnl_flags_to_gre_flags(__be16 tflags) -{ - __be16 flags = 0; - - if (tflags & TUNNEL_CSUM) - flags |= GRE_CSUM; - if (tflags & TUNNEL_ROUTING) - flags |= GRE_ROUTING; - if (tflags & TUNNEL_KEY) - flags |= GRE_KEY; - if (tflags & TUNNEL_SEQ) - flags |= GRE_SEQ; - if (tflags & TUNNEL_STRICT) - flags |= GRE_STRICT; - if (tflags & TUNNEL_REC) - flags |= GRE_REC; - if (tflags & TUNNEL_VERSION) - flags |= GRE_VERSION; - - return flags; -} - -/* Fills in tpi and returns header length to be pulled. */ -static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err) -{ - const struct gre_base_hdr *greh; - __be32 *options; - int hdr_len; - - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) - return -EINVAL; - - greh = (struct gre_base_hdr *)skb_transport_header(skb); - if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) - return -EINVAL; - - tpi->flags = gre_flags_to_tnl_flags(greh->flags); - hdr_len = ip_gre_calc_hlen(tpi->flags); - - if (!pskb_may_pull(skb, hdr_len)) - return -EINVAL; - - greh = (struct gre_base_hdr *)skb_transport_header(skb); - tpi->proto = greh->protocol; - - options = (__be32 *)(greh + 1); - if (greh->flags & GRE_CSUM) { - if (skb_checksum_simple_validate(skb)) { - *csum_err = true; - return -EINVAL; - } - - skb_checksum_try_convert(skb, IPPROTO_GRE, 0, - null_compute_pseudo); - options++; - } - - if (greh->flags & GRE_KEY) { - tpi->key = *options; - options++; - } else { - tpi->key = 0; - } - if (unlikely(greh->flags & GRE_SEQ)) { - tpi->seq = *options; - options++; - } else { - tpi->seq = 0; - } - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { - tpi->proto = htons(ETH_P_IP); - if ((*(u8 *)options & 0xF0) != 0x40) { - hdr_len += 4; - if (!pskb_may_pull(skb, hdr_len)) - return -EINVAL; - } - } - return hdr_len; -} - static void ipgre_err(struct sk_buff *skb, u32 info, const struct tnl_ptk_info *tpi) { @@ -342,7 +222,7 @@ static void gre_err(struct sk_buff *skb, u32 info) struct tnl_ptk_info tpi; bool csum_err = false; - if (parse_gre_header(skb, &tpi, &csum_err) < 0) { + if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -380,24 +260,22 @@ static __be32 tunnel_id_to_key(__be64 x) #endif } -static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) +static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, + struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) { - struct net *net = dev_net(skb->dev); struct metadata_dst *tun_dst = NULL; - struct ip_tunnel_net *itn; const struct iphdr *iph; struct ip_tunnel *tunnel; - if (tpi->proto == htons(ETH_P_TEB)) - itn = net_generic(net, gre_tap_net_id); - else - itn = net_generic(net, ipgre_net_id); - iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, iph->saddr, iph->daddr, tpi->key); if (tunnel) { + if (__iptunnel_pull_header(skb, hdr_len, tpi->proto, + raw_proto, false) < 0) + goto drop; + if (tunnel->dev->type != ARPHRD_NONE) skb_pop_mac_header(skb); else @@ -416,7 +294,34 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); return PACKET_RCVD; } - return PACKET_REJECT; + return PACKET_NEXT; + +drop: + kfree_skb(skb); + return PACKET_RCVD; +} + +static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, + int hdr_len) +{ + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn; + int res; + + if (tpi->proto == htons(ETH_P_TEB)) + itn = net_generic(net, gre_tap_net_id); + else + itn = net_generic(net, ipgre_net_id); + + res = __ipgre_rcv(skb, tpi, itn, hdr_len, false); + if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) { + /* ipgre tunnels in collect metadata mode should receive + * also ETH_P_TEB traffic. + */ + itn = net_generic(net, ipgre_net_id); + res = __ipgre_rcv(skb, tpi, itn, hdr_len, true); + } + return res; } static int gre_rcv(struct sk_buff *skb) @@ -433,13 +338,11 @@ static int gre_rcv(struct sk_buff *skb) } #endif - hdr_len = parse_gre_header(skb, &tpi, &csum_err); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)); if (hdr_len < 0) goto drop; - if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false) < 0) - goto drop; - if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) + if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); @@ -448,49 +351,6 @@ drop: return 0; } -static __sum16 gre_checksum(struct sk_buff *skb) -{ - __wsum csum; - - if (skb->ip_summed == CHECKSUM_PARTIAL) - csum = lco_csum(skb); - else - csum = skb_checksum(skb, 0, skb->len, 0); - return csum_fold(csum); -} - -static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags, - __be16 proto, __be32 key, __be32 seq) -{ - struct gre_base_hdr *greh; - - skb_push(skb, hdr_len); - - skb_reset_transport_header(skb); - greh = (struct gre_base_hdr *)skb->data; - greh->flags = tnl_flags_to_gre_flags(flags); - greh->protocol = proto; - - if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - - if (flags & TUNNEL_SEQ) { - *ptr = seq; - ptr--; - } - if (flags & TUNNEL_KEY) { - *ptr = key; - ptr--; - } - if (flags & TUNNEL_CSUM && - !(skb_shinfo(skb)->gso_type & - (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { - *ptr = 0; - *(__sum16 *)ptr = gre_checksum(skb); - } - } -} - static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, __be16 proto) @@ -501,15 +361,15 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->o_seqno++; /* Push GRE header. */ - build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, - proto, tunnel->parms.o_key, htonl(tunnel->o_seqno)); + gre_build_header(skb, tunnel->tun_hlen, + tunnel->parms.o_flags, proto, tunnel->parms.o_key, + htonl(tunnel->o_seqno)); skb_set_inner_protocol(skb, proto); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } -static struct sk_buff *gre_handle_offloads(struct sk_buff *skb, - bool csum) +static int gre_handle_offloads(struct sk_buff *skb, bool csum) { return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } @@ -562,7 +422,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, fl.saddr); } - tunnel_hlen = ip_gre_calc_hlen(key->tun_flags); + tunnel_hlen = gre_calc_hlen(key->tun_flags); min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + tunnel_hlen + sizeof(struct iphdr); @@ -577,15 +437,12 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, } /* Push Tunnel header. */ - skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)); - if (IS_ERR(skb)) { - skb = NULL; + if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) goto err_free_rt; - } flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); - build_header(skb, tunnel_hlen, flags, proto, - tunnel_id_to_key(tun_info->key.tun_id), 0); + gre_build_header(skb, tunnel_hlen, flags, proto, + tunnel_id_to_key(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -649,16 +506,14 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, tnl_params = &tunnel->parms.iph; } - skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); - if (IS_ERR(skb)) - goto out; + if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) + goto free_skb; __gre_xmit(skb, dev, tnl_params, skb->protocol); return NETDEV_TX_OK; free_skb: kfree_skb(skb); -out: dev->stats.tx_dropped++; return NETDEV_TX_OK; } @@ -673,9 +528,8 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } - skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); - if (IS_ERR(skb)) - goto out; + if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) + goto free_skb; if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; @@ -685,7 +539,6 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, free_skb: kfree_skb(skb); -out: dev->stats.tx_dropped++; return NETDEV_TX_OK; } @@ -711,8 +564,8 @@ static int ipgre_tunnel_ioctl(struct net_device *dev, if (err) return err; - p.i_flags = tnl_flags_to_gre_flags(p.i_flags); - p.o_flags = tnl_flags_to_gre_flags(p.o_flags); + p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); + p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) return -EFAULT; @@ -756,7 +609,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph)); greh = (struct gre_base_hdr *)(iph+1); - greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags); + greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags); greh->protocol = htons(type); memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); @@ -857,7 +710,7 @@ static void __gre_tunnel_init(struct net_device *dev) int t_hlen; tunnel = netdev_priv(dev); - tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags); + tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); tunnel->parms.iph.protocol = IPPROTO_GRE; tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; @@ -1180,8 +1033,10 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *p = &t->parms; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || - nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) || - nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, + gre_tnl_flags_to_gre_flags(p->i_flags)) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, + gre_tnl_flags_to_gre_flags(p->o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index e3d782746d9d..4b351af3e67b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -218,17 +218,17 @@ static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_b protocol = -ret; goto resubmit; } - IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); + __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS); + __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); } kfree_skb(skb); } else { - IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); + __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); consume_skb(skb); } } @@ -273,7 +273,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb) --ANK (980813) */ if (skb_cow(skb, skb_headroom(skb))) { - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); + __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto drop; } @@ -282,7 +282,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb) opt->optlen = iph->ihl*4 - sizeof(struct iphdr); if (ip_options_compile(dev_net(dev), opt, skb)) { - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); + __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS); goto drop; } @@ -313,6 +313,13 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; + /* if ingress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip_rcv(skb); + if (!skb) + return NET_RX_SUCCESS; + if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk && @@ -337,7 +344,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) - NET_INC_STATS_BH(net, LINUX_MIB_IPRPFILTER); + __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); goto drop; } } @@ -358,9 +365,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { - IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len); + __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) { - IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len); + __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len); } else if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) { struct in_device *in_dev = __in_dev_get_rcu(skb->dev); @@ -409,11 +416,11 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, net = dev_net(dev); - IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len); + __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); + __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); goto out; } @@ -439,9 +446,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1); BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0); BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE); - IP_ADD_STATS_BH(net, - IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), - max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); + __IP_ADD_STATS(net, + IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), + max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); if (!pskb_may_pull(skb, iph->ihl*4)) goto inhdr_error; @@ -453,7 +460,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, len = ntohs(iph->tot_len); if (skb->len < len) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS); + __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; @@ -463,7 +470,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, * Note this now means skb->len holds ntohs(iph->tot_len). */ if (pskb_trim_rcsum(skb, len)) { - IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); + __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); goto drop; } @@ -471,6 +478,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + IPCB(skb)->iif = skb->skb_iif; /* Must drop socket now because of tproxy. */ skb_orphan(skb); @@ -480,9 +488,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, ip_rcv_finish); csum_error: - IP_INC_STATS_BH(net, IPSTATS_MIB_CSUMERRORS); + __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); inhdr_error: - IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); + __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); drop: kfree_skb(skb); out: diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 035ad645a8d9..71a52f4d4cff 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -106,7 +106,8 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, return; if (offset != 0) - csum = csum_sub(csum, csum_partial(skb->data, offset, 0)); + csum = csum_sub(csum, csum_partial(skb_transport_header(skb), + offset, 0)); put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); } @@ -219,11 +220,12 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, } EXPORT_SYMBOL(ip_cmsg_recv_offset); -int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, +int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6) { int err, val; struct cmsghdr *cmsg; + struct net *net = sock_net(sk); for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) @@ -244,6 +246,13 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, continue; } #endif + if (cmsg->cmsg_level == SOL_SOCKET) { + err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc); + if (err) + return err; + continue; + } + if (cmsg->cmsg_level != SOL_IP) continue; switch (cmsg->cmsg_type) { @@ -502,9 +511,10 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) copied = len; } err = skb_copy_datagram_msg(skb, 0, msg, copied); - if (err) - goto out_free_skb; - + if (unlikely(err)) { + kfree_skb(skb); + return err; + } sock_recv_timestamp(msg, sk, skb); serr = SKB_EXT_ERR(skb); @@ -536,8 +546,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) msg->msg_flags |= MSG_ERRQUEUE; err = copied; -out_free_skb: - kfree_skb(skb); + consume_skb(skb); out: return err; } @@ -635,7 +644,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (err) break; old = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (inet->is_icsk) { struct inet_connection_sock *icsk = inet_csk(sk); #if IS_ENABLED(CONFIG_IPV6) @@ -1185,7 +1194,12 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) ipv6_sk_rxinfo(sk); if (prepare && skb_rtable(skb)) { - pktinfo->ipi_ifindex = inet_iif(skb); + /* skb->cb is overloaded: prior to this point it is IP{6}CB + * which has interface index (iif) as the first member of the + * underlying inet{6}_skb_parm struct. This code then overlays + * PKTINFO_SKB_CB and in_pktinfo also has iif as the first + * element so the iif is picked up from the prior IPCB + */ pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; @@ -1295,7 +1309,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, struct ip_options_rcu *inet_opt; inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); opt->optlen = 0; if (inet_opt) memcpy(optbuf, &inet_opt->opt, diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6165f30c4d72..9118b0e640ba 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -86,15 +86,15 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, } EXPORT_SYMBOL_GPL(iptunnel_xmit); -int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, - bool xnet) +int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, + __be16 inner_proto, bool raw_proto, bool xnet) { if (unlikely(!pskb_may_pull(skb, hdr_len))) return -ENOMEM; skb_pull_rcsum(skb, hdr_len); - if (inner_proto == htons(ETH_P_TEB)) { + if (!raw_proto && inner_proto == htons(ETH_P_TEB)) { struct ethhdr *eh; if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) @@ -117,7 +117,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, return iptunnel_pull_offloads(skb); } -EXPORT_SYMBOL_GPL(iptunnel_pull_header); +EXPORT_SYMBOL_GPL(__iptunnel_pull_header); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags) @@ -146,8 +146,8 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, } EXPORT_SYMBOL_GPL(iptunnel_metadata_reply); -struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, - int gso_type_mask) +int iptunnel_handle_offloads(struct sk_buff *skb, + int gso_type_mask) { int err; @@ -157,11 +157,11 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, } if (skb_is_gso(skb)) { - err = skb_unclone(skb, GFP_ATOMIC); + err = skb_header_unclone(skb, GFP_ATOMIC); if (unlikely(err)) - goto error; + return err; skb_shinfo(skb)->gso_type |= gso_type_mask; - return skb; + return 0; } if (skb->ip_summed != CHECKSUM_PARTIAL) { @@ -174,10 +174,7 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, skb->encapsulation = 0; } - return skb; -error: - kfree_skb(skb); - return ERR_PTR(err); + return 0; } EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); @@ -247,10 +244,10 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP_ID]); if (tb[LWTUNNEL_IP_DST]) - tun_info->key.u.ipv4.dst = nla_get_be32(tb[LWTUNNEL_IP_DST]); + tun_info->key.u.ipv4.dst = nla_get_in_addr(tb[LWTUNNEL_IP_DST]); if (tb[LWTUNNEL_IP_SRC]) - tun_info->key.u.ipv4.src = nla_get_be32(tb[LWTUNNEL_IP_SRC]); + tun_info->key.u.ipv4.src = nla_get_in_addr(tb[LWTUNNEL_IP_SRC]); if (tb[LWTUNNEL_IP_TTL]) tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]); @@ -274,9 +271,10 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); - if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) || - nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) || - nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || + if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id, + LWTUNNEL_IP_PAD) || + nla_put_in_addr(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) || + nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags)) @@ -287,7 +285,7 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate) { - return nla_total_size(8) /* LWTUNNEL_IP_ID */ + return nla_total_size_64bit(8) /* LWTUNNEL_IP_ID */ + nla_total_size(4) /* LWTUNNEL_IP_DST */ + nla_total_size(4) /* LWTUNNEL_IP_SRC */ + nla_total_size(1) /* LWTUNNEL_IP_TOS */ @@ -369,7 +367,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb, { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); - if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) || + if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id, + LWTUNNEL_IP6_PAD) || nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) || nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) || @@ -382,7 +381,7 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb, static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate) { - return nla_total_size(8) /* LWTUNNEL_IP6_ID */ + return nla_total_size_64bit(8) /* LWTUNNEL_IP6_ID */ + nla_total_size(16) /* LWTUNNEL_IP6_DST */ + nla_total_size(16) /* LWTUNNEL_IP6_SRC */ + nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ec51d02166de..92827483ee3d 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -219,9 +219,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->protocol != htons(ETH_P_IP))) goto tx_error; - skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP); - if (IS_ERR(skb)) - goto out; + if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP)) + goto tx_error; skb_set_inner_ipproto(skb, IPPROTO_IPIP); @@ -230,7 +229,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) tx_error: kfree_skb(skb); -out: + dev->stats.tx_errors++; return NETDEV_TX_OK; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 395e2814a46d..21a38e296fe2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2104,7 +2104,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; - if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0) + if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0) return -EMSGSIZE; rtm->rtm_type = RTN_MULTICAST; @@ -2237,7 +2237,7 @@ static size_t mroute_msgsize(bool unresolved, int maxvif) + nla_total_size(0) /* RTA_MULTIPATH */ + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) /* RTA_MFC_STATS */ - + nla_total_size(sizeof(struct rta_mfc_stats)) + + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) ; return len; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 4133b0f513af..2033f929aa66 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -34,27 +34,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); MODULE_DESCRIPTION("arptables core"); -/*#define DEBUG_ARP_TABLES*/ -/*#define DEBUG_ARP_TABLES_USER*/ - -#ifdef DEBUG_ARP_TABLES -#define dprintf(format, args...) pr_debug(format, ## args) -#else -#define dprintf(format, args...) -#endif - -#ifdef DEBUG_ARP_TABLES_USER -#define duprintf(format, args...) pr_debug(format, ## args) -#else -#define duprintf(format, args...) -#endif - -#ifdef CONFIG_NETFILTER_DEBUG -#define ARP_NF_ASSERT(x) WARN_ON(!(x)) -#else -#define ARP_NF_ASSERT(x) -#endif - void *arpt_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(arpt, ARPT); @@ -113,36 +92,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr, #define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop, - ARPT_INV_ARPOP)) { - dprintf("ARP operation field mismatch.\n"); - dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n", - arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask); + ARPT_INV_ARPOP)) return 0; - } if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd, - ARPT_INV_ARPHRD)) { - dprintf("ARP hardware address format mismatch.\n"); - dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n", - arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask); + ARPT_INV_ARPHRD)) return 0; - } if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro, - ARPT_INV_ARPPRO)) { - dprintf("ARP protocol address format mismatch.\n"); - dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n", - arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask); + ARPT_INV_ARPPRO)) return 0; - } if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln, - ARPT_INV_ARPHLN)) { - dprintf("ARP hardware address length mismatch.\n"); - dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n", - arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask); + ARPT_INV_ARPHLN)) return 0; - } src_devaddr = arpptr; arpptr += dev->addr_len; @@ -155,49 +118,25 @@ static inline int arp_packet_match(const struct arphdr *arphdr, if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len), ARPT_INV_SRCDEVADDR) || FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len), - ARPT_INV_TGTDEVADDR)) { - dprintf("Source or target device address mismatch.\n"); - + ARPT_INV_TGTDEVADDR)) return 0; - } if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr, ARPT_INV_SRCIP) || FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr), - ARPT_INV_TGTIP)) { - dprintf("Source or target IP address mismatch.\n"); - - dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", - &src_ipaddr, - &arpinfo->smsk.s_addr, - &arpinfo->src.s_addr, - arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : ""); - dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n", - &tgt_ipaddr, - &arpinfo->tmsk.s_addr, - &arpinfo->tgt.s_addr, - arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : ""); + ARPT_INV_TGTIP)) return 0; - } /* Look for ifname matches. */ ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); - if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { - dprintf("VIA in mismatch (%s vs %s).%s\n", - indev, arpinfo->iniface, - arpinfo->invflags & ARPT_INV_VIA_IN ? " (INV)" : ""); + if (FWINV(ret != 0, ARPT_INV_VIA_IN)) return 0; - } ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); - if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { - dprintf("VIA out mismatch (%s vs %s).%s\n", - outdev, arpinfo->outiface, - arpinfo->invflags & ARPT_INV_VIA_OUT ? " (INV)" : ""); + if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) return 0; - } return 1; #undef FWINV @@ -205,16 +144,10 @@ static inline int arp_packet_match(const struct arphdr *arphdr, static inline int arp_checkentry(const struct arpt_arp *arp) { - if (arp->flags & ~ARPT_F_MASK) { - duprintf("Unknown flag bits set: %08X\n", - arp->flags & ~ARPT_F_MASK); + if (arp->flags & ~ARPT_F_MASK) return 0; - } - if (arp->invflags & ~ARPT_INV_MASK) { - duprintf("Unknown invflag bits set: %08X\n", - arp->invflags & ~ARPT_INV_MASK); + if (arp->invflags & ~ARPT_INV_MASK) return 0; - } return 1; } @@ -367,6 +300,18 @@ static inline bool unconditional(const struct arpt_entry *e) memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; } +static bool find_jump_target(const struct xt_table_info *t, + const struct arpt_entry *target) +{ + struct arpt_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; +} + /* Figures out from what hook each rule can be called: returns 0 if * there are loops. Puts hook bitmask in comefrom. */ @@ -394,11 +339,9 @@ static int mark_source_chains(const struct xt_table_info *newinfo, = (void *)arpt_get_target_c(e); int visited = e->comefrom & (1 << hook); - if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { - pr_notice("arptables: loop hook %u pos %u %08X.\n", - hook, pos, e->comefrom); + if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) return 0; - } + e->comefrom |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); @@ -411,12 +354,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, if ((strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < -NF_MAX_VERDICT - 1) { - duprintf("mark_source_chains: bad " - "negative verdict (%i)\n", - t->verdict); + t->verdict < -NF_MAX_VERDICT - 1) return 0; - } /* Return: backtrack through the last * big jump. @@ -439,6 +378,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct arpt_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -447,20 +388,16 @@ static int mark_source_chains(const struct xt_table_info *newinfo, if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { - if (newpos > newinfo->size - - sizeof(struct arpt_entry)) { - duprintf("mark_source_chains: " - "bad verdict (%i)\n", - newpos); - return 0; - } - /* This a jump; chase it. */ - duprintf("Jump rule %u -> %u\n", - pos, newpos); + e = (struct arpt_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct arpt_entry *) (entry0 + newpos); @@ -468,33 +405,14 @@ static int mark_source_chains(const struct xt_table_info *newinfo, pos = newpos; } } -next: - duprintf("Finished chain %u\n", hook); +next: ; } return 1; } -static inline int check_entry(const struct arpt_entry *e) -{ - const struct xt_entry_target *t; - - if (!arp_checkentry(&e->arp)) - return -EINVAL; - - if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) - return -EINVAL; - - t = arpt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; -} - static inline int check_target(struct arpt_entry *e, const char *name) { struct xt_entry_target *t = arpt_get_target(e); - int ret; struct xt_tgchk_param par = { .table = name, .entryinfo = e, @@ -504,13 +422,7 @@ static inline int check_target(struct arpt_entry *e, const char *name) .family = NFPROTO_ARP, }; - ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); - if (ret < 0) { - duprintf("arp_tables: check failed for `%s'.\n", - t->u.kernel.target->name); - return ret; - } - return 0; + return xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); } static inline int @@ -518,17 +430,18 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) { struct xt_entry_target *t; struct xt_target *target; + unsigned long pcnt; int ret; - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) + pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(pcnt)) return -ENOMEM; + e->counters.pcnt = pcnt; t = arpt_get_target(e); target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { - duprintf("find_check_entry: `%s' not found\n", t->u.user.name); ret = PTR_ERR(target); goto out; } @@ -574,19 +487,18 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || (unsigned char *)e + sizeof(struct arpt_entry) >= limit || - (unsigned char *)e + e->next_offset > limit) { - duprintf("Bad offset %p\n", e); + (unsigned char *)e + e->next_offset > limit) return -EINVAL; - } if (e->next_offset - < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) { - duprintf("checking: element %p size %u\n", - e, e->next_offset); + < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) return -EINVAL; - } - err = check_entry(e); + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -597,12 +509,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { - if (!check_underflow(e)) { - pr_debug("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + if (!check_underflow(e)) return -EINVAL; - } + newinfo->underflow[h] = underflows[h]; } } @@ -647,7 +556,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } - duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ @@ -664,37 +572,25 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } - duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); if (ret != 0) return ret; - if (i != repl->num_entries) { - duprintf("translate_table: %u not %u entries\n", - i, repl->num_entries); + if (i != repl->num_entries) return -EINVAL; - } /* Check hooks all assigned */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { /* Only hooks which are valid */ if (!(repl->valid_hooks & (1 << i))) continue; - if (newinfo->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, repl->hook_entry[i]); + if (newinfo->hook_entry[i] == 0xFFFFFFFF) return -EINVAL; - } - if (newinfo->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, repl->underflow[i]); + if (newinfo->underflow[i] == 0xFFFFFFFF) return -EINVAL; - } } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) { - duprintf("Looping hook\n"); + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) return -ELOOP; - } /* Finally, each sanity check must pass */ i = 0; @@ -898,11 +794,8 @@ static int get_info(struct net *net, void __user *user, struct xt_table *t; int ret; - if (*len != sizeof(struct arpt_getinfo)) { - duprintf("length %u != %Zu\n", *len, - sizeof(struct arpt_getinfo)); + if (*len != sizeof(struct arpt_getinfo)) return -EINVAL; - } if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; @@ -958,33 +851,25 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, struct arpt_get_entries get; struct xt_table *t; - if (*len < sizeof(get)) { - duprintf("get_entries: %u < %Zu\n", *len, sizeof(get)); + if (*len < sizeof(get)) return -EINVAL; - } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; - if (*len != sizeof(struct arpt_get_entries) + get.size) { - duprintf("get_entries: %u != %Zu\n", *len, - sizeof(struct arpt_get_entries) + get.size); + if (*len != sizeof(struct arpt_get_entries) + get.size) return -EINVAL; - } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; - duprintf("t->private->number = %u\n", - private->number); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); - else { - duprintf("get_entries: I've got %u not %u!\n", - private->size, get.size); + else ret = -EAGAIN; - } + module_put(t->me); xt_table_unlock(t); } else @@ -1022,8 +907,6 @@ static int __do_replace(struct net *net, const char *name, /* You lied! */ if (valid_hooks != t->valid_hooks) { - duprintf("Valid hook crap: %08X vs %08X\n", - valid_hooks, t->valid_hooks); ret = -EINVAL; goto put_module; } @@ -1033,8 +916,6 @@ static int __do_replace(struct net *net, const char *name, goto put_module; /* Update module usage count based on number of rules */ - duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n", - oldinfo->number, oldinfo->initial_entries, newinfo->number); if ((oldinfo->number > oldinfo->initial_entries) || (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); @@ -1104,8 +985,6 @@ static int do_replace(struct net *net, const void __user *user, if (ret != 0) goto free_newinfo; - duprintf("arp_tables: Translated table\n"); - ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) @@ -1126,55 +1005,17 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct arpt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - t = xt_find_table_lock(net, NFPROTO_ARP, name); + t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1182,7 +1023,7 @@ static int do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1209,6 +1050,18 @@ static int do_add_counters(struct net *net, const void __user *user, } #ifdef CONFIG_COMPAT +struct compat_arpt_replace { + char name[XT_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_ARP_NUMHOOKS]; + u32 underflow[NF_ARP_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; + struct compat_arpt_entry entries[0]; +}; + static inline void compat_release_entry(struct compat_arpt_entry *e) { struct xt_entry_target *t; @@ -1217,38 +1070,32 @@ static inline void compat_release_entry(struct compat_arpt_entry *e) module_put(t->u.kernel.target->me); } -static inline int +static int check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned char *limit) { struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; - int ret, off, h; + int ret, off; - duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || - (unsigned char *)e + e->next_offset > limit) { - duprintf("Bad offset %p, limit = %p\n", e, limit); + (unsigned char *)e + e->next_offset > limit) return -EINVAL; - } if (e->next_offset < sizeof(struct compat_arpt_entry) + - sizeof(struct compat_xt_entry_target)) { - duprintf("checking: element %p size %u\n", - e, e->next_offset); + sizeof(struct compat_xt_entry_target)) return -EINVAL; - } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct arpt_entry *)e); + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (ret) return ret; @@ -1259,8 +1106,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { - duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", - t->u.user.name); ret = PTR_ERR(target); goto out; } @@ -1272,17 +1117,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, if (ret) goto release_target; - /* Check hooks & underflows */ - for (h = 0; h < NF_ARP_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; release_target: @@ -1291,18 +1125,17 @@ out: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct xt_target *target; struct arpt_entry *de; unsigned int origsize; - int ret, h; + int h; - ret = 0; origsize = *size; de = (struct arpt_entry *)*dstptr; memcpy(de, e, sizeof(struct arpt_entry)); @@ -1323,148 +1156,78 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } -static int translate_compat_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info **pinfo, +static int translate_compat_table(struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_arpt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_arpt_entry *iter0; - struct arpt_entry *iter1; + struct arpt_replace repl; unsigned int size; int ret = 0; info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; + size = compatr->size; + info->number = compatr->num_entries; - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } - - duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(NFPROTO_ARP); - xt_compat_init_offsets(NFPROTO_ARP, number); + xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { - duprintf("translate_compat_table: %u not %u entries\n", - j, number); + if (j != compatr->num_entries) goto out_unlock; - } - - /* Check hooks all assigned */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, underflows[i]); - goto out_unlock; - } - } ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); - if (ret != 0) - break; - } + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone */ + xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); - if (ret) - goto free_newinfo; - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - iter1->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(iter1->counters.pcnt)) { - ret = -ENOMEM; - break; - } - - ret = check_target(iter1, name); - if (ret != 0) { - xt_percpu_counter_free(iter1->counters.pcnt); - break; - } - ++i; - if (strcmp(arpt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1472,31 +1235,18 @@ static int translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); -out: - xt_entry_foreach(iter0, entry0, total_size) { + return ret; +out_unlock: + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(NFPROTO_ARP); - xt_compat_unlock(NFPROTO_ARP); - goto out; } -struct compat_arpt_replace { - char name[XT_TABLE_MAXNAMELEN]; - u32 valid_hooks; - u32 num_entries; - u32 size; - u32 hook_entry[NF_ARP_NUMHOOKS]; - u32 underflow[NF_ARP_NUMHOOKS]; - u32 num_counters; - compat_uptr_t counters; - struct compat_arpt_entry entries[0]; -}; - static int compat_do_replace(struct net *net, void __user *user, unsigned int len) { @@ -1510,8 +1260,6 @@ static int compat_do_replace(struct net *net, void __user *user, return -EFAULT; /* overflow check */ - if (tmp.size >= INT_MAX / num_possible_cpus()) - return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) @@ -1529,15 +1277,10 @@ static int compat_do_replace(struct net *net, void __user *user, goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; - duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) @@ -1570,7 +1313,6 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, break; default: - duprintf("do_arpt_set_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -1653,17 +1395,13 @@ static int compat_get_entries(struct net *net, struct compat_arpt_get_entries get; struct xt_table *t; - if (*len < sizeof(get)) { - duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); + if (*len < sizeof(get)) return -EINVAL; - } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; - if (*len != sizeof(struct compat_arpt_get_entries) + get.size) { - duprintf("compat_get_entries: %u != %zu\n", - *len, sizeof(get) + get.size); + if (*len != sizeof(struct compat_arpt_get_entries) + get.size) return -EINVAL; - } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(NFPROTO_ARP); @@ -1672,16 +1410,13 @@ static int compat_get_entries(struct net *net, const struct xt_table_info *private = t->private; struct xt_table_info info; - duprintf("t->private->number = %u\n", private->number); ret = compat_table_info(private, &info); if (!ret && get.size == info.size) { ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); - } else if (!ret) { - duprintf("compat_get_entries: I've got %u not %u!\n", - private->size, get.size); + } else if (!ret) ret = -EAGAIN; - } + xt_compat_flush_offsets(NFPROTO_ARP); module_put(t->me); xt_table_unlock(t); @@ -1733,7 +1468,6 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned break; default: - duprintf("do_arpt_set_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -1776,7 +1510,6 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len } default: - duprintf("do_arpt_get_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -1821,7 +1554,6 @@ int arpt_register_table(struct net *net, memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(newinfo, loc_cpu_entry, repl); - duprintf("arpt_register_table: translate table gives %d\n", ret); if (ret != 0) goto out_free; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 631c100a1338..54906e0e8e0c 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -35,34 +35,12 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv4 packet filter"); -/*#define DEBUG_IP_FIREWALL*/ -/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ -/*#define DEBUG_IP_FIREWALL_USER*/ - -#ifdef DEBUG_IP_FIREWALL -#define dprintf(format, args...) pr_info(format , ## args) -#else -#define dprintf(format, args...) -#endif - -#ifdef DEBUG_IP_FIREWALL_USER -#define duprintf(format, args...) pr_info(format , ## args) -#else -#define duprintf(format, args...) -#endif - #ifdef CONFIG_NETFILTER_DEBUG #define IP_NF_ASSERT(x) WARN_ON(!(x)) #else #define IP_NF_ASSERT(x) #endif -#if 0 -/* All the better to debug you with... */ -#define static -#define inline -#endif - void *ipt_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ipt, IPT); @@ -85,52 +63,28 @@ ip_packet_match(const struct iphdr *ip, if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, IPT_INV_SRCIP) || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, - IPT_INV_DSTIP)) { - dprintf("Source or dest mismatch.\n"); - - dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", - &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr, - ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : ""); - dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n", - &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr, - ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); + IPT_INV_DSTIP)) return false; - } ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); - if (FWINV(ret != 0, IPT_INV_VIA_IN)) { - dprintf("VIA in mismatch (%s vs %s).%s\n", - indev, ipinfo->iniface, - ipinfo->invflags & IPT_INV_VIA_IN ? " (INV)" : ""); + if (FWINV(ret != 0, IPT_INV_VIA_IN)) return false; - } ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); - if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { - dprintf("VIA out mismatch (%s vs %s).%s\n", - outdev, ipinfo->outiface, - ipinfo->invflags & IPT_INV_VIA_OUT ? " (INV)" : ""); + if (FWINV(ret != 0, IPT_INV_VIA_OUT)) return false; - } /* Check specific protocol */ if (ipinfo->proto && - FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { - dprintf("Packet protocol %hi does not match %hi.%s\n", - ip->protocol, ipinfo->proto, - ipinfo->invflags & IPT_INV_PROTO ? " (INV)" : ""); + FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) return false; - } /* If we have a fragment rule but the packet is not a fragment * then we return zero */ - if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) { - dprintf("Fragment rule but not fragment.%s\n", - ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : ""); + if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) return false; - } return true; } @@ -138,16 +92,10 @@ ip_packet_match(const struct iphdr *ip, static bool ip_checkentry(const struct ipt_ip *ip) { - if (ip->flags & ~IPT_F_MASK) { - duprintf("Unknown flag bits set: %08X\n", - ip->flags & ~IPT_F_MASK); + if (ip->flags & ~IPT_F_MASK) return false; - } - if (ip->invflags & ~IPT_INV_MASK) { - duprintf("Unknown invflag bits set: %08X\n", - ip->invflags & ~IPT_INV_MASK); + if (ip->invflags & ~IPT_INV_MASK) return false; - } return true; } @@ -346,10 +294,6 @@ ipt_do_table(struct sk_buff *skb, e = get_entry(table_base, private->hook_entry[hook]); - pr_debug("Entering %s(hook %u), UF %p\n", - table->name, hook, - get_entry(table_base, private->underflow[hook])); - do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; @@ -396,22 +340,15 @@ ipt_do_table(struct sk_buff *skb, if (stackidx == 0) { e = get_entry(table_base, private->underflow[hook]); - pr_debug("Underflow (this is normal) " - "to %p\n", e); } else { e = jumpstack[--stackidx]; - pr_debug("Pulled %p out from pos %u\n", - e, stackidx); e = ipt_next_entry(e); } continue; } if (table_base + v != ipt_next_entry(e) && - !(e->ip.flags & IPT_F_GOTO)) { + !(e->ip.flags & IPT_F_GOTO)) jumpstack[stackidx++] = e; - pr_debug("Pushed %p into pos %u\n", - e, stackidx - 1); - } e = get_entry(table_base, v); continue; @@ -429,18 +366,25 @@ ipt_do_table(struct sk_buff *skb, /* Verdict */ break; } while (!acpar.hotdrop); - pr_debug("Exiting %s; sp at %u\n", __func__, stackidx); xt_write_recseq_end(addend); local_bh_enable(); -#ifdef DEBUG_ALLOW_ALL - return NF_ACCEPT; -#else if (acpar.hotdrop) return NF_DROP; else return verdict; -#endif +} + +static bool find_jump_target(const struct xt_table_info *t, + const struct ipt_entry *target) +{ + struct ipt_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; } /* Figures out from what hook each rule can be called: returns 0 if @@ -468,11 +412,9 @@ mark_source_chains(const struct xt_table_info *newinfo, = (void *)ipt_get_target_c(e); int visited = e->comefrom & (1 << hook); - if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { - pr_err("iptables: loop hook %u pos %u %08X.\n", - hook, pos, e->comefrom); + if (e->comefrom & (1 << NF_INET_NUMHOOKS)) return 0; - } + e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ @@ -484,26 +426,13 @@ mark_source_chains(const struct xt_table_info *newinfo, if ((strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < -NF_MAX_VERDICT - 1) { - duprintf("mark_source_chains: bad " - "negative verdict (%i)\n", - t->verdict); + t->verdict < -NF_MAX_VERDICT - 1) return 0; - } /* Return: backtrack through the last big jump. */ do { e->comefrom ^= (1<<NF_INET_NUMHOOKS); -#ifdef DEBUG_IP_FIREWALL_USER - if (e->comefrom - & (1 << NF_INET_NUMHOOKS)) { - duprintf("Back unset " - "on hook %u " - "rule %u\n", - hook, pos); - } -#endif oldpos = pos; pos = e->counters.pcnt; e->counters.pcnt = 0; @@ -520,6 +449,8 @@ mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct ipt_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -528,19 +459,16 @@ mark_source_chains(const struct xt_table_info *newinfo, if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { - if (newpos > newinfo->size - - sizeof(struct ipt_entry)) { - duprintf("mark_source_chains: " - "bad verdict (%i)\n", - newpos); - return 0; - } /* This a jump; chase it. */ - duprintf("Jump rule %u -> %u\n", - pos, newpos); + e = (struct ipt_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct ipt_entry *) (entry0 + newpos); @@ -548,8 +476,7 @@ mark_source_chains(const struct xt_table_info *newinfo, pos = newpos; } } -next: - duprintf("Finished chain %u\n", hook); +next: ; } return 1; } @@ -568,40 +495,15 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ipt_entry *e) -{ - const struct xt_entry_target *t; - - if (!ip_checkentry(&e->ip)) - return -EINVAL; - - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ipt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; -} - -static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ipt_ip *ip = par->entryinfo; - int ret; par->match = m->u.kernel.match; par->matchinfo = m->data; - ret = xt_check_match(par, m->u.match_size - sizeof(*m), - ip->proto, ip->invflags & IPT_INV_PROTO); - if (ret < 0) { - duprintf("check failed for `%s'.\n", par->match->name); - return ret; - } - return 0; + return xt_check_match(par, m->u.match_size - sizeof(*m), + ip->proto, ip->invflags & IPT_INV_PROTO); } static int @@ -612,10 +514,8 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, m->u.user.revision); - if (IS_ERR(match)) { - duprintf("find_check_match: `%s' not found\n", m->u.user.name); + if (IS_ERR(match)) return PTR_ERR(match); - } m->u.kernel.match = match; ret = check_match(m, par); @@ -640,16 +540,9 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name) .hook_mask = e->comefrom, .family = NFPROTO_IPV4, }; - int ret; - ret = xt_check_target(&par, t->u.target_size - sizeof(*t), - e->ip.proto, e->ip.invflags & IPT_INV_PROTO); - if (ret < 0) { - duprintf("check failed for `%s'.\n", - t->u.kernel.target->name); - return ret; - } - return 0; + return xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ip.proto, e->ip.invflags & IPT_INV_PROTO); } static int @@ -662,10 +555,12 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; + unsigned long pcnt; - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) + pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(pcnt)) return -ENOMEM; + e->counters.pcnt = pcnt; j = 0; mtpar.net = net; @@ -684,7 +579,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { - duprintf("find_check_entry: `%s' not found\n", t->u.user.name); ret = PTR_ERR(target); goto cleanup_matches; } @@ -738,19 +632,18 @@ check_entry_size_and_hooks(struct ipt_entry *e, if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit || - (unsigned char *)e + e->next_offset > limit) { - duprintf("Bad offset %p\n", e); + (unsigned char *)e + e->next_offset > limit) return -EINVAL; - } if (e->next_offset - < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) { - duprintf("checking: element %p size %u\n", - e, e->next_offset); + < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) + return -EINVAL; + + if (!ip_checkentry(&e->ip)) return -EINVAL; - } - err = check_entry(e); + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -761,12 +654,9 @@ check_entry_size_and_hooks(struct ipt_entry *e, if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { - if (!check_underflow(e)) { - pr_debug("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + if (!check_underflow(e)) return -EINVAL; - } + newinfo->underflow[h] = underflows[h]; } } @@ -818,7 +708,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } - duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { @@ -835,27 +724,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, ++newinfo->stacksize; } - if (i != repl->num_entries) { - duprintf("translate_table: %u not %u entries\n", - i, repl->num_entries); + if (i != repl->num_entries) return -EINVAL; - } /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ if (!(repl->valid_hooks & (1 << i))) continue; - if (newinfo->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, repl->hook_entry[i]); + if (newinfo->hook_entry[i] == 0xFFFFFFFF) return -EINVAL; - } - if (newinfo->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, repl->underflow[i]); + if (newinfo->underflow[i] == 0xFFFFFFFF) return -EINVAL; - } } if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) @@ -1083,11 +963,8 @@ static int get_info(struct net *net, void __user *user, struct xt_table *t; int ret; - if (*len != sizeof(struct ipt_getinfo)) { - duprintf("length %u != %zu\n", *len, - sizeof(struct ipt_getinfo)); + if (*len != sizeof(struct ipt_getinfo)) return -EINVAL; - } if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; @@ -1145,31 +1022,23 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, struct ipt_get_entries get; struct xt_table *t; - if (*len < sizeof(get)) { - duprintf("get_entries: %u < %zu\n", *len, sizeof(get)); + if (*len < sizeof(get)) return -EINVAL; - } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; - if (*len != sizeof(struct ipt_get_entries) + get.size) { - duprintf("get_entries: %u != %zu\n", - *len, sizeof(get) + get.size); + if (*len != sizeof(struct ipt_get_entries) + get.size) return -EINVAL; - } get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; - duprintf("t->private->number = %u\n", private->number); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); - else { - duprintf("get_entries: I've got %u not %u!\n", - private->size, get.size); + else ret = -EAGAIN; - } + module_put(t->me); xt_table_unlock(t); } else @@ -1205,8 +1074,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, /* You lied! */ if (valid_hooks != t->valid_hooks) { - duprintf("Valid hook crap: %08X vs %08X\n", - valid_hooks, t->valid_hooks); ret = -EINVAL; goto put_module; } @@ -1216,8 +1083,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, goto put_module; /* Update module usage count based on number of rules */ - duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n", - oldinfo->number, oldinfo->initial_entries, newinfo->number); if ((oldinfo->number > oldinfo->initial_entries) || (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); @@ -1286,8 +1151,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len) if (ret != 0) goto free_newinfo; - duprintf("Translated table\n"); - ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) @@ -1309,55 +1172,17 @@ do_add_counters(struct net *net, const void __user *user, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ipt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, AF_INET, name); + t = xt_find_table_lock(net, AF_INET, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1365,7 +1190,7 @@ do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1444,7 +1269,6 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, static int compat_find_calc_match(struct xt_entry_match *m, - const char *name, const struct ipt_ip *ip, int *size) { @@ -1452,11 +1276,9 @@ compat_find_calc_match(struct xt_entry_match *m, match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, m->u.user.revision); - if (IS_ERR(match)) { - duprintf("compat_check_calc_match: `%s' not found\n", - m->u.user.name); + if (IS_ERR(match)) return PTR_ERR(match); - } + m->u.kernel.match = match; *size += xt_compat_match_offset(match); return 0; @@ -1479,35 +1301,29 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; - int ret, off, h; + int ret, off; - duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || - (unsigned char *)e + e->next_offset > limit) { - duprintf("Bad offset %p, limit = %p\n", e, limit); + (unsigned char *)e + e->next_offset > limit) return -EINVAL; - } if (e->next_offset < sizeof(struct compat_ipt_entry) + - sizeof(struct compat_xt_entry_target)) { - duprintf("checking: element %p size %u\n", - e, e->next_offset); + sizeof(struct compat_xt_entry_target)) return -EINVAL; - } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ipt_entry *)e); + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, + e->target_offset, e->next_offset); if (ret) return ret; @@ -1515,7 +1331,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, &e->ip, &off); + ret = compat_find_calc_match(ematch, &e->ip, &off); if (ret != 0) goto release_matches; ++j; @@ -1525,8 +1341,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { - duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", - t->u.user.name); ret = PTR_ERR(target); goto release_matches; } @@ -1538,17 +1352,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, if (ret) goto out; - /* Check hooks & underflows */ - for (h = 0; h < NF_INET_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; out: @@ -1562,19 +1365,18 @@ release_matches: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct xt_target *target; struct ipt_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ipt_entry *)*dstptr; memcpy(de, e, sizeof(struct ipt_entry)); @@ -1583,201 +1385,101 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, *dstptr += sizeof(struct ipt_entry); *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); target = t->u.kernel.target; xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)de - base < newinfo->hook_entry[h]) newinfo->hook_entry[h] -= origsize - *size; if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; -} - -static int -compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) -{ - struct xt_entry_match *ematch; - struct xt_mtchk_param mtpar; - unsigned int j; - int ret = 0; - - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) - return -ENOMEM; - - j = 0; - mtpar.net = net; - mtpar.table = name; - mtpar.entryinfo = &e->ip; - mtpar.hook_mask = e->comefrom; - mtpar.family = NFPROTO_IPV4; - xt_ematch_foreach(ematch, e) { - ret = check_match(ematch, &mtpar); - if (ret != 0) - goto cleanup_matches; - ++j; - } - - ret = check_target(e, net, name); - if (ret) - goto cleanup_matches; - return 0; - - cleanup_matches: - xt_ematch_foreach(ematch, e) { - if (j-- == 0) - break; - cleanup_match(ematch, net); - } - - xt_percpu_counter_free(e->counters.pcnt); - - return ret; } static int translate_compat_table(struct net *net, - const char *name, - unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_ipt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ipt_entry *iter0; - struct ipt_entry *iter1; + struct ipt_replace repl; unsigned int size; int ret; info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; - - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } + size = compatr->size; + info->number = compatr->num_entries; - duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET); - xt_compat_init_offsets(AF_INET, number); + xt_compat_init_offsets(AF_INET, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { - duprintf("translate_compat_table: %u not %u entries\n", - j, number); + if (j != compatr->num_entries) goto out_unlock; - } - - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, underflows[i]); - goto out_unlock; - } - } ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); - if (ret != 0) - break; - } + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone. + * entry1/newinfo contains a 64bit ruleset that looks exactly as + * generated by 64bit userspace. + * + * Call standard translate_table() to validate all hook_entrys, + * underflows, check for loops, etc. + */ xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); - if (ret) - goto free_newinfo; - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, name); - if (ret != 0) - break; - ++i; - if (strcmp(ipt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1, net); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1785,17 +1487,16 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); -out: - xt_entry_foreach(iter0, entry0, total_size) { + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET); + xt_compat_unlock(AF_INET); + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(AF_INET); - xt_compat_unlock(AF_INET); - goto out; } static int @@ -1811,8 +1512,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -EFAULT; /* overflow check */ - if (tmp.size >= INT_MAX / num_possible_cpus()) - return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) @@ -1831,15 +1530,10 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; - duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) @@ -1873,7 +1567,6 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, break; default: - duprintf("do_ipt_set_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -1923,19 +1616,15 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, struct compat_ipt_get_entries get; struct xt_table *t; - if (*len < sizeof(get)) { - duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); + if (*len < sizeof(get)) return -EINVAL; - } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; - if (*len != sizeof(struct compat_ipt_get_entries) + get.size) { - duprintf("compat_get_entries: %u != %zu\n", - *len, sizeof(get) + get.size); + if (*len != sizeof(struct compat_ipt_get_entries) + get.size) return -EINVAL; - } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET); @@ -1943,16 +1632,13 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; - duprintf("t->private->number = %u\n", private->number); ret = compat_table_info(private, &info); - if (!ret && get.size == info.size) { + if (!ret && get.size == info.size) ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); - } else if (!ret) { - duprintf("compat_get_entries: I've got %u not %u!\n", - private->size, get.size); + else if (!ret) ret = -EAGAIN; - } + xt_compat_flush_offsets(AF_INET); module_put(t->me); xt_table_unlock(t); @@ -2005,7 +1691,6 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) break; default: - duprintf("do_ipt_set_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -2057,7 +1742,6 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } default: - duprintf("do_ipt_get_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -2159,7 +1843,6 @@ icmp_match(const struct sk_buff *skb, struct xt_action_param *par) /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ - duprintf("Dropping evil ICMP tinygram.\n"); par->hotdrop = true; return false; } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index e3c46e8e2762..ae1a71a97132 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -360,7 +360,7 @@ static int ipv4_init_net(struct net *net) in->ctl_table[0].data = &nf_conntrack_max; in->ctl_table[1].data = &net->ct.count; - in->ctl_table[2].data = &net->ct.htable_size; + in->ctl_table[2].data = &nf_conntrack_htable_size; in->ctl_table[3].data = &net->ct.sysctl_checksum; in->ctl_table[4].data = &net->ct.sysctl_log_invalid; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index f0dfe92a00d6..c6f3c406f707 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -31,15 +31,14 @@ struct ct_iter_state { static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) { - struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < net->ct.htable_size; + st->bucket < nf_conntrack_htable_size; st->bucket++) { n = rcu_dereference( - hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); + hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); if (!is_a_nulls(n)) return n; } @@ -49,17 +48,16 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, struct hlist_nulls_node *head) { - struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; head = rcu_dereference(hlist_nulls_next_rcu(head)); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= net->ct.htable_size) + if (++st->bucket >= nf_conntrack_htable_size) return NULL; } head = rcu_dereference( - hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); + hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); } return head; } @@ -114,6 +112,23 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) } #endif +static bool ct_seq_should_skip(const struct nf_conn *ct, + const struct net *net, + const struct nf_conntrack_tuple_hash *hash) +{ + /* we only want to print DIR_ORIGINAL */ + if (NF_CT_DIRECTION(hash)) + return true; + + if (nf_ct_l3num(ct) != AF_INET) + return true; + + if (!net_eq(nf_ct_net(ct), net)) + return true; + + return false; +} + static int ct_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_tuple_hash *hash = v; @@ -123,14 +138,15 @@ static int ct_seq_show(struct seq_file *s, void *v) int ret = 0; NF_CT_ASSERT(ct); - if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + if (ct_seq_should_skip(ct, seq_file_net(s), hash)) return 0; + if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + return 0; - /* we only want to print DIR_ORIGINAL */ - if (NF_CT_DIRECTION(hash)) - goto release; - if (nf_ct_l3num(ct) != AF_INET) + /* check if we raced w. object reuse */ + if (!nf_ct_is_confirmed(ct) || + ct_seq_should_skip(ct, seq_file_net(s), hash)) goto release; l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); @@ -220,13 +236,12 @@ struct ct_expect_iter_state { static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { - struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { n = rcu_dereference( - hlist_first_rcu(&net->ct.expect_hash[st->bucket])); + hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); if (n) return n; } @@ -236,7 +251,6 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { - struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(hlist_next_rcu(head)); @@ -244,7 +258,7 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, if (++st->bucket >= nf_ct_expect_hsize) return NULL; head = rcu_dereference( - hlist_first_rcu(&net->ct.expect_hash[st->bucket])); + hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); } return head; } @@ -285,6 +299,9 @@ static int exp_seq_show(struct seq_file *s, void *v) exp = hlist_entry(n, struct nf_conntrack_expect, hnode); + if (!net_eq(nf_ct_net(exp->master), seq_file_net(s))) + return 0; + if (exp->tuple.src.l3num != AF_INET) return 0; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index cf9700b1a106..66ddcb60519a 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -737,6 +737,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* no remote port */ } + ipc.sockc.tsflags = sk->sk_tsflags; ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.oif = sk->sk_bound_dev_if; @@ -744,10 +745,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.ttl = 0; ipc.tos = -1; - sock_tx_timestamp(sk, &ipc.tx_flags); - if (msg->msg_controllen) { - err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); + err = ip_cmsg_send(sk, msg, &ipc, false); if (unlikely(err)) { kfree(ipc.opt); return err; @@ -768,6 +767,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) rcu_read_unlock(); } + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); + saddr = ipc.addr; ipc.addr = faddr = daddr; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 8d22de74080c..438f50c1a676 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -339,8 +339,8 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, struct msghdr *msg, size_t length, - struct rtable **rtp, - unsigned int flags) + struct rtable **rtp, unsigned int flags, + const struct sockcm_cookie *sockc) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); @@ -379,7 +379,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->ip_summed = CHECKSUM_NONE; - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); skb->transport_header = skb->network_header; err = -EFAULT; @@ -540,6 +540,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) daddr = inet->inet_daddr; } + ipc.sockc.tsflags = sk->sk_tsflags; ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.tx_flags = 0; @@ -548,7 +549,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { - err = ip_cmsg_send(net, msg, &ipc, false); + err = ip_cmsg_send(sk, msg, &ipc, false); if (unlikely(err)) { kfree(ipc.opt); goto out; @@ -638,10 +639,10 @@ back_from_confirm: if (inet->hdrincl) err = raw_send_hdrinc(sk, &fl4, msg, len, - &rt, msg->msg_flags); + &rt, msg->msg_flags, &ipc.sockc); else { - sock_tx_timestamp(sk, &ipc.tx_flags); + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); if (!ipc.addr) ipc.addr = fl4.daddr; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 60398a9370e7..a1f2830d8110 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -915,11 +915,11 @@ static int ip_error(struct sk_buff *skb) if (!IN_DEV_FORWARD(in_dev)) { switch (rt->dst.error) { case EHOSTUNREACH: - IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS); + __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS); break; case ENETUNREACH: - IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); break; } goto out; @@ -934,7 +934,7 @@ static int ip_error(struct sk_buff *skb) break; case ENETUNREACH: code = ICMP_NET_UNREACH; - IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES); + __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); break; case EACCES: code = ICMP_PKT_FILTERED; @@ -2146,6 +2146,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, unsigned int flags = 0; struct fib_result res; struct rtable *rth; + int master_idx; int orig_oif; int err = -ENETUNREACH; @@ -2155,6 +2156,9 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, orig_oif = fl4->flowi4_oif; + master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif); + if (master_idx) + fl4->flowi4_oif = master_idx; fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 4c04f09338e3..e3c4043c27de 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -312,11 +312,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) mss = __cookie_v4_check(ip_hdr(skb), th, cookie); if (mss == 0) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1e1fe6086dd9..bb0419582b8d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -960,6 +960,17 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_IP_ROUTE_MULTIPATH + { + .procname = "fib_multipath_use_neigh", + .data = &init_net.ipv4.sysctl_fib_multipath_use_neigh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 08b8b960a8ed..5c7ed147449c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -428,13 +428,16 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); -static void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb) +static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) { - if (sk->sk_tsflags) { + if (tsflags) { struct skb_shared_info *shinfo = skb_shinfo(skb); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - sock_tx_timestamp(sk, &shinfo->tx_flags); - if (shinfo->tx_flags & SKBTX_ANY_TSTAMP) + sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags); + if (tsflags & SOF_TIMESTAMPING_TX_ACK) + tcb->txstamp_ack = 1; + if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; } } @@ -906,7 +909,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, int copy, i; bool can_coalesce; - if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { + if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 || + !tcp_skb_can_collapse_to(skb)) { new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; @@ -957,7 +961,7 @@ new_segment: offset += copy; size -= copy; if (!size) { - tcp_tx_timestamp(sk, skb); + tcp_tx_timestamp(sk, sk->sk_tsflags, skb); goto out; } @@ -1077,8 +1081,10 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; + struct sockcm_cookie sockc; int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; + bool process_backlog = false; bool sg; long timeo; @@ -1119,14 +1125,24 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) /* 'common' sending to sendq */ } + sockc.tsflags = sk->sk_tsflags; + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (unlikely(err)) { + err = -EINVAL; + goto out_err; + } + } + /* This should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - mss_now = tcp_send_mss(sk, &size_goal, flags); - /* Ok commence sending. */ copied = 0; +restart: + mss_now = tcp_send_mss(sk, &size_goal, flags); + err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto out_err; @@ -1144,7 +1160,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) copy = max - skb->len; } - if (copy <= 0) { + if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. @@ -1152,6 +1168,10 @@ new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; + if (process_backlog && sk_flush_backlog(sk)) { + process_backlog = false; + goto restart; + } skb = sk_stream_alloc_skb(sk, select_size(sk, sg), sk->sk_allocation, @@ -1159,6 +1179,7 @@ new_segment: if (!skb) goto wait_for_memory; + process_backlog = true; /* * Check whether we can use HW checksum. */ @@ -1237,7 +1258,9 @@ new_segment: copied += copy; if (!msg_data_left(msg)) { - tcp_tx_timestamp(sk, skb); + tcp_tx_timestamp(sk, sockc.tsflags, skb); + if (unlikely(flags & MSG_EOR)) + TCP_SKB_CB(skb)->eor = 1; goto out; } @@ -1431,14 +1454,10 @@ static void tcp_prequeue_process(struct sock *sk) struct sk_buff *skb; struct tcp_sock *tp = tcp_sk(sk); - NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED); - /* RX process wants to run with disabled BHs, though it is not - * necessary */ - local_bh_disable(); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) sk_backlog_rcv(sk, skb); - local_bh_enable(); /* Clear memory counter. */ tp->ucopy.memory = 0; @@ -1765,7 +1784,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, chunk = len - tp->ucopy.len; if (chunk != 0) { - NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); len -= chunk; copied += chunk; } @@ -1777,7 +1796,7 @@ do_prequeue: chunk = len - tp->ucopy.len; if (chunk != 0) { - NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } @@ -1863,7 +1882,7 @@ skip_copy: tcp_prequeue_process(sk); if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } @@ -2053,13 +2072,13 @@ void tcp_close(struct sock *sk, long timeout) sk->sk_prot->disconnect(sk, 0); } else if (data_was_unread) { /* Unread data was tossed, zap the connection. */ - NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, sk->sk_allocation); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); - NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); } else if (tcp_close_state(sk)) { /* We FIN if the application ate all the data before * zapping the connection. @@ -2136,7 +2155,7 @@ adjudge_to_death: if (tp->linger2 < 0) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); - NET_INC_STATS_BH(sock_net(sk), + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONLINGER); } else { const int tmo = tcp_fin_time(sk); @@ -2155,7 +2174,7 @@ adjudge_to_death: if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); - NET_INC_STATS_BH(sock_net(sk), + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); } } @@ -3079,7 +3098,7 @@ void tcp_done(struct sock *sk) struct request_sock *req = tcp_sk(sk)->fastopen_rsk; if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index fd1405d37c14..36087bca9f48 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -197,15 +197,15 @@ static void bictcp_state(struct sock *sk, u8 new_state) /* Track delayed acknowledgment ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) +static void bictcp_acked(struct sock *sk, const struct ack_sample *sample) { const struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_state == TCP_CA_Open) { struct bictcp *ca = inet_csk_ca(sk); - cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; - ca->delayed_ack += cnt; + ca->delayed_ack += sample->pkts_acked - + (ca->delayed_ack >> ACK_RATIO_SHIFT); } } diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 167b6a3e1b98..03725b294286 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -155,11 +155,11 @@ static void tcp_cdg_hystart_update(struct sock *sk) ca->last_ack = now_us; if (after(now_us, ca->round_start + base_owd)) { - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTTRAINDETECT); - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTTRAINCWND, - tp->snd_cwnd); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINDETECT); + NET_ADD_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINCWND, + tp->snd_cwnd); tp->snd_ssthresh = tp->snd_cwnd; return; } @@ -174,11 +174,11 @@ static void tcp_cdg_hystart_update(struct sock *sk) 125U); if (ca->rtt.min > thresh) { - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTDELAYDETECT); - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTDELAYCWND, - tp->snd_cwnd); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYDETECT); + NET_ADD_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYCWND, + tp->snd_cwnd); tp->snd_ssthresh = tp->snd_cwnd; } } @@ -294,12 +294,12 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked) ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr); } -static void tcp_cdg_acked(struct sock *sk, u32 num_acked, s32 rtt_us) +static void tcp_cdg_acked(struct sock *sk, const struct ack_sample *sample) { struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - if (rtt_us <= 0) + if (sample->rtt_us <= 0) return; /* A heuristic for filtering delayed ACKs, adapted from: @@ -307,20 +307,20 @@ static void tcp_cdg_acked(struct sock *sk, u32 num_acked, s32 rtt_us) * delay and rate based TCP mechanisms." TR 100219A. CAIA, 2010. */ if (tp->sacked_out == 0) { - if (num_acked == 1 && ca->delack) { + if (sample->pkts_acked == 1 && ca->delack) { /* A delayed ACK is only used for the minimum if it is * provenly lower than an existing non-zero minimum. */ - ca->rtt.min = min(ca->rtt.min, rtt_us); + ca->rtt.min = min(ca->rtt.min, sample->rtt_us); ca->delack--; return; - } else if (num_acked > 1 && ca->delack < 5) { + } else if (sample->pkts_acked > 1 && ca->delack < 5) { ca->delack++; } } - ca->rtt.min = min_not_zero(ca->rtt.min, rtt_us); - ca->rtt.max = max(ca->rtt.max, rtt_us); + ca->rtt.min = min_not_zero(ca->rtt.min, sample->rtt_us); + ca->rtt.max = max(ca->rtt.max, sample->rtt_us); } static u32 tcp_cdg_ssthresh(struct sock *sk) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 448c2615fece..c99230efcd52 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -402,11 +402,11 @@ static void hystart_update(struct sock *sk, u32 delay) ca->last_ack = now; if ((s32)(now - ca->round_start) > ca->delay_min >> 4) { ca->found |= HYSTART_ACK_TRAIN; - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTTRAINDETECT); - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTTRAINCWND, - tp->snd_cwnd); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINDETECT); + NET_ADD_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINCWND, + tp->snd_cwnd); tp->snd_ssthresh = tp->snd_cwnd; } } @@ -423,11 +423,11 @@ static void hystart_update(struct sock *sk, u32 delay) if (ca->curr_rtt > ca->delay_min + HYSTART_DELAY_THRESH(ca->delay_min >> 3)) { ca->found |= HYSTART_DELAY; - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTDELAYDETECT); - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_TCPHYSTARTDELAYCWND, - tp->snd_cwnd); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYDETECT); + NET_ADD_STATS(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYCWND, + tp->snd_cwnd); tp->snd_ssthresh = tp->snd_cwnd; } } @@ -437,21 +437,21 @@ static void hystart_update(struct sock *sk, u32 delay) /* Track delayed acknowledgment ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) +static void bictcp_acked(struct sock *sk, const struct ack_sample *sample) { const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); u32 delay; /* Some calls are for duplicates without timetamps */ - if (rtt_us < 0) + if (sample->rtt_us < 0) return; /* Discard delay samples right after fast recovery */ if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ) return; - delay = (rtt_us << 3) / USEC_PER_MSEC; + delay = (sample->rtt_us << 3) / USEC_PER_MSEC; if (delay == 0) delay = 1; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index cffd8f9ed1a9..54d9f9b0120f 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -255,9 +255,9 @@ static bool tcp_fastopen_queue_check(struct sock *sk) spin_lock(&fastopenq->lock); req1 = fastopenq->rskq_rst_head; if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) { + __NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); spin_unlock(&fastopenq->lock); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); return false; } fastopenq->rskq_rst_head = req1->dl_next; @@ -282,7 +282,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct sock *child; if (foc->len == 0) /* Client requests a cookie */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && (syn_data || foc->len >= 0) && @@ -311,13 +311,13 @@ fastopen: child = tcp_fastopen_create_child(sk, skb, dst, req); if (child) { foc->len = -1; - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVE); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVE); return child; } - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); } else if (foc->len > 0) /* Client presents an invalid cookie */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); valid_foc.exp = foc->exp; *foc = valid_foc; diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 82f0d9ed60f5..4a4d8e76738f 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -99,7 +99,7 @@ static inline void measure_rtt(struct sock *sk, u32 srtt) } static void measure_achieved_throughput(struct sock *sk, - u32 pkts_acked, s32 rtt) + const struct ack_sample *sample) { const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); @@ -107,10 +107,10 @@ static void measure_achieved_throughput(struct sock *sk, u32 now = tcp_time_stamp; if (icsk->icsk_ca_state == TCP_CA_Open) - ca->pkts_acked = pkts_acked; + ca->pkts_acked = sample->pkts_acked; - if (rtt > 0) - measure_rtt(sk, usecs_to_jiffies(rtt)); + if (sample->rtt_us > 0) + measure_rtt(sk, usecs_to_jiffies(sample->rtt_us)); if (!use_bandwidth_switch) return; @@ -122,7 +122,7 @@ static void measure_achieved_throughput(struct sock *sk, return; } - ca->packetcount += pkts_acked; + ca->packetcount += sample->pkts_acked; if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) && now - ca->lasttime >= ca->minRTT && diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 2ab9bbb6faff..c8e6d86be114 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -82,30 +82,31 @@ static void tcp_illinois_init(struct sock *sk) } /* Measure RTT for each ack. */ -static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, s32 rtt) +static void tcp_illinois_acked(struct sock *sk, const struct ack_sample *sample) { struct illinois *ca = inet_csk_ca(sk); + s32 rtt_us = sample->rtt_us; - ca->acked = pkts_acked; + ca->acked = sample->pkts_acked; /* dup ack, no rtt sample */ - if (rtt < 0) + if (rtt_us < 0) return; /* ignore bogus values, this prevents wraparound in alpha math */ - if (rtt > RTT_MAX) - rtt = RTT_MAX; + if (rtt_us > RTT_MAX) + rtt_us = RTT_MAX; /* keep track of minimum RTT seen so far */ - if (ca->base_rtt > rtt) - ca->base_rtt = rtt; + if (ca->base_rtt > rtt_us) + ca->base_rtt = rtt_us; /* and max */ - if (ca->max_rtt < rtt) - ca->max_rtt = rtt; + if (ca->max_rtt < rtt_us) + ca->max_rtt = rtt_us; ++ca->cnt_rtt; - ca->sum_rtt += rtt; + ca->sum_rtt += rtt_us; } /* Maximum queuing delay */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c124c3c12f7c..d6c8f4cd0800 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -869,7 +869,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, else mib_idx = LINUX_MIB_TCPSACKREORDER; - NET_INC_STATS_BH(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, @@ -1062,7 +1062,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { dup_sack = true; tcp_dsack_seen(tp); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1) { u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq); u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq); @@ -1071,7 +1071,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, !before(start_seq_0, start_seq_1)) { dup_sack = true; tcp_dsack_seen(tp); - NET_INC_STATS_BH(sock_net(sk), + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV); } } @@ -1289,7 +1289,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, if (skb->len > 0) { BUG_ON(!tcp_skb_pcount(skb)); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED); return false; } @@ -1303,6 +1303,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, } TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; + TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) TCP_SKB_CB(prev)->end_seq++; @@ -1313,7 +1314,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED); return true; } @@ -1368,6 +1369,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) goto fallback; + if (!tcp_skb_can_collapse_to(prev)) + goto fallback; + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); @@ -1469,7 +1473,7 @@ noop: return skb; fallback: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); + NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); return NULL; } @@ -1657,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, mib_idx = LINUX_MIB_TCPSACKDISCARD; } - NET_INC_STATS_BH(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); if (i == 0) first_sack_index = -1; continue; @@ -1909,7 +1913,7 @@ void tcp_enter_loss(struct sock *sk) skb = tcp_write_queue_head(sk); is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); if (is_reneg) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; tp->fackets_out = 0; } @@ -2253,16 +2257,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) } } -/* CWND moderation, preventing bursts due to too big ACKs - * in dubious situations. - */ -static inline void tcp_moderate_cwnd(struct tcp_sock *tp) -{ - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp) + tcp_max_burst(tp)); - tp->snd_cwnd_stamp = tcp_time_stamp; -} - static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when) { return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && @@ -2405,13 +2399,12 @@ static bool tcp_try_undo_recovery(struct sock *sk) else mib_idx = LINUX_MIB_TCPFULLUNDO; - NET_INC_STATS_BH(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq * is ACKed. For Reno it is MUST to prevent false * fast retransmits (RFC2582). SACK TCP is safe. */ - tcp_moderate_cwnd(tp); if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; return true; @@ -2428,7 +2421,7 @@ static bool tcp_try_undo_dsack(struct sock *sk) if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, "D-SACK"); tcp_undo_cwnd_reduction(sk, false); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); return true; } return false; @@ -2443,10 +2436,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) tcp_undo_cwnd_reduction(sk, true); DBGUNDO(sk, "partial loss"); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); if (frto_undo) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPSPURIOUSRTOS); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; if (frto_undo || tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); @@ -2570,7 +2563,7 @@ static void tcp_mtup_probe_failed(struct sock *sk) icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1; icsk->icsk_mtup.probe_size = 0; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPFAIL); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL); } static void tcp_mtup_probe_success(struct sock *sk) @@ -2590,7 +2583,7 @@ static void tcp_mtup_probe_success(struct sock *sk) icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; icsk->icsk_mtup.probe_size = 0; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS); } /* Do a simple retransmit without using the backoff mechanisms in @@ -2654,7 +2647,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) else mib_idx = LINUX_MIB_TCPSACKRECOVERY; - NET_INC_STATS_BH(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); tp->prior_ssthresh = 0; tcp_init_undo(tp); @@ -2747,7 +2740,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked) DBGUNDO(sk, "partial recovery"); tcp_undo_cwnd_reduction(sk, true); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); tcp_try_keep_open(sk); return true; } @@ -3094,12 +3087,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, const struct skb_shared_info *shinfo; /* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */ - if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))) + if (likely(!TCP_SKB_CB(skb)->txstamp_ack)) return; shinfo = skb_shinfo(skb); - if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) && - !before(shinfo->tskey, prior_snd_una) && + if (!before(shinfo->tskey, prior_snd_una) && before(shinfo->tskey, tcp_sk(sk)->snd_una)) __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); } @@ -3256,8 +3248,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tcp_rearm_rto(sk); } - if (icsk->icsk_ca_ops->pkts_acked) - icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us); + if (icsk->icsk_ca_ops->pkts_acked) { + struct ack_sample sample = { .pkts_acked = pkts_acked, + .rtt_us = ca_rtt_us }; + + icsk->icsk_ca_ops->pkts_acked(sk, &sample); + } #if FASTRETRANS_DEBUG > 0 WARN_ON((int)tp->sacked_out < 0); @@ -3363,9 +3359,10 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) { u32 delta = ack - tp->snd_una; - u64_stats_update_begin(&tp->syncp); + sock_owned_by_me((struct sock *)tp); + u64_stats_update_begin_raw(&tp->syncp); tp->bytes_acked += delta; - u64_stats_update_end(&tp->syncp); + u64_stats_update_end_raw(&tp->syncp); tp->snd_una = ack; } @@ -3374,9 +3371,10 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) { u32 delta = seq - tp->rcv_nxt; - u64_stats_update_begin(&tp->syncp); + sock_owned_by_me((struct sock *)tp); + u64_stats_update_begin_raw(&tp->syncp); tp->bytes_received += delta; - u64_stats_update_end(&tp->syncp); + u64_stats_update_end_raw(&tp->syncp); tp->rcv_nxt = seq; } @@ -3442,7 +3440,7 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { - NET_INC_STATS_BH(net, mib_idx); + NET_INC_STATS(net, mib_idx); return true; /* rate-limited: don't send yet! */ } } @@ -3475,7 +3473,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) challenge_count = 0; } if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); tcp_send_ack(sk); } } @@ -3524,8 +3522,8 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) tcp_set_ca_state(sk, TCP_CA_CWR); tcp_end_cwnd_reduction(sk); tcp_try_keep_open(sk); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPLOSSPROBERECOVERY); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPLOSSPROBERECOVERY); } else if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP | FLAG_DATA_SACKED))) { /* Pure dupack: original and TLP probe arrived; no loss */ @@ -3629,14 +3627,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS); } else { u32 ack_ev_flags = CA_ACK_SLOWPATH; if (ack_seq != TCP_SKB_CB(skb)->end_seq) flag |= FLAG_DATA; else - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS); flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); @@ -4139,7 +4137,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) else mib_idx = LINUX_MIB_TCPDSACKOFOSENT; - NET_INC_STATS_BH(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); tp->rx_opt.dsack = 1; tp->duplicate_sack[0].start_seq = seq; @@ -4163,7 +4161,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); + NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); if (tcp_is_sack(tp) && sysctl_tcp_dsack) { @@ -4313,13 +4311,19 @@ static bool tcp_try_coalesce(struct sock *sk, atomic_add(delta, &sk->sk_rmem_alloc); sk_mem_charge(sk, delta); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; return true; } +static void tcp_drop(struct sock *sk, struct sk_buff *skb) +{ + sk_drops_add(sk, skb); + __kfree_skb(skb); +} + /* This one checks to see if we can put data from the * out_of_order queue into the receive_queue. */ @@ -4344,7 +4348,7 @@ static void tcp_ofo_queue(struct sock *sk) __skb_unlink(skb, &tp->out_of_order_queue); if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { SOCK_DEBUG(sk, "ofo packet was already received\n"); - __kfree_skb(skb); + tcp_drop(sk, skb); continue; } SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n", @@ -4395,8 +4399,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tcp_ecn_check_ce(tp, skb); if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); - __kfree_skb(skb); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); + tcp_drop(sk, skb); return; } @@ -4404,7 +4408,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tp->pred_flags = 0; inet_csk_schedule_ack(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); @@ -4459,8 +4463,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Drop. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); + tcp_drop(sk, skb); skb = NULL; tcp_dsack_set(sk, seq, end_seq); goto add_sack; @@ -4498,8 +4502,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) __skb_unlink(skb1, &tp->out_of_order_queue); tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb1); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); + tcp_drop(sk, skb1); } add_sack: @@ -4582,12 +4586,13 @@ err: static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - int eaten = -1; bool fragstolen = false; + int eaten = -1; - if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) - goto drop; - + if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { + __kfree_skb(skb); + return; + } skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); @@ -4612,14 +4617,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) __set_current_state(TASK_RUNNING); - local_bh_enable(); if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) { tp->ucopy.len -= chunk; tp->copied_seq += chunk; eaten = (chunk == skb->len); tcp_rcv_space_adjust(sk); } - local_bh_disable(); } if (eaten <= 0) { @@ -4662,14 +4665,14 @@ queue_and_out: if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { /* A retransmit, 2nd most common case. Force an immediate ack. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); + NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); out_of_window: tcp_enter_quickack_mode(sk); inet_csk_schedule_ack(sk); drop: - __kfree_skb(skb); + tcp_drop(sk, skb); return; } @@ -4708,7 +4711,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, __skb_unlink(skb, list); __kfree_skb(skb); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); return next; } @@ -4867,7 +4870,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk) bool res = false; if (!skb_queue_empty(&tp->out_of_order_queue)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); __skb_queue_purge(&tp->out_of_order_queue); /* Reset SACK state. A conforming SACK implementation will @@ -4896,7 +4899,7 @@ static int tcp_prune_queue(struct sock *sk) SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED); if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); @@ -4926,7 +4929,7 @@ static int tcp_prune_queue(struct sock *sk) * drop receive data on the floor. It will get retransmitted * and hopefully then we'll have sufficient space. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED); /* Massive buffer overcommit. */ tp->pred_flags = 0; @@ -5135,7 +5138,6 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) int chunk = skb->len - hlen; int err; - local_bh_enable(); if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk); else @@ -5147,32 +5149,9 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) tcp_rcv_space_adjust(sk); } - local_bh_disable(); return err; } -static __sum16 __tcp_checksum_complete_user(struct sock *sk, - struct sk_buff *skb) -{ - __sum16 result; - - if (sock_owned_by_user(sk)) { - local_bh_enable(); - result = __tcp_checksum_complete(skb); - local_bh_disable(); - } else { - result = __tcp_checksum_complete(skb); - } - return result; -} - -static inline bool tcp_checksum_complete_user(struct sock *sk, - struct sk_buff *skb) -{ - return !skb_csum_unnecessary(skb) && - __tcp_checksum_complete_user(sk, skb); -} - /* Does PAWS and seqno based validation of an incoming segment, flags will * play significant role here. */ @@ -5185,7 +5164,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); if (!tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDPAWS, &tp->last_oow_ack_time)) @@ -5237,8 +5216,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (th->syn) { syn_challenge: if (syn_inerr) - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); tcp_send_challenge_ack(sk, skb); goto discard; } @@ -5246,7 +5225,7 @@ syn_challenge: return true; discard: - __kfree_skb(skb); + tcp_drop(sk, skb); return false; } @@ -5353,7 +5332,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_data_snd_check(sk); return; } else { /* Header too small */ - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; } } else { @@ -5381,12 +5360,13 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, __skb_pull(skb, tcp_header_len); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPHPHITSTOUSER); eaten = 1; } } if (!eaten) { - if (tcp_checksum_complete_user(sk, skb)) + if (tcp_checksum_complete(skb)) goto csum_error; if ((int)skb->truesize > sk->sk_forward_alloc) @@ -5403,7 +5383,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_rcv_rtt_measure_ts(sk, skb); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ eaten = tcp_queue_rcv(sk, skb, tcp_header_len, @@ -5430,7 +5410,7 @@ no_ack: } slow_path: - if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb)) + if (len < (th->doff << 2) || tcp_checksum_complete(skb)) goto csum_error; if (!th->ack && !th->rst && !th->syn) @@ -5460,11 +5440,11 @@ step5: return; csum_error: - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); discard: - __kfree_skb(skb); + tcp_drop(sk, skb); } EXPORT_SYMBOL(tcp_rcv_established); @@ -5549,16 +5529,18 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, if (data) { /* Retransmit unacked data in SYN */ tcp_for_write_queue_from(data, sk) { if (data == tcp_send_head(sk) || - __tcp_retransmit_skb(sk, data)) + __tcp_retransmit_skb(sk, data, 1)) break; } tcp_rearm_rto(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); return true; } tp->syn_data_acked = tp->syn_data; if (tp->syn_data_acked) - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVE); tcp_fastopen_add_skb(sk, synack); @@ -5593,7 +5575,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_PAWSACTIVEREJECTED); goto reset_and_undo; } @@ -5695,7 +5678,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, TCP_DELACK_MAX, TCP_RTO_MAX); discard: - __kfree_skb(skb); + tcp_drop(sk, skb); return 0; } else { tcp_send_ack(sk); @@ -5802,8 +5785,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) int queued = 0; bool acceptable; - tp->rx_opt.saw_tstamp = 0; - switch (sk->sk_state) { case TCP_CLOSE: goto discard; @@ -5821,29 +5802,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) return 1; - /* Now we have several options: In theory there is - * nothing else in the frame. KA9Q has an option to - * send data with the syn, BSD accepts data with the - * syn up to the [to be] advertised window and - * Solaris 2.1 gives you a protocol error. For now - * we just ignore it, that fits the spec precisely - * and avoids incompatibilities. It would be nice in - * future to drop through and process the data. - * - * Now that TTCP is starting to be used we ought to - * queue this data. - * But, this leaves one open to an easy denial of - * service attack, and SYN cookies can't defend - * against this problem. So, we drop the data - * in the interest of security over speed unless - * it's still in use. - */ - kfree_skb(skb); + consume_skb(skb); return 0; } goto discard; case TCP_SYN_SENT: + tp->rx_opt.saw_tstamp = 0; queued = tcp_rcv_synsent_state_process(sk, skb, th); if (queued >= 0) return queued; @@ -5855,6 +5820,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 0; } + tp->rx_opt.saw_tstamp = 0; req = tp->fastopen_rsk; if (req) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && @@ -5979,7 +5945,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); return 1; } @@ -6036,7 +6002,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (sk->sk_shutdown & RCV_SHUTDOWN) { if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); tcp_reset(sk); return 1; } @@ -6056,7 +6022,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (!queued) { discard: - __kfree_skb(skb); + tcp_drop(sk, skb); } return 0; } @@ -6174,10 +6140,10 @@ static bool tcp_syn_flood_action(const struct sock *sk, if (net->ipv4.sysctl_tcp_syncookies) { msg = "Sending cookies"; want_cookie = true; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); } else #endif - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); if (!queue->synflood_warned && net->ipv4.sysctl_tcp_syncookies != 2 && @@ -6238,7 +6204,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, * timeout. */ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); goto drop; } @@ -6285,7 +6251,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (dst && strict && !tcp_peer_is_proven(req, dst, true, tmp_opt.saw_tstamp)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } } @@ -6333,7 +6299,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } if (fastopen_sk) { af_ops->send_synack(fastopen_sk, dst, &fl, req, - &foc, false); + &foc, TCP_SYNACK_FASTOPEN); /* Add the child socket directly into the accept queue */ inet_csk_reqsk_queue_add(sk, req, fastopen_sk); sk->sk_data_ready(sk); @@ -6343,10 +6309,13 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->tfo_listener = false; if (!want_cookie) inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - af_ops->send_synack(sk, dst, &fl, req, - &foc, !want_cookie); - if (want_cookie) - goto drop_and_free; + af_ops->send_synack(sk, dst, &fl, req, &foc, + !want_cookie ? TCP_SYNACK_NORMAL : + TCP_SYNACK_COOKIE); + if (want_cookie) { + reqsk_free(req); + return 0; + } } reqsk_put(req); return 0; @@ -6356,7 +6325,7 @@ drop_and_release: drop_and_free: reqsk_free(req); drop: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + tcp_listendrop(sk); return 0; } EXPORT_SYMBOL(tcp_conn_request); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad450509029b..3708de2a6683 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -157,7 +157,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = daddr = usin->sin_addr.s_addr; inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (inet_opt && inet_opt->opt.srr) { if (!daddr) return -EINVAL; @@ -320,7 +320,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) * an established socket here. */ if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); } else if (abort) { /* * Still in SYN_RECV, just remove it silently. @@ -329,7 +329,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) * errors returned from accept(). */ inet_csk_reqsk_queue_drop(req->rsk_listener, req); - NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS); + tcp_listendrop(req->rsk_listener); } reqsk_put(req); } @@ -372,7 +372,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) th->dest, iph->saddr, ntohs(th->source), inet_iif(icmp_skb)); if (!sk) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; } if (sk->sk_state == TCP_TIME_WAIT) { @@ -396,13 +396,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) */ if (sock_owned_by_user(sk)) { if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); } if (sk->sk_state == TCP_CLOSE) goto out; if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { - NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto out; } @@ -413,7 +413,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && !between(seq, snd_una, tp->snd_nxt)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -628,6 +628,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG + rcu_read_lock(); hash_location = tcp_parse_md5sig_option(th); if (sk && sk_fullsock(sk)) { key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) @@ -646,16 +647,18 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ntohs(th->source), inet_iif(skb)); /* don't send rst if it can't find key */ if (!sk1) - return; - rcu_read_lock(); + goto out; + key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) &ip_hdr(skb)->saddr, AF_INET); if (!key) - goto release_sk1; + goto out; + genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) - goto release_sk1; + goto out; + } if (key) { @@ -689,20 +692,19 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) offsetof(struct inet_timewait_sock, tw_bound_dev_if)); arg.tos = ip_hdr(skb)->tos; + local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); - TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); - TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); + __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); + __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); + local_bh_enable(); #ifdef CONFIG_TCP_MD5SIG -release_sk1: - if (sk1) { - rcu_read_unlock(); - sock_put(sk1); - } +out: + rcu_read_unlock(); #endif } @@ -774,12 +776,14 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); - TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); + __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); + local_bh_enable(); } static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -830,7 +834,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req) + enum tcp_synack_type synack_type) { const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; @@ -841,7 +845,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, foc, attach_req); + skb = tcp_make_synack(sk, dst, req, foc, synack_type); if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); @@ -882,8 +886,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, /* caller either holds rcu_read_lock() or socket lock */ md5sig = rcu_dereference_check(tp->md5sig_info, - sock_owned_by_user(sk) || - lockdep_is_held((spinlock_t *)&sk->sk_lock.slock)); + lockdep_sock_is_held(sk)); if (!md5sig) return NULL; #if IS_ENABLED(CONFIG_IPV6) @@ -928,8 +931,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, } md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk) || - lockdep_is_held(&sk->sk_lock.slock)); + lockdep_sock_is_held(sk)); if (!md5sig) { md5sig = kmalloc(sizeof(*md5sig), gfp); if (!md5sig) @@ -1153,12 +1155,12 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk, return false; if (hash_expected && !hash_location) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return true; } if (!hash_expected && hash_location) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return true; } @@ -1246,7 +1248,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) &tcp_request_sock_ipv4_ops, sk, skb); drop: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + tcp_listendrop(sk); return 0; } EXPORT_SYMBOL(tcp_v4_conn_request); @@ -1344,11 +1346,11 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, return newsk; exit_overflow: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); exit_nonewsk: dst_release(dst); exit: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + tcp_listendrop(sk); return NULL; put_and_exit: inet_csk_prepare_forced_close(newsk); @@ -1434,8 +1436,8 @@ discard: return 0; csum_err: - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; } EXPORT_SYMBOL(tcp_v4_do_rcv); @@ -1508,16 +1510,16 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) __skb_queue_tail(&tp->ucopy.prequeue, skb); tp->ucopy.memory += skb->truesize; - if (tp->ucopy.memory > sk->sk_rcvbuf) { + if (skb_queue_len(&tp->ucopy.prequeue) >= 32 || + tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { struct sk_buff *skb1; BUG_ON(sock_owned_by_user(sk)); + __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED, + skb_queue_len(&tp->ucopy.prequeue)); - while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { + while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) sk_backlog_rcv(sk, skb1); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPPREQUEUEDROPPED); - } tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { @@ -1538,24 +1540,25 @@ EXPORT_SYMBOL(tcp_prequeue); int tcp_v4_rcv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); const struct iphdr *iph; const struct tcphdr *th; + bool refcounted; struct sock *sk; int ret; - struct net *net = dev_net(skb->dev); if (skb->pkt_type != PACKET_HOST) goto discard_it; /* Count it even if it's bad */ - TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); + __TCP_INC_STATS(net, TCP_MIB_INSEGS); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; - th = tcp_hdr(skb); + th = (const struct tcphdr *)skb->data; - if (th->doff < sizeof(struct tcphdr) / 4) + if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) goto bad_packet; if (!pskb_may_pull(skb, th->doff * 4)) goto discard_it; @@ -1568,7 +1571,7 @@ int tcp_v4_rcv(struct sk_buff *skb) if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo)) goto csum_error; - th = tcp_hdr(skb); + th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() * barrier() makes sure compiler wont play fool^Waliasing games. @@ -1588,7 +1591,7 @@ int tcp_v4_rcv(struct sk_buff *skb) lookup: sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, - th->dest); + th->dest, &refcounted); if (!sk) goto no_tcp_socket; @@ -1609,7 +1612,11 @@ process: inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + /* We own a reference on the listener, increase it again + * as we might lose it too soon. + */ sock_hold(sk); + refcounted = true; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1626,7 +1633,7 @@ process: } } if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { - NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto discard_and_relse; } @@ -1659,13 +1666,14 @@ process: } else if (unlikely(sk_add_backlog(sk, skb, sk->sk_rcvbuf + sk->sk_sndbuf))) { bh_unlock_sock(sk); - NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); + __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP); goto discard_and_relse; } bh_unlock_sock(sk); put_and_return: - sock_put(sk); + if (refcounted) + sock_put(sk); return ret; @@ -1675,9 +1683,9 @@ no_tcp_socket: if (tcp_checksum_complete(skb)) { csum_error: - TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); + __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: - TCP_INC_STATS_BH(net, TCP_MIB_INERRS); + __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { tcp_v4_send_reset(NULL, skb); } @@ -1688,7 +1696,9 @@ discard_it: return 0; discard_and_relse: - sock_put(sk); + sk_drops_add(sk, skb); + if (refcounted) + sock_put(sk); goto discard_it; do_time_wait: @@ -1712,6 +1722,7 @@ do_time_wait: if (sk2) { inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; + refcounted = false; goto process; } /* Fall through to ACK */ @@ -1828,7 +1839,9 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_free_fastopen_req(tp); tcp_saved_syn_free(tp); + local_bh_disable(); sk_sockets_allocated_dec(sk); + local_bh_enable(); if (mem_cgroup_sockets_enabled && sk->sk_memcg) sock_release_memcg(sk); @@ -1845,17 +1858,17 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); */ static void *listening_get_next(struct seq_file *seq, void *cur) { - struct inet_connection_sock *icsk; - struct hlist_nulls_node *node; - struct sock *sk = cur; - struct inet_listen_hashbucket *ilb; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); + struct inet_listen_hashbucket *ilb; + struct inet_connection_sock *icsk; + struct sock *sk = cur; if (!sk) { +get_head: ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock_bh(&ilb->lock); - sk = sk_nulls_head(&ilb->head); + sk = sk_head(&ilb->head); st->offset = 0; goto get_sk; } @@ -1863,28 +1876,20 @@ static void *listening_get_next(struct seq_file *seq, void *cur) ++st->num; ++st->offset; - sk = sk_nulls_next(sk); + sk = sk_next(sk); get_sk: - sk_nulls_for_each_from(sk, node) { + sk_for_each_from(sk) { if (!net_eq(sock_net(sk), net)) continue; - if (sk->sk_family == st->family) { - cur = sk; - goto out; - } + if (sk->sk_family == st->family) + return sk; icsk = inet_csk(sk); } spin_unlock_bh(&ilb->lock); st->offset = 0; - if (++st->bucket < INET_LHTABLE_SIZE) { - ilb = &tcp_hashinfo.listening_hash[st->bucket]; - spin_lock_bh(&ilb->lock); - sk = sk_nulls_head(&ilb->head); - goto get_sk; - } - cur = NULL; -out: - return cur; + if (++st->bucket < INET_LHTABLE_SIZE) + goto get_head; + return NULL; } static void *listening_get_idx(struct seq_file *seq, loff_t *pos) @@ -2383,6 +2388,7 @@ static int __net_init tcp_sk_init(struct net *net) IPPROTO_TCP, net); if (res) goto fail; + sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; } diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 1e70fa8fa793..c67ece1390c2 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -260,13 +260,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt) * newReno in increase case. * We work it out by following the idea from TCP-LP's paper directly */ -static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) +static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); - if (rtt_us > 0) - tcp_lp_rtt_sample(sk, rtt_us); + if (sample->rtt_us > 0) + tcp_lp_rtt_sample(sk, sample->rtt_us); /* calc inference */ if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 7b7eec439906..b617826e2477 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -800,7 +800,8 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, } if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, - jiffies - tm->tcpm_stamp) < 0) + jiffies - tm->tcpm_stamp, + TCP_METRICS_ATTR_PAD) < 0) goto nla_put_failure; if (tm->tcpm_ts_stamp) { if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP, @@ -864,7 +865,8 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, (nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROPS, tfom->syn_loss) < 0 || nla_put_msecs(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS, - jiffies - tfom->last_syn_loss) < 0)) + jiffies - tfom->last_syn_loss, + TCP_METRICS_ATTR_PAD) < 0)) goto nla_put_failure; if (tfom->cookie.len > 0 && nla_put(msg, TCP_METRICS_ATTR_FOPEN_COOKIE, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index acb366dd61e6..4b95ec4ed2c8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -235,7 +235,7 @@ kill: } if (paws_reject) - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); + __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); if (!th->rst) { /* In this case we must reset the TIMEWAIT timer. @@ -337,7 +337,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * socket up. We've got bigger problems than * non-graceful socket closings. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); } tcp_update_metrics(sk); @@ -545,7 +545,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rack.mstamp.v64 = 0; newtp->rack.advanced = 0; - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); } return newsk; } @@ -704,10 +704,13 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) { /* Out of window: send ACK and drop. */ - if (!(flg & TCP_FLAG_RST)) + if (!(flg & TCP_FLAG_RST) && + !tcp_oow_rate_limited(sock_net(sk), skb, + LINUX_MIB_TCPACKSKIPPEDSYNRECV, + &tcp_rsk(req)->last_oow_ack_time)) req->rsk_ops->send_ack(sk, skb, req); if (paws_reject) - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); return NULL; } @@ -726,7 +729,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * "fourth, check the SYN bit" */ if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); goto embryonic_reset; } @@ -749,7 +752,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); return NULL; } @@ -788,7 +791,7 @@ embryonic_reset: } if (!fastopen) { inet_csk_reqsk_queue_drop(sk, req); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); } return NULL; } diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 773083b7f1e9..02737b607aa7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -89,6 +89,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, ~(SKB_GSO_TCPV4 | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_TCP_FIXEDID | SKB_GSO_TCPV6 | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | @@ -98,7 +99,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | 0) || - !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) + !(type & (SKB_GSO_TCPV4 | + SKB_GSO_TCPV6)))) goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); @@ -107,6 +109,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, goto out; } + /* GSO partial only requires splitting the frame into an MSS + * multiple and possibly a remainder. So update the mss now. + */ + if (features & NETIF_F_GSO_PARTIAL) + mss = skb->len - (skb->len % mss); + copy_destructor = gso_skb->destructor == tcp_wfree; ooo_okay = gso_skb->ooo_okay; /* All segments but the first should have ooo_okay cleared */ @@ -131,7 +139,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); - do { + while (skb->next) { th->fin = th->psh = 0; th->check = newcheck; @@ -151,7 +159,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->seq = htonl(seq); th->cwr = 0; - } while (skb->next); + } /* Following permits TCP Small Queues to work well with GSO : * The callback to TCP stack will be called at the time last frag @@ -237,7 +245,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) found: /* Include the IP ID check below from the inner most IP hdr */ - flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id; + flush = NAPI_GRO_CB(p)->flush; flush |= (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); @@ -246,6 +254,17 @@ found: flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); + /* When we receive our second frame we can made a decision on if we + * continue this flow as an atomic flow with a fixed ID or if we use + * an incrementing ID. + */ + if (NAPI_GRO_CB(p)->flush_id != 1 || + NAPI_GRO_CB(p)->count != 1 || + !NAPI_GRO_CB(p)->is_atomic) + flush |= NAPI_GRO_CB(p)->flush_id; + else + NAPI_GRO_CB(p)->is_atomic = false; + mss = skb_shinfo(p)->gso_size; flush |= (len - 1) >= mss; @@ -314,6 +333,9 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff) iph->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + if (NAPI_GRO_CB(skb)->is_atomic) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + return tcp_gro_complete(skb); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 79a03b87a771..8bd9911fdd16 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -364,7 +364,7 @@ tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) * be sent. */ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, - int tcp_header_len) + struct tcphdr *th, int tcp_header_len) { struct tcp_sock *tp = tcp_sk(sk); @@ -375,7 +375,7 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, INET_ECN_xmit(sk); if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) { tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; - tcp_hdr(skb)->cwr = 1; + th->cwr = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; } } else if (!tcp_ca_needs_ecn(sk)) { @@ -383,7 +383,7 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, INET_ECN_dontxmit(sk); } if (tp->ecn_flags & TCP_ECN_DEMAND_CWR) - tcp_hdr(skb)->ece = 1; + th->ece = 1; } } @@ -949,12 +949,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; - skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree; + skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ - th = tcp_hdr(skb); + th = (struct tcphdr *)skb->data; th->source = inet->inet_sport; th->dest = inet->inet_dport; th->seq = htonl(tcb->seq); @@ -962,14 +962,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->tcp_flags); - if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { - /* RFC1323: The window in SYN & SYN/ACK segments - * is never scaled. - */ - th->window = htons(min(tp->rcv_wnd, 65535U)); - } else { - th->window = htons(tcp_select_window(sk)); - } th->check = 0; th->urg_ptr = 0; @@ -986,9 +978,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_options_write((__be32 *)(th + 1), tp, &opts); skb_shinfo(skb)->gso_type = sk->sk_gso_type; - if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0)) - tcp_ecn_send(sk, skb, tcp_header_size); - + if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) { + th->window = htons(tcp_select_window(sk)); + tcp_ecn_send(sk, skb, th, tcp_header_size); + } else { + /* RFC1323: The window in SYN & SYN/ACK segments + * is never scaled. + */ + th->window = htons(min(tp->rcv_wnd, 65535U)); + } #ifdef CONFIG_TCP_MD5SIG /* Calculate the MD5 hash, as we have all we need now */ if (md5) { @@ -1111,11 +1109,17 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de tcp_verify_left_out(tp); } +static bool tcp_has_tx_tstamp(const struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->txstamp_ack || + (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP); +} + static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2) { struct skb_shared_info *shinfo = skb_shinfo(skb); - if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) && + if (unlikely(tcp_has_tx_tstamp(skb)) && !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) { struct skb_shared_info *shinfo2 = skb_shinfo(skb2); u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP; @@ -1123,9 +1127,17 @@ static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2) shinfo->tx_flags &= ~tsflags; shinfo2->tx_flags |= tsflags; swap(shinfo->tskey, shinfo2->tskey); + TCP_SKB_CB(skb2)->txstamp_ack = TCP_SKB_CB(skb)->txstamp_ack; + TCP_SKB_CB(skb)->txstamp_ack = 0; } } +static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2) +{ + TCP_SKB_CB(skb2)->eor = TCP_SKB_CB(skb)->eor; + TCP_SKB_CB(skb)->eor = 0; +} + /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. @@ -1171,6 +1183,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); TCP_SKB_CB(buff)->tcp_flags = flags; TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; + tcp_skb_fragment_eor(skb, buff); if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { /* Copy and checksum data tail into the new buffer. */ @@ -1731,6 +1744,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* This packet was never sent out yet, so no SACK bits. */ TCP_SKB_CB(buff)->sacked = 0; + tcp_skb_fragment_eor(skb, buff); + buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); tcp_fragment_tstamp(skb, buff); @@ -2204,14 +2219,13 @@ bool tcp_schedule_loss_probe(struct sock *sk) /* Thanks to skb fast clones, we can detect if a prior transmit of * a packet is still in a qdisc or driver queue. * In this case, there is very little point doing a retransmit ! - * Note: This is called from BH context only. */ static bool skb_still_in_host_queue(const struct sock *sk, const struct sk_buff *skb) { if (unlikely(skb_fclone_busy(sk, skb))) { - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); return true; } return false; @@ -2266,14 +2280,14 @@ void tcp_send_loss_probe(struct sock *sk) if (WARN_ON(!skb || !tcp_skb_pcount(skb))) goto rearm_timer; - if (__tcp_retransmit_skb(sk, skb)) + if (__tcp_retransmit_skb(sk, skb, 1)) goto rearm_timer; /* Record snd_nxt for loss detection. */ tp->tlp_high_seq = tp->snd_nxt; probe_sent: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); /* Reset s.t. tcp_rearm_rto will restart timer from now */ inet_csk(sk)->icsk_pending = 0; rearm_timer: @@ -2444,14 +2458,15 @@ u32 __tcp_select_window(struct sock *sk) void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb) { - const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb); - u8 tsflags = next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; - - if (unlikely(tsflags)) { + if (unlikely(tcp_has_tx_tstamp(next_skb))) { + const struct skb_shared_info *next_shinfo = + skb_shinfo(next_skb); struct skb_shared_info *shinfo = skb_shinfo(skb); - shinfo->tx_flags |= tsflags; + shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; shinfo->tskey = next_shinfo->tskey; + TCP_SKB_CB(skb)->txstamp_ack |= + TCP_SKB_CB(next_skb)->txstamp_ack; } } @@ -2490,6 +2505,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) * packet counting does not break. */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS; + TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor; /* changed transmit queue under us so clear hints */ tcp_clear_retrans_hints_partial(tp); @@ -2541,6 +2557,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, if (!tcp_can_collapse(sk, skb)) break; + if (!tcp_skb_can_collapse_to(to)) + break; + space -= skb->len; if (first) { @@ -2567,17 +2586,17 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, * state updates are done by the caller. Returns non-zero if an * error occurred which prevented the send. */ -int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) { - struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); unsigned int cur_mss; - int err; + int diff, len, err; + - /* Inconslusive MTU probe */ - if (icsk->icsk_mtup.probe_size) { + /* Inconclusive MTU probe */ + if (icsk->icsk_mtup.probe_size) icsk->icsk_mtup.probe_size = 0; - } /* Do not sent more than we queued. 1/4 is reserved for possible * copying overhead: fragmentation, tunneling, mangling etc. @@ -2610,30 +2629,27 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->seq != tp->snd_una) return -EAGAIN; - if (skb->len > cur_mss) { - if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC)) + len = cur_mss * segs; + if (skb->len > len) { + if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC)) return -ENOMEM; /* We'll try again later. */ } else { - int oldpcount = tcp_skb_pcount(skb); + if (skb_unclone(skb, GFP_ATOMIC)) + return -ENOMEM; - if (unlikely(oldpcount > 1)) { - if (skb_unclone(skb, GFP_ATOMIC)) - return -ENOMEM; - tcp_init_tso_segs(skb, cur_mss); - tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); - } + diff = tcp_skb_pcount(skb); + tcp_set_skb_tso_segs(skb, cur_mss); + diff -= tcp_skb_pcount(skb); + if (diff) + tcp_adjust_pcount(sk, skb, diff); + if (skb->len < cur_mss) + tcp_retrans_try_collapse(sk, skb, cur_mss); } /* RFC3168, section 6.1.1.1. ECN fallback */ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN) tcp_ecn_clear_syn(sk, skb); - tcp_retrans_try_collapse(sk, skb, cur_mss); - - /* Make a copy, if the first transmission SKB clone we made - * is still in somebody's hands, else make a clone. - */ - /* make sure skb->data is aligned on arches that require it * and check if ack-trimming & collapsing extended the headroom * beyond what csum_start can cover. @@ -2651,20 +2667,22 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) } if (likely(!err)) { + segs = tcp_skb_pcount(skb); + TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; /* Update global TCP statistics. */ - TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); + TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - tp->total_retrans++; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + tp->total_retrans += segs; } return err; } -int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) { struct tcp_sock *tp = tcp_sk(sk); - int err = __tcp_retransmit_skb(sk, skb); + int err = __tcp_retransmit_skb(sk, skb, segs); if (err == 0) { #if FASTRETRANS_DEBUG > 0 @@ -2680,7 +2698,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) tp->retrans_stamp = tcp_skb_timestamp(skb); } else if (err != -EBUSY) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } if (tp->undo_retrans < 0) @@ -2755,6 +2773,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) tcp_for_write_queue_from(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; + int segs; if (skb == tcp_send_head(sk)) break; @@ -2762,14 +2781,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (!hole) tp->retransmit_skb_hint = skb; - /* Assume this retransmit will generate - * only one packet for congestion window - * calculation purposes. This works because - * tcp_retransmit_skb() will chop up the - * packet to be MSS sized and all the - * packet counting works out. - */ - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) + segs = tp->snd_cwnd - tcp_packets_in_flight(tp); + if (segs <= 0) return; if (fwd_rexmitting) { @@ -2806,10 +2819,10 @@ begin_fwd: if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) continue; - if (tcp_retransmit_skb(sk, skb)) + if (tcp_retransmit_skb(sk, skb, segs)) return; - NET_INC_STATS_BH(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); if (tcp_in_cwnd_reduction(sk)) tp->prr_out += tcp_skb_pcount(skb); @@ -2962,7 +2975,7 @@ int tcp_send_synack(struct sock *sk) struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req) + enum tcp_synack_type synack_type) { struct inet_request_sock *ireq = inet_rsk(req); const struct tcp_sock *tp = tcp_sk(sk); @@ -2982,14 +2995,22 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, /* Reserve space for headers. */ skb_reserve(skb, MAX_TCP_HEADER); - if (attach_req) { + switch (synack_type) { + case TCP_SYNACK_NORMAL: skb_set_owner_w(skb, req_to_sk(req)); - } else { + break; + case TCP_SYNACK_COOKIE: + /* Under synflood, we do not attach skb to a socket, + * to avoid false sharing. + */ + break; + case TCP_SYNACK_FASTOPEN: /* sk is a const pointer, because we want to express multiple * cpu might call us concurrently. * sk->sk_wmem_alloc in an atomic, we can promote to rw. */ skb_set_owner_w(skb, (struct sock *)sk); + break; } skb_dst_set(skb, dst); @@ -3017,7 +3038,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); - th = tcp_hdr(skb); + th = (struct tcphdr *)skb->data; memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; @@ -3038,7 +3059,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, th->window = htons(min(req->rsk_rcv_wnd, 65535U)); tcp_options_write((__be32 *)(th + 1), NULL, &opts); th->doff = (tcp_header_size >> 2); - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ @@ -3534,10 +3555,10 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) int res; tcp_rsk(req)->txhash = net_tx_rndhash(); - res = af_ops->send_synack(sk, NULL, &fl, req, NULL, true); + res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL); if (!res) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); } return res; } diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 5353085fd0b2..e36df4fcfeba 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -65,8 +65,8 @@ int tcp_rack_mark_lost(struct sock *sk) if (scb->sacked & TCPCB_SACKED_RETRANS) { scb->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPLOSTRETRANSMIT); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPLOSTRETRANSMIT); } } else if (!(scb->sacked & TCPCB_RETRANS)) { /* Original data are sent sequentially so stop early diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 49bc474f8e35..debdd8b33e69 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -30,7 +30,7 @@ static void tcp_write_err(struct sock *sk) sk->sk_error_report(sk); tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); } /* Do not allow orphaned sockets to eat all our resources. @@ -68,7 +68,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC); tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; } return 0; @@ -162,8 +162,8 @@ static int tcp_write_timeout(struct sock *sk) if (tp->syn_fastopen || tp->syn_data) tcp_fastopen_cache_set(sk, 0, NULL, true, 0); if (tp->syn_data && icsk->icsk_retransmits == 1) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENACTIVEFAIL); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); } retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; syn_set = true; @@ -178,8 +178,8 @@ static int tcp_write_timeout(struct sock *sk) tp->bytes_acked <= tp->rx_opt.mss_clamp) { tcp_fastopen_cache_set(sk, 0, NULL, true, 0); if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENACTIVEFAIL); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); } /* Black hole detection */ tcp_mtu_probing(icsk, sk); @@ -209,6 +209,7 @@ static int tcp_write_timeout(struct sock *sk) return 0; } +/* Called with BH disabled */ void tcp_delack_timer_handler(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -228,7 +229,7 @@ void tcp_delack_timer_handler(struct sock *sk) if (!skb_queue_empty(&tp->ucopy.prequeue)) { struct sk_buff *skb; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) sk_backlog_rcv(sk, skb); @@ -248,7 +249,7 @@ void tcp_delack_timer_handler(struct sock *sk) icsk->icsk_ack.ato = TCP_ATO_MIN; } tcp_send_ack(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); } out: @@ -265,7 +266,7 @@ static void tcp_delack_timer(unsigned long data) tcp_delack_timer_handler(sk); } else { inet_csk(sk)->icsk_ack.blocked = 1; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) sock_hold(sk); @@ -404,7 +405,7 @@ void tcp_retransmit_timer(struct sock *sk) goto out; } tcp_enter_loss(sk); - tcp_retransmit_skb(sk, tcp_write_queue_head(sk)); + tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1); __sk_dst_reset(sk); goto out_reset_timer; } @@ -431,12 +432,12 @@ void tcp_retransmit_timer(struct sock *sk) } else { mib_idx = LINUX_MIB_TCPTIMEOUTS; } - NET_INC_STATS_BH(sock_net(sk), mib_idx); + __NET_INC_STATS(sock_net(sk), mib_idx); } tcp_enter_loss(sk); - if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { + if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) { /* Retransmission failed because of local congestion, * do not backoff. */ @@ -493,6 +494,7 @@ out_reset_timer: out:; } +/* Called with BH disabled */ void tcp_write_timer_handler(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -549,7 +551,7 @@ void tcp_syn_ack_timeout(const struct request_sock *req) { struct net *net = read_pnet(&inet_rsk(req)->ireq_net); - NET_INC_STATS_BH(net, LINUX_MIB_TCPTIMEOUTS); + __NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS); } EXPORT_SYMBOL(tcp_syn_ack_timeout); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 13951c4087d4..4c4bac1b5eab 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -107,16 +107,16 @@ EXPORT_SYMBOL_GPL(tcp_vegas_init); * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ -void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us) +void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample) { struct vegas *vegas = inet_csk_ca(sk); u32 vrtt; - if (rtt_us < 0) + if (sample->rtt_us < 0) return; /* Never allow zero rtt or baseRTT */ - vrtt = rtt_us + 1; + vrtt = sample->rtt_us + 1; /* Filter to find propagation delay: */ if (vrtt < vegas->baseRTT) diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index ef9da5306c68..248cfc0ff9ae 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h @@ -17,7 +17,7 @@ struct vegas { void tcp_vegas_init(struct sock *sk); void tcp_vegas_state(struct sock *sk, u8 ca_state); -void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); +void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample); void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 0d094b995cd9..40171e163cff 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -69,16 +69,17 @@ static void tcp_veno_init(struct sock *sk) } /* Do rtt sampling needed for Veno. */ -static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us) +static void tcp_veno_pkts_acked(struct sock *sk, + const struct ack_sample *sample) { struct veno *veno = inet_csk_ca(sk); u32 vrtt; - if (rtt_us < 0) + if (sample->rtt_us < 0) return; /* Never allow zero rtt or baseRTT */ - vrtt = rtt_us + 1; + vrtt = sample->rtt_us + 1; /* Filter to find propagation delay: */ if (vrtt < veno->basertt) diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index c10732e39837..4b03a2e2a050 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -99,12 +99,13 @@ static void westwood_filter(struct westwood *w, u32 delta) * Called after processing group of packets. * but all westwood needs is the last sample of srtt. */ -static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, s32 rtt) +static void tcp_westwood_pkts_acked(struct sock *sk, + const struct ack_sample *sample) { struct westwood *w = inet_csk_ca(sk); - if (rtt > 0) - w->rtt = usecs_to_jiffies(rtt); + if (sample->rtt_us > 0) + w->rtt = usecs_to_jiffies(sample->rtt_us); } /* diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 3e6a472e6b88..028eb046ea40 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -56,15 +56,16 @@ static void tcp_yeah_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us) +static void tcp_yeah_pkts_acked(struct sock *sk, + const struct ack_sample *sample) { const struct inet_connection_sock *icsk = inet_csk(sk); struct yeah *yeah = inet_csk_ca(sk); if (icsk->icsk_ca_state == TCP_CA_Open) - yeah->pkts_acked = pkts_acked; + yeah->pkts_acked = sample->pkts_acked; - tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); + tcp_vegas_pkts_acked(sk, sample); } static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a2e7f55a1f61..2e3ebfe5549e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -143,10 +143,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, unsigned int log) { struct sock *sk2; - struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); - sk_nulls_for_each(sk2, node, &hslot->head) { + sk_for_each(sk2, &hslot->head) { if (net_eq(sock_net(sk2), net) && sk2 != sk && (bitmap || udp_sk(sk2)->udp_port_hash == num) && @@ -177,12 +176,11 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, bool match_wildcard)) { struct sock *sk2; - struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); int res = 0; spin_lock(&hslot2->lock); - udp_portaddr_for_each_entry(sk2, node, &hslot2->head) { + udp_portaddr_for_each_entry(sk2, &hslot2->head) { if (net_eq(sock_net(sk2), net) && sk2 != sk && (udp_sk(sk2)->udp_port_hash == num) && @@ -207,11 +205,10 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot, bool match_wildcard)) { struct net *net = sock_net(sk); - struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); struct sock *sk2; - sk_nulls_for_each(sk2, node, &hslot->head) { + sk_for_each(sk2, &hslot->head) { if (net_eq(sock_net(sk2), net) && sk2 != sk && sk2->sk_family == sk->sk_family && @@ -333,22 +330,23 @@ found: goto fail_unlock; } - sk_nulls_add_node_rcu(sk, &hslot->head); + sk_add_node_rcu(sk, &hslot->head); hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock(&hslot2->lock); if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && - sk->sk_family == AF_INET6) - hlist_nulls_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node, - &hslot2->head); + sk->sk_family == AF_INET6) + hlist_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node, + &hslot2->head); else - hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, - &hslot2->head); + hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &hslot2->head); hslot2->count++; spin_unlock(&hslot2->lock); } + sock_set_flag(sk, SOCK_RCU_FREE); error = 0; fail_unlock: spin_unlock_bh(&hslot->lock); @@ -502,37 +500,27 @@ static struct sock *udp4_lib_lookup2(struct net *net, struct sk_buff *skb) { struct sock *sk, *result; - struct hlist_nulls_node *node; int score, badness, matches = 0, reuseport = 0; - bool select_ok = true; u32 hash = 0; -begin: result = NULL; badness = 0; - udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score2(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { - result = sk; - badness = score; reuseport = sk->sk_reuseport; if (reuseport) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - - sk2 = reuseport_select_sock(sk, hash, skb, + result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - select_ok = false; - goto found; - } - } + if (result) + return result; matches = 1; } + badness = score; + result = sk; } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -540,23 +528,6 @@ begin: hash = next_pseudo_random32(hash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot2) - goto begin; - if (result) { -found: - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(compute_score2(result, net, saddr, sport, - daddr, hnum, dif) < badness)) { - sock_put(result); - goto begin; - } - } return result; } @@ -568,15 +539,12 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, int dif, struct udp_table *udptable, struct sk_buff *skb) { struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; int score, badness, matches = 0, reuseport = 0; - bool select_ok = true; u32 hash = 0; - rcu_read_lock(); if (hslot->count > 10) { hash2 = udp4_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; @@ -598,35 +566,27 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, htonl(INADDR_ANY), hnum, dif, hslot2, slot2, skb); } - rcu_read_unlock(); return result; } begin: result = NULL; badness = 0; - sk_nulls_for_each_rcu(sk, node, &hslot->head) { + sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { - result = sk; - badness = score; reuseport = sk->sk_reuseport; if (reuseport) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - - sk2 = reuseport_select_sock(sk, hash, skb, + result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - select_ok = false; - goto found; - } - } + if (result) + return result; matches = 1; } + result = sk; + badness = score; } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -634,25 +594,6 @@ begin: hash = next_pseudo_random32(hash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot) - goto begin; - - if (result) { -found: - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(compute_score(result, net, saddr, hnum, sport, - daddr, dport, dif) < badness)) { - sock_put(result); - goto begin; - } - } - rcu_read_unlock(); return result; } EXPORT_SYMBOL_GPL(__udp4_lib_lookup); @@ -663,18 +604,36 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, { const struct iphdr *iph = ip_hdr(skb); - return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, + return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, iph->daddr, dport, inet_iif(skb), udptable, skb); } +struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport) +{ + return __udp4_lib_lookup_skb(skb, sport, dport, &udp_table); +} +EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb); + +/* Must be called under rcu_read_lock(). + * Does increment socket refcount. + */ +#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ + IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, - &udp_table, NULL); + struct sock *sk; + + sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, + dif, &udp_table, NULL); + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + return sk; } EXPORT_SYMBOL_GPL(udp4_lib_lookup); +#endif static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, __be16 loc_port, __be32 loc_addr, @@ -723,7 +682,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) iph->saddr, uh->source, skb->dev->ifindex, udptable, NULL); if (!sk) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; /* No socket for error */ } @@ -776,7 +735,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) sk->sk_err = err; sk->sk_error_report(sk); out: - sock_put(sk); + return; } void udp_err(struct sk_buff *skb, u32 info) @@ -917,13 +876,13 @@ send: err = ip_send_skb(sock_net(sk), skb); if (err) { if (err == -ENOBUFS && !inet->recverr) { - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; } } else - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_OUTDATAGRAMS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } @@ -1032,15 +991,13 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) */ connected = 1; } - ipc.addr = inet->inet_saddr; + ipc.sockc.tsflags = sk->sk_tsflags; + ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; - sock_tx_timestamp(sk, &ipc.tx_flags); - if (msg->msg_controllen) { - err = ip_cmsg_send(sock_net(sk), msg, &ipc, - sk->sk_family == AF_INET6); + err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); if (unlikely(err)) { kfree(ipc.opt); return err; @@ -1065,6 +1022,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) saddr = ipc.addr; ipc.addr = faddr = daddr; + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); + if (ipc.opt && ipc.opt->opt.srr) { if (!daddr) return -EINVAL; @@ -1192,8 +1151,8 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -1277,10 +1236,10 @@ static unsigned int first_packet_length(struct sock *sk) spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, - IS_UDPLITE(sk)); - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, + IS_UDPLITE(sk)); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, + IS_UDPLITE(sk)); atomic_inc(&sk->sk_drops); __skb_unlink(skb, rcvq); __skb_queue_tail(&list_kill, skb); @@ -1316,14 +1275,6 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) { unsigned int amount = first_packet_length(sk); - if (amount) - /* - * We will only return the amount - * of this packet since that is all - * that will be read. - */ - amount -= sizeof(struct udphdr); - return put_user(amount, (int __user *)arg); } @@ -1347,7 +1298,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct sk_buff *skb; unsigned int ulen, copied; - int peeked, off = 0; + int peeked, peeking, off; int err; int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; @@ -1357,15 +1308,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, return ip_recv_error(sk, msg, len, addr_len); try_again: + peeking = off = sk_peek_offset(sk, flags); skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), &peeked, &off, &err); if (!skb) - goto out; + return err; - ulen = skb->len - sizeof(struct udphdr); + ulen = skb->len; copied = len; - if (copied > ulen) - copied = ulen; + if (copied > ulen - off) + copied = ulen - off; else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; @@ -1375,18 +1327,16 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) { checksum_valid = !udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } if (checksum_valid || skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), - msg, copied); + err = skb_copy_datagram_msg(skb, off, msg, copied); else { - err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), - msg); + err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) goto csum_copy_err; @@ -1396,15 +1346,16 @@ try_again: trace_kfree_skb(skb, udp_recvmsg); if (!peeked) { atomic_inc(&sk->sk_drops); - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); } - goto out_free; + skb_free_datagram_locked(sk, skb); + return err; } if (!peeked) - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); sock_recv_ts_and_drops(msg, sk, skb); @@ -1417,22 +1368,20 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr)); + ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr) + off); err = copied; if (flags & MSG_TRUNC) err = ulen; -out_free: - skb_free_datagram_locked(sk, skb); -out: + __skb_free_datagram_locked(sk, skb, peeking ? -err : err); return err; csum_copy_err: slow = lock_sock_fast(sk); if (!skb_kill_datagram(sk, skb, flags)) { - UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); - UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } unlock_sock_fast(sk, slow); @@ -1479,13 +1428,13 @@ void udp_lib_unhash(struct sock *sk) spin_lock_bh(&hslot->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - if (sk_nulls_del_node_init_rcu(sk)) { + if (sk_del_node_init_rcu(sk)) { hslot->count--; inet_sk(sk)->inet_num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_lock(&hslot2->lock); - hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); hslot2->count--; spin_unlock(&hslot2->lock); } @@ -1518,12 +1467,12 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) if (hslot2 != nhslot2) { spin_lock(&hslot2->lock); - hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); hslot2->count--; spin_unlock(&hslot2->lock); spin_lock(&nhslot2->lock); - hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, &nhslot2->head); nhslot2->count++; spin_unlock(&nhslot2->lock); @@ -1553,15 +1502,15 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_incoming_cpu_update(sk); } - rc = sock_queue_rcv_skb(sk, skb); + rc = __sock_queue_rcv_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, - is_udplite); - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); trace_udp_fail_queue_rcv_skb(rc, sk); return -1; @@ -1625,9 +1574,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); + __UDP_INC_STATS(sock_net(sk), + UDP_MIB_INDATAGRAMS, + is_udplite); return -ret; } } @@ -1669,13 +1618,17 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_access_pointer(sk->sk_filter) && - udp_lib_checksum_complete(skb)) - goto csum_error; + if (rcu_access_pointer(sk->sk_filter)) { + if (udp_lib_checksum_complete(skb)) + goto csum_error; + if (sk_filter(sk, skb)) + goto drop; + } + udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, - is_udplite); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); goto drop; } @@ -1694,43 +1647,14 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return rc; csum_error: - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); kfree_skb(skb); return -1; } -static void flush_stack(struct sock **stack, unsigned int count, - struct sk_buff *skb, unsigned int final) -{ - unsigned int i; - struct sk_buff *skb1 = NULL; - struct sock *sk; - - for (i = 0; i < count; i++) { - sk = stack[i]; - if (likely(!skb1)) - skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); - - if (!skb1) { - atomic_inc(&sk->sk_drops); - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, - IS_UDPLITE(sk)); - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); - } - - if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0) - skb1 = NULL; - - sock_put(sk); - } - if (unlikely(skb1)) - kfree_skb(skb1); -} - /* For TCP sockets, sk_rx_dst is protected by socket lock * For UDP, we use xchg() to guard against concurrent changes. */ @@ -1754,14 +1678,14 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udp_table *udptable, int proto) { - struct sock *sk, *stack[256 / sizeof(struct sock *)]; - struct hlist_nulls_node *node; + struct sock *sk, *first = NULL; unsigned short hnum = ntohs(uh->dest); struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); - int dif = skb->dev->ifindex; - unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); - bool inner_flushed = false; + unsigned int offset = offsetof(typeof(*sk), sk_node); + int dif = skb->dev->ifindex; + struct hlist_node *node; + struct sk_buff *nskb; if (use_hash2) { hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & @@ -1772,23 +1696,28 @@ start_lookup: offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } - spin_lock(&hslot->lock); - sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) { - if (__udp_is_mcast_sock(net, sk, - uh->dest, daddr, - uh->source, saddr, - dif, hnum)) { - if (unlikely(count == ARRAY_SIZE(stack))) { - flush_stack(stack, count, skb, ~0); - inner_flushed = true; - count = 0; - } - stack[count++] = sk; - sock_hold(sk); + sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { + if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr, + uh->source, saddr, dif, hnum)) + continue; + + if (!first) { + first = sk; + continue; } - } + nskb = skb_clone(skb, GFP_ATOMIC); - spin_unlock(&hslot->lock); + if (unlikely(!nskb)) { + atomic_inc(&sk->sk_drops); + __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS, + IS_UDPLITE(sk)); + __UDP_INC_STATS(net, UDP_MIB_INERRORS, + IS_UDPLITE(sk)); + continue; + } + if (udp_queue_rcv_skb(sk, nskb) > 0) + consume_skb(nskb); + } /* Also lookup *:port if we are using hash2 and haven't done so yet. */ if (use_hash2 && hash2 != hash2_any) { @@ -1796,16 +1725,13 @@ start_lookup: goto start_lookup; } - /* - * do the slow work with no lock held - */ - if (count) { - flush_stack(stack, count, skb, count - 1); + if (first) { + if (udp_queue_rcv_skb(first, skb) > 0) + consume_skb(skb); } else { - if (!inner_flushed) - UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); - consume_skb(skb); + kfree_skb(skb); + __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); } return 0; } @@ -1902,7 +1828,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, inet_compute_pseudo); ret = udp_queue_rcv_skb(sk, skb); - sock_put(sk); /* a return value > 0 means to resubmit the input, but * it wants the return to be -protocol, or 0 @@ -1920,7 +1845,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp_lib_checksum_complete(skb)) goto csum_error; - UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* @@ -1947,9 +1872,9 @@ csum_error: proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), ulen); - UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); drop: - UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } @@ -1963,49 +1888,24 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, int dif) { struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned short hnum = ntohs(loc_port); - unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask); + unsigned int slot = udp_hashfn(net, hnum, udp_table.mask); struct udp_hslot *hslot = &udp_table.hash[slot]; /* Do not bother scanning a too big list */ if (hslot->count > 10) return NULL; - rcu_read_lock(); -begin: - count = 0; result = NULL; - sk_nulls_for_each_rcu(sk, node, &hslot->head) { - if (__udp_is_mcast_sock(net, sk, - loc_port, loc_addr, - rmt_port, rmt_addr, - dif, hnum)) { + sk_for_each_rcu(sk, &hslot->head) { + if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr, + rmt_port, rmt_addr, dif, hnum)) { + if (result) + return NULL; result = sk; - ++count; - } - } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot) - goto begin; - - if (result) { - if (count != 1 || - unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(!__udp_is_mcast_sock(net, result, - loc_port, loc_addr, - rmt_port, rmt_addr, - dif, hnum))) { - sock_put(result); - result = NULL; } } - rcu_read_unlock(); + return result; } @@ -2018,37 +1918,22 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, __be16 rmt_port, __be32 rmt_addr, int dif) { - struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned short hnum = ntohs(loc_port); unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); unsigned int slot2 = hash2 & udp_table.mask; struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); + struct sock *sk; - rcu_read_lock(); - result = NULL; - udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { - if (INET_MATCH(sk, net, acookie, - rmt_addr, loc_addr, ports, dif)) - result = sk; + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { + if (INET_MATCH(sk, net, acookie, rmt_addr, + loc_addr, ports, dif)) + return sk; /* Only check first socket in chain */ break; } - - if (result) { - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(!INET_MATCH(sk, net, acookie, - rmt_addr, loc_addr, - ports, dif))) { - sock_put(result); - result = NULL; - } - } - rcu_read_unlock(); - return result; + return NULL; } void udp_v4_early_demux(struct sk_buff *skb) @@ -2056,7 +1941,7 @@ void udp_v4_early_demux(struct sk_buff *skb) struct net *net = dev_net(skb->dev); const struct iphdr *iph; const struct udphdr *uh; - struct sock *sk; + struct sock *sk = NULL; struct dst_entry *dst; int dif = skb->dev->ifindex; int ours; @@ -2088,11 +1973,9 @@ void udp_v4_early_demux(struct sk_buff *skb) } else if (skb->pkt_type == PACKET_HOST) { sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); - } else { - return; } - if (!sk) + if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2)) return; skb->sk = sk; @@ -2392,14 +2275,13 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) for (state->bucket = start; state->bucket <= state->udp_table->mask; ++state->bucket) { - struct hlist_nulls_node *node; struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; - if (hlist_nulls_empty(&hslot->head)) + if (hlist_empty(&hslot->head)) continue; spin_lock_bh(&hslot->lock); - sk_nulls_for_each(sk, node, &hslot->head) { + sk_for_each(sk, &hslot->head) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == state->family) @@ -2418,7 +2300,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) struct net *net = seq_file_net(seq); do { - sk = sk_nulls_next(sk); + sk = sk_next(sk); } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); if (!sk) { @@ -2627,12 +2509,12 @@ void __init udp_table_init(struct udp_table *table, const char *name) table->hash2 = table->hash + (table->mask + 1); for (i = 0; i <= table->mask; i++) { - INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); + INIT_HLIST_HEAD(&table->hash[i].head); table->hash[i].count = 0; spin_lock_init(&table->hash[i].lock); } for (i = 0; i <= table->mask; i++) { - INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i); + INIT_HLIST_HEAD(&table->hash2[i].head); table->hash2[i].count = 0; spin_lock_init(&table->hash2[i].lock); } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index df1966f3b6ec..3d5ccf4b1412 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -36,10 +36,11 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, const struct inet_diag_req_v2 *req) { int err = -EINVAL; - struct sock *sk; + struct sock *sk = NULL; struct sk_buff *rep; struct net *net = sock_net(in_skb->sk); + rcu_read_lock(); if (req->sdiag_family == AF_INET) sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, @@ -54,9 +55,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, req->id.idiag_dport, req->id.idiag_if, tbl, NULL); #endif - else - goto out_nosk; - + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + rcu_read_unlock(); err = -ENOENT; if (!sk) goto out_nosk; @@ -96,24 +97,23 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { - int num, s_num, slot, s_slot; struct net *net = sock_net(skb->sk); + int num, s_num, slot, s_slot; s_slot = cb->args[0]; num = s_num = cb->args[1]; for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) { - struct sock *sk; - struct hlist_nulls_node *node; struct udp_hslot *hslot = &table->hash[slot]; + struct sock *sk; num = 0; - if (hlist_nulls_empty(&hslot->head)) + if (hlist_empty(&hslot->head)) continue; spin_lock_bh(&hslot->lock); - sk_nulls_for_each(sk, node, &hslot->head) { + sk_for_each(sk, &hslot->head) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index e330c0e56b11..6b7459c92bb2 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -14,18 +14,6 @@ #include <net/udp.h> #include <net/protocol.h> -static DEFINE_SPINLOCK(udp_offload_lock); -static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; - -#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock)) - -struct udp_offload_priv { - struct udp_offload *offload; - possible_net_t net; - struct rcu_head rcu; - struct udp_offload_priv __rcu *next; -}; - static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, @@ -51,8 +39,11 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * 16 bit length field due to the header being added outside of an * IP or IPv6 frame that was already limited to 64K - 1. */ - partial = csum_sub(csum_unfold(uh->check), - (__force __wsum)htonl(skb->len)); + if (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) + partial = (__force __wsum)uh->len; + else + partial = (__force __wsum)htonl(skb->len); + partial = csum_sub(csum_unfold(uh->check), partial); /* setup inner skb. */ skb->encapsulation = 0; @@ -101,7 +92,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, udp_offset = outer_hlen - tnl_hlen; skb = segs; do { - __be16 len; + unsigned int len; if (remcsum) skb->ip_summed = CHECKSUM_NONE; @@ -119,14 +110,26 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_set_transport_header(skb, udp_offset); - len = htons(skb->len - udp_offset); + len = skb->len - udp_offset; uh = udp_hdr(skb); - uh->len = len; + + /* If we are only performing partial GSO the inner header + * will be using a length value equal to only one MSS sized + * segment instead of the entire frame. + */ + if (skb_is_gso(skb)) { + uh->len = htons(skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)uh); + } else { + uh->len = htons(len); + } if (!need_csum) continue; - uh->check = ~csum_fold(csum_add(partial, (__force __wsum)len)); + uh->check = ~csum_fold(csum_add(partial, + (__force __wsum)htonl(len))); if (skb->encapsulation || !offload_csum) { uh->check = gso_make_checksum(skb, ~uh->check); @@ -179,6 +182,7 @@ out_unlock: return segs; } +EXPORT_SYMBOL(skb_udp_tunnel_segment); static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) @@ -253,64 +257,14 @@ out: return segs; } -int udp_add_offload(struct net *net, struct udp_offload *uo) -{ - struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC); - - if (!new_offload) - return -ENOMEM; - - write_pnet(&new_offload->net, net); - new_offload->offload = uo; - - spin_lock(&udp_offload_lock); - new_offload->next = udp_offload_base; - rcu_assign_pointer(udp_offload_base, new_offload); - spin_unlock(&udp_offload_lock); - - return 0; -} -EXPORT_SYMBOL(udp_add_offload); - -static void udp_offload_free_routine(struct rcu_head *head) -{ - struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu); - kfree(ou_priv); -} - -void udp_del_offload(struct udp_offload *uo) -{ - struct udp_offload_priv __rcu **head = &udp_offload_base; - struct udp_offload_priv *uo_priv; - - spin_lock(&udp_offload_lock); - - uo_priv = udp_deref_protected(*head); - for (; uo_priv != NULL; - uo_priv = udp_deref_protected(*head)) { - if (uo_priv->offload == uo) { - rcu_assign_pointer(*head, - udp_deref_protected(uo_priv->next)); - goto unlock; - } - head = &uo_priv->next; - } - pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port)); -unlock: - spin_unlock(&udp_offload_lock); - if (uo_priv) - call_rcu(&uo_priv->rcu, udp_offload_free_routine); -} -EXPORT_SYMBOL(udp_del_offload); - struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, - struct udphdr *uh) + struct udphdr *uh, udp_lookup_t lookup) { - struct udp_offload_priv *uo_priv; struct sk_buff *p, **pp = NULL; struct udphdr *uh2; unsigned int off = skb_gro_offset(skb); int flush = 1; + struct sock *sk; if (NAPI_GRO_CB(skb)->encap_mark || (skb->ip_summed != CHECKSUM_PARTIAL && @@ -322,13 +276,10 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, NAPI_GRO_CB(skb)->encap_mark = 1; rcu_read_lock(); - uo_priv = rcu_dereference(udp_offload_base); - for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { - if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && - uo_priv->offload->port == uh->dest && - uo_priv->offload->callbacks.gro_receive) - goto unflush; - } + sk = (*lookup)(skb, uh->source, uh->dest); + + if (sk && udp_sk(sk)->gro_receive) + goto unflush; goto out_unlock; unflush: @@ -352,9 +303,7 @@ unflush: skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); - NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - pp = uo_priv->offload->callbacks.gro_receive(head, skb, - uo_priv->offload); + pp = udp_sk(sk)->gro_receive(sk, head, skb); out_unlock: rcu_read_unlock(); @@ -362,6 +311,7 @@ out: NAPI_GRO_CB(skb)->flush |= flush; return pp; } +EXPORT_SYMBOL(udp_gro_receive); static struct sk_buff **udp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) @@ -383,19 +333,20 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head, inet_gro_compute_pseudo); skip: NAPI_GRO_CB(skb)->is_ipv6 = 0; - return udp_gro_receive(head, skb, uh); + return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb); flush: NAPI_GRO_CB(skb)->flush = 1; return NULL; } -int udp_gro_complete(struct sk_buff *skb, int nhoff) +int udp_gro_complete(struct sk_buff *skb, int nhoff, + udp_lookup_t lookup) { - struct udp_offload_priv *uo_priv; __be16 newlen = htons(skb->len - nhoff); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); int err = -ENOSYS; + struct sock *sk; uh->len = newlen; @@ -405,22 +356,10 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) skb->encapsulation = 1; rcu_read_lock(); - - uo_priv = rcu_dereference(udp_offload_base); - for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { - if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && - uo_priv->offload->port == uh->dest && - uo_priv->offload->callbacks.gro_complete) - break; - } - - if (uo_priv) { - NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - err = uo_priv->offload->callbacks.gro_complete(skb, - nhoff + sizeof(struct udphdr), - uo_priv->offload); - } - + sk = (*lookup)(skb, uh->source, uh->dest); + if (sk && udp_sk(sk)->gro_complete) + err = udp_sk(sk)->gro_complete(sk, skb, + nhoff + sizeof(struct udphdr)); rcu_read_unlock(); if (skb->remcsum_offload) @@ -428,6 +367,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) return err; } +EXPORT_SYMBOL(udp_gro_complete); static int udp4_gro_complete(struct sk_buff *skb, int nhoff) { @@ -442,7 +382,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; } - return udp_gro_complete(skb, nhoff); + return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb); } static const struct net_offload udpv4_offload = { diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 96599d1a1318..47f12c73d959 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -69,6 +69,8 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->encap_type = cfg->encap_type; udp_sk(sk)->encap_rcv = cfg->encap_rcv; udp_sk(sk)->encap_destroy = cfg->encap_destroy; + udp_sk(sk)->gro_receive = cfg->gro_receive; + udp_sk(sk)->gro_complete = cfg->gro_complete; udp_tunnel_encap_enable(sock); } diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 11e875ffd7ac..3f8411328de5 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -218,6 +218,7 @@ config IPV6_GRE tristate "IPv6: GRE tunnel" select IPV6_TUNNEL select NET_IP_TUNNEL + depends on NET_IPGRE_DEMUX ---help--- Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2fbd90bf8d33..5e9d6bf4aaca 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -8,9 +8,10 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ - exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o + exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ + udp_offload.o -ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o +ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8ec4b3089e20..47f837a58e0a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -359,7 +359,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64; ndev->cnf.mtu6 = dev->mtu; - ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); if (!ndev->nd_parms) { kfree(ndev); @@ -4995,15 +4994,13 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) { struct inet6_ifaddr *ifp; struct net_device *dev = idev->dev; - bool update_rs = false; + bool clear_token, update_rs = false; struct in6_addr ll_addr; ASSERT_RTNL(); if (!token) return -EINVAL; - if (ipv6_addr_any(token)) - return -EINVAL; if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) return -EINVAL; if (!ipv6_accept_ra(idev)) @@ -5018,10 +5015,13 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) write_unlock_bh(&idev->lock); + clear_token = ipv6_addr_any(token); + if (clear_token) + goto update_lft; + if (!idev->dead && (idev->if_flags & IF_READY) && !ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE | IFA_F_OPTIMISTIC)) { - /* If we're not ready, then normal ifup will take care * of this. Otherwise, we need to request our rs here. */ @@ -5029,6 +5029,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) update_rs = true; } +update_lft: write_lock_bh(&idev->lock); if (update_rs) { @@ -5618,376 +5619,366 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } -static struct addrconf_sysctl_table -{ - struct ctl_table_header *sysctl_header; - struct ctl_table addrconf_vars[DEVCONF_MAX+1]; -} addrconf_sysctl __read_mostly = { - .sysctl_header = NULL, - .addrconf_vars = { - { - .procname = "forwarding", - .data = &ipv6_devconf.forwarding, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_forward, - }, - { - .procname = "hop_limit", - .data = &ipv6_devconf.hop_limit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_hop_limit, - }, - { - .procname = "mtu", - .data = &ipv6_devconf.mtu6, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_mtu, - }, - { - .procname = "accept_ra", - .data = &ipv6_devconf.accept_ra, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_redirects", - .data = &ipv6_devconf.accept_redirects, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "autoconf", - .data = &ipv6_devconf.autoconf, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "dad_transmits", - .data = &ipv6_devconf.dad_transmits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "router_solicitations", - .data = &ipv6_devconf.rtr_solicits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "router_solicitation_interval", - .data = &ipv6_devconf.rtr_solicit_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "router_solicitation_delay", - .data = &ipv6_devconf.rtr_solicit_delay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "force_mld_version", - .data = &ipv6_devconf.force_mld_version, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "mldv1_unsolicited_report_interval", - .data = - &ipv6_devconf.mldv1_unsolicited_report_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, - { - .procname = "mldv2_unsolicited_report_interval", - .data = - &ipv6_devconf.mldv2_unsolicited_report_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, - { - .procname = "use_tempaddr", - .data = &ipv6_devconf.use_tempaddr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "temp_valid_lft", - .data = &ipv6_devconf.temp_valid_lft, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "temp_prefered_lft", - .data = &ipv6_devconf.temp_prefered_lft, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "regen_max_retry", - .data = &ipv6_devconf.regen_max_retry, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "max_desync_factor", - .data = &ipv6_devconf.max_desync_factor, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "max_addresses", - .data = &ipv6_devconf.max_addresses, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_ra_defrtr", - .data = &ipv6_devconf.accept_ra_defrtr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_ra_min_hop_limit", - .data = &ipv6_devconf.accept_ra_min_hop_limit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_ra_pinfo", - .data = &ipv6_devconf.accept_ra_pinfo, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, +static const struct ctl_table addrconf_sysctl[] = { + { + .procname = "forwarding", + .data = &ipv6_devconf.forwarding, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_forward, + }, + { + .procname = "hop_limit", + .data = &ipv6_devconf.hop_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_hop_limit, + }, + { + .procname = "mtu", + .data = &ipv6_devconf.mtu6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_mtu, + }, + { + .procname = "accept_ra", + .data = &ipv6_devconf.accept_ra, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_redirects", + .data = &ipv6_devconf.accept_redirects, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "autoconf", + .data = &ipv6_devconf.autoconf, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "dad_transmits", + .data = &ipv6_devconf.dad_transmits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "router_solicitations", + .data = &ipv6_devconf.rtr_solicits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "router_solicitation_interval", + .data = &ipv6_devconf.rtr_solicit_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "router_solicitation_delay", + .data = &ipv6_devconf.rtr_solicit_delay, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "force_mld_version", + .data = &ipv6_devconf.force_mld_version, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "mldv1_unsolicited_report_interval", + .data = + &ipv6_devconf.mldv1_unsolicited_report_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, + { + .procname = "mldv2_unsolicited_report_interval", + .data = + &ipv6_devconf.mldv2_unsolicited_report_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, + { + .procname = "use_tempaddr", + .data = &ipv6_devconf.use_tempaddr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "temp_valid_lft", + .data = &ipv6_devconf.temp_valid_lft, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "temp_prefered_lft", + .data = &ipv6_devconf.temp_prefered_lft, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "regen_max_retry", + .data = &ipv6_devconf.regen_max_retry, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "max_desync_factor", + .data = &ipv6_devconf.max_desync_factor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "max_addresses", + .data = &ipv6_devconf.max_addresses, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_ra_defrtr", + .data = &ipv6_devconf.accept_ra_defrtr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_ra_min_hop_limit", + .data = &ipv6_devconf.accept_ra_min_hop_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_ra_pinfo", + .data = &ipv6_devconf.accept_ra_pinfo, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IPV6_ROUTER_PREF - { - .procname = "accept_ra_rtr_pref", - .data = &ipv6_devconf.accept_ra_rtr_pref, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "router_probe_interval", - .data = &ipv6_devconf.rtr_probe_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, + { + .procname = "accept_ra_rtr_pref", + .data = &ipv6_devconf.accept_ra_rtr_pref, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "router_probe_interval", + .data = &ipv6_devconf.rtr_probe_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, #ifdef CONFIG_IPV6_ROUTE_INFO - { - .procname = "accept_ra_rt_info_max_plen", - .data = &ipv6_devconf.accept_ra_rt_info_max_plen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, + { + .procname = "accept_ra_rt_info_max_plen", + .data = &ipv6_devconf.accept_ra_rt_info_max_plen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif #endif - { - .procname = "proxy_ndp", - .data = &ipv6_devconf.proxy_ndp, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_proxy_ndp, - }, - { - .procname = "accept_source_route", - .data = &ipv6_devconf.accept_source_route, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, + { + .procname = "proxy_ndp", + .data = &ipv6_devconf.proxy_ndp, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_proxy_ndp, + }, + { + .procname = "accept_source_route", + .data = &ipv6_devconf.accept_source_route, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - { - .procname = "optimistic_dad", - .data = &ipv6_devconf.optimistic_dad, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - - }, - { - .procname = "use_optimistic", - .data = &ipv6_devconf.use_optimistic, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - - }, + { + .procname = "optimistic_dad", + .data = &ipv6_devconf.optimistic_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "use_optimistic", + .data = &ipv6_devconf.use_optimistic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif #ifdef CONFIG_IPV6_MROUTE - { - .procname = "mc_forwarding", - .data = &ipv6_devconf.mc_forwarding, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, + { + .procname = "mc_forwarding", + .data = &ipv6_devconf.mc_forwarding, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, #endif - { - .procname = "disable_ipv6", - .data = &ipv6_devconf.disable_ipv6, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_disable, - }, - { - .procname = "accept_dad", - .data = &ipv6_devconf.accept_dad, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "force_tllao", - .data = &ipv6_devconf.force_tllao, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "ndisc_notify", - .data = &ipv6_devconf.ndisc_notify, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "suppress_frag_ndisc", - .data = &ipv6_devconf.suppress_frag_ndisc, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "accept_ra_from_local", - .data = &ipv6_devconf.accept_ra_from_local, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_ra_mtu", - .data = &ipv6_devconf.accept_ra_mtu, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "stable_secret", - .data = &ipv6_devconf.stable_secret, - .maxlen = IPV6_MAX_STRLEN, - .mode = 0600, - .proc_handler = addrconf_sysctl_stable_secret, - }, - { - .procname = "use_oif_addrs_only", - .data = &ipv6_devconf.use_oif_addrs_only, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ignore_routes_with_linkdown", - .data = &ipv6_devconf.ignore_routes_with_linkdown, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown, - }, - { - .procname = "drop_unicast_in_l2_multicast", - .data = &ipv6_devconf.drop_unicast_in_l2_multicast, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "drop_unsolicited_na", - .data = &ipv6_devconf.drop_unsolicited_na, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "keep_addr_on_down", - .data = &ipv6_devconf.keep_addr_on_down, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - - }, - { - /* sentinel */ - } + { + .procname = "disable_ipv6", + .data = &ipv6_devconf.disable_ipv6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_disable, + }, + { + .procname = "accept_dad", + .data = &ipv6_devconf.accept_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "force_tllao", + .data = &ipv6_devconf.force_tllao, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "ndisc_notify", + .data = &ipv6_devconf.ndisc_notify, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec }, + { + .procname = "suppress_frag_ndisc", + .data = &ipv6_devconf.suppress_frag_ndisc, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "accept_ra_from_local", + .data = &ipv6_devconf.accept_ra_from_local, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_ra_mtu", + .data = &ipv6_devconf.accept_ra_mtu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "stable_secret", + .data = &ipv6_devconf.stable_secret, + .maxlen = IPV6_MAX_STRLEN, + .mode = 0600, + .proc_handler = addrconf_sysctl_stable_secret, + }, + { + .procname = "use_oif_addrs_only", + .data = &ipv6_devconf.use_oif_addrs_only, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "ignore_routes_with_linkdown", + .data = &ipv6_devconf.ignore_routes_with_linkdown, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown, + }, + { + .procname = "drop_unicast_in_l2_multicast", + .data = &ipv6_devconf.drop_unicast_in_l2_multicast, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "drop_unsolicited_na", + .data = &ipv6_devconf.drop_unsolicited_na, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "keep_addr_on_down", + .data = &ipv6_devconf.keep_addr_on_down, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, + { + /* sentinel */ + } }; static int __addrconf_sysctl_register(struct net *net, char *dev_name, struct inet6_dev *idev, struct ipv6_devconf *p) { int i; - struct addrconf_sysctl_table *t; + struct ctl_table *table; char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; - t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); - if (!t) + table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL); + if (!table) goto out; - for (i = 0; t->addrconf_vars[i].data; i++) { - t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf; - t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ - t->addrconf_vars[i].extra2 = net; + for (i = 0; table[i].data; i++) { + table[i].data += (char *)p - (char *)&ipv6_devconf; + table[i].extra1 = idev; /* embedded; no ref */ + table[i].extra2 = net; } snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); - t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars); - if (!t->sysctl_header) + p->sysctl_header = register_net_sysctl(net, path, table); + if (!p->sysctl_header) goto free; - p->sysctl = t; return 0; free: - kfree(t); + kfree(table); out: return -ENOBUFS; } static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) { - struct addrconf_sysctl_table *t; + struct ctl_table *table; - if (!p->sysctl) + if (!p->sysctl_header) return; - t = p->sysctl; - p->sysctl = NULL; - unregister_net_sysctl_table(t->sysctl_header); - kfree(t); + table = p->sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(p->sysctl_header); + p->sysctl_header = NULL; + kfree(table); } static int addrconf_sysctl_register(struct inet6_dev *idev) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b11c37cfd67c..bfa86f040c16 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -64,6 +64,8 @@ #include <asm/uaccess.h> #include <linux/mroute6.h> +#include "ip6_offload.h" + MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); @@ -561,6 +563,7 @@ const struct proto_ops inet6_dgram_ops = { .recvmsg = inet_recvmsg, /* ok */ .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, @@ -958,6 +961,10 @@ static int __init inet6_init(void) if (err) goto udplitev6_fail; + err = udpv6_offload_init(); + if (err) + goto udpv6_offload_fail; + err = tcpv6_init(); if (err) goto tcpv6_fail; @@ -987,6 +994,8 @@ pingv6_fail: ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: + udpv6_offload_exit(); +udpv6_offload_fail: udplitev6_exit(); udplitev6_fail: udpv6_exit(); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9dd3882fe6bf..37874e2f30ed 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -450,9 +450,10 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) copied = len; } err = skb_copy_datagram_msg(skb, 0, msg, copied); - if (err) - goto out_free_skb; - + if (unlikely(err)) { + kfree_skb(skb); + return err; + } sock_recv_timestamp(msg, sk, skb); serr = SKB_EXT_ERR(skb); @@ -509,8 +510,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) msg->msg_flags |= MSG_ERRQUEUE; err = copied; -out_free_skb: - kfree_skb(skb); + consume_skb(skb); out: return err; } @@ -727,13 +727,13 @@ EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl); int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, - struct ipv6_txoptions *opt, - int *hlimit, int *tclass, int *dontfrag) + struct ipcm6_cookie *ipc6, struct sockcm_cookie *sockc) { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; struct ipv6_rt_hdr *rthdr; struct ipv6_opt_hdr *hdr; + struct ipv6_txoptions *opt = ipc6->opt; int len; int err = 0; @@ -745,6 +745,13 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; } + if (cmsg->cmsg_level == SOL_SOCKET) { + err = __sock_cmsg_send(sk, msg, cmsg, sockc); + if (err) + return err; + continue; + } + if (cmsg->cmsg_level != SOL_IPV6) continue; @@ -946,8 +953,8 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; } - *hlimit = *(int *)CMSG_DATA(cmsg); - if (*hlimit < -1 || *hlimit > 0xff) { + ipc6->hlimit = *(int *)CMSG_DATA(cmsg); + if (ipc6->hlimit < -1 || ipc6->hlimit > 0xff) { err = -EINVAL; goto exit_f; } @@ -967,7 +974,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; err = 0; - *tclass = tc; + ipc6->tclass = tc; break; } @@ -985,7 +992,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; err = 0; - *dontfrag = df; + ipc6->dontfrag = df; break; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index ea7c4d64a00a..8de5dd7aaa05 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -258,8 +258,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } @@ -280,8 +280,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return 1; } - IP6_INC_STATS_BH(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); return -1; } @@ -309,8 +309,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } @@ -319,8 +319,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -334,8 +334,8 @@ looped_back: * processed by own */ if (!addr) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -360,8 +360,8 @@ looped_back: goto unknown_rh; /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } @@ -379,8 +379,8 @@ looped_back: n = hdr->hdrlen >> 1; if (hdr->segments_left > n) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - skb_network_header(skb))); @@ -393,8 +393,8 @@ looped_back: if (skb_cloned(skb)) { /* the copy is a forwarded packet */ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; } @@ -416,14 +416,14 @@ looped_back: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, IPPROTO_ROUTING) < 0) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -434,8 +434,8 @@ looped_back: } if (ipv6_addr_is_multicast(addr)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -454,8 +454,8 @@ looped_back: if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); kfree_skb(skb); @@ -470,7 +470,7 @@ looped_back: return -1; unknown_rh: - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb_network_header(skb)); return -1; @@ -568,28 +568,28 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]); - IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INHDRERRORS); goto drop; } pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); if (pkt_len <= IPV6_MAXPLEN) { - IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); return false; } if (ipv6_hdr(skb)->payload_len) { - IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); return false; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { - IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), - IPSTATS_MIB_INTRUNCATEDPKTS); + __IP6_INC_STATS(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 0013cacf7164..4527285fcaa2 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -400,10 +400,11 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; + struct sockcm_cookie sockc_unused = {0}; + struct ipcm6_cookie ipc6; int iif = 0; int addr_type = 0; int len; - int hlimit; int err = 0; u32 mark = IP6_REPLY_MARK(net, skb->mark); @@ -505,7 +506,10 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (IS_ERR(dst)) goto out; - hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + ipc6.tclass = np->tclass; + ipc6.dontfrag = np->dontfrag; + ipc6.opt = NULL; msg.skb = skb; msg.offset = skb_network_offset(skb); @@ -524,9 +528,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) err = ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), hlimit, - np->tclass, NULL, &fl6, (struct rt6_info *)dst, - MSG_DONTWAIT, np->dontfrag); + sizeof(struct icmp6hdr), + &ipc6, &fl6, (struct rt6_info *)dst, + MSG_DONTWAIT, &sockc_unused); if (err) { ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); @@ -561,10 +565,10 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct flowi6 fl6; struct icmpv6_msg msg; struct dst_entry *dst; + struct ipcm6_cookie ipc6; int err = 0; - int hlimit; - u8 tclass; u32 mark = IP6_REPLY_MARK(net, skb->mark); + struct sockcm_cookie sockc_unused = {0}; saddr = &ipv6_hdr(skb)->daddr; @@ -604,22 +608,24 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (IS_ERR(dst)) goto out; - hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - idev = __in6_dev_get(skb->dev); msg.skb = skb; msg.offset = 0; msg.type = ICMPV6_ECHO_REPLY; - tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + ipc6.dontfrag = np->dontfrag; + ipc6.opt = NULL; + err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6, + sizeof(struct icmp6hdr), &ipc6, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT, - np->dontfrag); + &sockc_unused); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); + __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, @@ -671,7 +677,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) return; out: - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); } /* @@ -707,7 +713,7 @@ static int icmpv6_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS); + __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; @@ -725,7 +731,7 @@ static int icmpv6_rcv(struct sk_buff *skb) type = hdr->icmp6_type; - ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type); + ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: @@ -809,9 +815,9 @@ static int icmpv6_rcv(struct sk_buff *skb) return 0; csum_error: - ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); + __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: - ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: kfree_skb(skb); return 0; diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index 28542cb2b387..d08fd2d48a78 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -23,10 +23,76 @@ #include <net/protocol.h> #include <uapi/linux/ila.h> +struct ila_locator { + union { + __u8 v8[8]; + __be16 v16[4]; + __be32 v32[2]; + __be64 v64; + }; +}; + +struct ila_identifier { + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 __space:4; + u8 csum_neutral:1; + u8 type:3; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 type:3; + u8 csum_neutral:1; + u8 __space:4; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + u8 __space2[7]; + }; + __u8 v8[8]; + __be16 v16[4]; + __be32 v32[2]; + __be64 v64; + }; +}; + +enum { + ILA_ATYPE_IID = 0, + ILA_ATYPE_LUID, + ILA_ATYPE_VIRT_V4, + ILA_ATYPE_VIRT_UNI_V6, + ILA_ATYPE_VIRT_MULTI_V6, + ILA_ATYPE_RSVD_1, + ILA_ATYPE_RSVD_2, + ILA_ATYPE_RSVD_3, +}; + +#define CSUM_NEUTRAL_FLAG htonl(0x10000000) + +struct ila_addr { + union { + struct in6_addr addr; + struct { + struct ila_locator loc; + struct ila_identifier ident; + }; + }; +}; + +static inline struct ila_addr *ila_a2i(struct in6_addr *addr) +{ + return (struct ila_addr *)addr; +} + +static inline bool ila_addr_is_ila(struct ila_addr *iaddr) +{ + return (iaddr->ident.type != ILA_ATYPE_IID); +} + struct ila_params { - __be64 locator; - __be64 locator_match; + struct ila_locator locator; + struct ila_locator locator_match; __wsum csum_diff; + u8 csum_mode; }; static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) @@ -38,7 +104,14 @@ static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) return csum_partial(diff, sizeof(diff), 0); } -void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); +static inline bool ila_csum_neutral_set(struct ila_identifier ident) +{ + return !!(ident.csum_neutral); +} + +void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); + +void ila_init_saved_csum(struct ila_params *p); int ila_lwt_init(void); void ila_lwt_fini(void); diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 30613050e4ca..0e94042d1289 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -15,20 +15,52 @@ static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) { - if (*(__be64 *)&ip6h->daddr == p->locator_match) + struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); + + if (p->locator_match.v64) return p->csum_diff; else - return compute_csum_diff8((__be32 *)&ip6h->daddr, + return compute_csum_diff8((__be32 *)&iaddr->loc, + (__be32 *)&p->locator); +} + +static void ila_csum_do_neutral(struct ila_addr *iaddr, + struct ila_params *p) +{ + __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; + __wsum diff, fval; + + /* Check if checksum adjust value has been cached */ + if (p->locator_match.v64) { + diff = p->csum_diff; + } else { + diff = compute_csum_diff8((__be32 *)iaddr, (__be32 *)&p->locator); + } + + fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ? + ~CSUM_NEUTRAL_FLAG : CSUM_NEUTRAL_FLAG); + + diff = csum_add(diff, fval); + + *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust))); + + /* Flip the csum-neutral bit. Either we are doing a SIR->ILA + * translation with ILA_CSUM_NEUTRAL_MAP as the csum_method + * and the C-bit is not set, or we are doing an ILA-SIR + * tranlsation and the C-bit is set. + */ + iaddr->ident.csum_neutral ^= 1; } -void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) +static void ila_csum_adjust_transport(struct sk_buff *skb, + struct ila_params *p) { __wsum diff; struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); size_t nhoff = sizeof(struct ipv6hdr); - /* First update checksum */ switch (ip6h->nexthdr) { case NEXTHDR_TCP: if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) { @@ -68,7 +100,46 @@ void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) } /* Now change destination address */ - *(__be64 *)&ip6h->daddr = p->locator; + iaddr->loc = p->locator; +} + +void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) +{ + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); + + /* First deal with the transport checksum */ + if (ila_csum_neutral_set(iaddr->ident)) { + /* C-bit is set in the locator indicating that this + * is a locator being translated to a SIR address. + * Perform (receiver) checksum-neutral translation. + */ + ila_csum_do_neutral(iaddr, p); + } else { + switch (p->csum_mode) { + case ILA_CSUM_ADJUST_TRANSPORT: + ila_csum_adjust_transport(skb, p); + break; + case ILA_CSUM_NEUTRAL_MAP: + ila_csum_do_neutral(iaddr, p); + break; + case ILA_CSUM_NO_ACTION: + break; + } + } + + /* Now change destination address */ + iaddr->loc = p->locator; +} + +void ila_init_saved_csum(struct ila_params *p) +{ + if (!p->locator_match.v64) + return; + + p->csum_diff = compute_csum_diff8( + (__be32 *)&p->locator_match, + (__be32 *)&p->locator); } static int __init ila_init(void) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 41f18de5dcc2..1dfb64166d7d 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -26,7 +26,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); + ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); return dst->lwtstate->orig_output(net, sk, skb); @@ -42,7 +42,7 @@ static int ila_input(struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); + ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); return dst->lwtstate->orig_input(skb); @@ -53,6 +53,7 @@ drop: static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, + [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, }; static int ila_build_state(struct net_device *dev, struct nlattr *nla, @@ -64,11 +65,28 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, size_t encap_len = sizeof(*p); struct lwtunnel_state *newts; const struct fib6_config *cfg6 = cfg; + struct ila_addr *iaddr; int ret; if (family != AF_INET6) return -EINVAL; + if (cfg6->fc_dst_len < sizeof(struct ila_locator) + 1) { + /* Need to have full locator and at least type field + * included in destination + */ + return -EINVAL; + } + + iaddr = (struct ila_addr *)&cfg6->fc_dst; + + if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) { + /* Don't allow translation for a non-ILA address or checksum + * neutral flag to be set. + */ + return -EINVAL; + } + ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy); if (ret < 0) @@ -84,16 +102,19 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, newts->len = encap_len; p = ila_params_lwtunnel(newts); - p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); + p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); - if (cfg6->fc_dst_len > sizeof(__be64)) { - /* Precompute checksum difference for translation since we - * know both the old locator and the new one. - */ - p->locator_match = *(__be64 *)&cfg6->fc_dst; - p->csum_diff = compute_csum_diff8( - (__be32 *)&p->locator_match, (__be32 *)&p->locator); - } + /* Precompute checksum difference for translation since we + * know both the old locator and the new one. + */ + p->locator_match = iaddr->loc; + p->csum_diff = compute_csum_diff8( + (__be32 *)&p->locator_match, (__be32 *)&p->locator); + + if (tb[ILA_ATTR_CSUM_MODE]) + p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]); + + ila_init_saved_csum(p); newts->type = LWTUNNEL_ENCAP_ILA; newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | @@ -109,7 +130,10 @@ static int ila_fill_encap_info(struct sk_buff *skb, { struct ila_params *p = ila_params_lwtunnel(lwtstate); - if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator)) + if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64, + ILA_ATTR_PAD)) + goto nla_put_failure; + if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode)) goto nla_put_failure; return 0; @@ -120,7 +144,9 @@ nla_put_failure: static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) { - return nla_total_size(sizeof(u64)); /* ILA_ATTR_LOCATOR */ + return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */ + nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */ + 0; } static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) @@ -128,7 +154,7 @@ static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) struct ila_params *a_p = ila_params_lwtunnel(a); struct ila_params *b_p = ila_params_lwtunnel(b); - return (a_p->locator != b_p->locator); + return (a_p->locator.v64 != b_p->locator.v64); } static const struct lwtunnel_encap_ops ila_encap_ops = { diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 295ca29a23c3..a90e57229c6c 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -11,13 +11,11 @@ struct ila_xlat_params { struct ila_params ip; - __be64 identifier; int ifindex; - unsigned int dir; }; struct ila_map { - struct ila_xlat_params p; + struct ila_xlat_params xp; struct rhash_head node; struct ila_map __rcu *next; struct rcu_head rcu; @@ -66,31 +64,29 @@ static __always_inline void __ila_hash_secret_init(void) net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static inline u32 ila_identifier_hash(__be64 identifier) +static inline u32 ila_locator_hash(struct ila_locator loc) { - u32 *v = (u32 *)&identifier; + u32 *v = (u32 *)loc.v32; return jhash_2words(v[0], v[1], hashrnd); } -static inline spinlock_t *ila_get_lock(struct ila_net *ilan, __be64 identifier) +static inline spinlock_t *ila_get_lock(struct ila_net *ilan, + struct ila_locator loc) { - return &ilan->locks[ila_identifier_hash(identifier) & ilan->locks_mask]; + return &ilan->locks[ila_locator_hash(loc) & ilan->locks_mask]; } -static inline int ila_cmp_wildcards(struct ila_map *ila, __be64 loc, - int ifindex, unsigned int dir) +static inline int ila_cmp_wildcards(struct ila_map *ila, + struct ila_addr *iaddr, int ifindex) { - return (ila->p.ip.locator_match && ila->p.ip.locator_match != loc) || - (ila->p.ifindex && ila->p.ifindex != ifindex) || - !(ila->p.dir & dir); + return (ila->xp.ifindex && ila->xp.ifindex != ifindex); } -static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *p) +static inline int ila_cmp_params(struct ila_map *ila, + struct ila_xlat_params *xp) { - return (ila->p.ip.locator_match != p->ip.locator_match) || - (ila->p.ifindex != p->ifindex) || - (ila->p.dir != p->dir); + return (ila->xp.ifindex != xp->ifindex); } static int ila_cmpfn(struct rhashtable_compare_arg *arg, @@ -98,17 +94,14 @@ static int ila_cmpfn(struct rhashtable_compare_arg *arg, { const struct ila_map *ila = obj; - return (ila->p.identifier != *(__be64 *)arg->key); + return (ila->xp.ip.locator_match.v64 != *(__be64 *)arg->key); } static inline int ila_order(struct ila_map *ila) { int score = 0; - if (ila->p.ip.locator_match) - score += 1 << 0; - - if (ila->p.ifindex) + if (ila->xp.ifindex) score += 1 << 1; return score; @@ -117,7 +110,7 @@ static inline int ila_order(struct ila_map *ila) static const struct rhashtable_params rht_params = { .nelem_hint = 1024, .head_offset = offsetof(struct ila_map, node), - .key_offset = offsetof(struct ila_map, p.identifier), + .key_offset = offsetof(struct ila_map, xp.ip.locator_match), .key_len = sizeof(u64), /* identifier */ .max_size = 1048576, .min_size = 256, @@ -136,50 +129,45 @@ static struct genl_family ila_nl_family = { }; static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { - [ILA_ATTR_IDENTIFIER] = { .type = NLA_U64, }, [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, - [ILA_ATTR_DIR] = { .type = NLA_U32, }, + [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, }; static int parse_nl_config(struct genl_info *info, - struct ila_xlat_params *p) + struct ila_xlat_params *xp) { - memset(p, 0, sizeof(*p)); - - if (info->attrs[ILA_ATTR_IDENTIFIER]) - p->identifier = (__force __be64)nla_get_u64( - info->attrs[ILA_ATTR_IDENTIFIER]); + memset(xp, 0, sizeof(*xp)); if (info->attrs[ILA_ATTR_LOCATOR]) - p->ip.locator = (__force __be64)nla_get_u64( + xp->ip.locator.v64 = (__force __be64)nla_get_u64( info->attrs[ILA_ATTR_LOCATOR]); if (info->attrs[ILA_ATTR_LOCATOR_MATCH]) - p->ip.locator_match = (__force __be64)nla_get_u64( + xp->ip.locator_match.v64 = (__force __be64)nla_get_u64( info->attrs[ILA_ATTR_LOCATOR_MATCH]); - if (info->attrs[ILA_ATTR_IFINDEX]) - p->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); + if (info->attrs[ILA_ATTR_CSUM_MODE]) + xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]); - if (info->attrs[ILA_ATTR_DIR]) - p->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]); + if (info->attrs[ILA_ATTR_IFINDEX]) + xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); return 0; } /* Must be called with rcu readlock */ -static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc, +static inline struct ila_map *ila_lookup_wildcards(struct ila_addr *iaddr, int ifindex, - unsigned int dir, struct ila_net *ilan) { struct ila_map *ila; - ila = rhashtable_lookup_fast(&ilan->rhash_table, &id, rht_params); + ila = rhashtable_lookup_fast(&ilan->rhash_table, &iaddr->loc, + rht_params); while (ila) { - if (!ila_cmp_wildcards(ila, loc, ifindex, dir)) + if (!ila_cmp_wildcards(ila, iaddr, ifindex)) return ila; ila = rcu_access_pointer(ila->next); } @@ -188,15 +176,16 @@ static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc, } /* Must be called with rcu readlock */ -static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *p, +static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *xp, struct ila_net *ilan) { struct ila_map *ila; - ila = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, + ila = rhashtable_lookup_fast(&ilan->rhash_table, + &xp->ip.locator_match, rht_params); while (ila) { - if (!ila_cmp_params(ila, p)) + if (!ila_cmp_params(ila, xp)) return ila; ila = rcu_access_pointer(ila->next); } @@ -221,14 +210,14 @@ static void ila_free_cb(void *ptr, void *arg) } } -static int ila_xlat_addr(struct sk_buff *skb, int dir); +static int ila_xlat_addr(struct sk_buff *skb); static unsigned int ila_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - ila_xlat_addr(skb, ILA_DIR_IN); + ila_xlat_addr(skb); return NF_ACCEPT; } @@ -241,11 +230,11 @@ static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = { }, }; -static int ila_add_mapping(struct net *net, struct ila_xlat_params *p) +static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp) { struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_map *ila, *head; - spinlock_t *lock = ila_get_lock(ilan, p->identifier); + spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match); int err = 0, order; if (!ilan->hooks_registered) { @@ -264,22 +253,16 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *p) if (!ila) return -ENOMEM; - ila->p = *p; + ila_init_saved_csum(&xp->ip); - if (p->ip.locator_match) { - /* Precompute checksum difference for translation since we - * know both the old identifier and the new one. - */ - ila->p.ip.csum_diff = compute_csum_diff8( - (__be32 *)&p->ip.locator_match, - (__be32 *)&p->ip.locator); - } + ila->xp = *xp; order = ila_order(ila); spin_lock(lock); - head = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, + head = rhashtable_lookup_fast(&ilan->rhash_table, + &xp->ip.locator_match, rht_params); if (!head) { /* New entry for the rhash_table */ @@ -289,7 +272,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *p) struct ila_map *tila = head, *prev = NULL; do { - if (!ila_cmp_params(tila, p)) { + if (!ila_cmp_params(tila, xp)) { err = -EEXIST; goto out; } @@ -326,23 +309,23 @@ out: return err; } -static int ila_del_mapping(struct net *net, struct ila_xlat_params *p) +static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp) { struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_map *ila, *head, *prev; - spinlock_t *lock = ila_get_lock(ilan, p->identifier); + spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match); int err = -ENOENT; spin_lock(lock); head = rhashtable_lookup_fast(&ilan->rhash_table, - &p->identifier, rht_params); + &xp->ip.locator_match, rht_params); ila = head; prev = NULL; while (ila) { - if (ila_cmp_params(ila, p)) { + if (ila_cmp_params(ila, xp)) { prev = ila; ila = rcu_dereference_protected(ila->next, lockdep_is_held(lock)); @@ -404,28 +387,28 @@ static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info) static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); - struct ila_xlat_params p; + struct ila_xlat_params xp; int err; - err = parse_nl_config(info, &p); + err = parse_nl_config(info, &xp); if (err) return err; - ila_del_mapping(net, &p); + ila_del_mapping(net, &xp); return 0; } static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) { - if (nla_put_u64(msg, ILA_ATTR_IDENTIFIER, - (__force u64)ila->p.identifier) || - nla_put_u64(msg, ILA_ATTR_LOCATOR, - (__force u64)ila->p.ip.locator) || - nla_put_u64(msg, ILA_ATTR_LOCATOR_MATCH, - (__force u64)ila->p.ip.locator_match) || - nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->p.ifindex) || - nla_put_u32(msg, ILA_ATTR_DIR, ila->p.dir)) + if (nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR, + (__force u64)ila->xp.ip.locator.v64, + ILA_ATTR_PAD) || + nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR_MATCH, + (__force u64)ila->xp.ip.locator_match.v64, + ILA_ATTR_PAD) || + nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) || + nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode)) return -1; return 0; @@ -457,11 +440,11 @@ static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) struct net *net = genl_info_net(info); struct ila_net *ilan = net_generic(net, ila_net_id); struct sk_buff *msg; - struct ila_xlat_params p; + struct ila_xlat_params xp; struct ila_map *ila; int ret; - ret = parse_nl_config(info, &p); + ret = parse_nl_config(info, &xp); if (ret) return ret; @@ -471,7 +454,7 @@ static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); - ila = ila_lookup_by_params(&p, ilan); + ila = ila_lookup_by_params(&xp, ilan); if (ila) { ret = ila_dump_info(ila, info->snd_portid, @@ -501,7 +484,8 @@ static int ila_nl_dump_start(struct netlink_callback *cb) struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; - return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter); + return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter, + GFP_KERNEL); } static int ila_nl_dump_done(struct netlink_callback *cb) @@ -613,45 +597,32 @@ static struct pernet_operations ila_net_ops = { .size = sizeof(struct ila_net), }; -static int ila_xlat_addr(struct sk_buff *skb, int dir) +static int ila_xlat_addr(struct sk_buff *skb) { struct ila_map *ila; struct ipv6hdr *ip6h = ipv6_hdr(skb); struct net *net = dev_net(skb->dev); struct ila_net *ilan = net_generic(net, ila_net_id); - __be64 identifier, locator_match; - size_t nhoff; + struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); /* Assumes skb contains a valid IPv6 header that is pulled */ - identifier = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[8]; - locator_match = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[0]; - nhoff = sizeof(struct ipv6hdr); + if (!ila_addr_is_ila(iaddr)) { + /* Type indicates this is not an ILA address */ + return 0; + } rcu_read_lock(); - ila = ila_lookup_wildcards(identifier, locator_match, - skb->dev->ifindex, dir, ilan); + ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan); if (ila) - update_ipv6_locator(skb, &ila->p.ip); + ila_update_ipv6_locator(skb, &ila->xp.ip); rcu_read_unlock(); return 0; } -int ila_xlat_incoming(struct sk_buff *skb) -{ - return ila_xlat_addr(skb, ILA_DIR_IN); -} -EXPORT_SYMBOL(ila_xlat_incoming); - -int ila_xlat_outgoing(struct sk_buff *skb) -{ - return ila_xlat_addr(skb, ILA_DIR_OUT); -} -EXPORT_SYMBOL(ila_xlat_outgoing); - int ila_xlat_init(void) { int ret; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 70f2628be6fa..00cf28ad4565 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -69,7 +69,6 @@ struct sock *__inet6_lookup_established(struct net *net, struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - rcu_read_lock(); begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) @@ -90,7 +89,6 @@ begin: out: sk = NULL; found: - rcu_read_unlock(); return sk; } EXPORT_SYMBOL(__inet6_lookup_established); @@ -122,6 +120,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } +/* called with rcu_read_lock() */ struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -129,39 +128,27 @@ struct sock *inet6_lookup_listener(struct net *net, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { - struct sock *sk; - const struct hlist_nulls_node *node; - struct sock *result; - int score, hiscore, matches = 0, reuseport = 0; - bool select_ok = true; - u32 phash = 0; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; + int score, hiscore = 0, matches = 0, reuseport = 0; + struct sock *sk, *result = NULL; + u32 phash = 0; - rcu_read_lock(); -begin: - result = NULL; - hiscore = 0; - sk_nulls_for_each(sk, node, &ilb->head) { + sk_for_each(sk, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif); if (score > hiscore) { - hiscore = score; - result = sk; reuseport = sk->sk_reuseport; if (reuseport) { phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - sk2 = reuseport_select_sock(sk, phash, - skb, doff); - if (sk2) { - result = sk2; - goto found; - } - } + result = reuseport_select_sock(sk, phash, + skb, doff); + if (result) + return result; matches = 1; } + result = sk; + hiscore = score; } else if (score == hiscore && reuseport) { matches++; if (reciprocal_scale(phash, matches) == 0) @@ -169,25 +156,6 @@ begin: phash = next_pseudo_random32(phash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) - goto begin; - if (result) { -found: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) - result = NULL; - else if (unlikely(compute_score(result, net, hnum, daddr, - dif) < hiscore)) { - sock_put(result); - select_ok = false; - goto begin; - } - } - rcu_read_unlock(); return result; } EXPORT_SYMBOL_GPL(inet6_lookup_listener); @@ -199,12 +167,12 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, const int dif) { struct sock *sk; + bool refcounted; - local_bh_disable(); sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, - ntohs(dport), dif); - local_bh_enable(); - + ntohs(dport), dif, &refcounted); + if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; return sk; } EXPORT_SYMBOL_GPL(inet6_lookup); @@ -254,7 +222,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { sk_nulls_del_node_init_rcu((struct sock *)tw); - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); + __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ea071fad67a0..1bcef2369d64 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -240,6 +240,7 @@ struct fib6_table *fib6_new_table(struct net *net, u32 id) return tb; } +EXPORT_SYMBOL_GPL(fib6_new_table); struct fib6_table *fib6_get_table(struct net *net, u32 id) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index dc2db4f7b182..b912f0dbaf72 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -372,7 +372,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, if (olen > 0) { struct msghdr msg; struct flowi6 flowi6; - int junk; + struct sockcm_cookie sockc_junk; + struct ipcm6_cookie ipc6; err = -ENOMEM; fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL); @@ -389,8 +390,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, msg.msg_control = (void *)(fl->opt+1); memset(&flowi6, 0, sizeof(flowi6)); - err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, - &junk, &junk, &junk); + ipc6.opt = fl->opt; + err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6, &sockc_junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4e636e60a360..4541fa54035e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -54,6 +54,7 @@ #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <net/ip6_tunnel.h> +#include <net/gre.h> static bool log_ecn_error = true; @@ -342,7 +343,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, goto failed_free; /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) + if (!(nt->parms.o_flags & TUNNEL_SEQ)) dev->features |= NETIF_F_LLTX; dev_hold(dev); @@ -443,137 +444,41 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, t->err_time = jiffies; } -static int ip6gre_rcv(struct sk_buff *skb) +static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) { const struct ipv6hdr *ipv6h; - u8 *h; - __be16 flags; - __sum16 csum = 0; - __be32 key = 0; - u32 seqno = 0; struct ip6_tnl *tunnel; - int offset = 4; - __be16 gre_proto; - int err; - - if (!pskb_may_pull(skb, sizeof(struct in6_addr))) - goto drop; ipv6h = ipv6_hdr(skb); - h = skb->data; - flags = *(__be16 *)h; - - if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { - /* - Version must be 0. - - We do not support routing headers. - */ - if (flags&(GRE_VERSION|GRE_ROUTING)) - goto drop; - - if (flags&GRE_CSUM) { - csum = skb_checksum_simple_validate(skb); - offset += 4; - } - if (flags&GRE_KEY) { - key = *(__be32 *)(h + offset); - offset += 4; - } - if (flags&GRE_SEQ) { - seqno = ntohl(*(__be32 *)(h + offset)); - offset += 4; - } - } - - gre_proto = *(__be16 *)(h + 2); - tunnel = ip6gre_tunnel_lookup(skb->dev, - &ipv6h->saddr, &ipv6h->daddr, key, - gre_proto); + &ipv6h->saddr, &ipv6h->daddr, tpi->key, + tpi->proto); if (tunnel) { - struct pcpu_sw_netstats *tstats; - - if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto drop; - - if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) { - tunnel->dev->stats.rx_dropped++; - goto drop; - } - - skb->protocol = gre_proto; - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IPv6 - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { - skb->protocol = htons(ETH_P_IPV6); - if ((*(h + offset) & 0xF0) != 0x40) - offset += 4; - } - - skb->mac_header = skb->network_header; - __pskb_pull(skb, offset); - skb_postpull_rcsum(skb, skb_transport_header(skb), offset); - - if (((flags&GRE_CSUM) && csum) || - (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { - tunnel->dev->stats.rx_crc_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - if (tunnel->parms.i_flags&GRE_SEQ) { - if (!(flags&GRE_SEQ) || - (tunnel->i_seqno && - (s32)(seqno - tunnel->i_seqno) < 0)) { - tunnel->dev->stats.rx_fifo_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - tunnel->i_seqno = seqno + 1; - } - - /* Warning: All skb pointers will be invalidated! */ - if (tunnel->dev->type == ARPHRD_ETHER) { - if (!pskb_may_pull(skb, ETH_HLEN)) { - tunnel->dev->stats.rx_length_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - - ipv6h = ipv6_hdr(skb); - skb->protocol = eth_type_trans(skb, tunnel->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - } + ip6_tnl_rcv(tunnel, skb, tpi, NULL, false); - __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); + return PACKET_RCVD; + } - skb_reset_network_header(skb); + return PACKET_REJECT; +} - err = IP6_ECN_decapsulate(ipv6h, skb); - if (unlikely(err)) { - if (log_ecn_error) - net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n", - &ipv6h->saddr, - ipv6_get_dsfield(ipv6h)); - if (err > 1) { - ++tunnel->dev->stats.rx_frame_errors; - ++tunnel->dev->stats.rx_errors; - goto drop; - } - } +static int gre_rcv(struct sk_buff *skb) +{ + struct tnl_ptk_info tpi; + bool csum_err = false; + int hdr_len; - tstats = this_cpu_ptr(tunnel->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6)); + if (hdr_len < 0) + goto drop; - netif_rx(skb); + if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false)) + goto drop; + if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD) return 0; - } - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: kfree_skb(skb); return 0; @@ -584,187 +489,40 @@ struct ipv6_tel_txoption { __u8 dst_opt[8]; }; -static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) +static int gre_handle_offloads(struct sk_buff *skb, bool csum) { - memset(opt, 0, sizeof(struct ipv6_tel_txoption)); - - opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; - opt->dst_opt[3] = 1; - opt->dst_opt[4] = encap_limit; - opt->dst_opt[5] = IPV6_TLV_PADN; - opt->dst_opt[6] = 1; - - opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; - opt->ops.opt_nflen = 8; + return iptunnel_handle_offloads(skb, + csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } -static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, - struct net_device *dev, - __u8 dsfield, - struct flowi6 *fl6, - int encap_limit, - __u32 *pmtu) +static netdev_tx_t __gre6_xmit(struct sk_buff *skb, + struct net_device *dev, __u8 dsfield, + struct flowi6 *fl6, int encap_limit, + __u32 *pmtu, __be16 proto) { struct ip6_tnl *tunnel = netdev_priv(dev); - struct net *net = tunnel->net; - struct net_device *tdev; /* Device to other host */ - struct ipv6hdr *ipv6h; /* Our new IP header */ - unsigned int max_headroom = 0; /* The extra header space needed */ - int gre_hlen; - struct ipv6_tel_txoption opt; - int mtu; - struct dst_entry *dst = NULL, *ndst = NULL; - struct net_device_stats *stats = &tunnel->dev->stats; - int err = -1; - u8 proto; - struct sk_buff *new_skb; - __be16 protocol; + __be16 protocol = (dev->type == ARPHRD_ETHER) ? + htons(ETH_P_TEB) : proto; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; - if (dev->header_ops && dev->type == ARPHRD_IP6GRE) { - gre_hlen = 0; - ipv6h = (struct ipv6hdr *)skb->data; - fl6->daddr = ipv6h->daddr; - } else { - gre_hlen = tunnel->hlen; + if (dev->header_ops && dev->type == ARPHRD_IP6GRE) + fl6->daddr = ((struct ipv6hdr *)skb->data)->daddr; + else fl6->daddr = tunnel->parms.raddr; - } - - if (!fl6->flowi6_mark) - dst = dst_cache_get(&tunnel->dst_cache); - - if (!dst) { - dst = ip6_route_output(net, NULL, fl6); - - if (dst->error) - goto tx_err_link_failure; - dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - dst = NULL; - goto tx_err_link_failure; - } - ndst = dst; - } - - tdev = dst->dev; - if (tdev == dev) { - stats->collisions++; - net_warn_ratelimited("%s: Local routing loop detected!\n", - tunnel->parms.name); - goto tx_err_dst_release; - } - - mtu = dst_mtu(dst) - sizeof(*ipv6h); - if (encap_limit >= 0) { - max_headroom += 8; - mtu -= 8; - } - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu) { - *pmtu = mtu; - err = -EMSGSIZE; - goto tx_err_dst_release; - } + if (tunnel->parms.o_flags & TUNNEL_SEQ) + tunnel->o_seqno++; - if (tunnel->err_count > 0) { - if (time_before(jiffies, - tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) { - tunnel->err_count--; - - dst_link_failure(skb); - } else - tunnel->err_count = 0; - } - - skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); - - max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; - - if (skb_headroom(skb) < max_headroom || skb_shared(skb) || - (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { - new_skb = skb_realloc_headroom(skb, max_headroom); - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; - if (!new_skb) - goto tx_err_dst_release; - - if (skb->sk) - skb_set_owner_w(new_skb, skb->sk); - consume_skb(skb); - skb = new_skb; - } - - if (!fl6->flowi6_mark && ndst) - dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr); - skb_dst_set(skb, dst); - - proto = NEXTHDR_GRE; - if (encap_limit >= 0) { - init_tel_txopt(&opt, encap_limit); - ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); - } - - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } - - skb_push(skb, gre_hlen); - skb_reset_network_header(skb); - skb_set_transport_header(skb, sizeof(*ipv6h)); - - /* - * Push down and install the IP header. - */ - ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), - ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); - ipv6h->hop_limit = tunnel->parms.hop_limit; - ipv6h->nexthdr = proto; - ipv6h->saddr = fl6->saddr; - ipv6h->daddr = fl6->daddr; - - ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags; - protocol = (dev->type == ARPHRD_ETHER) ? - htons(ETH_P_TEB) : skb->protocol; - ((__be16 *)(ipv6h + 1))[1] = protocol; - - if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4); - - if (tunnel->parms.o_flags&GRE_SEQ) { - ++tunnel->o_seqno; - *ptr = htonl(tunnel->o_seqno); - ptr--; - } - if (tunnel->parms.o_flags&GRE_KEY) { - *ptr = tunnel->parms.o_key; - ptr--; - } - if (tunnel->parms.o_flags&GRE_CSUM) { - *ptr = 0; - *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1), - skb->len - sizeof(struct ipv6hdr)); - } - } + /* Push GRE header. */ + gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, + protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); skb_set_inner_protocol(skb, protocol); - ip6tunnel_xmit(NULL, skb, dev); - return 0; -tx_err_link_failure: - stats->tx_carrier_errors++; - dst_link_failure(skb); -tx_err_dst_release: - dst_release(dst); - return err; + return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, + NEXTHDR_GRE); } static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) @@ -783,7 +541,6 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_GRE; dsfield = ipv4_get_dsfield(iph); @@ -793,7 +550,12 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; - err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); + if (err) + return -1; + + err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + skb->protocol); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) @@ -833,7 +595,6 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_GRE; dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) @@ -843,7 +604,11 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; - err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) + return -1; + + err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, + &mtu, skb->protocol); if (err != 0) { if (err == -EMSGSIZE) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -887,7 +652,11 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); fl6.flowi6_proto = skb->protocol; - err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu); + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); + if (err) + return err; + + err = __gre6_xmit(skb, dev, 0, &fl6, encap_limit, &mtu, skb->protocol); return err; } @@ -931,7 +700,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; - int addend = sizeof(struct ipv6hdr) + 4; + int t_hlen; if (dev->type != ARPHRD_ETHER) { memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); @@ -958,16 +727,11 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) else dev->flags &= ~IFF_POINTOPOINT; - /* Precalculate GRE options length */ - if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { - if (t->parms.o_flags&GRE_CSUM) - addend += 4; - if (t->parms.o_flags&GRE_KEY) - addend += 4; - if (t->parms.o_flags&GRE_SEQ) - addend += 4; - } - t->hlen = addend; + t->tun_hlen = gre_calc_hlen(t->parms.o_flags); + + t->hlen = t->tun_hlen; + + t_hlen = t->hlen + sizeof(struct ipv6hdr); if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & @@ -981,12 +745,15 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) return; if (rt->dst.dev) { - dev->hard_header_len = rt->dst.dev->hard_header_len + addend; + dev->hard_header_len = rt->dst.dev->hard_header_len + + t_hlen; if (set_mtu) { - dev->mtu = rt->dst.dev->mtu - addend; + dev->mtu = rt->dst.dev->mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1028,8 +795,8 @@ static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p, p->link = u->link; p->i_key = u->i_key; p->o_key = u->o_key; - p->i_flags = u->i_flags; - p->o_flags = u->o_flags; + p->i_flags = gre_flags_to_tnl_flags(u->i_flags); + p->o_flags = gre_flags_to_tnl_flags(u->o_flags); memcpy(p->name, u->name, sizeof(u->name)); } @@ -1046,8 +813,8 @@ static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u, u->link = p->link; u->i_key = p->i_key; u->o_key = p->o_key; - u->i_flags = p->i_flags; - u->o_flags = p->o_flags; + u->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); + u->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); memcpy(u->name, p->name, sizeof(u->name)); } @@ -1061,6 +828,8 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, struct net *net = t->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + memset(&p1, 0, sizeof(p1)); + switch (cmd) { case SIOCGETTUNNEL: if (dev == ign->fb_tunnel_dev) { @@ -1160,15 +929,6 @@ done: return err; } -static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) @@ -1212,7 +972,7 @@ static const struct net_device_ops ip6gre_netdev_ops = { .ndo_uninit = ip6gre_tunnel_uninit, .ndo_start_xmit = ip6gre_tunnel_xmit, .ndo_do_ioctl = ip6gre_tunnel_ioctl, - .ndo_change_mtu = ip6gre_tunnel_change_mtu, + .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -1228,17 +988,11 @@ static void ip6gre_dev_free(struct net_device *dev) static void ip6gre_tunnel_setup(struct net_device *dev) { - struct ip6_tnl *t; - dev->netdev_ops = &ip6gre_netdev_ops; dev->destructor = ip6gre_dev_free; dev->type = ARPHRD_IP6GRE; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4; - dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4; - t = netdev_priv(dev); - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; + dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); netif_keep_dst(dev); @@ -1248,6 +1002,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) { struct ip6_tnl *tunnel; int ret; + int t_hlen; tunnel = netdev_priv(dev); @@ -1266,6 +1021,17 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) return ret; } + tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); + + tunnel->hlen = tunnel->tun_hlen; + + t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); + + dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; + if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + return 0; } @@ -1304,7 +1070,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) static struct inet6_protocol ip6gre_protocol __read_mostly = { - .handler = ip6gre_rcv, + .handler = gre_rcv, .err_handler = ip6gre_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; @@ -1448,10 +1214,12 @@ static void ip6gre_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_GRE_LINK]); if (data[IFLA_GRE_IFLAGS]) - parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + parms->i_flags = gre_flags_to_tnl_flags( + nla_get_be16(data[IFLA_GRE_IFLAGS])); if (data[IFLA_GRE_OFLAGS]) - parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + parms->o_flags = gre_flags_to_tnl_flags( + nla_get_be16(data[IFLA_GRE_OFLAGS])); if (data[IFLA_GRE_IKEY]) parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); @@ -1500,11 +1268,16 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_start_xmit = ip6gre_tunnel_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = ip6gre_tunnel_change_mtu, + .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; +#define GRE6_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_HW_CSUM) + static void ip6gre_tap_setup(struct net_device *dev) { @@ -1538,9 +1311,21 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, nt->net = dev_net(dev); ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) + dev->features |= GRE6_FEATURES; + dev->hw_features |= GRE6_FEATURES; + + if (!(nt->parms.o_flags & TUNNEL_SEQ)) { + /* TCP segmentation offload is not supported when we + * generate output sequences. + */ + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + + /* Can use a lockless transmit, unless we generate + * output sequences + */ dev->features |= NETIF_F_LLTX; + } err = register_netdevice(dev); if (err) @@ -1609,8 +1394,6 @@ static size_t ip6gre_get_size(const struct net_device *dev) nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GRE_TTL */ nla_total_size(1) + - /* IFLA_GRE_TOS */ - nla_total_size(1) + /* IFLA_GRE_ENCAP_LIMIT */ nla_total_size(1) + /* IFLA_GRE_FLOWINFO */ @@ -1626,14 +1409,15 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct __ip6_tnl_parm *p = &t->parms; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || - nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || - nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, + gre_tnl_flags_to_gre_flags(p->i_flags)) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, + gre_tnl_flags_to_gre_flags(p->o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) || nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || - /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/ nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags)) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index c05c425c2389..f185cbcda114 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -49,6 +49,13 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + /* if ingress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_rcv(skb); + if (!skb) + return NET_RX_SUCCESS; + if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; @@ -78,11 +85,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt idev = __in6_dev_get(skb->dev); - IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_IN, skb->len); + __IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || !idev || unlikely(idev->cnf.disable_ipv6)) { - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } @@ -109,10 +116,10 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->version != 6) goto err; - IP6_ADD_STATS_BH(net, idev, - IPSTATS_MIB_NOECTPKTS + + __IP6_ADD_STATS(net, idev, + IPSTATS_MIB_NOECTPKTS + (ipv6_get_dsfield(hdr) & INET_ECN_MASK), - max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); + max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); /* * RFC4291 2.5.3 * A packet received on an interface with a destination address @@ -169,12 +176,12 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt /* pkt_len may be zero if Jumbo payload option is present */ if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { - IP6_INC_STATS_BH(net, - idev, IPSTATS_MIB_INTRUNCATEDPKTS); + __IP6_INC_STATS(net, + idev, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) { - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); goto drop; } hdr = ipv6_hdr(skb); @@ -182,7 +189,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->nexthdr == NEXTHDR_HOP) { if (ipv6_parse_hopopts(skb) < 0) { - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); rcu_read_unlock(); return NET_RX_DROP; } @@ -197,7 +204,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt net, NULL, skb, dev, NULL, ip6_rcv_finish); err: - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); drop: rcu_read_unlock(); kfree_skb(skb); @@ -259,18 +266,18 @@ resubmit: if (ret > 0) goto resubmit; else if (ret == 0) - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - IP6_INC_STATS_BH(net, idev, - IPSTATS_MIB_INUNKNOWNPROTOS); + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff); } kfree_skb(skb); } else { - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS); consume_skb(skb); } } @@ -278,7 +285,7 @@ resubmit: return 0; discard: - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); kfree_skb(skb); return 0; @@ -297,7 +304,7 @@ int ip6_mc_input(struct sk_buff *skb) const struct ipv6hdr *hdr; bool deliver; - IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev), + __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST, skb->len); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 82e9f3076028..f5eb184e1093 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -63,6 +63,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int proto; struct frag_hdr *fptr; unsigned int unfrag_ip6hlen; + unsigned int payload_len; u8 *prevhdr; int offset = 0; bool encap, udpfrag; @@ -73,6 +74,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_TCP_FIXEDID | + SKB_GSO_TCPV6 | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | @@ -80,7 +83,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_TCPV6 | + SKB_GSO_PARTIAL | 0))) goto out; @@ -117,7 +120,13 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h)); + if (skb_is_gso(skb)) + payload_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)(ipv6h + 1); + else + payload_len = skb->len - nhoff - sizeof(*ipv6h); + ipv6h->payload_len = htons(payload_len); skb->network_header = (u8 *)ipv6h - skb->head; if (udpfrag) { @@ -239,10 +248,14 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); NAPI_GRO_CB(p)->flush |= flush; - /* Clear flush_id, there's really no concept of ID in IPv6. */ - NAPI_GRO_CB(p)->flush_id = 0; + /* If the previous IP ID value was based on an atomic + * datagram we can overwrite the value and ignore it. + */ + if (NAPI_GRO_CB(skb)->is_atomic) + NAPI_GRO_CB(p)->flush_id = 0; } + NAPI_GRO_CB(skb)->is_atomic = true; NAPI_GRO_CB(skb)->flush |= flush; skb_gro_postpull_rcsum(skb, iph, nlen); @@ -325,8 +338,6 @@ static int __init ipv6_offload_init(void) if (tcpv6_offload_init() < 0) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); - if (udp_offload_init() < 0) - pr_crit("%s: Cannot add UDP protocol offload\n", __func__); if (ipv6_exthdrs_offload_init() < 0) pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__); diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h index 2e155c651b35..96b40e41ac53 100644 --- a/net/ipv6/ip6_offload.h +++ b/net/ipv6/ip6_offload.h @@ -12,7 +12,8 @@ #define __ip6_offload_h int ipv6_exthdrs_offload_init(void); -int udp_offload_init(void); +int udpv6_offload_init(void); +int udpv6_offload_exit(void); int tcpv6_offload_init(void); #endif diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bc972e7152c7..cbf127ae7c67 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -395,8 +395,8 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } @@ -427,8 +427,8 @@ int ip6_forward(struct sk_buff *skb) /* Force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; @@ -441,15 +441,15 @@ int ip6_forward(struct sk_buff *skb) if (proxied > 0) return ip6_input(skb); else if (proxied < 0) { - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb_dst(skb); @@ -505,17 +505,17 @@ int ip6_forward(struct sk_buff *skb) /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_INTOOBIGERRORS); - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_FRAGFAILS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INTOOBIGERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), - IPSTATS_MIB_OUTDISCARDS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_OUTDISCARDS); goto drop; } @@ -525,14 +525,14 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); - IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); + __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, net, NULL, skb, skb->dev, dst->dev, ip6_forward_finish); error: - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); drop: kfree_skb(skb); return -EINVAL; @@ -1182,12 +1182,12 @@ static void ip6_append_data_mtu(unsigned int *mtu, } static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, - struct inet6_cork *v6_cork, - int hlimit, int tclass, struct ipv6_txoptions *opt, + struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, struct rt6_info *rt, struct flowi6 *fl6) { struct ipv6_pinfo *np = inet6_sk(sk); unsigned int mtu; + struct ipv6_txoptions *opt = ipc6->opt; /* * setup for corking @@ -1229,8 +1229,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, dst_hold(&rt->dst); cork->base.dst = &rt->dst; cork->fl.u.ip6 = *fl6; - v6_cork->hop_limit = hlimit; - v6_cork->tclass = tclass; + v6_cork->hop_limit = ipc6->hlimit; + v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(&rt->dst); @@ -1258,7 +1258,8 @@ static int __ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - unsigned int flags, int dontfrag) + unsigned int flags, struct ipcm6_cookie *ipc6, + const struct sockcm_cookie *sockc) { struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; @@ -1297,7 +1298,7 @@ static int __ip6_append_data(struct sock *sk, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; - if (cork->length + length > mtu - headersize && dontfrag && + if (cork->length + length > mtu - headersize && ipc6->dontfrag && (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_RAW)) { ipv6_local_rxpmtu(sk, fl6, mtu - headersize + @@ -1329,7 +1330,7 @@ emsgsize: csummode = CHECKSUM_PARTIAL; if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { - sock_tx_timestamp(sk, &tx_flags); + sock_tx_timestamp(sk, sockc->tsflags, &tx_flags); if (tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) tskey = sk->sk_tskey++; @@ -1563,9 +1564,10 @@ error: int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, int hlimit, - int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, int dontfrag) + void *from, int length, int transhdrlen, + struct ipcm6_cookie *ipc6, struct flowi6 *fl6, + struct rt6_info *rt, unsigned int flags, + const struct sockcm_cookie *sockc) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -1578,12 +1580,12 @@ int ip6_append_data(struct sock *sk, /* * setup for corking */ - err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit, - tclass, opt, rt, fl6); + err = ip6_setup_cork(sk, &inet->cork, &np->cork, + ipc6, rt, fl6); if (err) return err; - exthdrlen = (opt ? opt->opt_flen : 0); + exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; } else { @@ -1593,7 +1595,7 @@ int ip6_append_data(struct sock *sk, return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, &np->cork, sk_page_frag(sk), getfrag, - from, length, transhdrlen, flags, dontfrag); + from, length, transhdrlen, flags, ipc6, sockc); } EXPORT_SYMBOL_GPL(ip6_append_data); @@ -1749,15 +1751,14 @@ struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - int hlimit, int tclass, - struct ipv6_txoptions *opt, struct flowi6 *fl6, + struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, - int dontfrag) + const struct sockcm_cookie *sockc) { struct inet_cork_full cork; struct inet6_cork v6_cork; struct sk_buff_head queue; - int exthdrlen = (opt ? opt->opt_flen : 0); + int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); int err; if (flags & MSG_PROBE) @@ -1769,17 +1770,17 @@ struct sk_buff *ip6_make_skb(struct sock *sk, cork.base.addr = 0; cork.base.opt = NULL; v6_cork.opt = NULL; - err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6); + err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); if (err) return ERR_PTR(err); - if (dontfrag < 0) - dontfrag = inet6_sk(sk)->dontfrag; + if (ipc6->dontfrag < 0) + ipc6->dontfrag = inet6_sk(sk)->dontfrag; err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, - flags, dontfrag); + flags, ipc6, sockc); if (err) { __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); return ERR_PTR(err); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 1f20345cbc97..e79330f214bd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -238,6 +238,7 @@ static void ip6_dev_free(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + gro_cells_destroy(&t->gro_cells); dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); free_netdev(dev); @@ -753,97 +754,157 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t, } EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); -/** - * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally - * @skb: received socket buffer - * @protocol: ethernet protocol ID - * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN - * - * Return: 0 - **/ - -static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, - __u8 ipproto, - int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, - const struct ipv6hdr *ipv6h, - struct sk_buff *skb)) +static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, + struct metadata_dst *tun_dst, + int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb), + bool log_ecn_err) { - struct ip6_tnl *t; + struct pcpu_sw_netstats *tstats; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - u8 tproto; int err; - rcu_read_lock(); - t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); - if (t) { - struct pcpu_sw_netstats *tstats; + if ((!(tpi->flags & TUNNEL_CSUM) && + (tunnel->parms.i_flags & TUNNEL_CSUM)) || + ((tpi->flags & TUNNEL_CSUM) && + !(tunnel->parms.i_flags & TUNNEL_CSUM))) { + tunnel->dev->stats.rx_crc_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } - tproto = ACCESS_ONCE(t->parms.proto); - if (tproto != ipproto && tproto != 0) { - rcu_read_unlock(); - goto discard; + if (tunnel->parms.i_flags & TUNNEL_SEQ) { + if (!(tpi->flags & TUNNEL_SEQ) || + (tunnel->i_seqno && + (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { + tunnel->dev->stats.rx_fifo_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; } + tunnel->i_seqno = ntohl(tpi->seq) + 1; + } - if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - rcu_read_unlock(); - goto discard; - } + skb->protocol = tpi->proto; - if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { - t->dev->stats.rx_dropped++; - rcu_read_unlock(); - goto discard; + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + tunnel->dev->stats.rx_length_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; } - skb->mac_header = skb->network_header; - skb_reset_network_header(skb); - skb->protocol = htons(protocol); - memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); - - __skb_tunnel_rx(skb, t->dev, t->net); - - err = dscp_ecn_decapsulate(t, ipv6h, skb); - if (unlikely(err)) { - if (log_ecn_error) - net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n", - &ipv6h->saddr, - ipv6_get_dsfield(ipv6h)); - if (err > 1) { - ++t->dev->stats.rx_frame_errors; - ++t->dev->stats.rx_errors; - rcu_read_unlock(); - goto discard; - } + + ipv6h = ipv6_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } else { + skb->dev = tunnel->dev; + } + + skb_reset_network_header(skb); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + + __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); + + err = dscp_ecn_decapsulate(tunnel, ipv6h, skb); + if (unlikely(err)) { + if (log_ecn_err) + net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n", + &ipv6h->saddr, + ipv6_get_dsfield(ipv6h)); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; } + } - tstats = this_cpu_ptr(t->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); + tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); - netif_rx(skb); + skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); - rcu_read_unlock(); - return 0; + gro_cells_receive(&tunnel->gro_cells, skb); + return 0; + +drop: + kfree_skb(skb); + return 0; +} + +int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, + struct metadata_dst *tun_dst, + bool log_ecn_err) +{ + return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate, + log_ecn_err); +} +EXPORT_SYMBOL(ip6_tnl_rcv); + +static const struct tnl_ptk_info tpi_v6 = { + /* no tunnel info required for ipxip6. */ + .proto = htons(ETH_P_IPV6), +}; + +static const struct tnl_ptk_info tpi_v4 = { + /* no tunnel info required for ipxip6. */ + .proto = htons(ETH_P_IP), +}; + +static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, + const struct tnl_ptk_info *tpi, + int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb)) +{ + struct ip6_tnl *t; + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + int ret = -1; + + rcu_read_lock(); + t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); + + if (t) { + u8 tproto = ACCESS_ONCE(t->parms.proto); + + if (tproto != ipproto && tproto != 0) + goto drop; + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) + goto drop; + if (iptunnel_pull_header(skb, 0, tpi->proto, false)) + goto drop; + ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate, + log_ecn_error); } + rcu_read_unlock(); - return 1; -discard: + return ret; + +drop: + rcu_read_unlock(); kfree_skb(skb); return 0; } static int ip4ip6_rcv(struct sk_buff *skb) { - return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP, - ip4ip6_dscp_ecn_decapsulate); + return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4, + ip4ip6_dscp_ecn_decapsulate); } static int ip6ip6_rcv(struct sk_buff *skb) { - return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6, - ip6ip6_dscp_ecn_decapsulate); + return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6, + ip6ip6_dscp_ecn_decapsulate); } struct ipv6_tel_txoption { @@ -918,13 +979,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); /** - * ip6_tnl_xmit2 - encapsulate packet and send + * ip6_tnl_xmit - encapsulate packet and send * @skb: the outgoing socket buffer * @dev: the outgoing tunnel device * @dsfield: dscp code for outer header - * @fl: flow of tunneled packet + * @fl6: flow of tunneled packet * @encap_limit: encapsulation limit * @pmtu: Path MTU is stored if packet is too big + * @proto: next header value * * Description: * Build new header and do some sanity checks on the packet before sending @@ -936,12 +998,9 @@ EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); * %-EMSGSIZE message too big. return mtu in this case. **/ -static int ip6_tnl_xmit2(struct sk_buff *skb, - struct net_device *dev, - __u8 dsfield, - struct flowi6 *fl6, - int encap_limit, - __u32 *pmtu) +int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, + struct flowi6 *fl6, int encap_limit, __u32 *pmtu, + __u8 proto) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = t->net; @@ -952,7 +1011,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, struct net_device *tdev; int mtu; unsigned int max_headroom = sizeof(struct ipv6hdr); - u8 proto; int err = -1; /* NBMA tunnel */ @@ -1014,12 +1072,23 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, mtu = IPV6_MIN_MTU; if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; } + if (t->err_count > 0) { + if (time_before(jiffies, + t->err_time + IP6TUNNEL_ERR_TIMEO)) { + t->err_count--; + + dst_link_failure(skb); + } else { + t->err_count = 0; + } + } + skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); /* @@ -1045,9 +1114,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); skb_dst_set(skb, dst); - skb->transport_header = skb->network_header; - - proto = fl6->flowi6_proto; if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); @@ -1058,6 +1124,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb->encapsulation = 1; } + max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) + + dst->header_len; + if (max_headroom > dev->needed_headroom) + dev->needed_headroom = max_headroom; + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); @@ -1076,6 +1147,7 @@ tx_err_dst_release: dst_release(dst); return err; } +EXPORT_SYMBOL(ip6_tnl_xmit); static inline int ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) @@ -1099,7 +1171,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; dsfield = ipv4_get_dsfield(iph); @@ -1109,7 +1180,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; - err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + IPPROTO_IPIP); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) @@ -1153,7 +1225,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) @@ -1163,7 +1234,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; - err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + IPPROTO_IPV6); if (err != 0) { if (err == -EMSGSIZE) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -1174,7 +1246,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } static netdev_tx_t -ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) +ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; @@ -1370,6 +1442,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + memset(&p1, 0, sizeof(p1)); + switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { @@ -1464,8 +1538,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * %-EINVAL if mtu too small **/ -static int -ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) +int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { struct ip6_tnl *tnl = netdev_priv(dev); @@ -1481,6 +1554,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) dev->mtu = new_mtu; return 0; } +EXPORT_SYMBOL(ip6_tnl_change_mtu); int ip6_tnl_get_iflink(const struct net_device *dev) { @@ -1493,7 +1567,7 @@ EXPORT_SYMBOL(ip6_tnl_get_iflink); static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, - .ndo_start_xmit = ip6_tnl_xmit, + .ndo_start_xmit = ip6_tnl_start_xmit, .ndo_do_ioctl = ip6_tnl_ioctl, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats = ip6_get_stats, @@ -1549,13 +1623,25 @@ ip6_tnl_dev_init_gen(struct net_device *dev) return -ENOMEM; ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); - if (ret) { - free_percpu(dev->tstats); - dev->tstats = NULL; - return ret; - } + if (ret) + goto free_stats; + + ret = gro_cells_init(&t->gro_cells, dev); + if (ret) + goto destroy_dst; + + t->hlen = 0; + t->tun_hlen = 0; return 0; + +destroy_dst: + dst_cache_destroy(&t->dst_cache); +free_stats: + free_percpu(dev->tstats); + dev->tstats = NULL; + + return ret; } /** diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index a10e77103c88..f2e2013f8346 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1984,10 +1984,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTFORWDATAGRAMS); - IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTOCTETS, skb->len); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTFORWDATAGRAMS); + __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTOCTETS, skb->len); return dst_output(net, sk, skb); } @@ -2268,7 +2268,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; - if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0) + if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0) return -EMSGSIZE; rtm->rtm_type = RTN_MULTICAST; @@ -2411,7 +2411,7 @@ static int mr6_msgsize(bool unresolved, int maxvif) + nla_total_size(0) /* RTA_MULTIPATH */ + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) /* RTA_MFC_STATS */ - + nla_total_size(sizeof(struct rta_mfc_stats)) + + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) ; return len; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4449ad1f8114..a9895e15ee9c 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -407,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) break; - opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = rcu_dereference_protected(np->opt, + lockdep_sock_is_held(sk)); opt = ipv6_renew_options(sk, opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); @@ -471,7 +472,8 @@ sticky_done: struct ipv6_txoptions *opt = NULL; struct msghdr msg; struct flowi6 fl6; - int junk; + struct sockcm_cookie sockc_junk; + struct ipcm6_cookie ipc6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = sk->sk_bound_dev_if; @@ -501,9 +503,9 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void *)(opt+1); + ipc6.opt = opt; - retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, - &junk, &junk); + retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6, &sockc_junk); if (retv) goto done; update: @@ -1123,7 +1125,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct ipv6_txoptions *opt; lock_sock(sk); - opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = rcu_dereference_protected(np->opt, + lockdep_sock_is_held(sk)); len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 86b67b70b626..63e06c3dd319 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -39,34 +39,12 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv6 packet filter"); -/*#define DEBUG_IP_FIREWALL*/ -/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ -/*#define DEBUG_IP_FIREWALL_USER*/ - -#ifdef DEBUG_IP_FIREWALL -#define dprintf(format, args...) pr_info(format , ## args) -#else -#define dprintf(format, args...) -#endif - -#ifdef DEBUG_IP_FIREWALL_USER -#define duprintf(format, args...) pr_info(format , ## args) -#else -#define duprintf(format, args...) -#endif - #ifdef CONFIG_NETFILTER_DEBUG #define IP_NF_ASSERT(x) WARN_ON(!(x)) #else #define IP_NF_ASSERT(x) #endif -#if 0 -/* All the better to debug you with... */ -#define static -#define inline -#endif - void *ip6t_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ip6t, IP6T); @@ -100,35 +78,18 @@ ip6_packet_match(const struct sk_buff *skb, if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, &ip6info->src), IP6T_INV_SRCIP) || FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, - &ip6info->dst), IP6T_INV_DSTIP)) { - dprintf("Source or dest mismatch.\n"); -/* - dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr, - ipinfo->smsk.s_addr, ipinfo->src.s_addr, - ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : ""); - dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr, - ipinfo->dmsk.s_addr, ipinfo->dst.s_addr, - ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/ + &ip6info->dst), IP6T_INV_DSTIP)) return false; - } ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask); - if (FWINV(ret != 0, IP6T_INV_VIA_IN)) { - dprintf("VIA in mismatch (%s vs %s).%s\n", - indev, ip6info->iniface, - ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : ""); + if (FWINV(ret != 0, IP6T_INV_VIA_IN)) return false; - } ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask); - if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) { - dprintf("VIA out mismatch (%s vs %s).%s\n", - outdev, ip6info->outiface, - ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : ""); + if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) return false; - } /* ... might want to do something with class and flowlabel here ... */ @@ -145,11 +106,6 @@ ip6_packet_match(const struct sk_buff *skb, } *fragoff = _frag_off; - dprintf("Packet protocol %hi ?= %s%hi.\n", - protohdr, - ip6info->invflags & IP6T_INV_PROTO ? "!":"", - ip6info->proto); - if (ip6info->proto == protohdr) { if (ip6info->invflags & IP6T_INV_PROTO) return false; @@ -169,16 +125,11 @@ ip6_packet_match(const struct sk_buff *skb, static bool ip6_checkentry(const struct ip6t_ip6 *ipv6) { - if (ipv6->flags & ~IP6T_F_MASK) { - duprintf("Unknown flag bits set: %08X\n", - ipv6->flags & ~IP6T_F_MASK); + if (ipv6->flags & ~IP6T_F_MASK) return false; - } - if (ipv6->invflags & ~IP6T_INV_MASK) { - duprintf("Unknown invflag bits set: %08X\n", - ipv6->invflags & ~IP6T_INV_MASK); + if (ipv6->invflags & ~IP6T_INV_MASK) return false; - } + return true; } @@ -446,13 +397,21 @@ ip6t_do_table(struct sk_buff *skb, xt_write_recseq_end(addend); local_bh_enable(); -#ifdef DEBUG_ALLOW_ALL - return NF_ACCEPT; -#else if (acpar.hotdrop) return NF_DROP; else return verdict; -#endif +} + +static bool find_jump_target(const struct xt_table_info *t, + const struct ip6t_entry *target) +{ + struct ip6t_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; } /* Figures out from what hook each rule can be called: returns 0 if @@ -480,11 +439,9 @@ mark_source_chains(const struct xt_table_info *newinfo, = (void *)ip6t_get_target_c(e); int visited = e->comefrom & (1 << hook); - if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { - pr_err("iptables: loop hook %u pos %u %08X.\n", - hook, pos, e->comefrom); + if (e->comefrom & (1 << NF_INET_NUMHOOKS)) return 0; - } + e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ @@ -496,26 +453,13 @@ mark_source_chains(const struct xt_table_info *newinfo, if ((strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < -NF_MAX_VERDICT - 1) { - duprintf("mark_source_chains: bad " - "negative verdict (%i)\n", - t->verdict); + t->verdict < -NF_MAX_VERDICT - 1) return 0; - } /* Return: backtrack through the last big jump. */ do { e->comefrom ^= (1<<NF_INET_NUMHOOKS); -#ifdef DEBUG_IP_FIREWALL_USER - if (e->comefrom - & (1 << NF_INET_NUMHOOKS)) { - duprintf("Back unset " - "on hook %u " - "rule %u\n", - hook, pos); - } -#endif oldpos = pos; pos = e->counters.pcnt; e->counters.pcnt = 0; @@ -532,6 +476,8 @@ mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct ip6t_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -540,19 +486,16 @@ mark_source_chains(const struct xt_table_info *newinfo, if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { - if (newpos > newinfo->size - - sizeof(struct ip6t_entry)) { - duprintf("mark_source_chains: " - "bad verdict (%i)\n", - newpos); - return 0; - } /* This a jump; chase it. */ - duprintf("Jump rule %u -> %u\n", - pos, newpos); + e = (struct ip6t_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct ip6t_entry *) (entry0 + newpos); @@ -560,8 +503,7 @@ mark_source_chains(const struct xt_table_info *newinfo, pos = newpos; } } -next: - duprintf("Finished chain %u\n", hook); +next: ; } return 1; } @@ -579,41 +521,15 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) module_put(par.match->me); } -static int -check_entry(const struct ip6t_entry *e) -{ - const struct xt_entry_target *t; - - if (!ip6_checkentry(&e->ipv6)) - return -EINVAL; - - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ip6t_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; -} - static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ip6t_ip6 *ipv6 = par->entryinfo; - int ret; par->match = m->u.kernel.match; par->matchinfo = m->data; - ret = xt_check_match(par, m->u.match_size - sizeof(*m), - ipv6->proto, ipv6->invflags & IP6T_INV_PROTO); - if (ret < 0) { - duprintf("ip_tables: check failed for `%s'.\n", - par.match->name); - return ret; - } - return 0; + return xt_check_match(par, m->u.match_size - sizeof(*m), + ipv6->proto, ipv6->invflags & IP6T_INV_PROTO); } static int @@ -624,10 +540,9 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, m->u.user.revision); - if (IS_ERR(match)) { - duprintf("find_check_match: `%s' not found\n", m->u.user.name); + if (IS_ERR(match)) return PTR_ERR(match); - } + m->u.kernel.match = match; ret = check_match(m, par); @@ -652,17 +567,11 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name) .hook_mask = e->comefrom, .family = NFPROTO_IPV6, }; - int ret; t = ip6t_get_target(e); - ret = xt_check_target(&par, t->u.target_size - sizeof(*t), - e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO); - if (ret < 0) { - duprintf("ip_tables: check failed for `%s'.\n", - t->u.kernel.target->name); - return ret; - } - return 0; + return xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ipv6.proto, + e->ipv6.invflags & IP6T_INV_PROTO); } static int @@ -675,10 +584,12 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; + unsigned long pcnt; - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) + pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(pcnt)) return -ENOMEM; + e->counters.pcnt = pcnt; j = 0; mtpar.net = net; @@ -697,7 +608,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { - duprintf("find_check_entry: `%s' not found\n", t->u.user.name); ret = PTR_ERR(target); goto cleanup_matches; } @@ -750,19 +660,18 @@ check_entry_size_and_hooks(struct ip6t_entry *e, if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || - (unsigned char *)e + e->next_offset > limit) { - duprintf("Bad offset %p\n", e); + (unsigned char *)e + e->next_offset > limit) return -EINVAL; - } if (e->next_offset - < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) { - duprintf("checking: element %p size %u\n", - e, e->next_offset); + < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) return -EINVAL; - } - err = check_entry(e); + if (!ip6_checkentry(&e->ipv6)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -773,12 +682,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e, if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { - if (!check_underflow(e)) { - pr_debug("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + if (!check_underflow(e)) return -EINVAL; - } + newinfo->underflow[h] = underflows[h]; } } @@ -830,7 +736,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } - duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { @@ -847,27 +752,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, ++newinfo->stacksize; } - if (i != repl->num_entries) { - duprintf("translate_table: %u not %u entries\n", - i, repl->num_entries); + if (i != repl->num_entries) return -EINVAL; - } /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ if (!(repl->valid_hooks & (1 << i))) continue; - if (newinfo->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, repl->hook_entry[i]); + if (newinfo->hook_entry[i] == 0xFFFFFFFF) return -EINVAL; - } - if (newinfo->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, repl->underflow[i]); + if (newinfo->underflow[i] == 0xFFFFFFFF) return -EINVAL; - } } if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) @@ -1095,11 +991,8 @@ static int get_info(struct net *net, void __user *user, struct xt_table *t; int ret; - if (*len != sizeof(struct ip6t_getinfo)) { - duprintf("length %u != %zu\n", *len, - sizeof(struct ip6t_getinfo)); + if (*len != sizeof(struct ip6t_getinfo)) return -EINVAL; - } if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; @@ -1157,31 +1050,24 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, struct ip6t_get_entries get; struct xt_table *t; - if (*len < sizeof(get)) { - duprintf("get_entries: %u < %zu\n", *len, sizeof(get)); + if (*len < sizeof(get)) return -EINVAL; - } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; - if (*len != sizeof(struct ip6t_get_entries) + get.size) { - duprintf("get_entries: %u != %zu\n", - *len, sizeof(get) + get.size); + if (*len != sizeof(struct ip6t_get_entries) + get.size) return -EINVAL; - } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR_OR_NULL(t)) { struct xt_table_info *private = t->private; - duprintf("t->private->number = %u\n", private->number); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); - else { - duprintf("get_entries: I've got %u not %u!\n", - private->size, get.size); + else ret = -EAGAIN; - } + module_put(t->me); xt_table_unlock(t); } else @@ -1217,8 +1103,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, /* You lied! */ if (valid_hooks != t->valid_hooks) { - duprintf("Valid hook crap: %08X vs %08X\n", - valid_hooks, t->valid_hooks); ret = -EINVAL; goto put_module; } @@ -1228,8 +1112,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, goto put_module; /* Update module usage count based on number of rules */ - duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n", - oldinfo->number, oldinfo->initial_entries, newinfo->number); if ((oldinfo->number > oldinfo->initial_entries) || (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); @@ -1298,8 +1180,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len) if (ret != 0) goto free_newinfo; - duprintf("ip_tables: Translated table\n"); - ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) @@ -1321,55 +1201,16 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ip6t_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, AF_INET6, name); + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); + t = xt_find_table_lock(net, AF_INET6, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1377,7 +1218,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1456,7 +1297,6 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, static int compat_find_calc_match(struct xt_entry_match *m, - const char *name, const struct ip6t_ip6 *ipv6, int *size) { @@ -1464,11 +1304,9 @@ compat_find_calc_match(struct xt_entry_match *m, match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, m->u.user.revision); - if (IS_ERR(match)) { - duprintf("compat_check_calc_match: `%s' not found\n", - m->u.user.name); + if (IS_ERR(match)) return PTR_ERR(match); - } + m->u.kernel.match = match; *size += xt_compat_match_offset(match); return 0; @@ -1491,35 +1329,29 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; - int ret, off, h; + int ret, off; - duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || - (unsigned char *)e + e->next_offset > limit) { - duprintf("Bad offset %p, limit = %p\n", e, limit); + (unsigned char *)e + e->next_offset > limit) return -EINVAL; - } if (e->next_offset < sizeof(struct compat_ip6t_entry) + - sizeof(struct compat_xt_entry_target)) { - duprintf("checking: element %p size %u\n", - e, e->next_offset); + sizeof(struct compat_xt_entry_target)) return -EINVAL; - } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ip6t_entry *)e); + if (!ip6_checkentry(&e->ipv6)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, + e->target_offset, e->next_offset); if (ret) return ret; @@ -1527,7 +1359,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, &e->ipv6, &off); + ret = compat_find_calc_match(ematch, &e->ipv6, &off); if (ret != 0) goto release_matches; ++j; @@ -1537,8 +1369,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { - duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", - t->u.user.name); ret = PTR_ERR(target); goto release_matches; } @@ -1550,17 +1380,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, if (ret) goto out; - /* Check hooks & underflows */ - for (h = 0; h < NF_INET_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; out: @@ -1574,18 +1393,17 @@ release_matches: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct ip6t_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ip6t_entry *)*dstptr; memcpy(de, e, sizeof(struct ip6t_entry)); @@ -1594,11 +1412,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, *dstptr += sizeof(struct ip6t_entry); *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ip6t_get_target(e); xt_compat_target_from_user(t, dstptr, size); @@ -1610,183 +1426,79 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; -} - -static int compat_check_entry(struct ip6t_entry *e, struct net *net, - const char *name) -{ - unsigned int j; - int ret = 0; - struct xt_mtchk_param mtpar; - struct xt_entry_match *ematch; - - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) - return -ENOMEM; - j = 0; - mtpar.net = net; - mtpar.table = name; - mtpar.entryinfo = &e->ipv6; - mtpar.hook_mask = e->comefrom; - mtpar.family = NFPROTO_IPV6; - xt_ematch_foreach(ematch, e) { - ret = check_match(ematch, &mtpar); - if (ret != 0) - goto cleanup_matches; - ++j; - } - - ret = check_target(e, net, name); - if (ret) - goto cleanup_matches; - return 0; - - cleanup_matches: - xt_ematch_foreach(ematch, e) { - if (j-- == 0) - break; - cleanup_match(ematch, net); - } - - xt_percpu_counter_free(e->counters.pcnt); - - return ret; } static int translate_compat_table(struct net *net, - const char *name, - unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_ip6t_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ip6t_entry *iter0; - struct ip6t_entry *iter1; + struct ip6t_replace repl; unsigned int size; int ret = 0; info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; - - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } + size = compatr->size; + info->number = compatr->num_entries; - duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET6); - xt_compat_init_offsets(AF_INET6, number); + xt_compat_init_offsets(AF_INET6, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { - duprintf("translate_compat_table: %u not %u entries\n", - j, number); + if (j != compatr->num_entries) goto out_unlock; - } - - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, underflows[i]); - goto out_unlock; - } - } ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); - if (ret != 0) - break; - } + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone. */ xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); - if (ret) - goto free_newinfo; - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, name); - if (ret != 0) - break; - ++i; - if (strcmp(ip6t_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1, net); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1794,17 +1506,16 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); -out: - xt_entry_foreach(iter0, entry0, total_size) { + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET6); + xt_compat_unlock(AF_INET6); + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(AF_INET6); - xt_compat_unlock(AF_INET6); - goto out; } static int @@ -1820,8 +1531,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -EFAULT; /* overflow check */ - if (tmp.size >= INT_MAX / num_possible_cpus()) - return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) @@ -1840,15 +1549,10 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; - duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) @@ -1882,7 +1586,6 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, break; default: - duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -1932,19 +1635,15 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, struct compat_ip6t_get_entries get; struct xt_table *t; - if (*len < sizeof(get)) { - duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); + if (*len < sizeof(get)) return -EINVAL; - } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; - if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) { - duprintf("compat_get_entries: %u != %zu\n", - *len, sizeof(get) + get.size); + if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) return -EINVAL; - } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET6); @@ -1952,16 +1651,13 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; - duprintf("t->private->number = %u\n", private->number); ret = compat_table_info(private, &info); - if (!ret && get.size == info.size) { + if (!ret && get.size == info.size) ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); - } else if (!ret) { - duprintf("compat_get_entries: I've got %u not %u!\n", - private->size, get.size); + else if (!ret) ret = -EAGAIN; - } + xt_compat_flush_offsets(AF_INET6); module_put(t->me); xt_table_unlock(t); @@ -2014,7 +1710,6 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) break; default: - duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -2066,7 +1761,6 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } default: - duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -2168,7 +1862,6 @@ icmp6_match(const struct sk_buff *skb, struct xt_action_param *par) /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ - duprintf("Dropping evil ICMP tinygram.\n"); par->hotdrop = true; return false; } diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 3deed5860a42..06bed74cf5ee 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -20,15 +20,16 @@ #include <net/netfilter/nf_conntrack_synproxy.h> static struct ipv6hdr * -synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr, - const struct in6_addr *daddr) +synproxy_build_ip(struct net *net, struct sk_buff *skb, + const struct in6_addr *saddr, + const struct in6_addr *daddr) { struct ipv6hdr *iph; skb_reset_network_header(skb); iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph)); ip6_flow_hdr(iph, 0, 0); - iph->hop_limit = 64; //XXX + iph->hop_limit = net->ipv6.devconf_all->hop_limit; iph->nexthdr = IPPROTO_TCP; iph->saddr = *saddr; iph->daddr = *daddr; @@ -37,13 +38,12 @@ synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr, } static void -synproxy_send_tcp(const struct synproxy_net *snet, +synproxy_send_tcp(struct net *net, const struct sk_buff *skb, struct sk_buff *nskb, struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, struct ipv6hdr *niph, struct tcphdr *nth, unsigned int tcp_hdr_size) { - struct net *net = nf_ct_net(snet->tmpl); struct dst_entry *dst; struct flowi6 fl6; @@ -60,7 +60,7 @@ synproxy_send_tcp(const struct synproxy_net *snet, fl6.fl6_dport = nth->dest; security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6)); dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { + if (dst->error) { dst_release(dst); goto free_nskb; } @@ -84,7 +84,7 @@ free_nskb: } static void -synproxy_send_client_synack(const struct synproxy_net *snet, +synproxy_send_client_synack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { @@ -103,7 +103,7 @@ synproxy_send_client_synack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr); + niph = synproxy_build_ip(net, nskb, &iph->daddr, &iph->saddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -121,15 +121,16 @@ synproxy_send_client_synack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static void -synproxy_send_server_syn(const struct synproxy_net *snet, +synproxy_send_server_syn(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts, u32 recv_seq) { + struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *nskb; struct ipv6hdr *iph, *niph; struct tcphdr *nth; @@ -144,7 +145,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr); + niph = synproxy_build_ip(net, nskb, &iph->saddr, &iph->daddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -165,12 +166,12 @@ synproxy_send_server_syn(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, + synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, niph, nth, tcp_hdr_size); } static void -synproxy_send_server_ack(const struct synproxy_net *snet, +synproxy_send_server_ack(struct net *net, const struct ip_ct_tcp *state, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) @@ -189,7 +190,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr); + niph = synproxy_build_ip(net, nskb, &iph->daddr, &iph->saddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -205,11 +206,11 @@ synproxy_send_server_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); } static void -synproxy_send_client_ack(const struct synproxy_net *snet, +synproxy_send_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { @@ -227,7 +228,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr); + niph = synproxy_build_ip(net, nskb, &iph->saddr, &iph->daddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -243,15 +244,16 @@ synproxy_send_client_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static bool -synproxy_recv_client_ack(const struct synproxy_net *snet, +synproxy_recv_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, struct synproxy_options *opts, u32 recv_seq) { + struct synproxy_net *snet = synproxy_pernet(net); int mss; mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1); @@ -267,7 +269,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); - synproxy_send_server_syn(snet, skb, th, opts, recv_seq); + synproxy_send_server_syn(net, skb, th, opts, recv_seq); return true; } @@ -275,7 +277,8 @@ static unsigned int synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct synproxy_net *snet = synproxy_pernet(par->net); + struct net *net = par->net; + struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; @@ -304,12 +307,12 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) XT_SYNPROXY_OPT_SACK_PERM | XT_SYNPROXY_OPT_ECN); - synproxy_send_client_synack(snet, skb, th, &opts); + synproxy_send_client_synack(net, skb, th, &opts); return NF_DROP; } else if (th->ack && !(th->fin || th->rst || th->syn)) { /* ACK from client */ - synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq)); + synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq)); return NF_DROP; } @@ -320,7 +323,8 @@ static unsigned int ipv6_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) { - struct synproxy_net *snet = synproxy_pernet(nhs->net); + struct net *net = nhs->net; + struct synproxy_net *snet = synproxy_pernet(net); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; @@ -384,7 +388,7 @@ static unsigned int ipv6_synproxy_hook(void *priv, * therefore we need to add 1 to make the SYN sequence * number match the one of first SYN. */ - if (synproxy_recv_client_ack(snet, skb, th, &opts, + if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq) + 1)) this_cpu_inc(snet->stats->cookie_retrans); @@ -410,12 +414,12 @@ static unsigned int ipv6_synproxy_hook(void *priv, XT_SYNPROXY_OPT_SACK_PERM); swap(opts.tsval, opts.tsecr); - synproxy_send_server_ack(snet, state, skb, th, &opts); + synproxy_send_server_ack(net, state, skb, th, &opts); nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); swap(opts.tsval, opts.tsecr); - synproxy_send_client_ack(snet, skb, th, &opts); + synproxy_send_client_ack(net, skb, th, &opts); consume_skb(skb); return NF_STOLEN; diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 4709f657b7b6..a5400223fd74 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -158,7 +158,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) fl6.fl6_dport = otcph->source; security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { + if (dst->error) { dst_release(dst); return; } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index c382db7a2e73..3ee3e444a66b 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -58,10 +58,11 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int iif = 0; struct flowi6 fl6; int err; - int hlimit; struct dst_entry *dst; struct rt6_info *rt; struct pingfakehdr pfh; + struct sockcm_cookie junk = {0}; + struct ipcm6_cookie ipc6; pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); @@ -138,13 +139,15 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) pfh.wcheck = 0; pfh.family = AF_INET6; - hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + ipc6.tclass = np->tclass; + ipc6.dontfrag = np->dontfrag; + ipc6.opt = NULL; lock_sock(sk); err = ip6_append_data(sk, ping_getfrag, &pfh, len, - 0, hlimit, - np->tclass, NULL, &fl6, rt, - MSG_DONTWAIT, np->dontfrag); + 0, &ipc6, &fl6, rt, + MSG_DONTWAIT, &junk); if (err) { ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index fa59dd7a427e..896350df6423 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -745,10 +745,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct dst_entry *dst = NULL; struct raw6_frag_vec rfv; struct flowi6 fl6; + struct sockcm_cookie sockc; + struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; - int hlimit = -1; - int tclass = -1; - int dontfrag = -1; u16 proto; int err; @@ -769,6 +768,11 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_mark = sk->sk_mark; + ipc6.hlimit = -1; + ipc6.tclass = -1; + ipc6.dontfrag = -1; + ipc6.opt = NULL; + if (sin6) { if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@ -821,13 +825,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (fl6.flowi6_oif == 0) fl6.flowi6_oif = sk->sk_bound_dev_if; + sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); + ipc6.opt = opt; - err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -843,7 +848,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!opt) { opt = txopt_get(np); opt_to_free = opt; - } + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -878,14 +883,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) err = PTR_ERR(dst); goto out; } - if (hlimit < 0) - hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + if (ipc6.hlimit < 0) + ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - if (tclass < 0) - tclass = np->tclass; + if (ipc6.tclass < 0) + ipc6.tclass = np->tclass; - if (dontfrag < 0) - dontfrag = np->dontfrag; + if (ipc6.dontfrag < 0) + ipc6.dontfrag = np->dontfrag; if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; @@ -894,10 +899,11 @@ back_from_confirm: if (inet->hdrincl) err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags); else { + ipc6.opt = opt; lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, - len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - msg->msg_flags, dontfrag); + len, 0, &ipc6, &fl6, (struct rt6_info *)dst, + msg->msg_flags, &sockc); if (err) ip6_flush_pending_frames(sk); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index e2ea31175ef9..2160d5d009cb 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -145,12 +145,12 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, if (!dev) goto out_rcu_unlock; - IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); if (inet_frag_evicting(&fq->q)) goto out_rcu_unlock; - IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); /* Don't send error if the first segment did not arrive. */ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments) @@ -223,8 +223,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - skb_network_header(skb))); @@ -258,8 +258,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); return -1; @@ -361,8 +361,8 @@ found: discard_fq: inet_frag_kill(&fq->q, &ip6_frags); err: - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_REASMFAILS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; } @@ -500,7 +500,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, skb_network_header_len(head)); rcu_read_lock(); - IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; fq->q.fragments_tail = NULL; @@ -513,7 +513,7 @@ out_oom: net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); out_fail: rcu_read_lock(); - IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); return -1; } @@ -528,7 +528,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) goto fail_hdr; - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ if (hdr->payload_len == 0) @@ -544,8 +544,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) if (!(fhdr->frag_off & htons(0xFFF9))) { /* It is not a fragmented frame */ skb->transport_header += sizeof(struct frag_hdr); - IP6_INC_STATS_BH(net, - ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); + __IP6_INC_STATS(net, + ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; @@ -566,13 +566,13 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return ret; } - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; fail_hdr: - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); return -1; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6f32944e0223..969913da494f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1190,7 +1190,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct dst_entry *dst; bool any_src; - dst = l3mdev_rt6_dst_by_oif(net, fl6); + dst = l3mdev_get_rt6_dst(net, fl6); if (dst) return dst; @@ -1771,6 +1771,37 @@ static int ip6_convert_metrics(struct mx6_config *mxc, return -EINVAL; } +static struct rt6_info *ip6_nh_lookup_table(struct net *net, + struct fib6_config *cfg, + const struct in6_addr *gw_addr) +{ + struct flowi6 fl6 = { + .flowi6_oif = cfg->fc_ifindex, + .daddr = *gw_addr, + .saddr = cfg->fc_prefsrc, + }; + struct fib6_table *table; + struct rt6_info *rt; + int flags = 0; + + table = fib6_get_table(net, cfg->fc_table); + if (!table) + return NULL; + + if (!ipv6_addr_any(&cfg->fc_prefsrc)) + flags |= RT6_LOOKUP_F_HAS_SADDR; + + rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags); + + /* if table lookup failed, fall back to full lookup */ + if (rt == net->ipv6.ip6_null_entry) { + ip6_rt_put(rt); + rt = NULL; + } + + return rt; +} + static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) { struct net *net = cfg->fc_nlinfo.nl_net; @@ -1942,7 +1973,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) rt->rt6i_gateway = *gw_addr; if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { - struct rt6_info *grt; + struct rt6_info *grt = NULL; /* IPv6 strictly inhibits using not link-local addresses as nexthop address. @@ -1954,7 +1985,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) if (!(gwa_type & IPV6_ADDR_UNICAST)) goto out; - grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); + if (cfg->fc_table) + grt = ip6_nh_lookup_table(net, cfg, gw_addr); + + if (!grt) + grt = rt6_lookup(net, gw_addr, NULL, + cfg->fc_ifindex, 1); err = -EHOSTUNREACH; if (!grt) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 83384308d032..a13d8c114ccb 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -913,10 +913,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - skb = iptunnel_handle_offloads(skb, SKB_GSO_SIT); - if (IS_ERR(skb)) { + if (iptunnel_handle_offloads(skb, SKB_GSO_SIT)) { ip_rt_put(rt); - goto out; + goto tx_error; } if (df) { @@ -992,7 +991,6 @@ tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); -out: dev->stats.tx_errors++; return NETDEV_TX_OK; } @@ -1002,15 +1000,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP); - if (IS_ERR(skb)) - goto out; + if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP)) + goto tx_error; skb_set_inner_ipproto(skb, IPPROTO_IPIP); ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); return NETDEV_TX_OK; -out: +tx_error: + kfree_skb(skb); dev->stats.tx_errors++; return NETDEV_TX_OK; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index aab91fa86c5e..59c483937aec 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -155,11 +155,11 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); if (mss == 0) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f443c6b0ce16..79e33e02f11a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -234,7 +234,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; - opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -336,8 +336,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb->dev->ifindex); if (!sk) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -352,13 +352,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) { - NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto out; } @@ -368,7 +368,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && !between(seq, snd_una, tp->snd_nxt)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -439,7 +439,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req) + enum tcp_synack_type synack_type) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -452,7 +452,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, IPPROTO_TCP)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, foc, attach_req); + skb = tcp_make_synack(sk, dst, req, foc, synack_type); if (skb) { __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, @@ -649,12 +649,12 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk, return false; if (hash_expected && !hash_location) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return true; } if (!hash_expected && hash_location) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return true; } @@ -830,9 +830,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (!IS_ERR(dst)) { skb_dst_set(buff, dst); ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); - TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); + TCP_INC_STATS(net, TCP_MIB_OUTSEGS); if (rst) - TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); + TCP_INC_STATS(net, TCP_MIB_OUTRSTS); return; } @@ -863,6 +863,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) return; #ifdef CONFIG_TCP_MD5SIG + rcu_read_lock(); hash_location = tcp_parse_md5sig_option(th); if (sk && sk_fullsock(sk)) { key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr); @@ -880,16 +881,15 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) th->source, &ipv6h->daddr, ntohs(th->source), tcp_v6_iif(skb)); if (!sk1) - return; + goto out; - rcu_read_lock(); key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr); if (!key) - goto release_sk1; + goto out; genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) - goto release_sk1; + goto out; } #endif @@ -903,11 +903,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); #ifdef CONFIG_TCP_MD5SIG -release_sk1: - if (sk1) { - rcu_read_unlock(); - sock_put(sk1); - } +out: + rcu_read_unlock(); #endif } @@ -972,7 +969,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) &tcp_request_sock_ipv6_ops, sk, skb); drop: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + tcp_listendrop(sk); return 0; /* don't send reset */ } @@ -1173,11 +1170,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * return newsk; out_overflow: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: dst_release(dst); out: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + tcp_listendrop(sk); return NULL; } @@ -1284,8 +1281,8 @@ discard: kfree_skb(skb); return 0; csum_err: - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; @@ -1356,6 +1353,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) { const struct tcphdr *th; const struct ipv6hdr *hdr; + bool refcounted; struct sock *sk; int ret; struct net *net = dev_net(skb->dev); @@ -1366,14 +1364,14 @@ static int tcp_v6_rcv(struct sk_buff *skb) /* * Count it even if it's bad. */ - TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); + __TCP_INC_STATS(net, TCP_MIB_INSEGS); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; - th = tcp_hdr(skb); + th = (const struct tcphdr *)skb->data; - if (th->doff < sizeof(struct tcphdr)/4) + if (unlikely(th->doff < sizeof(struct tcphdr)/4)) goto bad_packet; if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; @@ -1381,12 +1379,13 @@ static int tcp_v6_rcv(struct sk_buff *skb) if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) goto csum_error; - th = tcp_hdr(skb); + th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); lookup: sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), - th->source, th->dest, inet6_iif(skb)); + th->source, th->dest, inet6_iif(skb), + &refcounted); if (!sk) goto no_tcp_socket; @@ -1409,6 +1408,7 @@ process: goto lookup; } sock_hold(sk); + refcounted = true; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1426,7 +1426,7 @@ process: } } if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) { - NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto discard_and_relse; } @@ -1459,13 +1459,14 @@ process: } else if (unlikely(sk_add_backlog(sk, skb, sk->sk_rcvbuf + sk->sk_sndbuf))) { bh_unlock_sock(sk); - NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); + __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP); goto discard_and_relse; } bh_unlock_sock(sk); put_and_return: - sock_put(sk); + if (refcounted) + sock_put(sk); return ret ? -1 : 0; no_tcp_socket: @@ -1476,9 +1477,9 @@ no_tcp_socket: if (tcp_checksum_complete(skb)) { csum_error: - TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); + __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: - TCP_INC_STATS_BH(net, TCP_MIB_INERRS); + __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { tcp_v6_send_reset(NULL, skb); } @@ -1488,7 +1489,9 @@ discard_it: return 0; discard_and_relse: - sock_put(sk); + sk_drops_add(sk, skb); + if (refcounted) + sock_put(sk); goto discard_it; do_time_wait: @@ -1519,6 +1522,7 @@ do_time_wait: inet_twsk_deschedule_put(tw); sk = sk2; tcp_v6_restore_cb(skb); + refcounted = false; goto process; } /* Fall through to ACK */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6bc5c664fa46..2ba6a77a8815 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -213,37 +213,28 @@ static struct sock *udp6_lib_lookup2(struct net *net, struct sk_buff *skb) { struct sock *sk, *result; - struct hlist_nulls_node *node; int score, badness, matches = 0, reuseport = 0; - bool select_ok = true; u32 hash = 0; -begin: result = NULL; badness = -1; - udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score2(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { - result = sk; - badness = score; reuseport = sk->sk_reuseport; if (reuseport) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - sk2 = reuseport_select_sock(sk, hash, skb, + result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - select_ok = false; - goto found; - } - } + if (result) + return result; matches = 1; } + result = sk; + badness = score; } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -251,27 +242,10 @@ begin: hash = next_pseudo_random32(hash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot2) - goto begin; - - if (result) { -found: - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(compute_score2(result, net, saddr, sport, - daddr, hnum, dif) < badness)) { - sock_put(result); - goto begin; - } - } return result; } +/* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, @@ -279,15 +253,12 @@ struct sock *__udp6_lib_lookup(struct net *net, struct sk_buff *skb) { struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; int score, badness, matches = 0, reuseport = 0; - bool select_ok = true; u32 hash = 0; - rcu_read_lock(); if (hslot->count > 10) { hash2 = udp6_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; @@ -309,34 +280,26 @@ struct sock *__udp6_lib_lookup(struct net *net, &in6addr_any, hnum, dif, hslot2, slot2, skb); } - rcu_read_unlock(); return result; } begin: result = NULL; badness = -1; - sk_nulls_for_each_rcu(sk, node, &hslot->head) { + sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); if (score > badness) { - result = sk; - badness = score; reuseport = sk->sk_reuseport; if (reuseport) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - - sk2 = reuseport_select_sock(sk, hash, skb, + result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - select_ok = false; - goto found; - } - } + if (result) + return result; matches = 1; } + result = sk; + badness = score; } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -344,25 +307,6 @@ begin: hash = next_pseudo_random32(hash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot) - goto begin; - - if (result) { -found: - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(compute_score(result, net, hnum, saddr, sport, - daddr, dport, dif) < badness)) { - sock_put(result); - goto begin; - } - } - rcu_read_unlock(); return result; } EXPORT_SYMBOL_GPL(__udp6_lib_lookup); @@ -371,23 +315,46 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct udp_table *udptable) { - struct sock *sk; const struct ipv6hdr *iph = ipv6_hdr(skb); + struct sock *sk; sk = skb_steal_sock(skb); if (unlikely(sk)) return sk; - return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, + return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), udptable, skb); } +struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + + return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, + &iph->daddr, dport, inet6_iif(skb), + &udp_table, skb); +} +EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb); + +/* Must be called under rcu_read_lock(). + * Does increment socket refcount. + */ +#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ + IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif) { - return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); + struct sock *sk; + + sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, + dif, &udp_table, NULL); + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + return sk; } EXPORT_SYMBOL_GPL(udp6_lib_lookup); +#endif /* * This should be easy, if there is something there we @@ -401,7 +368,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; unsigned int ulen, copied; - int peeked, off = 0; + int peeked, peeking, off; int err; int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; @@ -415,15 +382,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: + peeking = off = sk_peek_offset(sk, flags); skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), &peeked, &off, &err); if (!skb) - goto out; + return err; - ulen = skb->len - sizeof(struct udphdr); + ulen = skb->len; copied = len; - if (copied > ulen) - copied = ulen; + if (copied > ulen - off) + copied = ulen - off; else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; @@ -435,17 +403,16 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) { checksum_valid = !udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } if (checksum_valid || skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), - msg, copied); + err = skb_copy_datagram_msg(skb, off, msg, copied); else { - err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg); + err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) goto csum_copy_err; } @@ -454,23 +421,22 @@ try_again: if (!peeked) { atomic_inc(&sk->sk_drops); if (is_udp4) - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_INERRORS, - is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, + is_udplite); else - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_INERRORS, - is_udplite); + UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, + is_udplite); } - goto out_free; + skb_free_datagram_locked(sk, skb); + return err; } if (!peeked) { if (is_udp4) - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, + is_udplite); else - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + UDP6_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, + is_udplite); } sock_recv_ts_and_drops(msg, sk, skb); @@ -510,24 +476,22 @@ try_again: if (flags & MSG_TRUNC) err = ulen; -out_free: - skb_free_datagram_locked(sk, skb); -out: + __skb_free_datagram_locked(sk, skb, peeking ? -err : err); return err; csum_copy_err: slow = lock_sock_fast(sk); if (!skb_kill_datagram(sk, skb, flags)) { if (is_udp4) { - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_CSUMERRORS, is_udplite); - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_CSUMERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); } else { - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_CSUMERRORS, is_udplite); - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_CSUMERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); } } unlock_sock_fast(sk, slow); @@ -555,8 +519,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), udptable, skb); if (!sk) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -585,7 +549,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk->sk_err = err; sk->sk_error_report(sk); out: - sock_put(sk); + return; } static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -598,15 +562,15 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_incoming_cpu_update(sk); } - rc = sock_queue_rcv_skb(sk, skb); + rc = __sock_queue_rcv_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } @@ -662,9 +626,9 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); + __UDP_INC_STATS(sock_net(sk), + UDP_MIB_INDATAGRAMS, + is_udplite); return -ret; } } @@ -692,11 +656,14 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (rcu_access_pointer(sk->sk_filter)) { if (udp_lib_checksum_complete(skb)) goto csum_error; + if (sk_filter(sk, skb)) + goto drop; } + udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); + __UDP6_INC_STATS(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); goto drop; } @@ -715,9 +682,9 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return rc; csum_error: - UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); + __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: - UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); kfree_skb(skb); return -1; @@ -747,33 +714,6 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, return true; } -static void flush_stack(struct sock **stack, unsigned int count, - struct sk_buff *skb, unsigned int final) -{ - struct sk_buff *skb1 = NULL; - struct sock *sk; - unsigned int i; - - for (i = 0; i < count; i++) { - sk = stack[i]; - if (likely(!skb1)) - skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); - if (!skb1) { - atomic_inc(&sk->sk_drops); - UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, - IS_UDPLITE(sk)); - UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); - } - - if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0) - skb1 = NULL; - sock_put(sk); - } - if (unlikely(skb1)) - kfree_skb(skb1); -} - static void udp6_csum_zero_error(struct sk_buff *skb) { /* RFC 2460 section 8.1 says that we SHOULD log @@ -792,15 +732,15 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, struct udp_table *udptable, int proto) { - struct sock *sk, *stack[256 / sizeof(struct sock *)]; + struct sock *sk, *first = NULL; const struct udphdr *uh = udp_hdr(skb); - struct hlist_nulls_node *node; unsigned short hnum = ntohs(uh->dest); struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); - int dif = inet6_iif(skb); - unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); + unsigned int offset = offsetof(typeof(*sk), sk_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); - bool inner_flushed = false; + int dif = inet6_iif(skb); + struct hlist_node *node; + struct sk_buff *nskb; if (use_hash2) { hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & @@ -811,27 +751,32 @@ start_lookup: offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } - spin_lock(&hslot->lock); - sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) { - if (__udp_v6_is_mcast_sock(net, sk, - uh->dest, daddr, - uh->source, saddr, - dif, hnum) && - /* If zero checksum and no_check is not on for - * the socket then skip it. - */ - (uh->check || udp_sk(sk)->no_check6_rx)) { - if (unlikely(count == ARRAY_SIZE(stack))) { - flush_stack(stack, count, skb, ~0); - inner_flushed = true; - count = 0; - } - stack[count++] = sk; - sock_hold(sk); + sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { + if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr, + uh->source, saddr, dif, hnum)) + continue; + /* If zero checksum and no_check is not on for + * the socket then skip it. + */ + if (!uh->check && !udp_sk(sk)->no_check6_rx) + continue; + if (!first) { + first = sk; + continue; + } + nskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!nskb)) { + atomic_inc(&sk->sk_drops); + __UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS, + IS_UDPLITE(sk)); + __UDP6_INC_STATS(net, UDP_MIB_INERRORS, + IS_UDPLITE(sk)); + continue; } - } - spin_unlock(&hslot->lock); + if (udpv6_queue_rcv_skb(sk, nskb) > 0) + consume_skb(nskb); + } /* Also lookup *:port if we are using hash2 and haven't done so yet. */ if (use_hash2 && hash2 != hash2_any) { @@ -839,13 +784,13 @@ start_lookup: goto start_lookup; } - if (count) { - flush_stack(stack, count, skb, count - 1); + if (first) { + if (udpv6_queue_rcv_skb(first, skb) > 0) + consume_skb(skb); } else { - if (!inner_flushed) - UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); - consume_skb(skb); + kfree_skb(skb); + __UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); } return 0; } @@ -853,10 +798,10 @@ start_lookup: int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { + const struct in6_addr *saddr, *daddr; struct net *net = dev_net(skb->dev); - struct sock *sk; struct udphdr *uh; - const struct in6_addr *saddr, *daddr; + struct sock *sk; u32 ulen = 0; if (!pskb_may_pull(skb, sizeof(struct udphdr))) @@ -910,7 +855,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int ret; if (!uh->check && !udp_sk(sk)->no_check6_rx) { - sock_put(sk); udp6_csum_zero_error(skb); goto csum_error; } @@ -920,7 +864,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ip6_compute_pseudo); ret = udpv6_queue_rcv_skb(sk, skb); - sock_put(sk); /* a return value > 0 means to resubmit the input */ if (ret > 0) @@ -940,7 +883,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp_lib_checksum_complete(skb)) goto csum_error; - UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); kfree_skb(skb); @@ -954,9 +897,9 @@ short_packet: daddr, ntohs(uh->dest)); goto discard; csum_error: - UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); discard: - UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } @@ -1068,13 +1011,14 @@ send: err = ip6_send_skb(skb); if (err) { if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; } - } else - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_OUTDATAGRAMS, is_udplite); + } else { + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); + } return err; } @@ -1118,16 +1062,19 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ip6_flowlabel *flowlabel = NULL; struct flowi6 fl6; struct dst_entry *dst; + struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; int ulen = len; - int hlimit = -1; - int tclass = -1; - int dontfrag = -1; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; int connected = 0; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + struct sockcm_cookie sockc; + + ipc6.hlimit = -1; + ipc6.tclass = -1; + ipc6.dontfrag = -1; /* destination address check */ if (sin6) { @@ -1247,14 +1194,15 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); + ipc6.opt = opt; - err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1275,6 +1223,7 @@ do_udp_sendmsg: if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); + ipc6.opt = opt; fl6.flowi6_proto = sk->sk_protocol; if (!ipv6_addr_any(daddr)) @@ -1304,11 +1253,11 @@ do_udp_sendmsg: goto out; } - if (hlimit < 0) - hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + if (ipc6.hlimit < 0) + ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - if (tclass < 0) - tclass = np->tclass; + if (ipc6.tclass < 0) + ipc6.tclass = np->tclass; if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; @@ -1319,9 +1268,9 @@ back_from_confirm: struct sk_buff *skb; skb = ip6_make_skb(sk, getfrag, msg, ulen, - sizeof(struct udphdr), hlimit, tclass, opt, + sizeof(struct udphdr), &ipc6, &fl6, (struct rt6_info *)dst, - msg->msg_flags, dontfrag); + msg->msg_flags, &sockc); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) err = udp_v6_send_skb(skb, &fl6); @@ -1342,13 +1291,12 @@ back_from_confirm: up->pending = AF_INET6; do_append_data: - if (dontfrag < 0) - dontfrag = np->dontfrag; + if (ipc6.dontfrag < 0) + ipc6.dontfrag = np->dontfrag; up->len += ulen; - err = ip6_append_data(sk, getfrag, msg, ulen, - sizeof(struct udphdr), hlimit, tclass, opt, &fl6, - (struct rt6_info *)dst, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); + err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), + &ipc6, &fl6, (struct rt6_info *)dst, + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, &sockc); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) @@ -1391,8 +1339,8 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); } return err; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 2b0fbe6929e8..5429f6bcf047 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -153,7 +153,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head, skip: NAPI_GRO_CB(skb)->is_ipv6 = 1; - return udp_gro_receive(head, skb, uh); + return udp_gro_receive(head, skb, uh, udp6_lib_lookup_skb); flush: NAPI_GRO_CB(skb)->flush = 1; @@ -173,7 +173,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; } - return udp_gro_complete(skb, nhoff); + return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb); } static const struct net_offload udpv6_offload = { @@ -184,7 +184,12 @@ static const struct net_offload udpv6_offload = { }, }; -int __init udp_offload_init(void) +int udpv6_offload_init(void) { return inet6_add_offload(&udpv6_offload, IPPROTO_UDP); } + +int udpv6_offload_exit(void) +{ + return inet6_del_offload(&udpv6_offload, IPPROTO_UDP); +} diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index fcfbe579434a..d8b7267280c3 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -181,7 +181,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, skb = new_skb; } - dev->trans_start = jiffies; + netif_trans_update(dev); len = skb->len; /* Now queue the packet in the transport layer */ diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index cd479903d943..c6f5df1bed12 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -494,10 +494,9 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; struct flowi6 fl6; + struct sockcm_cookie sockc_unused = {0}; + struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; - int hlimit = -1; - int tclass = -1; - int dontfrag = -1; int transhdrlen = 4; /* zero session-id */ int ulen = len + transhdrlen; int err; @@ -519,6 +518,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_mark = sk->sk_mark; + ipc6.hlimit = -1; + ipc6.tclass = -1; + ipc6.dontfrag = -1; + if (lsa) { if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@ -563,9 +566,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); + ipc6.opt = opt; - err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, + &sockc_unused); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -586,6 +590,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); + ipc6.opt = opt; fl6.flowi6_proto = sk->sk_protocol; if (!ipv6_addr_any(daddr)) @@ -610,14 +615,14 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; } - if (hlimit < 0) - hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + if (ipc6.hlimit < 0) + ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - if (tclass < 0) - tclass = np->tclass; + if (ipc6.tclass < 0) + ipc6.tclass = np->tclass; - if (dontfrag < 0) - dontfrag = np->dontfrag; + if (ipc6.dontfrag < 0) + ipc6.dontfrag = np->dontfrag; if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; @@ -625,9 +630,9 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) back_from_confirm: lock_sock(sk); err = ip6_append_data(sk, ip_generic_getfrag, msg, - ulen, transhdrlen, hlimit, tclass, opt, + ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst, - msg->msg_flags, dontfrag); + msg->msg_flags, &sockc_unused); if (err) ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 2caaa84ce92d..1d02e8d20e56 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -346,22 +346,30 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla if (nest == NULL) goto nla_put_failure; - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, - atomic_long_read(&tunnel->stats.tx_packets)) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, - atomic_long_read(&tunnel->stats.tx_bytes)) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, - atomic_long_read(&tunnel->stats.tx_errors)) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, - atomic_long_read(&tunnel->stats.rx_packets)) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, - atomic_long_read(&tunnel->stats.rx_bytes)) || - nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - atomic_long_read(&tunnel->stats.rx_seq_discards)) || - nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - atomic_long_read(&tunnel->stats.rx_oos_packets)) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, - atomic_long_read(&tunnel->stats.rx_errors))) + if (nla_put_u64_64bit(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&tunnel->stats.tx_packets), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&tunnel->stats.tx_bytes), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&tunnel->stats.tx_errors), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&tunnel->stats.rx_packets), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&tunnel->stats.rx_bytes), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_SEQ_DISCARDS, + atomic_long_read(&tunnel->stats.rx_seq_discards), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_OOS_PACKETS, + atomic_long_read(&tunnel->stats.rx_oos_packets), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&tunnel->stats.rx_errors), + L2TP_ATTR_STATS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); @@ -746,29 +754,38 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl nla_put_u8(skb, L2TP_ATTR_USING_IPSEC, 1)) || #endif (session->reorder_timeout && - nla_put_msecs(skb, L2TP_ATTR_RECV_TIMEOUT, session->reorder_timeout))) + nla_put_msecs(skb, L2TP_ATTR_RECV_TIMEOUT, + session->reorder_timeout, L2TP_ATTR_PAD))) goto nla_put_failure; nest = nla_nest_start(skb, L2TP_ATTR_STATS); if (nest == NULL) goto nla_put_failure; - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, - atomic_long_read(&session->stats.tx_packets)) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, - atomic_long_read(&session->stats.tx_bytes)) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, - atomic_long_read(&session->stats.tx_errors)) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, - atomic_long_read(&session->stats.rx_packets)) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, - atomic_long_read(&session->stats.rx_bytes)) || - nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - atomic_long_read(&session->stats.rx_seq_discards)) || - nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - atomic_long_read(&session->stats.rx_oos_packets)) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, - atomic_long_read(&session->stats.rx_errors))) + if (nla_put_u64_64bit(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&session->stats.tx_packets), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&session->stats.tx_bytes), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&session->stats.tx_errors), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&session->stats.rx_packets), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&session->stats.rx_bytes), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_SEQ_DISCARDS, + atomic_long_read(&session->stats.rx_seq_discards), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_OOS_PACKETS, + atomic_long_read(&session->stats.rx_oos_packets), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&session->stats.rx_errors), + L2TP_ATTR_STATS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index e925037fa0df..6651a78e100c 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -97,3 +97,66 @@ u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) return tb_id; } EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); + +/** + * l3mdev_get_rt6_dst - IPv6 route lookup based on flow. Returns + * cached route for L3 master device if relevant + * to flow + * @net: network namespace for device index lookup + * @fl6: IPv6 flow struct for lookup + */ + +struct dst_entry *l3mdev_get_rt6_dst(struct net *net, + const struct flowi6 *fl6) +{ + struct dst_entry *dst = NULL; + struct net_device *dev; + + if (fl6->flowi6_oif) { + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); + if (dev && netif_is_l3_slave(dev)) + dev = netdev_master_upper_dev_get_rcu(dev); + + if (dev && netif_is_l3_master(dev) && + dev->l3mdev_ops->l3mdev_get_rt6_dst) + dst = dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6); + + rcu_read_unlock(); + } + + return dst; +} +EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst); + +/** + * l3mdev_get_saddr - get source address for a flow based on an interface + * enslaved to an L3 master device + * @net: network namespace for device index lookup + * @ifindex: Interface index + * @fl4: IPv4 flow struct + */ + +int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4) +{ + struct net_device *dev; + int rc = 0; + + if (ifindex) { + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev && netif_is_l3_slave(dev)) + dev = netdev_master_upper_dev_get_rcu(dev); + + if (dev && netif_is_l3_master(dev) && + dev->l3mdev_ops->l3mdev_get_saddr) + rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4); + + rcu_read_unlock(); + } + + return rc; +} +EXPORT_SYMBOL_GPL(l3mdev_get_saddr); diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 1a3c7e0f5d0d..29c509c54bb2 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -195,7 +195,7 @@ static int llc_seq_core_show(struct seq_file *seq, void *v) timer_pending(&llc->pf_cycle_timer.timer), timer_pending(&llc->rej_sent_timer.timer), timer_pending(&llc->busy_state_timer.timer), - !!sk->sk_backlog.tail, !!sock_owned_by_user(sk)); + !!sk->sk_backlog.tail, !!sk->sk_lock.owned); out: return 0; } diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 4932e9f243a2..42fa81031dfa 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -935,6 +935,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, size_t len) { struct tid_ampdu_tx *tid_tx; + struct ieee80211_txq *txq; u16 capab, tid; u8 buf_size; bool amsdu; @@ -945,6 +946,10 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes); + txq = sta->sta.txq[tid]; + if (!amsdu && txq) + set_bit(IEEE80211_TXQ_NO_AMSDU, &to_txq_info(txq)->flags); + mutex_lock(&sta->ampdu_mlme.mtx); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fe1704c4e8fb..0c12e4001f19 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -65,11 +65,13 @@ static int ieee80211_change_iface(struct wiphy *wiphy, return ret; if (type == NL80211_IFTYPE_AP_VLAN && - params && params->use_4addr == 0) + params && params->use_4addr == 0) { RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); - else if (type == NL80211_IFTYPE_STATION && - params && params->use_4addr >= 0) + ieee80211_check_fast_rx_iface(sdata); + } else if (type == NL80211_IFTYPE_STATION && + params && params->use_4addr >= 0) { sdata->u.mgd.use_4addr = params->use_4addr; + } if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags) { struct ieee80211_local *local = sdata->local; @@ -732,6 +734,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.beacon_int = params->beacon_interval; sdata->vif.bss_conf.dtim_period = params->dtim_period; sdata->vif.bss_conf.enable_beacon = true; + sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p; sdata->vif.bss_conf.ssid_len = params->ssid_len; if (params->ssid_len) @@ -1046,7 +1049,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, int ret = 0; struct ieee80211_supported_band *sband; struct ieee80211_sub_if_data *sdata = sta->sdata; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); u32 mask, set; sband = local->hw.wiphy->bands[band]; @@ -1202,6 +1205,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, params->opmode_notif, band); } + if (params->support_p2p_ps >= 0) + sta->sta.support_p2p_ps = params->support_p2p_ps; + if (ieee80211_vif_is_mesh(&sdata->vif)) sta_apply_mesh_params(local, sta, params); @@ -1363,6 +1369,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, rcu_assign_pointer(vlansdata->u.vlan.sta, sta); new_4addr = true; + __ieee80211_check_fast_rx_iface(vlansdata); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && @@ -1499,7 +1506,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, memset(pinfo, 0, sizeof(*pinfo)); - pinfo->generation = mesh_paths_generation; + pinfo->generation = mpath->sdata->u.mesh.mesh_paths_generation; pinfo->filled = MPATH_INFO_FRAME_QLEN | MPATH_INFO_SN | @@ -1577,7 +1584,7 @@ static void mpp_set_pinfo(struct mesh_path *mpath, u8 *mpp, memset(pinfo, 0, sizeof(*pinfo)); memcpy(mpp, mpath->mpp, ETH_ALEN); - pinfo->generation = mpp_paths_generation; + pinfo->generation = mpath->sdata->u.mesh.mpp_paths_generation; } static int ieee80211_get_mpp(struct wiphy *wiphy, struct net_device *dev, @@ -1841,7 +1848,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, struct bss_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - enum ieee80211_band band; + enum nl80211_band band; u32 changed = 0; if (!sdata_dereference(sdata->u.ap.beacon, sdata)) @@ -1860,7 +1867,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, } if (!sdata->vif.bss_conf.use_short_slot && - band == IEEE80211_BAND_5GHZ) { + band == NL80211_BAND_5GHZ) { sdata->vif.bss_conf.use_short_slot = true; changed |= BSS_CHANGED_ERP_SLOT; } @@ -1885,6 +1892,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS; else sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS; + ieee80211_check_fast_rx_iface(sdata); } if (params->ht_opmode >= 0) { @@ -2089,12 +2097,12 @@ static int ieee80211_leave_ocb(struct wiphy *wiphy, struct net_device *dev) } static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev, - int rate[IEEE80211_NUM_BANDS]) + int rate[NUM_NL80211_BANDS]) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); memcpy(sdata->vif.bss_conf.mcast_rate, rate, - sizeof(int) * IEEE80211_NUM_BANDS); + sizeof(int) * NUM_NL80211_BANDS); return 0; } @@ -2499,7 +2507,7 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return ret; } - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband = wiphy->bands[i]; int j; @@ -3127,7 +3135,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_tx_info *info; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; - enum ieee80211_band band; + enum nl80211_band band; int ret; /* the lock is needed to assign the cookie later */ diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 4ab5c522ceee..b251b2f7f8dd 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -127,6 +127,9 @@ static const char *hw_flag_names[] = { FLAG(BEACON_TX_STATUS), FLAG(NEEDS_UNIQUE_STA_ADDR), FLAG(SUPPORTS_REORDERING_BUFFER), + FLAG(USES_RSS), + FLAG(TX_AMSDU), + FLAG(TX_FRAG_LIST), #undef FLAG }; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 37ea30e0754c..a5ba739cd2a7 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -169,21 +169,21 @@ static ssize_t ieee80211_if_write_##name(struct file *file, \ IEEE80211_IF_FILE_R(name) /* common attributes */ -IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ], +IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[NL80211_BAND_2GHZ], HEX); -IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ], +IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[NL80211_BAND_5GHZ], HEX); IEEE80211_IF_FILE(rc_rateidx_mcs_mask_2ghz, - rc_rateidx_mcs_mask[IEEE80211_BAND_2GHZ], HEXARRAY); + rc_rateidx_mcs_mask[NL80211_BAND_2GHZ], HEXARRAY); IEEE80211_IF_FILE(rc_rateidx_mcs_mask_5ghz, - rc_rateidx_mcs_mask[IEEE80211_BAND_5GHZ], HEXARRAY); + rc_rateidx_mcs_mask[NL80211_BAND_5GHZ], HEXARRAY); static ssize_t ieee80211_if_fmt_rc_rateidx_vht_mcs_mask_2ghz( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) { int i, len = 0; - const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[IEEE80211_BAND_2GHZ]; + const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[NL80211_BAND_2GHZ]; for (i = 0; i < NL80211_VHT_NSS_MAX; i++) len += scnprintf(buf + len, buflen - len, "%04x ", mask[i]); @@ -199,7 +199,7 @@ static ssize_t ieee80211_if_fmt_rc_rateidx_vht_mcs_mask_5ghz( char *buf, int buflen) { int i, len = 0; - const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[IEEE80211_BAND_5GHZ]; + const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[NL80211_BAND_5GHZ]; for (i = 0; i < NL80211_VHT_NSS_MAX; i++) len += scnprintf(buf + len, buflen - len, "%04x ", mask[i]); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index a39512f09f9e..33dfcbc2bf9c 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -3,6 +3,7 @@ * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -51,31 +52,54 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_FILE(aid, sta.aid, D); +static const char * const sta_flag_names[] = { +#define FLAG(F) [WLAN_STA_##F] = #F + FLAG(AUTH), + FLAG(ASSOC), + FLAG(PS_STA), + FLAG(AUTHORIZED), + FLAG(SHORT_PREAMBLE), + FLAG(WDS), + FLAG(CLEAR_PS_FILT), + FLAG(MFP), + FLAG(BLOCK_BA), + FLAG(PS_DRIVER), + FLAG(PSPOLL), + FLAG(TDLS_PEER), + FLAG(TDLS_PEER_AUTH), + FLAG(TDLS_INITIATOR), + FLAG(TDLS_CHAN_SWITCH), + FLAG(TDLS_OFF_CHANNEL), + FLAG(TDLS_WIDER_BW), + FLAG(UAPSD), + FLAG(SP), + FLAG(4ADDR_EVENT), + FLAG(INSERTED), + FLAG(RATE_CONTROL), + FLAG(TOFFSET_KNOWN), + FLAG(MPSP_OWNER), + FLAG(MPSP_RECIPIENT), + FLAG(PS_DELIVER), +#undef FLAG +}; + static ssize_t sta_flags_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char buf[121]; + char buf[16 * NUM_WLAN_STA_FLAGS], *pos = buf; + char *end = buf + sizeof(buf) - 1; struct sta_info *sta = file->private_data; + unsigned int flg; + + BUILD_BUG_ON(ARRAY_SIZE(sta_flag_names) != NUM_WLAN_STA_FLAGS); + + for (flg = 0; flg < NUM_WLAN_STA_FLAGS; flg++) { + if (test_sta_flag(sta, flg)) + pos += scnprintf(pos, end - pos, "%s\n", + sta_flag_names[flg]); + } -#define TEST(flg) \ - test_sta_flag(sta, WLAN_STA_##flg) ? #flg "\n" : "" - - int res = scnprintf(buf, sizeof(buf), - "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - TEST(AUTH), TEST(ASSOC), TEST(PS_STA), - TEST(PS_DRIVER), TEST(AUTHORIZED), - TEST(SHORT_PREAMBLE), - sta->sta.wme ? "WME\n" : "", - TEST(WDS), TEST(CLEAR_PS_FILT), - TEST(MFP), TEST(BLOCK_BA), TEST(PSPOLL), - TEST(UAPSD), TEST(SP), TEST(TDLS_PEER), - TEST(TDLS_PEER_AUTH), TEST(TDLS_INITIATOR), - TEST(TDLS_CHAN_SWITCH), TEST(TDLS_OFF_CHANNEL), - TEST(4ADDR_EVENT), TEST(INSERTED), - TEST(RATE_CONTROL), TEST(TOFFSET_KNOWN), - TEST(MPSP_OWNER), TEST(MPSP_RECIPIENT)); -#undef TEST - return simple_read_from_buffer(userbuf, count, ppos, buf, res); + return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); } STA_OPS(flags); @@ -151,11 +175,12 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { - char _buf[12] = {}, *buf = _buf; + char _buf[25] = {}, *buf = _buf; struct sta_info *sta = file->private_data; bool start, tx; unsigned long tid; - int ret; + char *pos; + int ret, timeout = 5000; if (count > sizeof(_buf)) return -EINVAL; @@ -164,37 +189,48 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu return -EFAULT; buf[sizeof(_buf) - 1] = '\0'; + pos = buf; + buf = strsep(&pos, " "); + if (!buf) + return -EINVAL; - if (strncmp(buf, "tx ", 3) == 0) { - buf += 3; + if (!strcmp(buf, "tx")) tx = true; - } else if (strncmp(buf, "rx ", 3) == 0) { - buf += 3; + else if (!strcmp(buf, "rx")) tx = false; - } else + else return -EINVAL; - if (strncmp(buf, "start ", 6) == 0) { - buf += 6; + buf = strsep(&pos, " "); + if (!buf) + return -EINVAL; + if (!strcmp(buf, "start")) { start = true; if (!tx) return -EINVAL; - } else if (strncmp(buf, "stop ", 5) == 0) { - buf += 5; + } else if (!strcmp(buf, "stop")) { start = false; - } else + } else { return -EINVAL; + } - ret = kstrtoul(buf, 0, &tid); - if (ret) - return ret; + buf = strsep(&pos, " "); + if (!buf) + return -EINVAL; + if (sscanf(buf, "timeout=%d", &timeout) == 1) { + buf = strsep(&pos, " "); + if (!buf || !tx || !start) + return -EINVAL; + } - if (tid >= IEEE80211_NUM_TIDS) + ret = kstrtoul(buf, 0, &tid); + if (ret || tid >= IEEE80211_NUM_TIDS) return -EINVAL; if (tx) { if (start) - ret = ieee80211_start_tx_ba_session(&sta->sta, tid, 5000); + ret = ieee80211_start_tx_ba_session(&sta->sta, tid, + timeout); else ret = ieee80211_stop_tx_ba_session(&sta->sta, tid); } else { @@ -322,14 +358,14 @@ STA_OPS(vht_capa); #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0400, \ - sta->debugfs.dir, sta, &sta_ ##name## _ops); + sta->debugfs_dir, sta, &sta_ ##name## _ops); #define DEBUGFS_ADD_COUNTER(name, field) \ if (sizeof(sta->field) == sizeof(u32)) \ - debugfs_create_u32(#name, 0400, sta->debugfs.dir, \ + debugfs_create_u32(#name, 0400, sta->debugfs_dir, \ (u32 *) &sta->field); \ else \ - debugfs_create_u64(#name, 0400, sta->debugfs.dir, \ + debugfs_create_u64(#name, 0400, sta->debugfs_dir, \ (u64 *) &sta->field); void ieee80211_sta_debugfs_add(struct sta_info *sta) @@ -339,8 +375,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) struct dentry *stations_dir = sta->sdata->debugfs.subdir_stations; u8 mac[3*ETH_ALEN]; - sta->debugfs.add_has_run = true; - if (!stations_dir) return; @@ -355,8 +389,8 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) * destroyed quickly enough the old station's debugfs * dir might still be around. */ - sta->debugfs.dir = debugfs_create_dir(mac, stations_dir); - if (!sta->debugfs.dir) + sta->debugfs_dir = debugfs_create_dir(mac, stations_dir); + if (!sta->debugfs_dir) return; DEBUGFS_ADD(flags); @@ -372,14 +406,14 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) if (sizeof(sta->driver_buffered_tids) == sizeof(u32)) debugfs_create_x32("driver_buffered_tids", 0400, - sta->debugfs.dir, + sta->debugfs_dir, (u32 *)&sta->driver_buffered_tids); else debugfs_create_x64("driver_buffered_tids", 0400, - sta->debugfs.dir, + sta->debugfs_dir, (u64 *)&sta->driver_buffered_tids); - drv_sta_add_debugfs(local, sdata, &sta->sta, sta->debugfs.dir); + drv_sta_add_debugfs(local, sdata, &sta->sta, sta->debugfs_dir); } void ieee80211_sta_debugfs_remove(struct sta_info *sta) @@ -387,7 +421,7 @@ void ieee80211_sta_debugfs_remove(struct sta_info *sta) struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; - drv_sta_remove_debugfs(local, sdata, &sta->sta, sta->debugfs.dir); - debugfs_remove_recursive(sta->debugfs.dir); - sta->debugfs.dir = NULL; + drv_sta_remove_debugfs(local, sdata, &sta->sta, sta->debugfs_dir); + debugfs_remove_recursive(sta->debugfs_dir); + sta->debugfs_dir = NULL; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 18b0d65baff0..184473c257eb 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1,3 +1,8 @@ +/* +* Portions of this file +* Copyright(c) 2016 Intel Deutschland GmbH +*/ + #ifndef __MAC80211_DRIVER_OPS #define __MAC80211_DRIVER_OPS @@ -29,6 +34,16 @@ static inline void drv_tx(struct ieee80211_local *local, local->ops->tx(&local->hw, control, skb); } +static inline void drv_sync_rx_queues(struct ieee80211_local *local, + struct sta_info *sta) +{ + if (local->ops->sync_rx_queues) { + trace_drv_sync_rx_queues(local, sta->sdata, &sta->sta); + local->ops->sync_rx_queues(&local->hw); + trace_drv_return_void(local); + } +} + static inline void drv_get_et_strings(struct ieee80211_sub_if_data *sdata, u32 sset, u8 *data) { diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index fc3238376b39..a31d30713d08 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -126,7 +126,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, } } - if (sband->band == IEEE80211_BAND_2GHZ) { + if (sband->band == NL80211_BAND_2GHZ) { *pos++ = WLAN_EID_DS_PARAMS; *pos++ = 1; *pos++ = ieee80211_frequency_to_channel( @@ -348,11 +348,11 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, * * HT follows these specifications (IEEE 802.11-2012 20.3.18) */ - sdata->vif.bss_conf.use_short_slot = chan->band == IEEE80211_BAND_5GHZ; + sdata->vif.bss_conf.use_short_slot = chan->band == NL80211_BAND_5GHZ; bss_change |= BSS_CHANGED_ERP_SLOT; /* cf. IEEE 802.11 9.2.12 */ - if (chan->band == IEEE80211_BAND_2GHZ && have_higher_than_11mbit) + if (chan->band == NL80211_BAND_2GHZ && have_higher_than_11mbit) sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; else sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; @@ -649,8 +649,6 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, return NULL; } - sta->rx_stats.last_rx = jiffies; - /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = supp_rates | @@ -670,10 +668,11 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { + unsigned long last_active = ieee80211_sta_last_active(sta); + if (sta->sdata == sdata && - time_after(sta->rx_stats.last_rx + - IEEE80211_IBSS_MERGE_INTERVAL, - jiffies)) { + time_is_after_jiffies(last_active + + IEEE80211_IBSS_MERGE_INTERVAL)) { active++; break; } @@ -990,7 +989,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel) { struct sta_info *sta; - enum ieee80211_band band = rx_status->band; + enum nl80211_band band = rx_status->band; enum nl80211_bss_scan_width scan_width; struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; @@ -1110,7 +1109,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel; u64 beacon_timestamp, rx_timestamp; u32 supp_rates = 0; - enum ieee80211_band band = rx_status->band; + enum nl80211_band band = rx_status->band; channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq); if (!channel) @@ -1236,8 +1235,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, if (!sta) return; - sta->rx_stats.last_rx = jiffies; - /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = supp_rates | @@ -1259,11 +1256,13 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { + unsigned long last_active = ieee80211_sta_last_active(sta); + if (sdata != sta->sdata) continue; - if (time_after(jiffies, sta->rx_stats.last_rx + exp_time) || - (time_after(jiffies, sta->rx_stats.last_rx + exp_rsn) && + if (time_is_before_jiffies(last_active + exp_time) || + (time_is_before_jiffies(last_active + exp_rsn) && sta->sta_state != IEEE80211_STA_AUTHORIZED)) { sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n", sta->sta_state != IEEE80211_STA_AUTHORIZED ? diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 422003540169..9438c9406687 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -696,6 +696,11 @@ struct ieee80211_if_mesh { /* offset from skb->data while building IE */ int meshconf_offset; + + struct mesh_table *mesh_paths; + struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */ + int mesh_paths_generation; + int mpp_paths_generation; }; #ifdef CONFIG_MAC80211_MESH @@ -797,6 +802,7 @@ struct mac80211_qos_map { enum txq_info_flags { IEEE80211_TXQ_STOP, IEEE80211_TXQ_AMPDU, + IEEE80211_TXQ_NO_AMSDU, }; struct txq_info { @@ -890,13 +896,13 @@ struct ieee80211_sub_if_data { struct ieee80211_if_ap *bss; /* bitmap of allowed (non-MCS) rate indexes for rate control */ - u32 rc_rateidx_mask[IEEE80211_NUM_BANDS]; + u32 rc_rateidx_mask[NUM_NL80211_BANDS]; - bool rc_has_mcs_mask[IEEE80211_NUM_BANDS]; - u8 rc_rateidx_mcs_mask[IEEE80211_NUM_BANDS][IEEE80211_HT_MCS_MASK_LEN]; + bool rc_has_mcs_mask[NUM_NL80211_BANDS]; + u8 rc_rateidx_mcs_mask[NUM_NL80211_BANDS][IEEE80211_HT_MCS_MASK_LEN]; - bool rc_has_vht_mcs_mask[IEEE80211_NUM_BANDS]; - u16 rc_rateidx_vht_mcs_mask[IEEE80211_NUM_BANDS][NL80211_VHT_NSS_MAX]; + bool rc_has_vht_mcs_mask[NUM_NL80211_BANDS]; + u16 rc_rateidx_vht_mcs_mask[NUM_NL80211_BANDS][NL80211_VHT_NSS_MAX]; union { struct ieee80211_if_ap ap; @@ -951,10 +957,10 @@ sdata_assert_lock(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&sdata->wdev.mtx); } -static inline enum ieee80211_band +static inline enum nl80211_band ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata) { - enum ieee80211_band band = IEEE80211_BAND_2GHZ; + enum nl80211_band band = NL80211_BAND_2GHZ; struct ieee80211_chanctx_conf *chanctx_conf; rcu_read_lock(); @@ -1225,7 +1231,7 @@ struct ieee80211_local { struct cfg80211_scan_request __rcu *scan_req; struct ieee80211_scan_request *hw_scan_req; struct cfg80211_chan_def scan_chandef; - enum ieee80211_band hw_scan_band; + enum nl80211_band hw_scan_band; int scan_channel_idx; int scan_ies_len; int hw_scan_ies_bufsize; @@ -1489,6 +1495,11 @@ u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local); int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, u64 *cookie, gfp_t gfp); +void ieee80211_check_fast_rx(struct sta_info *sta); +void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata); +void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata); +void ieee80211_clear_fast_rx(struct sta_info *sta); + /* STA code */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, @@ -1727,10 +1738,10 @@ void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt); u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, - enum ieee80211_band band); + enum nl80211_band band); void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, - enum ieee80211_band band); + enum nl80211_band band); void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_vht_cap *vht_cap); void ieee80211_get_vht_mask_from_cap(__le16 vht_cap, @@ -1758,7 +1769,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, */ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, - enum ieee80211_band current_band, + enum nl80211_band current_band, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie); @@ -1783,7 +1794,7 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw) /* utility functions/constants */ extern const void *const mac80211_wiphy_privid; /* for wiphy privid */ -int ieee80211_frame_duration(enum ieee80211_band band, size_t len, +int ieee80211_frame_duration(enum nl80211_band band, size_t len, int rate, int erp, int short_preamble, int shift); void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, @@ -1793,12 +1804,12 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, - enum ieee80211_band band); + enum nl80211_band band); static inline void ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, - enum ieee80211_band band) + enum nl80211_band band) { rcu_read_lock(); __ieee80211_tx_skb_tid_band(sdata, skb, tid, band); @@ -1953,7 +1964,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, - enum ieee80211_band band, u32 *basic_rates); + enum nl80211_band band, u32 *basic_rates); int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode); int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata, @@ -1976,10 +1987,10 @@ int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, const u8 *srates, int srates_len, u32 *rates); int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic, - enum ieee80211_band band); + enum nl80211_band band); int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic, - enum ieee80211_band band); + enum nl80211_band band); u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo); /* channel management */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index e1cb22c16530..c59af3eb9fa4 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1093,7 +1093,7 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) sdata->fragment_next = 0; if (ieee80211_vif_is_mesh(&sdata->vif)) - mesh_rmc_free(sdata); + ieee80211_mesh_teardown_sdata(sdata); } static void ieee80211_uninit(struct net_device *dev) @@ -1800,7 +1800,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk, ieee80211_delayed_tailroom_dec); - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband; sband = local->hw.wiphy->bands[i]; sdata->rc_rateidx_mask[i] = diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 3df7b0392d30..edd6f2945f69 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -338,6 +338,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, } else { rcu_assign_pointer(sta->gtk[idx], new); } + ieee80211_check_fast_rx(sta); } else { defunikey = old && old == key_mtx_dereference(sdata->local, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8190bf27ebff..7ee91d6151d1 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -558,6 +558,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, if (!ops->set_key) wiphy->flags |= WIPHY_FLAG_IBSS_RSN; + wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_RRM); + wiphy->bss_priv_size = sizeof(struct ieee80211_bss); local = wiphy_priv(wiphy); @@ -799,7 +801,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); int result, i; - enum ieee80211_band band; + enum nl80211_band band; int channels, max_bitrates; bool supp_ht, supp_vht; netdev_features_t feature_whitelist; @@ -854,7 +856,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* Only HW csum features are currently compatible with mac80211 */ feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | - NETIF_F_GSO_SOFTWARE; + NETIF_F_GSO_SOFTWARE | NETIF_F_RXCSUM; if (WARN_ON(hw->netdev_features & ~feature_whitelist)) return -EINVAL; @@ -872,7 +874,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) max_bitrates = 0; supp_ht = false; supp_vht = false; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; sband = local->hw.wiphy->bands[band]; @@ -934,7 +936,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (!local->int_scan_req) return -ENOMEM; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { if (!local->hw.wiphy->bands[band]) continue; local->int_scan_req->rates[band] = (u32) -1; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index d32cefcb63b0..4c6404e1ad6e 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -25,7 +25,6 @@ bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) void ieee80211s_init(void) { - mesh_pathtbl_init(); mesh_allocated = 1; rm_cache = kmem_cache_create("mesh_rmc", sizeof(struct rmc_entry), 0, 0, NULL); @@ -35,7 +34,6 @@ void ieee80211s_stop(void) { if (!mesh_allocated) return; - mesh_pathtbl_unregister(); kmem_cache_destroy(rm_cache); } @@ -176,22 +174,23 @@ int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) return -ENOMEM; sdata->u.mesh.rmc->idx_mask = RMC_BUCKETS - 1; for (i = 0; i < RMC_BUCKETS; i++) - INIT_LIST_HEAD(&sdata->u.mesh.rmc->bucket[i]); + INIT_HLIST_HEAD(&sdata->u.mesh.rmc->bucket[i]); return 0; } void mesh_rmc_free(struct ieee80211_sub_if_data *sdata) { struct mesh_rmc *rmc = sdata->u.mesh.rmc; - struct rmc_entry *p, *n; + struct rmc_entry *p; + struct hlist_node *n; int i; if (!sdata->u.mesh.rmc) return; for (i = 0; i < RMC_BUCKETS; i++) { - list_for_each_entry_safe(p, n, &rmc->bucket[i], list) { - list_del(&p->list); + hlist_for_each_entry_safe(p, n, &rmc->bucket[i], list) { + hlist_del(&p->list); kmem_cache_free(rm_cache, p); } } @@ -220,16 +219,20 @@ int mesh_rmc_check(struct ieee80211_sub_if_data *sdata, u32 seqnum = 0; int entries = 0; u8 idx; - struct rmc_entry *p, *n; + struct rmc_entry *p; + struct hlist_node *n; + + if (!rmc) + return -1; /* Don't care about endianness since only match matters */ memcpy(&seqnum, &mesh_hdr->seqnum, sizeof(mesh_hdr->seqnum)); idx = le32_to_cpu(mesh_hdr->seqnum) & rmc->idx_mask; - list_for_each_entry_safe(p, n, &rmc->bucket[idx], list) { + hlist_for_each_entry_safe(p, n, &rmc->bucket[idx], list) { ++entries; if (time_after(jiffies, p->exp_time) || entries == RMC_QUEUE_MAX_LEN) { - list_del(&p->list); + hlist_del(&p->list); kmem_cache_free(rm_cache, p); --entries; } else if ((seqnum == p->seqnum) && ether_addr_equal(sa, p->sa)) @@ -243,7 +246,7 @@ int mesh_rmc_check(struct ieee80211_sub_if_data *sdata, p->seqnum = seqnum; p->exp_time = jiffies + RMC_TIMEOUT; memcpy(p->sa, sa, ETH_ALEN); - list_add(&p->list, &rmc->bucket[idx]); + hlist_add_head(&p->list, &rmc->bucket[idx]); return 0; } @@ -412,7 +415,7 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; u8 *pos; @@ -475,7 +478,7 @@ int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; u8 *pos; @@ -677,7 +680,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) struct ieee80211_mgmt *mgmt; struct ieee80211_chanctx_conf *chanctx_conf; struct mesh_csa_settings *csa; - enum ieee80211_band band; + enum nl80211_band band; u8 *pos; struct ieee80211_sub_if_data *sdata; int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) + @@ -927,7 +930,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings params; struct ieee80211_csa_ie csa_ie; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); int err; u32 sta_flags; @@ -1081,7 +1084,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel; size_t baselen; int freq; - enum ieee80211_band band = rx_status->band; + enum nl80211_band band = rx_status->band; /* ignore ProbeResp to foreign address */ if (stype == IEEE80211_STYPE_PROBE_RESP && @@ -1348,12 +1351,6 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) ifmsh->last_preq + msecs_to_jiffies(ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval))) mesh_path_start_discovery(sdata); - if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags)) - mesh_mpath_table_grow(); - - if (test_and_clear_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags)) - mesh_mpp_table_grow(); - if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags)) ieee80211_mesh_housekeeping(sdata); @@ -1388,6 +1385,9 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) /* Allocate all mesh structures when creating the first mesh interface. */ if (!mesh_allocated) ieee80211s_init(); + + mesh_pathtbl_init(sdata); + setup_timer(&ifmsh->mesh_path_timer, ieee80211_mesh_path_timer, (unsigned long) sdata); @@ -1402,3 +1402,9 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.bssid = zero_addr; } + +void ieee80211_mesh_teardown_sdata(struct ieee80211_sub_if_data *sdata) +{ + mesh_rmc_free(sdata); + mesh_pathtbl_unregister(sdata); +} diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 87c017a3b1ce..26b9ccbe1fce 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -21,8 +21,6 @@ /** * enum mesh_path_flags - mac80211 mesh path flags * - * - * * @MESH_PATH_ACTIVE: the mesh path can be used for forwarding * @MESH_PATH_RESOLVING: the discovery process is running for this mesh path * @MESH_PATH_SN_VALID: the mesh path contains a valid destination sequence @@ -32,6 +30,8 @@ * @MESH_PATH_RESOLVED: the mesh path can has been resolved * @MESH_PATH_REQ_QUEUED: there is an unsent path request for this destination * already queued up, waiting for the discovery process to start. + * @MESH_PATH_DELETED: the mesh path has been deleted and should no longer + * be used * * MESH_PATH_RESOLVED is used by the mesh path timer to * decide when to stop or cancel the mesh path discovery. @@ -43,6 +43,7 @@ enum mesh_path_flags { MESH_PATH_FIXED = BIT(3), MESH_PATH_RESOLVED = BIT(4), MESH_PATH_REQ_QUEUED = BIT(5), + MESH_PATH_DELETED = BIT(6), }; /** @@ -51,10 +52,6 @@ enum mesh_path_flags { * * * @MESH_WORK_HOUSEKEEPING: run the periodic mesh housekeeping tasks - * @MESH_WORK_GROW_MPATH_TABLE: the mesh path table is full and needs - * to grow. - * @MESH_WORK_GROW_MPP_TABLE: the mesh portals table is full and needs to - * grow * @MESH_WORK_ROOT: the mesh root station needs to send a frame * @MESH_WORK_DRIFT_ADJUST: time to compensate for clock drift relative to other * mesh nodes @@ -62,8 +59,6 @@ enum mesh_path_flags { */ enum mesh_deferred_task_flags { MESH_WORK_HOUSEKEEPING, - MESH_WORK_GROW_MPATH_TABLE, - MESH_WORK_GROW_MPP_TABLE, MESH_WORK_ROOT, MESH_WORK_DRIFT_ADJUST, MESH_WORK_MBSS_CHANGED, @@ -73,12 +68,16 @@ enum mesh_deferred_task_flags { * struct mesh_path - mac80211 mesh path structure * * @dst: mesh path destination mac address + * @mpp: mesh proxy mac address + * @rhash: rhashtable list pointer + * @gate_list: list pointer for known gates list * @sdata: mesh subif * @next_hop: mesh neighbor to which frames for this destination will be * forwarded * @timer: mesh path discovery timer * @frame_queue: pending queue for frames sent to this destination while the * path is unresolved + * @rcu: rcu head for freeing mesh path * @sn: target sequence number * @metric: current metric to this destination * @hop_count: hops to destination @@ -97,14 +96,16 @@ enum mesh_deferred_task_flags { * @is_gate: the destination station of this path is a mesh gate * * - * The combination of dst and sdata is unique in the mesh path table. Since the - * next_hop STA is only protected by RCU as well, deleting the STA must also - * remove/substitute the mesh_path structure and wait until that is no longer - * reachable before destroying the STA completely. + * The dst address is unique in the mesh path table. Since the mesh_path is + * protected by RCU, deleting the next_hop STA must remove / substitute the + * mesh_path structure and wait until that is no longer reachable before + * destroying the STA completely. */ struct mesh_path { u8 dst[ETH_ALEN]; u8 mpp[ETH_ALEN]; /* used for MPP or MAP */ + struct rhash_head rhash; + struct hlist_node gate_list; struct ieee80211_sub_if_data *sdata; struct sta_info __rcu *next_hop; struct timer_list timer; @@ -128,34 +129,17 @@ struct mesh_path { /** * struct mesh_table * - * @hash_buckets: array of hash buckets of the table - * @hashwlock: array of locks to protect write operations, one per bucket - * @hash_mask: 2^size_order - 1, used to compute hash idx - * @hash_rnd: random value used for hash computations - * @entries: number of entries in the table - * @free_node: function to free nodes of the table - * @copy_node: function to copy nodes of the table - * @size_order: determines size of the table, there will be 2^size_order hash - * buckets * @known_gates: list of known mesh gates and their mpaths by the station. The * gate's mpath may or may not be resolved and active. - * - * rcu_head: RCU head to free the table + * @gates_lock: protects updates to known_gates + * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr + * @entries: number of entries in the table */ struct mesh_table { - /* Number of buckets will be 2^N */ - struct hlist_head *hash_buckets; - spinlock_t *hashwlock; /* One per bucket, for add/del */ - unsigned int hash_mask; /* (2^size_order) - 1 */ - __u32 hash_rnd; /* Used for hash generation */ - atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */ - void (*free_node) (struct hlist_node *p, bool free_leafs); - int (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl); - int size_order; - struct hlist_head *known_gates; + struct hlist_head known_gates; spinlock_t gates_lock; - - struct rcu_head rcu_head; + struct rhashtable rhead; + atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */ }; /* Recent multicast cache */ @@ -170,20 +154,21 @@ struct mesh_table { * @seqnum: mesh sequence number of the frame * @exp_time: expiration time of the entry, in jiffies * @sa: source address of the frame + * @list: hashtable list pointer * * The Recent Multicast Cache keeps track of the latest multicast frames that * have been received by a mesh interface and discards received multicast frames * that are found in the cache. */ struct rmc_entry { - struct list_head list; - u32 seqnum; + struct hlist_node list; unsigned long exp_time; + u32 seqnum; u8 sa[ETH_ALEN]; }; struct mesh_rmc { - struct list_head bucket[RMC_BUCKETS]; + struct hlist_head bucket[RMC_BUCKETS]; u32 idx_mask; }; @@ -234,6 +219,7 @@ void ieee80211s_init(void); void ieee80211s_update_metric(struct ieee80211_local *local, struct sta_info *sta, struct sk_buff *skb); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); +void ieee80211_mesh_teardown_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh); @@ -299,9 +285,6 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, void mesh_sta_cleanup(struct sta_info *sta); /* Private interfaces */ -/* Mesh tables */ -void mesh_mpath_table_grow(void); -void mesh_mpp_table_grow(void); /* Mesh paths */ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, u8 ttl, const u8 *target, u32 target_sn, @@ -309,8 +292,8 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); -int mesh_pathtbl_init(void); -void mesh_pathtbl_unregister(void); +int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata); +void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata); int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr); void mesh_path_timer(unsigned long data); void mesh_path_flush_by_nexthop(struct sta_info *sta); @@ -319,8 +302,6 @@ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); -extern int mesh_paths_generation; -extern int mpp_paths_generation; #ifdef CONFIG_MAC80211_MESH static inline diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 002244bca948..8f9c3bde835f 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1012,6 +1012,10 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) goto enddiscovery; spin_lock_bh(&mpath->state_lock); + if (mpath->flags & MESH_PATH_DELETED) { + spin_unlock_bh(&mpath->state_lock); + goto enddiscovery; + } mpath->flags &= ~MESH_PATH_REQ_QUEUED; if (preq_node->flags & PREQ_Q_F_START) { if (mpath->flags & MESH_PATH_RESOLVING) { diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 2ba7aa56b11c..6db2ddfa0695 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -18,11 +18,22 @@ #include "ieee80211_i.h" #include "mesh.h" -/* There will be initially 2^INIT_PATHS_SIZE_ORDER buckets */ -#define INIT_PATHS_SIZE_ORDER 2 +static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); -/* Keep the mean chain length below this constant */ -#define MEAN_CHAIN_LEN 2 +static u32 mesh_table_hash(const void *addr, u32 len, u32 seed) +{ + /* Use last four bytes of hw addr as hash index */ + return jhash_1word(*(u32 *)(addr+2), seed); +} + +static const struct rhashtable_params mesh_rht_params = { + .nelem_hint = 2, + .automatic_shrinking = true, + .key_len = ETH_ALEN, + .key_offset = offsetof(struct mesh_path, dst), + .head_offset = offsetof(struct mesh_path, rhash), + .hashfn = mesh_table_hash, +}; static inline bool mpath_expired(struct mesh_path *mpath) { @@ -31,173 +42,36 @@ static inline bool mpath_expired(struct mesh_path *mpath) !(mpath->flags & MESH_PATH_FIXED); } -struct mpath_node { - struct hlist_node list; - struct rcu_head rcu; - /* This indirection allows two different tables to point to the same - * mesh_path structure, useful when resizing - */ - struct mesh_path *mpath; -}; - -static struct mesh_table __rcu *mesh_paths; -static struct mesh_table __rcu *mpp_paths; /* Store paths for MPP&MAP */ - -int mesh_paths_generation; -int mpp_paths_generation; - -/* This lock will have the grow table function as writer and add / delete nodes - * as readers. RCU provides sufficient protection only when reading the table - * (i.e. doing lookups). Adding or adding or removing nodes requires we take - * the read lock or we risk operating on an old table. The write lock is only - * needed when modifying the number of buckets a table. - */ -static DEFINE_RWLOCK(pathtbl_resize_lock); - - -static inline struct mesh_table *resize_dereference_paths( - struct mesh_table __rcu *table) +static void mesh_path_rht_free(void *ptr, void *tblptr) { - return rcu_dereference_protected(table, - lockdep_is_held(&pathtbl_resize_lock)); -} + struct mesh_path *mpath = ptr; + struct mesh_table *tbl = tblptr; -static inline struct mesh_table *resize_dereference_mesh_paths(void) -{ - return resize_dereference_paths(mesh_paths); + mesh_path_free_rcu(tbl, mpath); } -static inline struct mesh_table *resize_dereference_mpp_paths(void) +static struct mesh_table *mesh_table_alloc(void) { - return resize_dereference_paths(mpp_paths); -} - -/* - * CAREFUL -- "tbl" must not be an expression, - * in particular not an rcu_dereference(), since - * it's used twice. So it is illegal to do - * for_each_mesh_entry(rcu_dereference(...), ...) - */ -#define for_each_mesh_entry(tbl, node, i) \ - for (i = 0; i <= tbl->hash_mask; i++) \ - hlist_for_each_entry_rcu(node, &tbl->hash_buckets[i], list) - - -static struct mesh_table *mesh_table_alloc(int size_order) -{ - int i; struct mesh_table *newtbl; newtbl = kmalloc(sizeof(struct mesh_table), GFP_ATOMIC); if (!newtbl) return NULL; - newtbl->hash_buckets = kzalloc(sizeof(struct hlist_head) * - (1 << size_order), GFP_ATOMIC); - - if (!newtbl->hash_buckets) { - kfree(newtbl); - return NULL; - } - - newtbl->hashwlock = kmalloc(sizeof(spinlock_t) * - (1 << size_order), GFP_ATOMIC); - if (!newtbl->hashwlock) { - kfree(newtbl->hash_buckets); - kfree(newtbl); - return NULL; - } - - newtbl->size_order = size_order; - newtbl->hash_mask = (1 << size_order) - 1; + INIT_HLIST_HEAD(&newtbl->known_gates); atomic_set(&newtbl->entries, 0); - get_random_bytes(&newtbl->hash_rnd, - sizeof(newtbl->hash_rnd)); - for (i = 0; i <= newtbl->hash_mask; i++) - spin_lock_init(&newtbl->hashwlock[i]); spin_lock_init(&newtbl->gates_lock); return newtbl; } -static void __mesh_table_free(struct mesh_table *tbl) +static void mesh_table_free(struct mesh_table *tbl) { - kfree(tbl->hash_buckets); - kfree(tbl->hashwlock); + rhashtable_free_and_destroy(&tbl->rhead, + mesh_path_rht_free, tbl); kfree(tbl); } -static void mesh_table_free(struct mesh_table *tbl, bool free_leafs) -{ - struct hlist_head *mesh_hash; - struct hlist_node *p, *q; - struct mpath_node *gate; - int i; - - mesh_hash = tbl->hash_buckets; - for (i = 0; i <= tbl->hash_mask; i++) { - spin_lock_bh(&tbl->hashwlock[i]); - hlist_for_each_safe(p, q, &mesh_hash[i]) { - tbl->free_node(p, free_leafs); - atomic_dec(&tbl->entries); - } - spin_unlock_bh(&tbl->hashwlock[i]); - } - if (free_leafs) { - spin_lock_bh(&tbl->gates_lock); - hlist_for_each_entry_safe(gate, q, - tbl->known_gates, list) { - hlist_del(&gate->list); - kfree(gate); - } - kfree(tbl->known_gates); - spin_unlock_bh(&tbl->gates_lock); - } - - __mesh_table_free(tbl); -} - -static int mesh_table_grow(struct mesh_table *oldtbl, - struct mesh_table *newtbl) -{ - struct hlist_head *oldhash; - struct hlist_node *p, *q; - int i; - - if (atomic_read(&oldtbl->entries) - < MEAN_CHAIN_LEN * (oldtbl->hash_mask + 1)) - return -EAGAIN; - - newtbl->free_node = oldtbl->free_node; - newtbl->copy_node = oldtbl->copy_node; - newtbl->known_gates = oldtbl->known_gates; - atomic_set(&newtbl->entries, atomic_read(&oldtbl->entries)); - - oldhash = oldtbl->hash_buckets; - for (i = 0; i <= oldtbl->hash_mask; i++) - hlist_for_each(p, &oldhash[i]) - if (oldtbl->copy_node(p, newtbl) < 0) - goto errcopy; - - return 0; - -errcopy: - for (i = 0; i <= newtbl->hash_mask; i++) { - hlist_for_each_safe(p, q, &newtbl->hash_buckets[i]) - oldtbl->free_node(p, 0); - } - return -ENOMEM; -} - -static u32 mesh_table_hash(const u8 *addr, struct ieee80211_sub_if_data *sdata, - struct mesh_table *tbl) -{ - /* Use last four bytes of hw addr and interface index as hash index */ - return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex, - tbl->hash_rnd) & tbl->hash_mask; -} - - /** * * mesh_path_assign_nexthop - update mesh path next hop @@ -340,23 +214,15 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; - struct hlist_head *bucket; - struct mpath_node *node; - - bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; - hlist_for_each_entry_rcu(node, bucket, list) { - mpath = node->mpath; - if (mpath->sdata == sdata && - ether_addr_equal(dst, mpath->dst)) { - if (mpath_expired(mpath)) { - spin_lock_bh(&mpath->state_lock); - mpath->flags &= ~MESH_PATH_ACTIVE; - spin_unlock_bh(&mpath->state_lock); - } - return mpath; - } + + mpath = rhashtable_lookup_fast(&tbl->rhead, dst, mesh_rht_params); + + if (mpath && mpath_expired(mpath)) { + spin_lock_bh(&mpath->state_lock); + mpath->flags &= ~MESH_PATH_ACTIVE; + spin_unlock_bh(&mpath->state_lock); } - return NULL; + return mpath; } /** @@ -371,15 +237,52 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, struct mesh_path * mesh_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { - return mpath_lookup(rcu_dereference(mesh_paths), dst, sdata); + return mpath_lookup(sdata->u.mesh.mesh_paths, dst, sdata); } struct mesh_path * mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { - return mpath_lookup(rcu_dereference(mpp_paths), dst, sdata); + return mpath_lookup(sdata->u.mesh.mpp_paths, dst, sdata); } +static struct mesh_path * +__mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) +{ + int i = 0, ret; + struct mesh_path *mpath = NULL; + struct rhashtable_iter iter; + + ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); + if (ret) + return NULL; + + ret = rhashtable_walk_start(&iter); + if (ret && ret != -EAGAIN) + goto err; + + while ((mpath = rhashtable_walk_next(&iter))) { + if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) + continue; + if (IS_ERR(mpath)) + break; + if (i++ == idx) + break; + } +err: + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + + if (IS_ERR(mpath) || !mpath) + return NULL; + + if (mpath_expired(mpath)) { + spin_lock_bh(&mpath->state_lock); + mpath->flags &= ~MESH_PATH_ACTIVE; + spin_unlock_bh(&mpath->state_lock); + } + return mpath; +} /** * mesh_path_lookup_by_idx - look up a path in the mesh path table by its index @@ -393,25 +296,7 @@ mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) struct mesh_path * mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { - struct mesh_table *tbl = rcu_dereference(mesh_paths); - struct mpath_node *node; - int i; - int j = 0; - - for_each_mesh_entry(tbl, node, i) { - if (sdata && node->mpath->sdata != sdata) - continue; - if (j++ == idx) { - if (mpath_expired(node->mpath)) { - spin_lock_bh(&node->mpath->state_lock); - node->mpath->flags &= ~MESH_PATH_ACTIVE; - spin_unlock_bh(&node->mpath->state_lock); - } - return node->mpath; - } - } - - return NULL; + return __mesh_path_lookup_by_idx(sdata->u.mesh.mesh_paths, idx); } /** @@ -426,19 +311,7 @@ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) struct mesh_path * mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { - struct mesh_table *tbl = rcu_dereference(mpp_paths); - struct mpath_node *node; - int i; - int j = 0; - - for_each_mesh_entry(tbl, node, i) { - if (sdata && node->mpath->sdata != sdata) - continue; - if (j++ == idx) - return node->mpath; - } - - return NULL; + return __mesh_path_lookup_by_idx(sdata->u.mesh.mpp_paths, idx); } /** @@ -448,30 +321,26 @@ mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) int mesh_path_add_gate(struct mesh_path *mpath) { struct mesh_table *tbl; - struct mpath_node *gate, *new_gate; int err; rcu_read_lock(); - tbl = rcu_dereference(mesh_paths); - - hlist_for_each_entry_rcu(gate, tbl->known_gates, list) - if (gate->mpath == mpath) { - err = -EEXIST; - goto err_rcu; - } + tbl = mpath->sdata->u.mesh.mesh_paths; - new_gate = kzalloc(sizeof(struct mpath_node), GFP_ATOMIC); - if (!new_gate) { - err = -ENOMEM; + spin_lock_bh(&mpath->state_lock); + if (mpath->is_gate) { + err = -EEXIST; + spin_unlock_bh(&mpath->state_lock); goto err_rcu; } - mpath->is_gate = true; mpath->sdata->u.mesh.num_gates++; - new_gate->mpath = mpath; - spin_lock_bh(&tbl->gates_lock); - hlist_add_head_rcu(&new_gate->list, tbl->known_gates); - spin_unlock_bh(&tbl->gates_lock); + + spin_lock(&tbl->gates_lock); + hlist_add_head_rcu(&mpath->gate_list, &tbl->known_gates); + spin_unlock(&tbl->gates_lock); + + spin_unlock_bh(&mpath->state_lock); + mpath_dbg(mpath->sdata, "Mesh path: Recorded new gate: %pM. %d known gates\n", mpath->dst, mpath->sdata->u.mesh.num_gates); @@ -485,28 +354,22 @@ err_rcu: * mesh_gate_del - remove a mesh gate from the list of known gates * @tbl: table which holds our list of known gates * @mpath: gate mpath - * - * Locking: must be called inside rcu_read_lock() section */ static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) { - struct mpath_node *gate; - struct hlist_node *q; + lockdep_assert_held(&mpath->state_lock); + if (!mpath->is_gate) + return; - hlist_for_each_entry_safe(gate, q, tbl->known_gates, list) { - if (gate->mpath != mpath) - continue; - spin_lock_bh(&tbl->gates_lock); - hlist_del_rcu(&gate->list); - kfree_rcu(gate, rcu); - spin_unlock_bh(&tbl->gates_lock); - mpath->sdata->u.mesh.num_gates--; - mpath->is_gate = false; - mpath_dbg(mpath->sdata, - "Mesh path: Deleted gate: %pM. %d known gates\n", - mpath->dst, mpath->sdata->u.mesh.num_gates); - break; - } + mpath->is_gate = false; + spin_lock_bh(&tbl->gates_lock); + hlist_del_rcu(&mpath->gate_list); + mpath->sdata->u.mesh.num_gates--; + spin_unlock_bh(&tbl->gates_lock); + + mpath_dbg(mpath->sdata, + "Mesh path: Deleted gate: %pM. %d known gates\n", + mpath->dst, mpath->sdata->u.mesh.num_gates); } /** @@ -518,6 +381,31 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata) return sdata->u.mesh.num_gates; } +static +struct mesh_path *mesh_path_new(struct ieee80211_sub_if_data *sdata, + const u8 *dst, gfp_t gfp_flags) +{ + struct mesh_path *new_mpath; + + new_mpath = kzalloc(sizeof(struct mesh_path), gfp_flags); + if (!new_mpath) + return NULL; + + memcpy(new_mpath->dst, dst, ETH_ALEN); + eth_broadcast_addr(new_mpath->rann_snd_addr); + new_mpath->is_root = false; + new_mpath->sdata = sdata; + new_mpath->flags = 0; + skb_queue_head_init(&new_mpath->frame_queue); + new_mpath->timer.data = (unsigned long) new_mpath; + new_mpath->timer.function = mesh_path_timer; + new_mpath->exp_time = jiffies; + spin_lock_init(&new_mpath->state_lock); + init_timer(&new_mpath->timer); + + return new_mpath; +} + /** * mesh_path_add - allocate and add a new path to the mesh path table * @dst: destination address of the path (ETH_ALEN length) @@ -530,15 +418,9 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata) struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst) { - struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee80211_local *local = sdata->local; struct mesh_table *tbl; struct mesh_path *mpath, *new_mpath; - struct mpath_node *node, *new_node; - struct hlist_head *bucket; - int grow = 0; - int err; - u32 hash_idx; + int ret; if (ether_addr_equal(dst, sdata->vif.addr)) /* never add ourselves as neighbours */ @@ -550,129 +432,44 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0) return ERR_PTR(-ENOSPC); - read_lock_bh(&pathtbl_resize_lock); - tbl = resize_dereference_mesh_paths(); - - hash_idx = mesh_table_hash(dst, sdata, tbl); - bucket = &tbl->hash_buckets[hash_idx]; - - spin_lock(&tbl->hashwlock[hash_idx]); - - hlist_for_each_entry(node, bucket, list) { - mpath = node->mpath; - if (mpath->sdata == sdata && - ether_addr_equal(dst, mpath->dst)) - goto found; - } - - err = -ENOMEM; - new_mpath = kzalloc(sizeof(struct mesh_path), GFP_ATOMIC); + new_mpath = mesh_path_new(sdata, dst, GFP_ATOMIC); if (!new_mpath) - goto err_path_alloc; - - new_node = kmalloc(sizeof(struct mpath_node), GFP_ATOMIC); - if (!new_node) - goto err_node_alloc; - - memcpy(new_mpath->dst, dst, ETH_ALEN); - eth_broadcast_addr(new_mpath->rann_snd_addr); - new_mpath->is_root = false; - new_mpath->sdata = sdata; - new_mpath->flags = 0; - skb_queue_head_init(&new_mpath->frame_queue); - new_node->mpath = new_mpath; - new_mpath->timer.data = (unsigned long) new_mpath; - new_mpath->timer.function = mesh_path_timer; - new_mpath->exp_time = jiffies; - spin_lock_init(&new_mpath->state_lock); - init_timer(&new_mpath->timer); - - hlist_add_head_rcu(&new_node->list, bucket); - if (atomic_inc_return(&tbl->entries) >= - MEAN_CHAIN_LEN * (tbl->hash_mask + 1)) - grow = 1; - - mesh_paths_generation++; - - if (grow) { - set_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags); - ieee80211_queue_work(&local->hw, &sdata->work); - } - mpath = new_mpath; -found: - spin_unlock(&tbl->hashwlock[hash_idx]); - read_unlock_bh(&pathtbl_resize_lock); - return mpath; + return ERR_PTR(-ENOMEM); -err_node_alloc: - kfree(new_mpath); -err_path_alloc: - atomic_dec(&sdata->u.mesh.mpaths); - spin_unlock(&tbl->hashwlock[hash_idx]); - read_unlock_bh(&pathtbl_resize_lock); - return ERR_PTR(err); -} + tbl = sdata->u.mesh.mesh_paths; + do { + ret = rhashtable_lookup_insert_fast(&tbl->rhead, + &new_mpath->rhash, + mesh_rht_params); -static void mesh_table_free_rcu(struct rcu_head *rcu) -{ - struct mesh_table *tbl = container_of(rcu, struct mesh_table, rcu_head); + if (ret == -EEXIST) + mpath = rhashtable_lookup_fast(&tbl->rhead, + dst, + mesh_rht_params); - mesh_table_free(tbl, false); -} + } while (unlikely(ret == -EEXIST && !mpath)); -void mesh_mpath_table_grow(void) -{ - struct mesh_table *oldtbl, *newtbl; - - write_lock_bh(&pathtbl_resize_lock); - oldtbl = resize_dereference_mesh_paths(); - newtbl = mesh_table_alloc(oldtbl->size_order + 1); - if (!newtbl) - goto out; - if (mesh_table_grow(oldtbl, newtbl) < 0) { - __mesh_table_free(newtbl); - goto out; - } - rcu_assign_pointer(mesh_paths, newtbl); - - call_rcu(&oldtbl->rcu_head, mesh_table_free_rcu); - - out: - write_unlock_bh(&pathtbl_resize_lock); -} + if (ret && ret != -EEXIST) + return ERR_PTR(ret); -void mesh_mpp_table_grow(void) -{ - struct mesh_table *oldtbl, *newtbl; - - write_lock_bh(&pathtbl_resize_lock); - oldtbl = resize_dereference_mpp_paths(); - newtbl = mesh_table_alloc(oldtbl->size_order + 1); - if (!newtbl) - goto out; - if (mesh_table_grow(oldtbl, newtbl) < 0) { - __mesh_table_free(newtbl); - goto out; + /* At this point either new_mpath was added, or we found a + * matching entry already in the table; in the latter case + * free the unnecessary new entry. + */ + if (ret == -EEXIST) { + kfree(new_mpath); + new_mpath = mpath; } - rcu_assign_pointer(mpp_paths, newtbl); - call_rcu(&oldtbl->rcu_head, mesh_table_free_rcu); - - out: - write_unlock_bh(&pathtbl_resize_lock); + sdata->u.mesh.mesh_paths_generation++; + return new_mpath; } int mpp_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst, const u8 *mpp) { - struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee80211_local *local = sdata->local; struct mesh_table *tbl; - struct mesh_path *mpath, *new_mpath; - struct mpath_node *node, *new_node; - struct hlist_head *bucket; - int grow = 0; - int err = 0; - u32 hash_idx; + struct mesh_path *new_mpath; + int ret; if (ether_addr_equal(dst, sdata->vif.addr)) /* never add ourselves as neighbours */ @@ -681,65 +478,19 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, if (is_multicast_ether_addr(dst)) return -ENOTSUPP; - err = -ENOMEM; - new_mpath = kzalloc(sizeof(struct mesh_path), GFP_ATOMIC); - if (!new_mpath) - goto err_path_alloc; + new_mpath = mesh_path_new(sdata, dst, GFP_ATOMIC); - new_node = kmalloc(sizeof(struct mpath_node), GFP_ATOMIC); - if (!new_node) - goto err_node_alloc; + if (!new_mpath) + return -ENOMEM; - read_lock_bh(&pathtbl_resize_lock); - memcpy(new_mpath->dst, dst, ETH_ALEN); memcpy(new_mpath->mpp, mpp, ETH_ALEN); - new_mpath->sdata = sdata; - new_mpath->flags = 0; - skb_queue_head_init(&new_mpath->frame_queue); - new_node->mpath = new_mpath; - init_timer(&new_mpath->timer); - new_mpath->exp_time = jiffies; - spin_lock_init(&new_mpath->state_lock); - - tbl = resize_dereference_mpp_paths(); + tbl = sdata->u.mesh.mpp_paths; + ret = rhashtable_lookup_insert_fast(&tbl->rhead, + &new_mpath->rhash, + mesh_rht_params); - hash_idx = mesh_table_hash(dst, sdata, tbl); - bucket = &tbl->hash_buckets[hash_idx]; - - spin_lock(&tbl->hashwlock[hash_idx]); - - err = -EEXIST; - hlist_for_each_entry(node, bucket, list) { - mpath = node->mpath; - if (mpath->sdata == sdata && - ether_addr_equal(dst, mpath->dst)) - goto err_exists; - } - - hlist_add_head_rcu(&new_node->list, bucket); - if (atomic_inc_return(&tbl->entries) >= - MEAN_CHAIN_LEN * (tbl->hash_mask + 1)) - grow = 1; - - spin_unlock(&tbl->hashwlock[hash_idx]); - read_unlock_bh(&pathtbl_resize_lock); - - mpp_paths_generation++; - - if (grow) { - set_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags); - ieee80211_queue_work(&local->hw, &sdata->work); - } - return 0; - -err_exists: - spin_unlock(&tbl->hashwlock[hash_idx]); - read_unlock_bh(&pathtbl_resize_lock); - kfree(new_node); -err_node_alloc: - kfree(new_mpath); -err_path_alloc: - return err; + sdata->u.mesh.mpp_paths_generation++; + return ret; } @@ -753,17 +504,26 @@ err_path_alloc: */ void mesh_plink_broken(struct sta_info *sta) { - struct mesh_table *tbl; + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct mesh_table *tbl = sdata->u.mesh.mesh_paths; static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mesh_path *mpath; - struct mpath_node *node; - struct ieee80211_sub_if_data *sdata = sta->sdata; - int i; + struct rhashtable_iter iter; + int ret; - rcu_read_lock(); - tbl = rcu_dereference(mesh_paths); - for_each_mesh_entry(tbl, node, i) { - mpath = node->mpath; + ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); + if (ret) + return; + + ret = rhashtable_walk_start(&iter); + if (ret && ret != -EAGAIN) + goto out; + + while ((mpath = rhashtable_walk_next(&iter))) { + if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) + continue; + if (IS_ERR(mpath)) + break; if (rcu_access_pointer(mpath->next_hop) == sta && mpath->flags & MESH_PATH_ACTIVE && !(mpath->flags & MESH_PATH_FIXED)) { @@ -777,33 +537,30 @@ void mesh_plink_broken(struct sta_info *sta) WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast); } } - rcu_read_unlock(); +out: + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } -static void mesh_path_node_reclaim(struct rcu_head *rp) +static void mesh_path_free_rcu(struct mesh_table *tbl, + struct mesh_path *mpath) { - struct mpath_node *node = container_of(rp, struct mpath_node, rcu); + struct ieee80211_sub_if_data *sdata = mpath->sdata; - del_timer_sync(&node->mpath->timer); - kfree(node->mpath); - kfree(node); + spin_lock_bh(&mpath->state_lock); + mpath->flags |= MESH_PATH_RESOLVING | MESH_PATH_DELETED; + mesh_gate_del(tbl, mpath); + spin_unlock_bh(&mpath->state_lock); + del_timer_sync(&mpath->timer); + atomic_dec(&sdata->u.mesh.mpaths); + atomic_dec(&tbl->entries); + kfree_rcu(mpath, rcu); } -/* needs to be called with the corresponding hashwlock taken */ -static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node) +static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath) { - struct mesh_path *mpath = node->mpath; - struct ieee80211_sub_if_data *sdata = node->mpath->sdata; - - spin_lock(&mpath->state_lock); - mpath->flags |= MESH_PATH_RESOLVING; - if (mpath->is_gate) - mesh_gate_del(tbl, mpath); - hlist_del_rcu(&node->list); - call_rcu(&node->rcu, mesh_path_node_reclaim); - spin_unlock(&mpath->state_lock); - atomic_dec(&sdata->u.mesh.mpaths); - atomic_dec(&tbl->entries); + rhashtable_remove_fast(&tbl->rhead, &mpath->rhash, mesh_rht_params); + mesh_path_free_rcu(tbl, mpath); } /** @@ -819,65 +576,88 @@ static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node) */ void mesh_path_flush_by_nexthop(struct sta_info *sta) { - struct mesh_table *tbl; + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct mesh_table *tbl = sdata->u.mesh.mesh_paths; struct mesh_path *mpath; - struct mpath_node *node; - int i; + struct rhashtable_iter iter; + int ret; - rcu_read_lock(); - read_lock_bh(&pathtbl_resize_lock); - tbl = resize_dereference_mesh_paths(); - for_each_mesh_entry(tbl, node, i) { - mpath = node->mpath; - if (rcu_access_pointer(mpath->next_hop) == sta) { - spin_lock(&tbl->hashwlock[i]); - __mesh_path_del(tbl, node); - spin_unlock(&tbl->hashwlock[i]); - } + ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); + if (ret) + return; + + ret = rhashtable_walk_start(&iter); + if (ret && ret != -EAGAIN) + goto out; + + while ((mpath = rhashtable_walk_next(&iter))) { + if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) + continue; + if (IS_ERR(mpath)) + break; + + if (rcu_access_pointer(mpath->next_hop) == sta) + __mesh_path_del(tbl, mpath); } - read_unlock_bh(&pathtbl_resize_lock); - rcu_read_unlock(); +out: + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, const u8 *proxy) { - struct mesh_table *tbl; - struct mesh_path *mpp; - struct mpath_node *node; - int i; + struct mesh_table *tbl = sdata->u.mesh.mpp_paths; + struct mesh_path *mpath; + struct rhashtable_iter iter; + int ret; - rcu_read_lock(); - read_lock_bh(&pathtbl_resize_lock); - tbl = resize_dereference_mpp_paths(); - for_each_mesh_entry(tbl, node, i) { - mpp = node->mpath; - if (ether_addr_equal(mpp->mpp, proxy)) { - spin_lock(&tbl->hashwlock[i]); - __mesh_path_del(tbl, node); - spin_unlock(&tbl->hashwlock[i]); - } + ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); + if (ret) + return; + + ret = rhashtable_walk_start(&iter); + if (ret && ret != -EAGAIN) + goto out; + + while ((mpath = rhashtable_walk_next(&iter))) { + if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) + continue; + if (IS_ERR(mpath)) + break; + + if (ether_addr_equal(mpath->mpp, proxy)) + __mesh_path_del(tbl, mpath); } - read_unlock_bh(&pathtbl_resize_lock); - rcu_read_unlock(); +out: + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } -static void table_flush_by_iface(struct mesh_table *tbl, - struct ieee80211_sub_if_data *sdata) +static void table_flush_by_iface(struct mesh_table *tbl) { struct mesh_path *mpath; - struct mpath_node *node; - int i; + struct rhashtable_iter iter; + int ret; + + ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); + if (ret) + return; + + ret = rhashtable_walk_start(&iter); + if (ret && ret != -EAGAIN) + goto out; - WARN_ON(!rcu_read_lock_held()); - for_each_mesh_entry(tbl, node, i) { - mpath = node->mpath; - if (mpath->sdata != sdata) + while ((mpath = rhashtable_walk_next(&iter))) { + if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) continue; - spin_lock_bh(&tbl->hashwlock[i]); - __mesh_path_del(tbl, node); - spin_unlock_bh(&tbl->hashwlock[i]); + if (IS_ERR(mpath)) + break; + __mesh_path_del(tbl, mpath); } +out: + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } /** @@ -890,16 +670,8 @@ static void table_flush_by_iface(struct mesh_table *tbl, */ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) { - struct mesh_table *tbl; - - rcu_read_lock(); - read_lock_bh(&pathtbl_resize_lock); - tbl = resize_dereference_mesh_paths(); - table_flush_by_iface(tbl, sdata); - tbl = resize_dereference_mpp_paths(); - table_flush_by_iface(tbl, sdata); - read_unlock_bh(&pathtbl_resize_lock); - rcu_read_unlock(); + table_flush_by_iface(sdata->u.mesh.mesh_paths); + table_flush_by_iface(sdata->u.mesh.mpp_paths); } /** @@ -911,37 +683,25 @@ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) * * Returns: 0 if successful */ -static int table_path_del(struct mesh_table __rcu *rcu_tbl, +static int table_path_del(struct mesh_table *tbl, struct ieee80211_sub_if_data *sdata, const u8 *addr) { - struct mesh_table *tbl; struct mesh_path *mpath; - struct mpath_node *node; - struct hlist_head *bucket; - int hash_idx; - int err = 0; - - tbl = resize_dereference_paths(rcu_tbl); - hash_idx = mesh_table_hash(addr, sdata, tbl); - bucket = &tbl->hash_buckets[hash_idx]; - - spin_lock(&tbl->hashwlock[hash_idx]); - hlist_for_each_entry(node, bucket, list) { - mpath = node->mpath; - if (mpath->sdata == sdata && - ether_addr_equal(addr, mpath->dst)) { - __mesh_path_del(tbl, node); - goto enddel; - } + + rcu_read_lock(); + mpath = rhashtable_lookup_fast(&tbl->rhead, addr, mesh_rht_params); + if (!mpath) { + rcu_read_unlock(); + return -ENXIO; } - err = -ENXIO; -enddel: - spin_unlock(&tbl->hashwlock[hash_idx]); - return err; + __mesh_path_del(tbl, mpath); + rcu_read_unlock(); + return 0; } + /** * mesh_path_del - delete a mesh path from the table * @@ -952,36 +712,13 @@ enddel: */ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) { - int err = 0; + int err; /* flush relevant mpp entries first */ mpp_flush_by_proxy(sdata, addr); - read_lock_bh(&pathtbl_resize_lock); - err = table_path_del(mesh_paths, sdata, addr); - mesh_paths_generation++; - read_unlock_bh(&pathtbl_resize_lock); - - return err; -} - -/** - * mpp_path_del - delete a mesh proxy path from the table - * - * @addr: addr address (ETH_ALEN length) - * @sdata: local subif - * - * Returns: 0 if successful - */ -static int mpp_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) -{ - int err = 0; - - read_lock_bh(&pathtbl_resize_lock); - err = table_path_del(mpp_paths, sdata, addr); - mpp_paths_generation++; - read_unlock_bh(&pathtbl_resize_lock); - + err = table_path_del(sdata->u.mesh.mesh_paths, sdata, addr); + sdata->u.mesh.mesh_paths_generation++; return err; } @@ -1015,39 +752,30 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) struct ieee80211_sub_if_data *sdata = mpath->sdata; struct mesh_table *tbl; struct mesh_path *from_mpath = mpath; - struct mpath_node *gate = NULL; + struct mesh_path *gate; bool copy = false; - struct hlist_head *known_gates; - - rcu_read_lock(); - tbl = rcu_dereference(mesh_paths); - known_gates = tbl->known_gates; - rcu_read_unlock(); - if (!known_gates) - return -EHOSTUNREACH; + tbl = sdata->u.mesh.mesh_paths; - hlist_for_each_entry_rcu(gate, known_gates, list) { - if (gate->mpath->sdata != sdata) - continue; - - if (gate->mpath->flags & MESH_PATH_ACTIVE) { - mpath_dbg(sdata, "Forwarding to %pM\n", gate->mpath->dst); - mesh_path_move_to_queue(gate->mpath, from_mpath, copy); - from_mpath = gate->mpath; + rcu_read_lock(); + hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) { + if (gate->flags & MESH_PATH_ACTIVE) { + mpath_dbg(sdata, "Forwarding to %pM\n", gate->dst); + mesh_path_move_to_queue(gate, from_mpath, copy); + from_mpath = gate; copy = true; } else { mpath_dbg(sdata, "Not forwarding to %pM (flags %#x)\n", - gate->mpath->dst, gate->mpath->flags); + gate->dst, gate->flags); } } - hlist_for_each_entry_rcu(gate, known_gates, list) - if (gate->mpath->sdata == sdata) { - mpath_dbg(sdata, "Sending to %pM\n", gate->mpath->dst); - mesh_path_tx_pending(gate->mpath); - } + hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) { + mpath_dbg(sdata, "Sending to %pM\n", gate->dst); + mesh_path_tx_pending(gate); + } + rcu_read_unlock(); return (from_mpath == mpath) ? -EHOSTUNREACH : 0; } @@ -1104,118 +832,73 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) mesh_path_tx_pending(mpath); } -static void mesh_path_node_free(struct hlist_node *p, bool free_leafs) -{ - struct mesh_path *mpath; - struct mpath_node *node = hlist_entry(p, struct mpath_node, list); - mpath = node->mpath; - hlist_del_rcu(p); - if (free_leafs) { - del_timer_sync(&mpath->timer); - kfree(mpath); - } - kfree(node); -} - -static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl) -{ - struct mesh_path *mpath; - struct mpath_node *node, *new_node; - u32 hash_idx; - - new_node = kmalloc(sizeof(struct mpath_node), GFP_ATOMIC); - if (new_node == NULL) - return -ENOMEM; - - node = hlist_entry(p, struct mpath_node, list); - mpath = node->mpath; - new_node->mpath = mpath; - hash_idx = mesh_table_hash(mpath->dst, mpath->sdata, newtbl); - hlist_add_head(&new_node->list, - &newtbl->hash_buckets[hash_idx]); - return 0; -} - -int mesh_pathtbl_init(void) +int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) { struct mesh_table *tbl_path, *tbl_mpp; int ret; - tbl_path = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); + tbl_path = mesh_table_alloc(); if (!tbl_path) return -ENOMEM; - tbl_path->free_node = &mesh_path_node_free; - tbl_path->copy_node = &mesh_path_node_copy; - tbl_path->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC); - if (!tbl_path->known_gates) { - ret = -ENOMEM; - goto free_path; - } - INIT_HLIST_HEAD(tbl_path->known_gates); - - tbl_mpp = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); + tbl_mpp = mesh_table_alloc(); if (!tbl_mpp) { ret = -ENOMEM; goto free_path; } - tbl_mpp->free_node = &mesh_path_node_free; - tbl_mpp->copy_node = &mesh_path_node_copy; - tbl_mpp->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC); - if (!tbl_mpp->known_gates) { - ret = -ENOMEM; - goto free_mpp; - } - INIT_HLIST_HEAD(tbl_mpp->known_gates); - /* Need no locking since this is during init */ - RCU_INIT_POINTER(mesh_paths, tbl_path); - RCU_INIT_POINTER(mpp_paths, tbl_mpp); + rhashtable_init(&tbl_path->rhead, &mesh_rht_params); + rhashtable_init(&tbl_mpp->rhead, &mesh_rht_params); + + sdata->u.mesh.mesh_paths = tbl_path; + sdata->u.mesh.mpp_paths = tbl_mpp; return 0; -free_mpp: - mesh_table_free(tbl_mpp, true); free_path: - mesh_table_free(tbl_path, true); + mesh_table_free(tbl_path); return ret; } -void mesh_path_expire(struct ieee80211_sub_if_data *sdata) +static +void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, + struct mesh_table *tbl) { - struct mesh_table *tbl; struct mesh_path *mpath; - struct mpath_node *node; - int i; + struct rhashtable_iter iter; + int ret; - rcu_read_lock(); - tbl = rcu_dereference(mesh_paths); - for_each_mesh_entry(tbl, node, i) { - if (node->mpath->sdata != sdata) + ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_KERNEL); + if (ret) + return; + + ret = rhashtable_walk_start(&iter); + if (ret && ret != -EAGAIN) + goto out; + + while ((mpath = rhashtable_walk_next(&iter))) { + if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) continue; - mpath = node->mpath; + if (IS_ERR(mpath)) + break; if ((!(mpath->flags & MESH_PATH_RESOLVING)) && (!(mpath->flags & MESH_PATH_FIXED)) && time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) - mesh_path_del(mpath->sdata, mpath->dst); - } - - tbl = rcu_dereference(mpp_paths); - for_each_mesh_entry(tbl, node, i) { - if (node->mpath->sdata != sdata) - continue; - mpath = node->mpath; - if ((!(mpath->flags & MESH_PATH_FIXED)) && - time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) - mpp_path_del(mpath->sdata, mpath->dst); + __mesh_path_del(tbl, mpath); } +out: + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); +} - rcu_read_unlock(); +void mesh_path_expire(struct ieee80211_sub_if_data *sdata) +{ + mesh_path_tbl_expire(sdata, sdata->u.mesh.mesh_paths); + mesh_path_tbl_expire(sdata, sdata->u.mesh.mpp_paths); } -void mesh_pathtbl_unregister(void) +void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata) { - /* no need for locking during exit path */ - mesh_table_free(rcu_dereference_protected(mesh_paths, 1), true); - mesh_table_free(rcu_dereference_protected(mpp_paths, 1), true); + mesh_table_free(sdata->u.mesh.mesh_paths); + mesh_table_free(sdata->u.mesh.mpp_paths); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index a07e93c21c9e..79f2a0a13db8 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -61,7 +61,7 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata, s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold; return rssi_threshold == 0 || (sta && - (s8)-ewma_signal_read(&sta->rx_stats.avg_signal) > + (s8)-ewma_signal_read(&sta->rx_stats_avg.signal) > rssi_threshold); } @@ -93,18 +93,18 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta) static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; struct sta_info *sta; u32 erp_rates = 0, changed = 0; int i; bool short_slot = false; - if (band == IEEE80211_BAND_5GHZ) { + if (band == NL80211_BAND_5GHZ) { /* (IEEE 802.11-2012 19.4.5) */ short_slot = true; goto out; - } else if (band != IEEE80211_BAND_2GHZ) + } else if (band != NL80211_BAND_2GHZ) goto out; for (i = 0; i < sband->n_bitrates; i++) @@ -247,7 +247,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.self_prot.action_code = action; if (action != WLAN_SP_MESH_PEERING_CLOSE) { - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); /* capability info */ pos = skb_put(skb, 2); @@ -331,7 +331,9 @@ free: * * @sta: mesh peer link to deactivate * - * All mesh paths with this peer as next hop will be flushed + * Mesh paths with this peer as next hop should be flushed + * by the caller outside of plink_lock. + * * Returns beacon changed flag if the beacon content changed. * * Locking: the caller must hold sta->mesh->plink_lock @@ -346,7 +348,6 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta) if (sta->mesh->plink_state == NL80211_PLINK_ESTAB) changed = mesh_plink_dec_estab_count(sdata); sta->mesh->plink_state = NL80211_PLINK_BLOCKED; - mesh_path_flush_by_nexthop(sta); ieee80211_mps_sta_status_update(sta); changed |= ieee80211_mps_set_sta_local_pm(sta, @@ -374,6 +375,7 @@ u32 mesh_plink_deactivate(struct sta_info *sta) sta->sta.addr, sta->mesh->llid, sta->mesh->plid, sta->mesh->reason); spin_unlock_bh(&sta->mesh->plink_lock); + mesh_path_flush_by_nexthop(sta); return changed; } @@ -383,7 +385,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, bool insert) { struct ieee80211_local *local = sdata->local; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; u32 rates, basic_rates = 0, changed = 0; enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth; @@ -748,6 +750,7 @@ u32 mesh_plink_block(struct sta_info *sta) changed = __mesh_plink_deactivate(sta); sta->mesh->plink_state = NL80211_PLINK_BLOCKED; spin_unlock_bh(&sta->mesh->plink_lock); + mesh_path_flush_by_nexthop(sta); return changed; } @@ -797,6 +800,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; enum ieee80211_self_protected_actioncode action = 0; u32 changed = 0; + bool flush = false; mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr, mplstates[sta->mesh->plink_state], mplevents[event]); @@ -885,6 +889,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, changed |= mesh_set_short_slot_time(sdata); mesh_plink_close(sdata, sta, event); action = WLAN_SP_MESH_PEERING_CLOSE; + flush = true; break; case OPN_ACPT: action = WLAN_SP_MESH_PEERING_CONFIRM; @@ -916,6 +921,8 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, break; } spin_unlock_bh(&sta->mesh->plink_lock); + if (flush) + mesh_path_flush_by_nexthop(sta); if (action) { mesh_plink_frame_tx(sdata, sta, action, sta->sta.addr, sta->mesh->llid, sta->mesh->plid, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 281b8d6e5109..8d426f637f58 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -122,15 +122,16 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - if (unlikely(!sdata->u.mgd.associated)) + if (unlikely(!ifmgd->associated)) return; - ifmgd->probe_send_count = 0; + if (ifmgd->probe_send_count) + ifmgd->probe_send_count = 0; if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) return; - mod_timer(&sdata->u.mgd.conn_mon_timer, + mod_timer(&ifmgd->conn_mon_timer, round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME)); } @@ -660,7 +661,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) capab = WLAN_CAPABILITY_ESS; - if (sband->band == IEEE80211_BAND_2GHZ) { + if (sband->band == NL80211_BAND_2GHZ) { capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; } @@ -1099,7 +1100,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss = ifmgd->associated; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; - enum ieee80211_band current_band; + enum nl80211_band current_band; struct ieee80211_csa_ie csa_ie; struct ieee80211_channel_switch ch_switch; int res; @@ -1256,11 +1257,11 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, default: WARN_ON_ONCE(1); /* fall through */ - case IEEE80211_BAND_2GHZ: - case IEEE80211_BAND_60GHZ: + case NL80211_BAND_2GHZ: + case NL80211_BAND_60GHZ: chan_increment = 1; break; - case IEEE80211_BAND_5GHZ: + case NL80211_BAND_5GHZ: chan_increment = 4; break; } @@ -1860,7 +1861,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, } use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME); - if (ieee80211_get_sdata_band(sdata) == IEEE80211_BAND_5GHZ) + if (ieee80211_get_sdata_band(sdata) == NL80211_BAND_5GHZ) use_short_slot = true; if (use_protection != bss_conf->use_cts_prot) { @@ -2216,6 +2217,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) const u8 *ssid; u8 *dst = ifmgd->associated->bssid; u8 unicast_limit = max(1, max_probe_tries - 3); + struct sta_info *sta; /* * Try sending broadcast probe requests for the last three @@ -2234,6 +2236,14 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) */ ifmgd->probe_send_count++; + if (dst) { + mutex_lock(&sdata->local->sta_mtx); + sta = sta_info_get(sdata, dst); + if (!WARN_ON(!sta)) + ieee80211_check_fast_rx(sta); + mutex_unlock(&sdata->local->sta_mtx); + } + if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) { ifmgd->nullfunc_failed = false; ieee80211_send_nullfunc(sdata->local, sdata, false); @@ -2389,6 +2399,11 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) return; } + /* AP is probably out of range (or not reachable for another reason) so + * remove the bss struct for that AP. + */ + cfg80211_unlink_bss(local->hw.wiphy, ifmgd->associated); + ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, true, frame_buf); @@ -4365,7 +4380,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.basic_rates = basic_rates; /* cf. IEEE 802.11 9.2.12 */ - if (cbss->channel->band == IEEE80211_BAND_2GHZ && + if (cbss->channel->band == NL80211_BAND_2GHZ && have_higher_than_11mbit) sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; else diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index 0be0aadfc559..88e6ebbbe24f 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -75,8 +75,6 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, if (!sta) return; - sta->rx_stats.last_rx = jiffies; - /* Add only mandatory rates for now */ sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index a4e2f4e67f94..206698bc93f4 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -287,7 +287,7 @@ static void __rate_control_send_low(struct ieee80211_hw *hw, u32 rate_flags = ieee80211_chandef_rate_flags(&hw->conf.chandef); - if ((sband->band == IEEE80211_BAND_2GHZ) && + if ((sband->band == NL80211_BAND_2GHZ) && (info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) rate_flags |= IEEE80211_RATE_ERP_G; diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 624fe5b81615..8d3260785b94 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -96,9 +96,9 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta) { #ifdef CONFIG_MAC80211_DEBUGFS struct rate_control_ref *ref = sta->rate_ctrl; - if (ref && sta->debugfs.dir && ref->ops->add_sta_debugfs) + if (ref && sta->debugfs_dir && ref->ops->add_sta_debugfs) ref->ops->add_sta_debugfs(ref->priv, sta->rate_ctrl_priv, - sta->debugfs.dir); + sta->debugfs_dir); #endif } diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index b54f398cda5d..14c5ba3a1b1c 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -436,7 +436,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, static void -calc_rate_durations(enum ieee80211_band band, +calc_rate_durations(enum nl80211_band band, struct minstrel_rate *d, struct ieee80211_rate *rate, struct cfg80211_chan_def *chandef) @@ -579,7 +579,7 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) if (!mi) return NULL; - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { sband = hw->wiphy->bands[i]; if (sband && sband->n_bitrates > max_rates) max_rates = sband->n_bitrates; @@ -621,7 +621,7 @@ minstrel_init_cck_rates(struct minstrel_priv *mp) u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef); int i, j; - sband = mp->hw->wiphy->bands[IEEE80211_BAND_2GHZ]; + sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ]; if (!sband) return; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 370d677b547b..30fbabf4bcbc 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -883,6 +883,59 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, ratetbl->rate[offset].flags = flags; } +static inline int +minstrel_ht_get_prob_ewma(struct minstrel_ht_sta *mi, int rate) +{ + int group = rate / MCS_GROUP_RATES; + rate %= MCS_GROUP_RATES; + return mi->groups[group].rates[rate].prob_ewma; +} + +static int +minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi) +{ + int group = mi->max_prob_rate / MCS_GROUP_RATES; + const struct mcs_group *g = &minstrel_mcs_groups[group]; + int rate = mi->max_prob_rate % MCS_GROUP_RATES; + + /* Disable A-MSDU if max_prob_rate is bad */ + if (mi->groups[group].rates[rate].prob_ewma < MINSTREL_FRAC(50, 100)) + return 1; + + /* If the rate is slower than single-stream MCS1, make A-MSDU limit small */ + if (g->duration[rate] > MCS_DURATION(1, 0, 52)) + return 500; + + /* + * If the rate is slower than single-stream MCS4, limit A-MSDU to usual + * data packet size + */ + if (g->duration[rate] > MCS_DURATION(1, 0, 104)) + return 1600; + + /* + * If the rate is slower than single-stream MCS7, or if the max throughput + * rate success probability is less than 75%, limit A-MSDU to twice the usual + * data packet size + */ + if (g->duration[rate] > MCS_DURATION(1, 0, 260) || + (minstrel_ht_get_prob_ewma(mi, mi->max_tp_rate[0]) < + MINSTREL_FRAC(75, 100))) + return 3200; + + /* + * HT A-MPDU limits maximum MPDU size under BA agreement to 4095 bytes. + * Since aggregation sessions are started/stopped without txq flush, use + * the limit here to avoid the complexity of having to de-aggregate + * packets in the queue. + */ + if (!mi->sta->vht_cap.vht_supported) + return IEEE80211_MAX_MPDU_LEN_HT_BA; + + /* unlimited */ + return 0; +} + static void minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { @@ -907,6 +960,7 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate); } + mi->sta->max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi); rates->rate[i].idx = -1; rate_control_set_rates(mp->hw, mi->sta, rates); } @@ -924,6 +978,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) struct minstrel_rate_stats *mrs; struct minstrel_mcs_group_data *mg; unsigned int sample_dur, sample_group, cur_max_tp_streams; + int tp_rate1, tp_rate2; int sample_idx = 0; if (mi->sample_wait > 0) { @@ -945,14 +1000,22 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) mrs = &mg->rates[sample_idx]; sample_idx += sample_group * MCS_GROUP_RATES; + /* Set tp_rate1, tp_rate2 to the highest / second highest max_tp_rate */ + if (minstrel_get_duration(mi->max_tp_rate[0]) > + minstrel_get_duration(mi->max_tp_rate[1])) { + tp_rate1 = mi->max_tp_rate[1]; + tp_rate2 = mi->max_tp_rate[0]; + } else { + tp_rate1 = mi->max_tp_rate[0]; + tp_rate2 = mi->max_tp_rate[1]; + } + /* * Sampling might add some overhead (RTS, no aggregation) - * to the frame. Hence, don't use sampling for the currently - * used rates. + * to the frame. Hence, don't use sampling for the highest currently + * used highest throughput or probability rate. */ - if (sample_idx == mi->max_tp_rate[0] || - sample_idx == mi->max_tp_rate[1] || - sample_idx == mi->max_prob_rate) + if (sample_idx == mi->max_tp_rate[0] || sample_idx == mi->max_prob_rate) return -1; /* @@ -967,10 +1030,10 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) * if the link is working perfectly. */ - cur_max_tp_streams = minstrel_mcs_groups[mi->max_tp_rate[0] / + cur_max_tp_streams = minstrel_mcs_groups[tp_rate1 / MCS_GROUP_RATES].streams; sample_dur = minstrel_get_duration(sample_idx); - if (sample_dur >= minstrel_get_duration(mi->max_tp_rate[1]) && + if (sample_dur >= minstrel_get_duration(tp_rate2) && (cur_max_tp_streams - 1 < minstrel_mcs_groups[sample_group].streams || sample_dur >= minstrel_get_duration(mi->max_prob_rate))) { @@ -1074,7 +1137,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, { int i; - if (sband->band != IEEE80211_BAND_2GHZ) + if (sband->band != NL80211_BAND_2GHZ) return; if (!ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES)) @@ -1272,7 +1335,7 @@ minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) int max_rates = 0; int i; - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { sband = hw->wiphy->bands[i]; if (sband && sband->n_bitrates > max_rates) max_rates = sband->n_bitrates; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index dc27becb9b71..5e65e838992a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -322,7 +322,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, else if (status->flag & RX_FLAG_5MHZ) channel_flags |= IEEE80211_CHAN_QUARTER; - if (status->band == IEEE80211_BAND_5GHZ) + if (status->band == NL80211_BAND_5GHZ) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ; else if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ; @@ -722,8 +722,8 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) return -1; } -static int iwl80211_get_cs_keyid(const struct ieee80211_cipher_scheme *cs, - struct sk_buff *skb) +static int ieee80211_get_cs_keyid(const struct ieee80211_cipher_scheme *cs, + struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; __le16 fc; @@ -1319,13 +1319,52 @@ int ieee80211_sta_ps_transition(struct ieee80211_sta *pubsta, bool start) } EXPORT_SYMBOL(ieee80211_sta_ps_transition); +void ieee80211_sta_pspoll(struct ieee80211_sta *pubsta) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + + if (test_sta_flag(sta, WLAN_STA_SP)) + return; + + if (!test_sta_flag(sta, WLAN_STA_PS_DRIVER)) + ieee80211_sta_ps_deliver_poll_response(sta); + else + set_sta_flag(sta, WLAN_STA_PSPOLL); +} +EXPORT_SYMBOL(ieee80211_sta_pspoll); + +void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *pubsta, u8 tid) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + u8 ac = ieee802_1d_to_ac[tid & 7]; + + /* + * If this AC is not trigger-enabled do nothing. + * + * NB: This could/should check a separate bitmap of trigger- + * enabled queues, but for now we only implement uAPSD w/o + * TSPEC changes to the ACs, so they're always the same. + */ + if (!(sta->sta.uapsd_queues & BIT(ac))) + return; + + /* if we are in a service period, do nothing */ + if (test_sta_flag(sta, WLAN_STA_SP)) + return; + + if (!test_sta_flag(sta, WLAN_STA_PS_DRIVER)) + ieee80211_sta_ps_deliver_uapsd(sta); + else + set_sta_flag(sta, WLAN_STA_UAPSD); +} +EXPORT_SYMBOL(ieee80211_sta_uapsd_trigger); + static ieee80211_rx_result debug_noinline ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_hdr *hdr = (void *)rx->skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - int tid, ac; if (!rx->sta) return RX_CONTINUE; @@ -1351,12 +1390,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (unlikely(ieee80211_is_pspoll(hdr->frame_control))) { - if (!test_sta_flag(rx->sta, WLAN_STA_SP)) { - if (!test_sta_flag(rx->sta, WLAN_STA_PS_DRIVER)) - ieee80211_sta_ps_deliver_poll_response(rx->sta); - else - set_sta_flag(rx->sta, WLAN_STA_PSPOLL); - } + ieee80211_sta_pspoll(&rx->sta->sta); /* Free PS Poll skb here instead of returning RX_DROP that would * count as an dropped frame. */ @@ -1368,27 +1402,11 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx) ieee80211_has_pm(hdr->frame_control) && (ieee80211_is_data_qos(hdr->frame_control) || ieee80211_is_qos_nullfunc(hdr->frame_control))) { - tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; - ac = ieee802_1d_to_ac[tid & 7]; + u8 tid; - /* - * If this AC is not trigger-enabled do nothing. - * - * NB: This could/should check a separate bitmap of trigger- - * enabled queues, but for now we only implement uAPSD w/o - * TSPEC changes to the ACs, so they're always the same. - */ - if (!(rx->sta->sta.uapsd_queues & BIT(ac))) - return RX_CONTINUE; - - /* if we are in a service period, do nothing */ - if (test_sta_flag(rx->sta, WLAN_STA_SP)) - return RX_CONTINUE; + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; - if (!test_sta_flag(rx->sta, WLAN_STA_PS_DRIVER)) - ieee80211_sta_ps_deliver_uapsd(rx->sta); - else - set_sta_flag(rx->sta, WLAN_STA_UAPSD); + ieee80211_sta_uapsd_trigger(&rx->sta->sta, tid); } return RX_CONTINUE; @@ -1421,16 +1439,9 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { sta->rx_stats.last_rx = jiffies; if (ieee80211_is_data(hdr->frame_control) && - !is_multicast_ether_addr(hdr->addr1)) { - sta->rx_stats.last_rate_idx = - status->rate_idx; - sta->rx_stats.last_rate_flag = - status->flag; - sta->rx_stats.last_rate_vht_flag = - status->vht_flag; - sta->rx_stats.last_rate_vht_nss = - status->vht_nss; - } + !is_multicast_ether_addr(hdr->addr1)) + sta->rx_stats.last_rate = + sta_stats_encode_rate(status); } } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { sta->rx_stats.last_rx = jiffies; @@ -1440,22 +1451,22 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * match the current local configuration when processed. */ sta->rx_stats.last_rx = jiffies; - if (ieee80211_is_data(hdr->frame_control)) { - sta->rx_stats.last_rate_idx = status->rate_idx; - sta->rx_stats.last_rate_flag = status->flag; - sta->rx_stats.last_rate_vht_flag = status->vht_flag; - sta->rx_stats.last_rate_vht_nss = status->vht_nss; - } + if (ieee80211_is_data(hdr->frame_control)) + sta->rx_stats.last_rate = sta_stats_encode_rate(status); } if (rx->sdata->vif.type == NL80211_IFTYPE_STATION) ieee80211_sta_rx_notify(rx->sdata, hdr); sta->rx_stats.fragments++; + + u64_stats_update_begin(&rx->sta->rx_stats.syncp); sta->rx_stats.bytes += rx->skb->len; + u64_stats_update_end(&rx->sta->rx_stats.syncp); + if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { sta->rx_stats.last_signal = status->signal; - ewma_signal_add(&sta->rx_stats.avg_signal, -status->signal); + ewma_signal_add(&sta->rx_stats_avg.signal, -status->signal); } if (status->chains) { @@ -1467,7 +1478,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) continue; sta->rx_stats.chain_signal_last[i] = signal; - ewma_signal_add(&sta->rx_stats.chain_signal_avg[i], + ewma_signal_add(&sta->rx_stats_avg.chain_signal[i], -signal); } } @@ -1586,7 +1597,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (ieee80211_has_protected(fc) && rx->sta->cipher_scheme) { cs = rx->sta->cipher_scheme; - keyid = iwl80211_get_cs_keyid(cs, rx->skb); + keyid = ieee80211_get_cs_keyid(cs, rx->skb); if (unlikely(keyid < 0)) return RX_DROP_UNUSABLE; } @@ -1670,7 +1681,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) hdrlen = ieee80211_hdrlen(fc); if (cs) { - keyidx = iwl80211_get_cs_keyid(cs, rx->skb); + keyidx = ieee80211_get_cs_keyid(cs, rx->skb); if (unlikely(keyidx < 0)) return RX_DROP_UNUSABLE; @@ -2129,6 +2140,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) ieee80211_rx_stats(dev, skb->len); + if (rx->sta) { + /* The seqno index has the same property as needed + * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS + * for non-QoS-data frames. Here we know it's a data + * frame, so count MSDUs. + */ + u64_stats_update_begin(&rx->sta->rx_stats.syncp); + rx->sta->rx_stats.msdu[rx->seqno_idx]++; + u64_stats_update_end(&rx->sta->rx_stats.syncp); + } + if ((sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && @@ -2415,15 +2437,6 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return RX_DROP_MONITOR; - if (rx->sta) { - /* The seqno index has the same property as needed - * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS - * for non-QoS-data frames. Here we know it's a data - * frame, so count MSDUs. - */ - rx->sta->rx_stats.msdu[rx->seqno_idx]++; - } - /* * Send unexpected-4addr-frame event to hostapd. For older versions, * also drop the frame to cooked monitor interfaces. @@ -2474,14 +2487,14 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) rx->skb->dev = dev; - if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 && + if (!ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS) && + local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 && !is_multicast_ether_addr( ((struct ethhdr *)rx->skb->data)->h_dest) && (!local->scanning && - !test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))) { - mod_timer(&local->dynamic_ps_timer, jiffies + - msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); - } + !test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))) + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); ieee80211_deliver_skb(rx); @@ -2828,7 +2841,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) switch (mgmt->u.action.u.measurement.action_code) { case WLAN_ACTION_SPCT_MSR_REQ: - if (status->band != IEEE80211_BAND_5GHZ) + if (status->band != NL80211_BAND_5GHZ) break; if (len < (IEEE80211_MIN_ACTION_SIZE + @@ -3201,7 +3214,7 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, res = rxh(rx); \ if (res != RX_CONTINUE) \ goto rxh_next; \ - } while (0); + } while (0) /* Lock here to avoid hitting all of the data used in the RX * path (e.g. key data, station data, ...) concurrently when @@ -3219,30 +3232,30 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, */ rx->skb = skb; - CALL_RXH(ieee80211_rx_h_check_more_data) - CALL_RXH(ieee80211_rx_h_uapsd_and_pspoll) - CALL_RXH(ieee80211_rx_h_sta_process) - CALL_RXH(ieee80211_rx_h_decrypt) - CALL_RXH(ieee80211_rx_h_defragment) - CALL_RXH(ieee80211_rx_h_michael_mic_verify) + CALL_RXH(ieee80211_rx_h_check_more_data); + CALL_RXH(ieee80211_rx_h_uapsd_and_pspoll); + CALL_RXH(ieee80211_rx_h_sta_process); + CALL_RXH(ieee80211_rx_h_decrypt); + CALL_RXH(ieee80211_rx_h_defragment); + CALL_RXH(ieee80211_rx_h_michael_mic_verify); /* must be after MMIC verify so header is counted in MPDU mic */ #ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&rx->sdata->vif)) CALL_RXH(ieee80211_rx_h_mesh_fwding); #endif - CALL_RXH(ieee80211_rx_h_amsdu) - CALL_RXH(ieee80211_rx_h_data) + CALL_RXH(ieee80211_rx_h_amsdu); + CALL_RXH(ieee80211_rx_h_data); /* special treatment -- needs the queue */ res = ieee80211_rx_h_ctrl(rx, frames); if (res != RX_CONTINUE) goto rxh_next; - CALL_RXH(ieee80211_rx_h_mgmt_check) - CALL_RXH(ieee80211_rx_h_action) - CALL_RXH(ieee80211_rx_h_userspace_mgmt) - CALL_RXH(ieee80211_rx_h_action_return) - CALL_RXH(ieee80211_rx_h_mgmt) + CALL_RXH(ieee80211_rx_h_mgmt_check); + CALL_RXH(ieee80211_rx_h_action); + CALL_RXH(ieee80211_rx_h_userspace_mgmt); + CALL_RXH(ieee80211_rx_h_action_return); + CALL_RXH(ieee80211_rx_h_mgmt); rxh_next: ieee80211_rx_handlers_result(rx, res); @@ -3265,10 +3278,10 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) res = rxh(rx); \ if (res != RX_CONTINUE) \ goto rxh_next; \ - } while (0); + } while (0) - CALL_RXH(ieee80211_rx_h_check_dup) - CALL_RXH(ieee80211_rx_h_check) + CALL_RXH(ieee80211_rx_h_check_dup); + CALL_RXH(ieee80211_rx_h_check); ieee80211_rx_reorder_ampdu(rx, &reorder_release); @@ -3513,6 +3526,351 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) return false; } +void ieee80211_check_fast_rx(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct ieee80211_key *key; + struct ieee80211_fast_rx fastrx = { + .dev = sdata->dev, + .vif_type = sdata->vif.type, + .control_port_protocol = sdata->control_port_protocol, + }, *old, *new = NULL; + bool assign = false; + + /* use sparse to check that we don't return without updating */ + __acquire(check_fast_rx); + + BUILD_BUG_ON(sizeof(fastrx.rfc1042_hdr) != sizeof(rfc1042_header)); + BUILD_BUG_ON(sizeof(fastrx.rfc1042_hdr) != ETH_ALEN); + ether_addr_copy(fastrx.rfc1042_hdr, rfc1042_header); + ether_addr_copy(fastrx.vif_addr, sdata->vif.addr); + + fastrx.uses_rss = ieee80211_hw_check(&local->hw, USES_RSS); + + /* fast-rx doesn't do reordering */ + if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) && + !ieee80211_hw_check(&local->hw, SUPPORTS_REORDERING_BUFFER)) + goto clear; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + /* 4-addr is harder to deal with, later maybe */ + if (sdata->u.mgd.use_4addr) + goto clear; + /* software powersave is a huge mess, avoid all of it */ + if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) + goto clear; + if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) && + !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) + goto clear; + if (sta->sta.tdls) { + fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1); + fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2); + fastrx.expected_ds_bits = 0; + } else { + fastrx.sta_notify = sdata->u.mgd.probe_send_count > 0; + fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1); + fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr3); + fastrx.expected_ds_bits = + cpu_to_le16(IEEE80211_FCTL_FROMDS); + } + break; + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_AP: + /* parallel-rx requires this, at least with calls to + * ieee80211_sta_ps_transition() + */ + if (!ieee80211_hw_check(&local->hw, AP_LINK_PS)) + goto clear; + fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3); + fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2); + fastrx.expected_ds_bits = cpu_to_le16(IEEE80211_FCTL_TODS); + + fastrx.internal_forward = + !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && + (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || + !sdata->u.vlan.sta); + break; + default: + goto clear; + } + + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + goto clear; + + rcu_read_lock(); + key = rcu_dereference(sta->ptk[sta->ptk_idx]); + if (key) { + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_TKIP: + /* we don't want to deal with MMIC in fast-rx */ + goto clear_rcu; + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + break; + default: + /* we also don't want to deal with WEP or cipher scheme + * since those require looking up the key idx in the + * frame, rather than assuming the PTK is used + * (we need to revisit this once we implement the real + * PTK index, which is now valid in the spec, but we + * haven't implemented that part yet) + */ + goto clear_rcu; + } + + fastrx.key = true; + fastrx.icv_len = key->conf.icv_len; + } + + assign = true; + clear_rcu: + rcu_read_unlock(); + clear: + __release(check_fast_rx); + + if (assign) + new = kmemdup(&fastrx, sizeof(fastrx), GFP_KERNEL); + + spin_lock_bh(&sta->lock); + old = rcu_dereference_protected(sta->fast_rx, true); + rcu_assign_pointer(sta->fast_rx, new); + spin_unlock_bh(&sta->lock); + + if (old) + kfree_rcu(old, rcu_head); +} + +void ieee80211_clear_fast_rx(struct sta_info *sta) +{ + struct ieee80211_fast_rx *old; + + spin_lock_bh(&sta->lock); + old = rcu_dereference_protected(sta->fast_rx, true); + RCU_INIT_POINTER(sta->fast_rx, NULL); + spin_unlock_bh(&sta->lock); + + if (old) + kfree_rcu(old, rcu_head); +} + +void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + lockdep_assert_held(&local->sta_mtx); + + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata && + (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) + continue; + ieee80211_check_fast_rx(sta); + } +} + +void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + + mutex_lock(&local->sta_mtx); + __ieee80211_check_fast_rx_iface(sdata); + mutex_unlock(&local->sta_mtx); +} + +static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, + struct ieee80211_fast_rx *fast_rx) +{ + struct sk_buff *skb = rx->skb; + struct ieee80211_hdr *hdr = (void *)skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct sta_info *sta = rx->sta; + int orig_len = skb->len; + int snap_offs = ieee80211_hdrlen(hdr->frame_control); + struct { + u8 snap[sizeof(rfc1042_header)]; + __be16 proto; + } *payload __aligned(2); + struct { + u8 da[ETH_ALEN]; + u8 sa[ETH_ALEN]; + } addrs __aligned(2); + struct ieee80211_sta_rx_stats *stats = &sta->rx_stats; + + if (fast_rx->uses_rss) + stats = this_cpu_ptr(sta->pcpu_rx_stats); + + /* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write + * to a common data structure; drivers can implement that per queue + * but we don't have that information in mac80211 + */ + if (!(status->flag & RX_FLAG_DUP_VALIDATED)) + return false; + +#define FAST_RX_CRYPT_FLAGS (RX_FLAG_PN_VALIDATED | RX_FLAG_DECRYPTED) + + /* If using encryption, we also need to have: + * - PN_VALIDATED: similar, but the implementation is tricky + * - DECRYPTED: necessary for PN_VALIDATED + */ + if (fast_rx->key && + (status->flag & FAST_RX_CRYPT_FLAGS) != FAST_RX_CRYPT_FLAGS) + return false; + + /* we don't deal with A-MSDU deaggregation here */ + if (status->rx_flags & IEEE80211_RX_AMSDU) + return false; + + if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) + return false; + + if (unlikely(ieee80211_is_frag(hdr))) + return false; + + /* Since our interface address cannot be multicast, this + * implicitly also rejects multicast frames without the + * explicit check. + * + * We shouldn't get any *data* frames not addressed to us + * (AP mode will accept multicast *management* frames), but + * punting here will make it go through the full checks in + * ieee80211_accept_frame(). + */ + if (!ether_addr_equal(fast_rx->vif_addr, hdr->addr1)) + return false; + + if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FROMDS | + IEEE80211_FCTL_TODS)) != + fast_rx->expected_ds_bits) + goto drop; + + /* assign the key to drop unencrypted frames (later) + * and strip the IV/MIC if necessary + */ + if (fast_rx->key && !(status->flag & RX_FLAG_IV_STRIPPED)) { + /* GCMP header length is the same */ + snap_offs += IEEE80211_CCMP_HDR_LEN; + } + + if (!pskb_may_pull(skb, snap_offs + sizeof(*payload))) + goto drop; + payload = (void *)(skb->data + snap_offs); + + if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr)) + return false; + + /* Don't handle these here since they require special code. + * Accept AARP and IPX even though they should come with a + * bridge-tunnel header - but if we get them this way then + * there's little point in discarding them. + */ + if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) || + payload->proto == fast_rx->control_port_protocol)) + return false; + + /* after this point, don't punt to the slowpath! */ + + if (rx->key && !(status->flag & RX_FLAG_MIC_STRIPPED) && + pskb_trim(skb, skb->len - fast_rx->icv_len)) + goto drop; + + if (unlikely(fast_rx->sta_notify)) { + ieee80211_sta_rx_notify(rx->sdata, hdr); + fast_rx->sta_notify = false; + } + + /* statistics part of ieee80211_rx_h_sta_process() */ + stats->last_rx = jiffies; + stats->last_rate = sta_stats_encode_rate(status); + + stats->fragments++; + + if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { + stats->last_signal = status->signal; + if (!fast_rx->uses_rss) + ewma_signal_add(&sta->rx_stats_avg.signal, + -status->signal); + } + + if (status->chains) { + int i; + + stats->chains = status->chains; + for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { + int signal = status->chain_signal[i]; + + if (!(status->chains & BIT(i))) + continue; + + stats->chain_signal_last[i] = signal; + if (!fast_rx->uses_rss) + ewma_signal_add(&sta->rx_stats_avg.chain_signal[i], + -signal); + } + } + /* end of statistics */ + + if (rx->key && !ieee80211_has_protected(hdr->frame_control)) + goto drop; + + /* do the header conversion - first grab the addresses */ + ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs); + ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs); + /* remove the SNAP but leave the ethertype */ + skb_pull(skb, snap_offs + sizeof(rfc1042_header)); + /* push the addresses in front */ + memcpy(skb_push(skb, sizeof(addrs)), &addrs, sizeof(addrs)); + + skb->dev = fast_rx->dev; + + ieee80211_rx_stats(fast_rx->dev, skb->len); + + /* The seqno index has the same property as needed + * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS + * for non-QoS-data frames. Here we know it's a data + * frame, so count MSDUs. + */ + u64_stats_update_begin(&stats->syncp); + stats->msdu[rx->seqno_idx]++; + stats->bytes += orig_len; + u64_stats_update_end(&stats->syncp); + + if (fast_rx->internal_forward) { + struct sta_info *dsta = sta_info_get(rx->sdata, skb->data); + + if (dsta) { + /* + * Send to wireless media and increase priority by 256 + * to keep the received priority instead of + * reclassifying the frame (see cfg80211_classify8021d). + */ + skb->priority += 256; + skb->protocol = htons(ETH_P_802_3); + skb_reset_network_header(skb); + skb_reset_mac_header(skb); + dev_queue_xmit(skb); + return true; + } + } + + /* deliver to local stack */ + skb->protocol = eth_type_trans(skb, fast_rx->dev); + memset(skb->cb, 0, sizeof(skb->cb)); + if (rx->napi) + napi_gro_receive(rx->napi, skb); + else + netif_receive_skb(skb); + + return true; + drop: + dev_kfree_skb(skb); + stats->dropped++; + return true; +} + /* * This function returns whether or not the SKB * was destined for RX processing or not, which, @@ -3527,6 +3885,21 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, rx->skb = skb; + /* See if we can do fast-rx; if we have to copy we already lost, + * so punt in that case. We should never have to deliver a data + * frame to multiple interfaces anyway. + * + * We skip the ieee80211_accept_frame() call and do the necessary + * checking inside ieee80211_invoke_fast_rx(). + */ + if (consume && rx->sta) { + struct ieee80211_fast_rx *fast_rx; + + fast_rx = rcu_dereference(rx->sta->fast_rx); + if (fast_rx && ieee80211_invoke_fast_rx(rx, fast_rx)) + return true; + } + if (!ieee80211_accept_frame(rx)) return false; @@ -3552,6 +3925,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, * be called with rcu_read_lock protection. */ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, struct sk_buff *skb, struct napi_struct *napi) { @@ -3561,7 +3935,6 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, __le16 fc; struct ieee80211_rx_data rx; struct ieee80211_sub_if_data *prev; - struct sta_info *sta, *prev_sta; struct rhash_head *tmp; int err = 0; @@ -3597,7 +3970,14 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, ieee80211_is_beacon(hdr->frame_control))) ieee80211_scan_rx(local, skb); - if (ieee80211_is_data(fc)) { + if (pubsta) { + rx.sta = container_of(pubsta, struct sta_info, sta); + rx.sdata = rx.sta->sdata; + if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) + return; + goto out; + } else if (ieee80211_is_data(fc)) { + struct sta_info *sta, *prev_sta; const struct bucket_table *tbl; prev_sta = NULL; @@ -3671,8 +4051,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, * This is the receive path handler. It is called by a low level driver when an * 802.11 MPDU is received from the hardware. */ -void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, - struct napi_struct *napi) +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, + struct sk_buff *skb, struct napi_struct *napi) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rate *rate = NULL; @@ -3681,7 +4061,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, WARN_ON_ONCE(softirq_count() == 0); - if (WARN_ON(status->band >= IEEE80211_NUM_BANDS)) + if (WARN_ON(status->band >= NUM_NL80211_BANDS)) goto drop; sband = local->hw.wiphy->bands[status->band]; @@ -3771,7 +4151,8 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, ieee80211_tpt_led_trig_rx(local, ((struct ieee80211_hdr *)skb->data)->frame_control, skb->len); - __ieee80211_rx_handle_packet(hw, skb, napi); + + __ieee80211_rx_handle_packet(hw, pubsta, skb, napi); rcu_read_unlock(); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index ae980ce8daff..f9648ef9e31f 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -66,7 +66,9 @@ ieee80211_bss_info_update(struct ieee80211_local *local, struct cfg80211_bss *cbss; struct ieee80211_bss *bss; int clen, srlen; - struct cfg80211_inform_bss bss_meta = {}; + struct cfg80211_inform_bss bss_meta = { + .boottime_ns = rx_status->boottime_ns, + }; bool signal_valid; if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) @@ -270,7 +272,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) n_chans = req->n_channels; } else { do { - if (local->hw_scan_band == IEEE80211_NUM_BANDS) + if (local->hw_scan_band == NUM_NL80211_BANDS) return false; n_chans = 0; @@ -303,6 +305,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) ether_addr_copy(local->hw_scan_req->req.mac_addr, req->mac_addr); ether_addr_copy(local->hw_scan_req->req.mac_addr_mask, req->mac_addr_mask); + ether_addr_copy(local->hw_scan_req->req.bssid, req->bssid); return true; } @@ -482,7 +485,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, int i; struct ieee80211_sub_if_data *sdata; struct cfg80211_scan_request *scan_req; - enum ieee80211_band band = local->hw.conf.chandef.chan->band; + enum nl80211_band band = local->hw.conf.chandef.chan->band; u32 tx_flags; scan_req = rcu_dereference_protected(local->scan_req, @@ -497,7 +500,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, for (i = 0; i < scan_req->n_ssids; i++) ieee80211_send_probe_req( - sdata, local->scan_addr, NULL, + sdata, local->scan_addr, scan_req->bssid, scan_req->ssids[i].ssid, scan_req->ssids[i].ssid_len, scan_req->ie, scan_req->ie_len, scan_req->rates[band], false, @@ -562,6 +565,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, req->n_channels * sizeof(req->channels[0]); local->hw_scan_req->req.ie = ies; local->hw_scan_req->req.flags = req->flags; + eth_broadcast_addr(local->hw_scan_req->req.bssid); local->hw_scan_band = 0; @@ -949,7 +953,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; int ret = -EBUSY, i, n_ch = 0; - enum ieee80211_band band; + enum nl80211_band band; mutex_lock(&local->mtx); @@ -961,7 +965,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, if (!channels) { int max_n; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { if (!local->hw.wiphy->bands[band]) continue; @@ -1081,7 +1085,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct ieee80211_scan_ies sched_scan_ies = {}; struct cfg80211_chan_def chandef; int ret, i, iebufsz, num_bands = 0; - u32 rate_masks[IEEE80211_NUM_BANDS] = {}; + u32 rate_masks[NUM_NL80211_BANDS] = {}; u8 bands_used = 0; u8 *ie; size_t len; @@ -1093,7 +1097,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, if (!local->ops->sched_scan_start) return -ENOTSUPP; - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { if (local->hw.wiphy->bands[i]) { bands_used |= BIT(i); rate_masks[i] = (u32) -1; diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 06e6ac8cc693..2ddc661f0988 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -23,11 +23,11 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, - enum ieee80211_band current_band, + enum nl80211_band current_band, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie) { - enum ieee80211_band new_band; + enum nl80211_band new_band; int new_freq; u8 new_chan_no; struct ieee80211_channel *new_chan; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 861b93ffbe92..5ccfdbd406bd 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2,7 +2,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2015 Intel Deutschland GmbH + * Copyright (C) 2015 - 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -255,6 +255,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif + free_percpu(sta->pcpu_rx_stats); kfree(sta); } @@ -312,6 +313,13 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (!sta) return NULL; + if (ieee80211_hw_check(hw, USES_RSS)) { + sta->pcpu_rx_stats = + alloc_percpu(struct ieee80211_sta_rx_stats); + if (!sta->pcpu_rx_stats) + goto free; + } + spin_lock_init(&sta->lock); spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); @@ -336,15 +344,17 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->sdata = sdata; sta->rx_stats.last_rx = jiffies; + u64_stats_init(&sta->rx_stats.syncp); + sta->sta_state = IEEE80211_STA_NONE; /* Mark TID as unreserved */ sta->reserved_tid = IEEE80211_TID_UNRESERVED; sta->last_connected = ktime_get_seconds(); - ewma_signal_init(&sta->rx_stats.avg_signal); - for (i = 0; i < ARRAY_SIZE(sta->rx_stats.chain_signal_avg); i++) - ewma_signal_init(&sta->rx_stats.chain_signal_avg[i]); + ewma_signal_init(&sta->rx_stats_avg.signal); + for (i = 0; i < ARRAY_SIZE(sta->rx_stats_avg.chain_signal); i++) + ewma_signal_init(&sta->rx_stats_avg.chain_signal[i]); if (local->ops->wake_tx_queue) { void *txq_data; @@ -407,6 +417,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, } } + sta->sta.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; + sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); return sta; @@ -879,6 +891,13 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); + /* + * Before removing the station from the driver there might be pending + * rx frames on RSS queues sent prior to the disassociation - wait for + * all such frames to be processed. + */ + drv_sync_rx_queues(local, sta); + ret = sta_info_hash_del(local, sta); if (WARN_ON(ret)) return ret; @@ -1091,10 +1110,12 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { + unsigned long last_active = ieee80211_sta_last_active(sta); + if (sdata != sta->sdata) continue; - if (time_after(jiffies, sta->rx_stats.last_rx + exp_time)) { + if (time_is_before_jiffies(last_active + exp_time)) { sta_dbg(sta->sdata, "expiring inactive STA %pM\n", sta->sta.addr); @@ -1764,6 +1785,31 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, } EXPORT_SYMBOL(ieee80211_sta_set_buffered); +static void +ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + bool allow_p2p_go_ps = sdata->vif.p2p; + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata || + !test_sta_flag(sta, WLAN_STA_ASSOC)) + continue; + if (!sta->sta.support_p2p_ps) { + allow_p2p_go_ps = false; + break; + } + } + rcu_read_unlock(); + + if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) { + sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS); + } +} + int sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state) { @@ -1825,12 +1871,16 @@ int sta_info_move_state(struct sta_info *sta, } else if (sta->sta_state == IEEE80211_STA_ASSOC) { clear_bit(WLAN_STA_ASSOC, &sta->_flags); ieee80211_recalc_min_chandef(sta->sdata); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); } break; case IEEE80211_STA_ASSOC: if (sta->sta_state == IEEE80211_STA_AUTH) { set_bit(WLAN_STA_ASSOC, &sta->_flags); ieee80211_recalc_min_chandef(sta->sdata); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { if (sta->sdata->vif.type == NL80211_IFTYPE_AP || (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && @@ -1838,6 +1888,7 @@ int sta_info_move_state(struct sta_info *sta, atomic_dec(&sta->sdata->bss->num_mcast_sta); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_clear_fast_xmit(sta); + ieee80211_clear_fast_rx(sta); } break; case IEEE80211_STA_AUTHORIZED: @@ -1848,6 +1899,7 @@ int sta_info_move_state(struct sta_info *sta, atomic_inc(&sta->sdata->bss->num_mcast_sta); set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_check_fast_xmit(sta); + ieee80211_check_fast_rx(sta); } break; default: @@ -1894,43 +1946,117 @@ u8 sta_info_tx_streams(struct sta_info *sta) >> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1; } -static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) +static struct ieee80211_sta_rx_stats * +sta_get_last_rx_stats(struct sta_info *sta) { - rinfo->flags = 0; - - if (sta->rx_stats.last_rate_flag & RX_FLAG_HT) { - rinfo->flags |= RATE_INFO_FLAGS_MCS; - rinfo->mcs = sta->rx_stats.last_rate_idx; - } else if (sta->rx_stats.last_rate_flag & RX_FLAG_VHT) { - rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS; - rinfo->nss = sta->rx_stats.last_rate_vht_nss; - rinfo->mcs = sta->rx_stats.last_rate_idx; - } else { + struct ieee80211_sta_rx_stats *stats = &sta->rx_stats; + struct ieee80211_local *local = sta->local; + int cpu; + + if (!ieee80211_hw_check(&local->hw, USES_RSS)) + return stats; + + for_each_possible_cpu(cpu) { + struct ieee80211_sta_rx_stats *cpustats; + + cpustats = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + + if (time_after(cpustats->last_rx, stats->last_rx)) + stats = cpustats; + } + + return stats; +} + +static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate, + struct rate_info *rinfo) +{ + rinfo->bw = (rate & STA_STATS_RATE_BW_MASK) >> + STA_STATS_RATE_BW_SHIFT; + + if (rate & STA_STATS_RATE_VHT) { + rinfo->flags = RATE_INFO_FLAGS_VHT_MCS; + rinfo->mcs = rate & 0xf; + rinfo->nss = (rate & 0xf0) >> 4; + } else if (rate & STA_STATS_RATE_HT) { + rinfo->flags = RATE_INFO_FLAGS_MCS; + rinfo->mcs = rate & 0xff; + } else if (rate & STA_STATS_RATE_LEGACY) { struct ieee80211_supported_band *sband; - int shift = ieee80211_vif_get_shift(&sta->sdata->vif); u16 brate; - - sband = sta->local->hw.wiphy->bands[ - ieee80211_get_sdata_band(sta->sdata)]; - brate = sband->bitrates[sta->rx_stats.last_rate_idx].bitrate; + unsigned int shift; + + sband = local->hw.wiphy->bands[(rate >> 4) & 0xf]; + brate = sband->bitrates[rate & 0xf].bitrate; + if (rinfo->bw == RATE_INFO_BW_5) + shift = 2; + else if (rinfo->bw == RATE_INFO_BW_10) + shift = 1; + else + shift = 0; rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); } - if (sta->rx_stats.last_rate_flag & RX_FLAG_SHORT_GI) + if (rate & STA_STATS_RATE_SGI) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; +} - if (sta->rx_stats.last_rate_flag & RX_FLAG_5MHZ) - rinfo->bw = RATE_INFO_BW_5; - else if (sta->rx_stats.last_rate_flag & RX_FLAG_10MHZ) - rinfo->bw = RATE_INFO_BW_10; - else if (sta->rx_stats.last_rate_flag & RX_FLAG_40MHZ) - rinfo->bw = RATE_INFO_BW_40; - else if (sta->rx_stats.last_rate_vht_flag & RX_VHT_FLAG_80MHZ) - rinfo->bw = RATE_INFO_BW_80; - else if (sta->rx_stats.last_rate_vht_flag & RX_VHT_FLAG_160MHZ) - rinfo->bw = RATE_INFO_BW_160; +static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) +{ + u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate); + + if (rate == STA_STATS_RATE_INVALID) + rinfo->flags = 0; else - rinfo->bw = RATE_INFO_BW_20; + sta_stats_decode_rate(sta->local, rate, rinfo); +} + +static void sta_set_tidstats(struct sta_info *sta, + struct cfg80211_tid_stats *tidstats, + int tid) +{ + struct ieee80211_local *local = sta->local; + + if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { + unsigned int start; + + do { + start = u64_stats_fetch_begin(&sta->rx_stats.syncp); + tidstats->rx_msdu = sta->rx_stats.msdu[tid]; + } while (u64_stats_fetch_retry(&sta->rx_stats.syncp, start)); + + tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU); + } + + if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { + tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); + tidstats->tx_msdu = sta->tx_stats.msdu[tid]; + } + + if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && + ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { + tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); + tidstats->tx_msdu_retries = sta->status_stats.msdu_retries[tid]; + } + + if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && + ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { + tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED); + tidstats->tx_msdu_failed = sta->status_stats.msdu_failed[tid]; + } +} + +static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats) +{ + unsigned int start; + u64 value; + + do { + start = u64_stats_fetch_begin(&rxstats->syncp); + value = rxstats->bytes; + } while (u64_stats_fetch_retry(&rxstats->syncp, start)); + + return value; } void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) @@ -1939,7 +2065,10 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) struct ieee80211_local *local = sdata->local; struct rate_control_ref *ref = NULL; u32 thr = 0; - int i, ac; + int i, ac, cpu; + struct ieee80211_sta_rx_stats *last_rxstats; + + last_rxstats = sta_get_last_rx_stats(sta); if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) ref = local->rate_ctrl; @@ -1968,7 +2097,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->connected_time = ktime_get_seconds() - sta->last_connected; sinfo->inactive_time = - jiffies_to_msecs(jiffies - sta->rx_stats.last_rx); + jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta)); if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) | BIT(NL80211_STA_INFO_TX_BYTES)))) { @@ -1987,12 +2116,30 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) | BIT(NL80211_STA_INFO_RX_BYTES)))) { - sinfo->rx_bytes = sta->rx_stats.bytes; + sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats); + + if (sta->pcpu_rx_stats) { + for_each_possible_cpu(cpu) { + struct ieee80211_sta_rx_stats *cpurxs; + + cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + sinfo->rx_bytes += sta_get_stats_bytes(cpurxs); + } + } + sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64); } if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) { sinfo->rx_packets = sta->rx_stats.packets; + if (sta->pcpu_rx_stats) { + for_each_possible_cpu(cpu) { + struct ieee80211_sta_rx_stats *cpurxs; + + cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + sinfo->rx_packets += cpurxs->packets; + } + } sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS); } @@ -2007,6 +2154,14 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) } sinfo->rx_dropped_misc = sta->rx_stats.dropped; + if (sta->pcpu_rx_stats) { + for_each_possible_cpu(cpu) { + struct ieee80211_sta_rx_stats *cpurxs; + + cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + sinfo->rx_packets += cpurxs->dropped; + } + } if (sdata->vif.type == NL80211_IFTYPE_STATION && !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) { @@ -2018,29 +2173,36 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) || ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) { if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) { - sinfo->signal = (s8)sta->rx_stats.last_signal; + sinfo->signal = (s8)last_rxstats->last_signal; sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); } - if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) { + if (!sta->pcpu_rx_stats && + !(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) { sinfo->signal_avg = - -ewma_signal_read(&sta->rx_stats.avg_signal); + -ewma_signal_read(&sta->rx_stats_avg.signal); sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG); } } - if (sta->rx_stats.chains && + /* for the average - if pcpu_rx_stats isn't set - rxstats must point to + * the sta->rx_stats struct, so the check here is fine with and without + * pcpu statistics + */ + if (last_rxstats->chains && !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { - sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | - BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); + sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL); + if (!sta->pcpu_rx_stats) + sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); + + sinfo->chains = last_rxstats->chains; - sinfo->chains = sta->rx_stats.chains; for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { sinfo->chain_signal[i] = - sta->rx_stats.chain_signal_last[i]; + last_rxstats->chain_signal_last[i]; sinfo->chain_signal_avg[i] = - -ewma_signal_read(&sta->rx_stats.chain_signal_avg[i]); + -ewma_signal_read(&sta->rx_stats_avg.chain_signal[i]); } } @@ -2059,33 +2221,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) { struct cfg80211_tid_stats *tidstats = &sinfo->pertid[i]; - if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { - tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU); - tidstats->rx_msdu = sta->rx_stats.msdu[i]; - } - - if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { - tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); - tidstats->tx_msdu = sta->tx_stats.msdu[i]; - } - - if (!(tidstats->filled & - BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && - ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { - tidstats->filled |= - BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); - tidstats->tx_msdu_retries = - sta->status_stats.msdu_retries[i]; - } - - if (!(tidstats->filled & - BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && - ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { - tidstats->filled |= - BIT(NL80211_TID_STATS_TX_MSDU_FAILED); - tidstats->tx_msdu_failed = - sta->status_stats.msdu_failed[i]; - } + sta_set_tidstats(sta, tidstats, i); } if (ieee80211_vif_is_mesh(&sdata->vif)) { @@ -2154,3 +2290,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->expected_throughput = thr; } } + +unsigned long ieee80211_sta_last_active(struct sta_info *sta) +{ + struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); + + if (time_after(stats->last_rx, sta->status_stats.last_ack)) + return stats->last_rx; + return sta->status_stats.last_ack; +} diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 62193f4bc37b..c8b8ccc370eb 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -1,7 +1,7 @@ /* * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright(c) 2015 Intel Deutschland GmbH + * Copyright(c) 2015-2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -18,6 +18,7 @@ #include <linux/average.h> #include <linux/etherdevice.h> #include <linux/rhashtable.h> +#include <linux/u64_stats_sync.h> #include "key.h" /** @@ -69,6 +70,8 @@ * @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP. * @WLAN_STA_PS_DELIVER: station woke up, but we're still blocking TX * until pending frames are delivered + * + * @NUM_WLAN_STA_FLAGS: number of defined flags */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH, @@ -97,6 +100,8 @@ enum ieee80211_sta_info_flags { WLAN_STA_MPSP_OWNER, WLAN_STA_MPSP_RECIPIENT, WLAN_STA_PS_DELIVER, + + NUM_WLAN_STA_FLAGS, }; #define ADDBA_RESP_INTERVAL HZ @@ -281,6 +286,40 @@ struct ieee80211_fast_tx { }; /** + * struct ieee80211_fast_rx - RX fastpath information + * @dev: netdevice for reporting the SKB + * @vif_type: (P2P-less) interface type of the original sdata (sdata->vif.type) + * @vif_addr: interface address + * @rfc1042_hdr: copy of the RFC 1042 SNAP header (to have in cache) + * @control_port_protocol: control port protocol copied from sdata + * @expected_ds_bits: from/to DS bits expected + * @icv_len: length of the MIC if present + * @key: bool indicating encryption is expected (key is set) + * @sta_notify: notify the MLME code (once) + * @internal_forward: forward froms internally on AP/VLAN type interfaces + * @uses_rss: copy of USES_RSS hw flag + * @da_offs: offset of the DA in the header (for header conversion) + * @sa_offs: offset of the SA in the header (for header conversion) + * @rcu_head: RCU head for freeing this structure + */ +struct ieee80211_fast_rx { + struct net_device *dev; + enum nl80211_iftype vif_type; + u8 vif_addr[ETH_ALEN] __aligned(2); + u8 rfc1042_hdr[6] __aligned(2); + __be16 control_port_protocol; + __le16 expected_ds_bits; + u8 icv_len; + u8 key:1, + sta_notify:1, + internal_forward:1, + uses_rss:1; + u8 da_offs, sa_offs; + + struct rcu_head rcu_head; +}; + +/** * struct mesh_sta - mesh STA information * @plink_lock: serialize access to plink fields * @llid: Local link ID @@ -330,6 +369,21 @@ struct mesh_sta { DECLARE_EWMA(signal, 1024, 8) +struct ieee80211_sta_rx_stats { + unsigned long packets; + unsigned long last_rx; + unsigned long num_duplicates; + unsigned long fragments; + unsigned long dropped; + int last_signal; + u8 chains; + s8 chain_signal_last[IEEE80211_MAX_CHAINS]; + u16 last_rate; + struct u64_stats_sync syncp; + u64 bytes; + u64 msdu[IEEE80211_NUM_TIDS + 1]; +}; + /** * struct sta_info - STA information * @@ -371,7 +425,7 @@ DECLARE_EWMA(signal, 1024, 8) * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers * @mesh: mesh STA information - * @debugfs: debug filesystem info + * @debugfs_dir: debug filesystem directory dentry * @dead: set to true when sta is unlinked * @removed: set to true when sta is being removed from sta_list * @uploaded: set to true when sta is uploaded to the driver @@ -385,10 +439,13 @@ DECLARE_EWMA(signal, 1024, 8) * @cipher_scheme: optional cipher scheme for this station * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) * @fast_tx: TX fastpath information + * @fast_rx: RX fastpath information * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to * the BSS one. * @tx_stats: TX statistics * @rx_stats: RX statistics + * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs + * this (by advertising the USES_RSS hw flag) * @status_stats: TX status statistics */ struct sta_info { @@ -408,6 +465,8 @@ struct sta_info { spinlock_t lock; struct ieee80211_fast_tx __rcu *fast_tx; + struct ieee80211_fast_rx __rcu *fast_rx; + struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats; #ifdef CONFIG_MAC80211_MESH struct mesh_sta *mesh; @@ -437,24 +496,11 @@ struct sta_info { long last_connected; /* Updated from RX path only, no locking requirements */ + struct ieee80211_sta_rx_stats rx_stats; struct { - unsigned long packets; - u64 bytes; - unsigned long last_rx; - unsigned long num_duplicates; - unsigned long fragments; - unsigned long dropped; - int last_signal; - struct ewma_signal avg_signal; - u8 chains; - s8 chain_signal_last[IEEE80211_MAX_CHAINS]; - struct ewma_signal chain_signal_avg[IEEE80211_MAX_CHAINS]; - int last_rate_idx; - u32 last_rate_flag; - u32 last_rate_vht_flag; - u8 last_rate_vht_nss; - u64 msdu[IEEE80211_NUM_TIDS + 1]; - } rx_stats; + struct ewma_signal signal; + struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS]; + } rx_stats_avg; /* Plus 1 for non-QoS frames */ __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1]; @@ -467,6 +513,7 @@ struct sta_info { unsigned long last_tdls_pkt_time; u64 msdu_retries[IEEE80211_NUM_TIDS + 1]; u64 msdu_failed[IEEE80211_NUM_TIDS + 1]; + unsigned long last_ack; } status_stats; /* Updated from TX path only, no locking requirements */ @@ -485,10 +532,7 @@ struct sta_info { u8 timer_to_tid[IEEE80211_NUM_TIDS]; #ifdef CONFIG_MAC80211_DEBUGFS - struct sta_info_debugfsdentries { - struct dentry *dir; - bool add_has_run; - } debugfs; + struct dentry *debugfs_dir; #endif enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; @@ -676,4 +720,44 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta); +unsigned long ieee80211_sta_last_active(struct sta_info *sta); + +#define STA_STATS_RATE_INVALID 0 +#define STA_STATS_RATE_VHT 0x8000 +#define STA_STATS_RATE_HT 0x4000 +#define STA_STATS_RATE_LEGACY 0x2000 +#define STA_STATS_RATE_SGI 0x1000 +#define STA_STATS_RATE_BW_SHIFT 9 +#define STA_STATS_RATE_BW_MASK (0x7 << STA_STATS_RATE_BW_SHIFT) + +static inline u16 sta_stats_encode_rate(struct ieee80211_rx_status *s) +{ + u16 r = s->rate_idx; + + if (s->vht_flag & RX_VHT_FLAG_80MHZ) + r |= RATE_INFO_BW_80 << STA_STATS_RATE_BW_SHIFT; + else if (s->vht_flag & RX_VHT_FLAG_160MHZ) + r |= RATE_INFO_BW_160 << STA_STATS_RATE_BW_SHIFT; + else if (s->flag & RX_FLAG_40MHZ) + r |= RATE_INFO_BW_40 << STA_STATS_RATE_BW_SHIFT; + else if (s->flag & RX_FLAG_10MHZ) + r |= RATE_INFO_BW_10 << STA_STATS_RATE_BW_SHIFT; + else if (s->flag & RX_FLAG_5MHZ) + r |= RATE_INFO_BW_5 << STA_STATS_RATE_BW_SHIFT; + else + r |= RATE_INFO_BW_20 << STA_STATS_RATE_BW_SHIFT; + + if (s->flag & RX_FLAG_SHORT_GI) + r |= STA_STATS_RATE_SGI; + + if (s->flag & RX_FLAG_VHT) + r |= STA_STATS_RATE_VHT | (s->vht_nss << 4); + else if (s->flag & RX_FLAG_HT) + r |= STA_STATS_RATE_HT; + else + r |= STA_STATS_RATE_LEGACY | (s->band << 4); + + return r; +} + #endif /* STA_INFO_H */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 8b1b2ea03eb5..c6d5c724e032 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -188,7 +188,7 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) struct ieee80211_sub_if_data *sdata = sta->sdata; if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) - sta->rx_stats.last_rx = jiffies; + sta->status_stats.last_ack = jiffies; if (ieee80211_is_data_qos(mgmt->frame_control)) { struct ieee80211_hdr *hdr = (void *) skb->data; @@ -647,7 +647,7 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, sta->status_stats.retry_count += retry_count; if (acked) { - sta->rx_stats.last_rx = jiffies; + sta->status_stats.last_ack = jiffies; if (sta->status_stats.lost_packets) sta->status_stats.lost_packets = 0; diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index a29ea813b7d5..1c7d45a6d93e 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -47,7 +47,7 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata, NL80211_FEATURE_TDLS_CHANNEL_SWITCH; bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) && !ifmgd->tdls_wider_bw_prohibited; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; bool vht = sband && sband->vht_cap.vht_supported; u8 *pos = (void *)skb_put(skb, 10); @@ -184,7 +184,7 @@ static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata, if (status_code != 0) return 0; - if (ieee80211_get_sdata_band(sdata) == IEEE80211_BAND_2GHZ) { + if (ieee80211_get_sdata_band(sdata) == NL80211_BAND_2GHZ) { return WLAN_CAPABILITY_SHORT_SLOT_TIME | WLAN_CAPABILITY_SHORT_PREAMBLE; } @@ -357,7 +357,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, u8 action_code, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; struct ieee80211_sta_ht_cap ht_cap; @@ -544,7 +544,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; size_t offset = 0, noffset; struct sta_info *sta, *ap_sta; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); u8 *pos; mutex_lock(&local->sta_mtx); @@ -611,7 +611,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); /* only include VHT-operation if not on the 2.4GHz band */ - if (band != IEEE80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) { + if (band != NL80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) { /* * if both peers support WIDER_BW, we can expand the chandef to * a wider compatible one, up to 80MHz @@ -1773,7 +1773,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, u8 target_channel, oper_class; bool local_initiator; struct sta_info *sta; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_tdls_data *tf = (void *)skb->data; struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); int baselen = offsetof(typeof(*tf), u.chan_switch_req.variable); @@ -1805,10 +1805,10 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, if ((oper_class == 112 || oper_class == 2 || oper_class == 3 || oper_class == 4 || oper_class == 5 || oper_class == 6) && target_channel < 14) - band = IEEE80211_BAND_5GHZ; + band = NL80211_BAND_5GHZ; else - band = target_channel < 14 ? IEEE80211_BAND_2GHZ : - IEEE80211_BAND_5GHZ; + band = target_channel < 14 ? NL80211_BAND_2GHZ : + NL80211_BAND_5GHZ; freq = ieee80211_channel_to_frequency(target_channel, band); if (freq == 0) { diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 2b0a17ee907a..77e4c53baefb 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1,3 +1,8 @@ +/* +* Portions of this file +* Copyright(c) 2016 Intel Deutschland GmbH +*/ + #if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) #define __MAC80211_DRIVER_TRACE @@ -396,7 +401,7 @@ TRACE_EVENT(drv_bss_info_changed, __field(u32, sync_device_ts) __field(u8, sync_dtim_count) __field(u32, basic_rates) - __array(int, mcast_rate, IEEE80211_NUM_BANDS) + __array(int, mcast_rate, NUM_NL80211_BANDS) __field(u16, ht_operation_mode) __field(s32, cqm_rssi_thold); __field(s32, cqm_rssi_hyst); @@ -899,6 +904,13 @@ DEFINE_EVENT(sta_event, drv_sta_pre_rcu_remove, TP_ARGS(local, sdata, sta) ); +DEFINE_EVENT(sta_event, drv_sync_rx_queues, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) +); + DEFINE_EVENT(sta_event, drv_sta_rate_tbl_update, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -1253,8 +1265,8 @@ TRACE_EVENT(drv_set_bitrate_mask, TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; - __entry->legacy_2g = mask->control[IEEE80211_BAND_2GHZ].legacy; - __entry->legacy_5g = mask->control[IEEE80211_BAND_5GHZ].legacy; + __entry->legacy_2g = mask->control[NL80211_BAND_2GHZ].legacy; + __entry->legacy_5g = mask->control[NL80211_BAND_5GHZ].legacy; ), TP_printk( diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 21f6602395f7..203044379ce0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -150,7 +150,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, rate = DIV_ROUND_UP(r->bitrate, 1 << shift); switch (sband->band) { - case IEEE80211_BAND_2GHZ: { + case NL80211_BAND_2GHZ: { u32 flag; if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) flag = IEEE80211_RATE_MANDATORY_G; @@ -160,13 +160,13 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, mrate = r->bitrate; break; } - case IEEE80211_BAND_5GHZ: + case NL80211_BAND_5GHZ: if (r->flags & IEEE80211_RATE_MANDATORY_A) mrate = r->bitrate; break; - case IEEE80211_BAND_60GHZ: + case NL80211_BAND_60GHZ: /* TODO, for now fall through */ - case IEEE80211_NUM_BANDS: + case NUM_NL80211_BANDS: WARN_ON(1); break; } @@ -1329,6 +1329,10 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, out: spin_unlock_bh(&txqi->queue.lock); + if (skb && skb_has_frag_list(skb) && + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) + skb_linearize(skb); + return skb; } EXPORT_SYMBOL(ieee80211_tx_dequeue); @@ -1696,7 +1700,9 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, bool rate_found = false; u8 rate_retries = 0; u16 rate_flags = 0; - u8 mcs_known, mcs_flags; + u8 mcs_known, mcs_flags, mcs_bw; + u16 vht_known; + u8 vht_mcs = 0, vht_nss = 0; int i; info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | @@ -1772,11 +1778,38 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, mcs_flags & IEEE80211_RADIOTAP_MCS_SGI) rate_flags |= IEEE80211_TX_RC_SHORT_GI; + mcs_bw = mcs_flags & IEEE80211_RADIOTAP_MCS_BW_MASK; if (mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_BW && - mcs_flags & IEEE80211_RADIOTAP_MCS_BW_40) + mcs_bw == IEEE80211_RADIOTAP_MCS_BW_40) rate_flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; break; + case IEEE80211_RADIOTAP_VHT: + vht_known = get_unaligned_le16(iterator.this_arg); + rate_found = true; + + rate_flags = IEEE80211_TX_RC_VHT_MCS; + if ((vht_known & IEEE80211_RADIOTAP_VHT_KNOWN_GI) && + (iterator.this_arg[2] & + IEEE80211_RADIOTAP_VHT_FLAG_SGI)) + rate_flags |= IEEE80211_TX_RC_SHORT_GI; + if (vht_known & + IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH) { + if (iterator.this_arg[3] == 1) + rate_flags |= + IEEE80211_TX_RC_40_MHZ_WIDTH; + else if (iterator.this_arg[3] == 4) + rate_flags |= + IEEE80211_TX_RC_80_MHZ_WIDTH; + else if (iterator.this_arg[3] == 11) + rate_flags |= + IEEE80211_TX_RC_160_MHZ_WIDTH; + } + + vht_mcs = iterator.this_arg[4] >> 4; + vht_nss = iterator.this_arg[4] & 0xF; + break; + /* * Please update the file * Documentation/networking/mac80211-injection.txt @@ -1802,6 +1835,9 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, if (rate_flags & IEEE80211_TX_RC_MCS) { info->control.rates[0].idx = rate; + } else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) { + ieee80211_rate_set_vht(info->control.rates, vht_mcs, + vht_nss); } else { for (i = 0; i < sband->n_bitrates; i++) { if (rate * 5 != sband->bitrates[i].bitrate) @@ -1812,6 +1848,9 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, } } + if (info->control.rates[0].idx < 0) + info->control.flags &= ~IEEE80211_TX_CTRL_RATE_INJECT; + info->control.rates[0].flags = rate_flags; info->control.rates[0].count = min_t(u8, rate_retries + 1, local->hw.max_rate_tries); @@ -2099,7 +2138,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, u16 info_id = 0; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_sub_if_data *ap_sdata; - enum ieee80211_band band; + enum nl80211_band band; int ret; if (IS_ERR(sta)) @@ -2186,7 +2225,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, } if (mppath && mpath) - mesh_path_del(mpath->sdata, mpath->dst); + mesh_path_del(sdata, mpath->dst); } /* @@ -2772,6 +2811,154 @@ void ieee80211_clear_fast_xmit(struct sta_info *sta) kfree_rcu(fast_tx, rcu_head); } +static bool ieee80211_amsdu_realloc_pad(struct ieee80211_local *local, + struct sk_buff *skb, int headroom, + int *subframe_len) +{ + int amsdu_len = *subframe_len + sizeof(struct ethhdr); + int padding = (4 - amsdu_len) & 3; + + if (skb_headroom(skb) < headroom || skb_tailroom(skb) < padding) { + I802_DEBUG_INC(local->tx_expand_skb_head); + + if (pskb_expand_head(skb, headroom, padding, GFP_ATOMIC)) { + wiphy_debug(local->hw.wiphy, + "failed to reallocate TX buffer\n"); + return false; + } + } + + if (padding) { + *subframe_len += padding; + memset(skb_put(skb, padding), 0, padding); + } + + return true; +} + +static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr; + struct ethhdr amsdu_hdr; + int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header); + int subframe_len = skb->len - hdr_len; + void *data; + u8 *qc; + + if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) + return false; + + if (info->control.flags & IEEE80211_TX_CTRL_AMSDU) + return true; + + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(amsdu_hdr), + &subframe_len)) + return false; + + amsdu_hdr.h_proto = cpu_to_be16(subframe_len); + memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN); + memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN); + + data = skb_push(skb, sizeof(amsdu_hdr)); + memmove(data, data + sizeof(amsdu_hdr), hdr_len); + memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr)); + + hdr = data; + qc = ieee80211_get_qos_ctl(hdr); + *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT; + + info->control.flags |= IEEE80211_TX_CTRL_AMSDU; + + return true; +} + +static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + struct ieee80211_txq *txq = sta->sta.txq[tid]; + struct txq_info *txqi; + struct sk_buff **frag_tail, *head; + int subframe_len = skb->len - ETH_ALEN; + u8 max_subframes = sta->sta.max_amsdu_subframes; + int max_frags = local->hw.max_tx_fragments; + int max_amsdu_len = sta->sta.max_amsdu_len; + __be16 len; + void *data; + bool ret = false; + int n = 1, nfrags; + + if (!ieee80211_hw_check(&local->hw, TX_AMSDU)) + return false; + + if (!txq) + return false; + + txqi = to_txq_info(txq); + if (test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags)) + return false; + + if (sta->sta.max_rc_amsdu_len) + max_amsdu_len = min_t(int, max_amsdu_len, + sta->sta.max_rc_amsdu_len); + + spin_lock_bh(&txqi->queue.lock); + + head = skb_peek_tail(&txqi->queue); + if (!head) + goto out; + + if (skb->len + head->len > max_amsdu_len) + goto out; + + if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head)) + goto out; + + nfrags = 1 + skb_shinfo(skb)->nr_frags; + nfrags += 1 + skb_shinfo(head)->nr_frags; + frag_tail = &skb_shinfo(head)->frag_list; + while (*frag_tail) { + nfrags += 1 + skb_shinfo(*frag_tail)->nr_frags; + frag_tail = &(*frag_tail)->next; + n++; + } + + if (max_subframes && n > max_subframes) + goto out; + + if (max_frags && nfrags > max_frags) + goto out; + + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2, + &subframe_len)) + goto out; + + ret = true; + data = skb_push(skb, ETH_ALEN + 2); + memmove(data, data + ETH_ALEN + 2, 2 * ETH_ALEN); + + data += 2 * ETH_ALEN; + len = cpu_to_be16(subframe_len); + memcpy(data, &len, 2); + memcpy(data + 2, rfc1042_header, sizeof(rfc1042_header)); + + head->len += skb->len; + head->data_len += skb->len; + *frag_tail = skb; + +out: + spin_unlock_bh(&txqi->queue.lock); + + return ret; +} + static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, struct net_device *dev, struct sta_info *sta, struct ieee80211_fast_tx *fast_tx, @@ -2826,6 +3013,10 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, ieee80211_tx_stats(dev, skb->len + extra_head); + if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) && + ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb)) + return true; + /* will not be crypto-handled beyond what we do here, so use false * as the may-encrypt argument for the resize to not account for * more room than we already have in 'extra_head' @@ -3406,7 +3597,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, struct sk_buff *skb = NULL; struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata = NULL; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_tx_rate_control txrc; struct ieee80211_chanctx_conf *chanctx_conf; int csa_off_base = 0; @@ -3974,7 +4165,7 @@ EXPORT_SYMBOL(ieee80211_unreserve_tid); void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, - enum ieee80211_band band) + enum nl80211_band band) { int ac = ieee802_1d_to_ac[tid & 7]; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 7390de4946a9..905003f75c4d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -59,7 +59,7 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx) } } -int ieee80211_frame_duration(enum ieee80211_band band, size_t len, +int ieee80211_frame_duration(enum nl80211_band band, size_t len, int rate, int erp, int short_preamble, int shift) { @@ -77,7 +77,7 @@ int ieee80211_frame_duration(enum ieee80211_band band, size_t len, * is assumed to be 0 otherwise. */ - if (band == IEEE80211_BAND_5GHZ || erp) { + if (band == NL80211_BAND_5GHZ || erp) { /* * OFDM: * @@ -129,7 +129,7 @@ int ieee80211_frame_duration(enum ieee80211_band band, size_t len, /* Exported duration function for driver use */ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - enum ieee80211_band band, + enum nl80211_band band, size_t frame_len, struct ieee80211_rate *rate) { @@ -1129,7 +1129,7 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); use_11b = (chanctx_conf && - chanctx_conf->def.chan->band == IEEE80211_BAND_2GHZ) && + chanctx_conf->def.chan->band == NL80211_BAND_2GHZ) && !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE); rcu_read_unlock(); @@ -1301,7 +1301,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, u8 *buffer, size_t buffer_len, const u8 *ie, size_t ie_len, - enum ieee80211_band band, + enum nl80211_band band, u32 rate_mask, struct cfg80211_chan_def *chandef, size_t *offset) @@ -1375,7 +1375,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, pos += ext_rates_len; } - if (chandef->chan && sband->band == IEEE80211_BAND_2GHZ) { + if (chandef->chan && sband->band == NL80211_BAND_2GHZ) { if (end - pos < 3) goto out_err; *pos++ = WLAN_EID_DS_PARAMS; @@ -1479,7 +1479,7 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, memset(ie_desc, 0, sizeof(*ie_desc)); - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { if (bands_used & BIT(i)) { pos += ieee80211_build_preq_ies_band(local, buffer + pos, @@ -1522,7 +1522,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb; struct ieee80211_mgmt *mgmt; int ies_len; - u32 rate_masks[IEEE80211_NUM_BANDS] = {}; + u32 rate_masks[NUM_NL80211_BANDS] = {}; struct ieee80211_scan_ies dummy_ie_desc; /* @@ -1582,7 +1582,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, - enum ieee80211_band band, u32 *basic_rates) + enum nl80211_band band, u32 *basic_rates) { struct ieee80211_supported_band *sband; size_t num_rates; @@ -2520,7 +2520,7 @@ int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic, - enum ieee80211_band band) + enum nl80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -2565,7 +2565,7 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic, - enum ieee80211_band band) + enum nl80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -2711,7 +2711,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, if (status->flag & RX_FLAG_MACTIME_PLCP_START) { /* TODO: handle HT/VHT preambles */ - if (status->band == IEEE80211_BAND_5GHZ) { + if (status->band == NL80211_BAND_5GHZ) { ts += 20 << shift; mpdu_offset += 2; } else if (status->flag & RX_FLAG_SHORTPRE) { @@ -2724,8 +2724,9 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, rate = cfg80211_calculate_bitrate(&ri); if (WARN_ONCE(!rate, - "Invalid bitrate: flags=0x%x, idx=%d, vht_nss=%d\n", - status->flag, status->rate_idx, status->vht_nss)) + "Invalid bitrate: flags=0x%llx, idx=%d, vht_nss=%d\n", + (unsigned long long)status->flag, status->rate_idx, + status->vht_nss)) return 0; /* rewind from end of MPDU */ diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index e590e2ef9eaf..ee715764a828 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -418,7 +418,7 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, - enum ieee80211_band band) + enum nl80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -504,7 +504,7 @@ EXPORT_SYMBOL_GPL(ieee80211_update_mu_groups); void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, - enum ieee80211_band band) + enum nl80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 18848258adde..b48c1e13e281 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -504,25 +504,31 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, !ieee80211_is_robust_mgmt_frame(skb)) return RX_CONTINUE; - data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len; - if (!rx->sta || data_len < 0) - return RX_DROP_UNUSABLE; - if (status->flag & RX_FLAG_DECRYPTED) { if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_CCMP_HDR_LEN)) return RX_DROP_UNUSABLE; + if (status->flag & RX_FLAG_MIC_STRIPPED) + mic_len = 0; } else { if (skb_linearize(rx->skb)) return RX_DROP_UNUSABLE; } + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len; + if (!rx->sta || data_len < 0) + return RX_DROP_UNUSABLE; + if (!(status->flag & RX_FLAG_PN_VALIDATED)) { + int res; + ccmp_hdr2pn(pn, skb->data + hdrlen); queue = rx->security_idx; - if (memcmp(pn, key->u.ccmp.rx_pn[queue], - IEEE80211_CCMP_PN_LEN) <= 0) { + res = memcmp(pn, key->u.ccmp.rx_pn[queue], + IEEE80211_CCMP_PN_LEN); + if (res < 0 || + (!res && !(status->flag & RX_FLAG_ALLOW_SAME_PN))) { key->u.ccmp.replays++; return RX_DROP_UNUSABLE; } @@ -720,8 +726,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); u8 pn[IEEE80211_GCMP_PN_LEN]; - int data_len; - int queue; + int data_len, queue, mic_len = IEEE80211_GCMP_MIC_LEN; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -729,26 +734,31 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) !ieee80211_is_robust_mgmt_frame(skb)) return RX_CONTINUE; - data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - - IEEE80211_GCMP_MIC_LEN; - if (!rx->sta || data_len < 0) - return RX_DROP_UNUSABLE; - if (status->flag & RX_FLAG_DECRYPTED) { if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_GCMP_HDR_LEN)) return RX_DROP_UNUSABLE; + if (status->flag & RX_FLAG_MIC_STRIPPED) + mic_len = 0; } else { if (skb_linearize(rx->skb)) return RX_DROP_UNUSABLE; } + data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len; + if (!rx->sta || data_len < 0) + return RX_DROP_UNUSABLE; + if (!(status->flag & RX_FLAG_PN_VALIDATED)) { + int res; + gcmp_hdr2pn(pn, skb->data + hdrlen); queue = rx->security_idx; - if (memcmp(pn, key->u.gcmp.rx_pn[queue], - IEEE80211_GCMP_PN_LEN) <= 0) { + res = memcmp(pn, key->u.gcmp.rx_pn[queue], + IEEE80211_GCMP_PN_LEN); + if (res < 0 || + (!res && !(status->flag & RX_FLAG_ALLOW_SAME_PN))) { key->u.gcmp.replays++; return RX_DROP_UNUSABLE; } @@ -772,7 +782,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) } /* Remove GCMP header and MIC */ - if (pskb_trim(skb, skb->len - IEEE80211_GCMP_MIC_LEN)) + if (pskb_trim(skb, skb->len - mic_len)) return RX_DROP_UNUSABLE; memmove(skb->data + IEEE80211_GCMP_HDR_LEN, skb->data, hdrlen); skb_pull(skb, IEEE80211_GCMP_HDR_LEN); diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 0183b32da942..bbcf60465e5c 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -31,6 +31,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_TCPV6 | SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_TCP_FIXEDID | SKB_GSO_TCP_ECN))) goto out; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 85ca189bdc3d..2cb3c626cd43 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -104,6 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key) spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } +static void ip_vs_conn_expire(unsigned long data); /* * Returns hash value for IPVS connection entry @@ -453,10 +454,16 @@ ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af, } EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); +static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp) +{ + __ip_vs_conn_put(cp); + ip_vs_conn_expire((unsigned long)cp); +} + /* * Put back the conn and restart its timer with its timeout */ -void ip_vs_conn_put(struct ip_vs_conn *cp) +static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp) { unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ? 0 : cp->timeout; @@ -465,6 +472,16 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) __ip_vs_conn_put(cp); } +void ip_vs_conn_put(struct ip_vs_conn *cp) +{ + if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && + (atomic_read(&cp->refcnt) == 1) && + !timer_pending(&cp->timer)) + /* expire connection immediately */ + __ip_vs_conn_put_notimer(cp); + else + __ip_vs_conn_put_timer(cp); +} /* * Fill a no_client_port connection with a client port number @@ -819,7 +836,8 @@ static void ip_vs_conn_expire(unsigned long data) if (cp->control) ip_vs_control_del(cp); - if (cp->flags & IP_VS_CONN_F_NFCT) { + if ((cp->flags & IP_VS_CONN_F_NFCT) && + !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) { /* Do not access conntracks during subsys cleanup * because nf_conntrack_find_get can not be used after * conntrack cleanup for the net. @@ -834,7 +852,10 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_unbind_dest(cp); if (cp->flags & IP_VS_CONN_F_NO_CPORT) atomic_dec(&ip_vs_conn_no_cport_cnt); - call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free); + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + ip_vs_conn_rcu_free(&cp->rcu_head); + else + call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free); atomic_dec(&ipvs->conn_count); return; } @@ -850,7 +871,7 @@ static void ip_vs_conn_expire(unsigned long data) if (ipvs->sync_state & IP_VS_STATE_MASTER) ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs)); - ip_vs_conn_put(cp); + __ip_vs_conn_put_timer(cp); } /* Modify timer, so that it expires as soon as possible. @@ -1240,6 +1261,16 @@ static inline int todrop_entry(struct ip_vs_conn *cp) return 1; } +static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp) +{ + struct ip_vs_service *svc; + + if (!cp->dest) + return false; + svc = rcu_dereference(cp->dest->svc); + return svc && (svc->flags & IP_VS_SVC_F_ONEPACKET); +} + /* Called from keventd and must protect itself from softirqs */ void ip_vs_random_dropentry(struct netns_ipvs *ipvs) { @@ -1254,11 +1285,16 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs) unsigned int hash = prandom_u32() & ip_vs_conn_tab_mask; hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->flags & IP_VS_CONN_F_TEMPLATE) - /* connection template */ - continue; if (cp->ipvs != ipvs) continue; + if (cp->flags & IP_VS_CONN_F_TEMPLATE) { + if (atomic_read(&cp->n_control) || + !ip_vs_conn_ops_mode(cp)) + continue; + else + /* connection template of OPS */ + goto try_drop; + } if (cp->protocol == IPPROTO_TCP) { switch(cp->state) { case IP_VS_TCP_S_SYN_RECV: @@ -1286,6 +1322,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs) continue; } } else { +try_drop: if (!todrop_entry(cp)) continue; } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b9a4082afa3a..1207f20d24e4 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -68,6 +68,7 @@ EXPORT_SYMBOL(ip_vs_conn_put); #ifdef CONFIG_IP_VS_DEBUG EXPORT_SYMBOL(ip_vs_get_debug_level); #endif +EXPORT_SYMBOL(ip_vs_new_conn_out); static int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ @@ -611,7 +612,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ret = cp->packet_xmit(skb, cp, pd->pp, iph); /* do not touch skb anymore */ - atomic_inc(&cp->in_pkts); + if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control) + atomic_inc(&cp->control->in_pkts); + else + atomic_inc(&cp->in_pkts); ip_vs_conn_put(cp); return ret; } @@ -1100,6 +1104,143 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp, } } +/* Generic function to create new connections for outgoing RS packets + * + * Pre-requisites for successful connection creation: + * 1) Virtual Service is NOT fwmark based: + * In fwmark-VS actual vaddr and vport are unknown to IPVS + * 2) Real Server and Virtual Service were NOT configured without port: + * This is to allow match of different VS to the same RS ip-addr + */ +struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, + struct ip_vs_dest *dest, + struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + __be16 dport, + __be16 cport) +{ + struct ip_vs_conn_param param; + struct ip_vs_conn *ct = NULL, *cp = NULL; + const union nf_inet_addr *vaddr, *daddr, *caddr; + union nf_inet_addr snet; + __be16 vport; + unsigned int flags; + + EnterFunction(12); + vaddr = &svc->addr; + vport = svc->port; + daddr = &iph->saddr; + caddr = &iph->daddr; + + /* check pre-requisites are satisfied */ + if (svc->fwmark) + return NULL; + if (!vport || !dport) + return NULL; + + /* for persistent service first create connection template */ + if (svc->flags & IP_VS_SVC_F_PERSISTENT) { + /* apply netmask the same way ingress-side does */ +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) + ipv6_addr_prefix(&snet.in6, &caddr->in6, + (__force __u32)svc->netmask); + else +#endif + snet.ip = caddr->ip & svc->netmask; + /* fill params and create template if not existent */ + if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol, + &snet, 0, vaddr, + vport, ¶m) < 0) + return NULL; + ct = ip_vs_ct_in_get(¶m); + if (!ct) { + ct = ip_vs_conn_new(¶m, dest->af, daddr, dport, + IP_VS_CONN_F_TEMPLATE, dest, 0); + if (!ct) { + kfree(param.pe_data); + return NULL; + } + ct->timeout = svc->timeout; + } else { + kfree(param.pe_data); + } + } + + /* connection flags */ + flags = ((svc->flags & IP_VS_SVC_F_ONEPACKET) && + iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; + /* create connection */ + ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, + caddr, cport, vaddr, vport, ¶m); + cp = ip_vs_conn_new(¶m, dest->af, daddr, dport, flags, dest, 0); + if (!cp) { + if (ct) + ip_vs_conn_put(ct); + return NULL; + } + if (ct) { + ip_vs_control_add(cp, ct); + ip_vs_conn_put(ct); + } + ip_vs_conn_stats(cp, svc); + + /* return connection (will be used to handle outgoing packet) */ + IP_VS_DBG_BUF(6, "New connection RS-initiated:%c c:%s:%u v:%s:%u " + "d:%s:%u conn->flags:%X conn->refcnt:%d\n", + ip_vs_fwd_tag(cp), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + cp->flags, atomic_read(&cp->refcnt)); + LeaveFunction(12); + return cp; +} + +/* Handle outgoing packets which are considered requests initiated by + * real servers, so that subsequent responses from external client can be + * routed to the right real server. + * Used also for outgoing responses in OPS mode. + * + * Connection management is handled by persistent-engine specific callback. + */ +static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, + struct netns_ipvs *ipvs, + int af, struct sk_buff *skb, + const struct ip_vs_iphdr *iph) +{ + struct ip_vs_dest *dest; + struct ip_vs_conn *cp = NULL; + __be16 _ports[2], *pptr; + + if (hooknum == NF_INET_LOCAL_IN) + return NULL; + + pptr = frag_safe_skb_hp(skb, iph->len, + sizeof(_ports), _ports, iph); + if (!pptr) + return NULL; + + rcu_read_lock(); + dest = ip_vs_find_real_service(ipvs, af, iph->protocol, + &iph->saddr, pptr[0]); + if (dest) { + struct ip_vs_service *svc; + struct ip_vs_pe *pe; + + svc = rcu_dereference(dest->svc); + if (svc) { + pe = rcu_dereference(svc->pe); + if (pe && pe->conn_out) + cp = pe->conn_out(svc, dest, skb, iph, + pptr[0], pptr[1]); + } + } + rcu_read_unlock(); + + return cp; +} + /* Handle response packets: rewrite addresses and send away... */ static unsigned int @@ -1245,6 +1386,22 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in if (likely(cp)) return handle_response(af, skb, pd, cp, &iph, hooknum); + + /* Check for real-server-started requests */ + if (atomic_read(&ipvs->conn_out_counter)) { + /* Currently only for UDP: + * connection oriented protocols typically use + * ephemeral ports for outgoing connections, so + * related incoming responses would not match any VS + */ + if (pp->protocol == IPPROTO_UDP) { + cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph); + if (likely(cp)) + return handle_response(af, skb, pd, cp, &iph, + hooknum); + } + } + if (sysctl_nat_icmp_send(ipvs) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || @@ -1837,6 +1994,9 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int if (ipvs->sync_state & IP_VS_STATE_MASTER) ip_vs_sync_conn(ipvs, cp, pkts); + else if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control) + /* increment is done inside ip_vs_sync_conn too */ + atomic_inc(&cp->control->in_pkts); ip_vs_conn_put(cp); return ret; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 404b2a4f4b5b..c3c809b2e712 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -567,6 +567,36 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, return false; } +/* Find real service record by <proto,addr,port>. + * In case of multiple records with the same <proto,addr,port>, only + * the first found record is returned. + * + * To be called under RCU lock. + */ +struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, + __u16 protocol, + const union nf_inet_addr *daddr, + __be16 dport) +{ + unsigned int hash; + struct ip_vs_dest *dest; + + /* Check for "full" addressed entries */ + hash = ip_vs_rs_hashkey(af, daddr, dport); + + hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { + if (dest->port == dport && + dest->af == af && + ip_vs_addr_equal(af, &dest->addr, daddr) && + (dest->protocol == protocol || dest->vfwmark)) { + /* HIT */ + return dest; + } + } + + return NULL; +} + /* Lookup destination by {addr,port} in the given service * Called under RCU lock. */ @@ -1253,6 +1283,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, atomic_inc(&ipvs->ftpsvc_counter); else if (svc->port == 0) atomic_inc(&ipvs->nullsvc_counter); + if (svc->pe && svc->pe->conn_out) + atomic_inc(&ipvs->conn_out_counter); ip_vs_start_estimator(ipvs, &svc->stats); @@ -1293,6 +1325,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) struct ip_vs_scheduler *sched = NULL, *old_sched; struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; + bool new_pe_conn_out, old_pe_conn_out; /* * Lookup the scheduler, by 'u->sched_name' @@ -1355,8 +1388,16 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) svc->netmask = u->netmask; old_pe = rcu_dereference_protected(svc->pe, 1); - if (pe != old_pe) + if (pe != old_pe) { rcu_assign_pointer(svc->pe, pe); + /* check for optional methods in new pe */ + new_pe_conn_out = (pe && pe->conn_out) ? true : false; + old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false; + if (new_pe_conn_out && !old_pe_conn_out) + atomic_inc(&svc->ipvs->conn_out_counter); + if (old_pe_conn_out && !new_pe_conn_out) + atomic_dec(&svc->ipvs->conn_out_counter); + } out: ip_vs_scheduler_put(old_sched); @@ -1389,6 +1430,8 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* Unbind persistence engine, keep svc->pe */ old_pe = rcu_dereference_protected(svc->pe, 1); + if (old_pe && old_pe->conn_out) + atomic_dec(&ipvs->conn_out_counter); ip_vs_pe_put(old_pe); /* @@ -2875,8 +2918,10 @@ static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) || nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) || nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) || - nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, + IPVS_STATS_ATTR_PAD) || nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) || nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) || nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) || @@ -2900,16 +2945,26 @@ static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, if (!nl_stats) return -EMSGSIZE; - if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) || - nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) || - nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) || - nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) || - nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) || - nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps)) + if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps, + IPVS_STATS_ATTR_PAD) || + nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps, + IPVS_STATS_ATTR_PAD)) goto nla_put_failure; nla_nest_end(skb, nl_stats); @@ -3957,6 +4012,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) (unsigned long) ipvs); atomic_set(&ipvs->ftpsvc_counter, 0); atomic_set(&ipvs->nullsvc_counter, 0); + atomic_set(&ipvs->conn_out_counter, 0); /* procfs stats */ ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 30434fb133df..f04fd8df210b 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -93,6 +93,10 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) return; + /* Never alter conntrack for OPS conns (no reply is expected) */ + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + return; + /* Alter reply only in original direction */ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return; diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 0a6eb5c0d9e9..d07ef9e31c12 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -143,6 +143,20 @@ static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf) return cp->pe_data_len; } +static struct ip_vs_conn * +ip_vs_sip_conn_out(struct ip_vs_service *svc, + struct ip_vs_dest *dest, + struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + __be16 dport, + __be16 cport) +{ + if (likely(iph->protocol == IPPROTO_UDP)) + return ip_vs_new_conn_out(svc, dest, skb, iph, dport, cport); + /* currently no need to handle other than UDP */ + return NULL; +} + static struct ip_vs_pe ip_vs_sip_pe = { .name = "sip", @@ -153,6 +167,7 @@ static struct ip_vs_pe ip_vs_sip_pe = .ct_match = ip_vs_sip_ct_match, .hashkey_raw = ip_vs_sip_hashkey_raw, .show_pe_data = ip_vs_sip_show_pe_data, + .conn_out = ip_vs_sip_conn_out, }; static int __init ip_vs_sip_init(void) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index dc196a0f501d..6d19d2eeaa60 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -1013,8 +1013,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af)); - if (IS_ERR(skb)) + if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af))) goto tx_error; skb->transport_header = skb->network_header; @@ -1105,8 +1104,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af)); - if (IS_ERR(skb)) + if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af))) goto tx_error; skb->transport_header = skb->network_header; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e27fd17c6743..db2312eeb2a4 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -12,6 +12,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> @@ -52,6 +54,7 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_helper.h> +#include <net/netns/hash.h> #define NF_CONNTRACK_VERSION "0.5.0" @@ -66,6 +69,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks); __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); +struct hlist_nulls_head *nf_conntrack_hash __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_hash); + +static __read_mostly struct kmem_cache *nf_conntrack_cachep; +static __read_mostly spinlock_t nf_conntrack_locks_all_lock; +static __read_mostly seqcount_t nf_conntrack_generation; static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; @@ -105,7 +114,7 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1, spin_lock_nested(&nf_conntrack_locks[h1], SINGLE_DEPTH_NESTING); } - if (read_seqcount_retry(&net->ct.generation, sequence)) { + if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { nf_conntrack_double_unlock(h1, h2); return true; } @@ -139,43 +148,43 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max); DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); -unsigned int nf_conntrack_hash_rnd __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd); +static unsigned int nf_conntrack_hash_rnd __read_mostly; -static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple) +static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, + const struct net *net) { unsigned int n; + u32 seed; + + get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd)); /* The direction must be ignored, so we hash everything up to the * destination ports (which is a multiple of 4) and treat the last * three bytes manually. */ + seed = nf_conntrack_hash_rnd ^ net_hash_mix(net); n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); - return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^ + return jhash2((u32 *)tuple, n, seed ^ (((__force __u16)tuple->dst.u.all << 16) | tuple->dst.protonum)); } -static u32 __hash_bucket(u32 hash, unsigned int size) -{ - return reciprocal_scale(hash, size); -} - -static u32 hash_bucket(u32 hash, const struct net *net) +static u32 scale_hash(u32 hash) { - return __hash_bucket(hash, net->ct.htable_size); + return reciprocal_scale(hash, nf_conntrack_htable_size); } -static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, - unsigned int size) +static u32 __hash_conntrack(const struct net *net, + const struct nf_conntrack_tuple *tuple, + unsigned int size) { - return __hash_bucket(hash_conntrack_raw(tuple), size); + return reciprocal_scale(hash_conntrack_raw(tuple, net), size); } -static inline u_int32_t hash_conntrack(const struct net *net, - const struct nf_conntrack_tuple *tuple) +static u32 hash_conntrack(const struct net *net, + const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, net->ct.htable_size); + return scale_hash(hash_conntrack_raw(tuple, net)); } bool @@ -356,7 +365,7 @@ destroy_conntrack(struct nf_conntrack *nfct) } rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); - if (l4proto && l4proto->destroy) + if (l4proto->destroy) l4proto->destroy(ct); rcu_read_unlock(); @@ -391,7 +400,7 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) local_bh_disable(); do { - sequence = read_seqcount_begin(&net->ct.generation); + sequence = read_seqcount_begin(&nf_conntrack_generation); hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); reply_hash = hash_conntrack(net, @@ -443,7 +452,8 @@ static void death_by_timeout(unsigned long ul_conntrack) static inline bool nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone) + const struct nf_conntrack_zone *zone, + const struct net *net) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -452,7 +462,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, */ return nf_ct_tuple_equal(tuple, &h->tuple) && nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && - nf_ct_is_confirmed(ct); + nf_ct_is_confirmed(ct) && + net_eq(net, nf_ct_net(ct)); } /* @@ -465,21 +476,23 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple, u32 hash) { struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; - unsigned int bucket = hash_bucket(hash, net); + unsigned int bucket, sequence; - /* Disable BHs the entire time since we normally need to disable them - * at least once for the stats anyway. - */ - local_bh_disable(); begin: - hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) { - if (nf_ct_key_equal(h, tuple, zone)) { - NF_CT_STAT_INC(net, found); - local_bh_enable(); + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + bucket = scale_hash(hash); + ct_hash = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { + if (nf_ct_key_equal(h, tuple, zone, net)) { + NF_CT_STAT_INC_ATOMIC(net, found); return h; } - NF_CT_STAT_INC(net, searched); + NF_CT_STAT_INC_ATOMIC(net, searched); } /* * if the nulls value we got at the end of this lookup is @@ -487,10 +500,9 @@ begin: * We probably met an item that was moved to another chain. */ if (get_nulls_value(n) != bucket) { - NF_CT_STAT_INC(net, search_restart); + NF_CT_STAT_INC_ATOMIC(net, search_restart); goto begin; } - local_bh_enable(); return NULL; } @@ -512,7 +524,7 @@ begin: !atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; else { - if (unlikely(!nf_ct_key_equal(h, tuple, zone))) { + if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) { nf_ct_put(ct); goto begin; } @@ -528,7 +540,7 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { return __nf_conntrack_find_get(net, zone, tuple, - hash_conntrack_raw(tuple)); + hash_conntrack_raw(tuple, net)); } EXPORT_SYMBOL_GPL(nf_conntrack_find_get); @@ -536,12 +548,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, unsigned int reply_hash) { - struct net *net = nf_ct_net(ct); - hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &net->ct.hash[hash]); + &nf_conntrack_hash[hash]); hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, - &net->ct.hash[reply_hash]); + &nf_conntrack_hash[reply_hash]); } int @@ -558,7 +568,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) local_bh_disable(); do { - sequence = read_seqcount_begin(&net->ct.generation); + sequence = read_seqcount_begin(&nf_conntrack_generation); hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); reply_hash = hash_conntrack(net, @@ -566,17 +576,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); /* See if there's one in the list already, including reverse */ - hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) - if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &h->tuple) && - nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, - NF_CT_DIRECTION(h))) + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) + if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + zone, net)) goto out; - hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) - if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, - &h->tuple) && - nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, - NF_CT_DIRECTION(h))) + + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) + if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, + zone, net)) goto out; add_timer(&ct->timeout); @@ -597,6 +604,62 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); +static inline void nf_ct_acct_update(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int len) +{ + struct nf_conn_acct *acct; + + acct = nf_conn_acct_find(ct); + if (acct) { + struct nf_conn_counter *counter = acct->counter; + + atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); + atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes); + } +} + +static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + const struct nf_conn *loser_ct) +{ + struct nf_conn_acct *acct; + + acct = nf_conn_acct_find(loser_ct); + if (acct) { + struct nf_conn_counter *counter = acct->counter; + unsigned int bytes; + + /* u32 should be fine since we must have seen one packet. */ + bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes); + nf_ct_acct_update(ct, ctinfo, bytes); + } +} + +/* Resolve race on insertion if this protocol allows this. */ +static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + struct nf_conntrack_tuple_hash *h) +{ + /* This is the conntrack entry already in hashes that won race. */ + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + struct nf_conntrack_l4proto *l4proto; + + l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + if (l4proto->allow_clash && + !nf_ct_is_dying(ct) && + atomic_inc_not_zero(&ct->ct_general.use)) { + nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct); + nf_conntrack_put(skb->nfct); + /* Assign conntrack already in hashes to this skbuff. Don't + * modify skb->nfctinfo to ensure consistent stateful filtering. + */ + skb->nfct = &ct->ct_general; + return NF_ACCEPT; + } + NF_CT_STAT_INC(net, drop); + return NF_DROP; +} + /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff *skb) @@ -611,6 +674,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) enum ip_conntrack_info ctinfo; struct net *net; unsigned int sequence; + int ret = NF_DROP; ct = nf_ct_get(skb, &ctinfo); net = nf_ct_net(ct); @@ -626,10 +690,10 @@ __nf_conntrack_confirm(struct sk_buff *skb) local_bh_disable(); do { - sequence = read_seqcount_begin(&net->ct.generation); + sequence = read_seqcount_begin(&nf_conntrack_generation); /* reuse the hash saved before */ hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; - hash = hash_bucket(hash, net); + hash = scale_hash(hash); reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); @@ -653,23 +717,22 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ nf_ct_del_from_dying_or_unconfirmed_list(ct); - if (unlikely(nf_ct_is_dying(ct))) - goto out; + if (unlikely(nf_ct_is_dying(ct))) { + nf_ct_add_to_dying_list(ct); + goto dying; + } /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) - if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &h->tuple) && - nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, - NF_CT_DIRECTION(h))) + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) + if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + zone, net)) goto out; - hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) - if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, - &h->tuple) && - nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, - NF_CT_DIRECTION(h))) + + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) + if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, + zone, net)) goto out; /* Timer relative to confirmation time, not original @@ -708,10 +771,12 @@ __nf_conntrack_confirm(struct sk_buff *skb) out: nf_ct_add_to_dying_list(ct); + ret = nf_ct_resolve_clash(net, skb, ctinfo, h); +dying: nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert_failed); local_bh_enable(); - return NF_DROP; + return ret; } EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); @@ -724,29 +789,31 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, struct net *net = nf_ct_net(ignored_conntrack); const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_head *ct_hash; + unsigned int hash, sequence; struct hlist_nulls_node *n; struct nf_conn *ct; - unsigned int hash; zone = nf_ct_zone(ignored_conntrack); - hash = hash_conntrack(net, tuple); - /* Disable BHs the entire time since we need to disable them at - * least once for the stats anyway. - */ - rcu_read_lock_bh(); - hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { + rcu_read_lock(); + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + hash = hash_conntrack(net, tuple); + ct_hash = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (ct != ignored_conntrack && - nf_ct_tuple_equal(tuple, &h->tuple) && - nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) { - NF_CT_STAT_INC(net, found); - rcu_read_unlock_bh(); + nf_ct_key_equal(h, tuple, zone, net)) { + NF_CT_STAT_INC_ATOMIC(net, found); + rcu_read_unlock(); return 1; } - NF_CT_STAT_INC(net, searched); + NF_CT_STAT_INC_ATOMIC(net, searched); } - rcu_read_unlock_bh(); + rcu_read_unlock(); return 0; } @@ -760,71 +827,63 @@ static noinline int early_drop(struct net *net, unsigned int _hash) { /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; - struct nf_conn *ct = NULL, *tmp; + struct nf_conn *tmp; struct hlist_nulls_node *n; - unsigned int i = 0, cnt = 0; - int dropped = 0; - unsigned int hash, sequence; + unsigned int i, hash, sequence; + struct nf_conn *ct = NULL; spinlock_t *lockp; + bool ret = false; + + i = 0; local_bh_disable(); restart: - sequence = read_seqcount_begin(&net->ct.generation); - hash = hash_bucket(_hash, net); - for (; i < net->ct.htable_size; i++) { + sequence = read_seqcount_begin(&nf_conntrack_generation); + for (; i < NF_CT_EVICTION_RANGE; i++) { + hash = scale_hash(_hash++); lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS]; nf_conntrack_lock(lockp); - if (read_seqcount_retry(&net->ct.generation, sequence)) { + if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { spin_unlock(lockp); goto restart; } - hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], - hnnode) { + hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], + hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); - if (!test_bit(IPS_ASSURED_BIT, &tmp->status) && - !nf_ct_is_dying(tmp) && - atomic_inc_not_zero(&tmp->ct_general.use)) { + + if (test_bit(IPS_ASSURED_BIT, &tmp->status) || + !net_eq(nf_ct_net(tmp), net) || + nf_ct_is_dying(tmp)) + continue; + + if (atomic_inc_not_zero(&tmp->ct_general.use)) { ct = tmp; break; } - cnt++; } - hash = (hash + 1) % net->ct.htable_size; spin_unlock(lockp); - - if (ct || cnt >= NF_CT_EVICTION_RANGE) + if (ct) break; - } + local_bh_enable(); if (!ct) - return dropped; + return false; - if (del_timer(&ct->timeout)) { + /* kill only if in same netns -- might have moved due to + * SLAB_DESTROY_BY_RCU rules + */ + if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) { if (nf_ct_delete(ct, 0, 0)) { - dropped = 1; NF_CT_STAT_INC_ATOMIC(net, early_drop); + ret = true; } } - nf_ct_put(ct); - return dropped; -} -void init_nf_conntrack_hash_rnd(void) -{ - unsigned int rand; - - /* - * Why not initialize nf_conntrack_rnd in a "init()" function ? - * Because there isn't enough entropy when system initializing, - * and we initialize it as late as possible. - */ - do { - get_random_bytes(&rand, sizeof(rand)); - } while (!rand); - cmpxchg(&nf_conntrack_hash_rnd, 0, rand); + nf_ct_put(ct); + return ret; } static struct nf_conn * @@ -836,12 +895,6 @@ __nf_conntrack_alloc(struct net *net, { struct nf_conn *ct; - if (unlikely(!nf_conntrack_hash_rnd)) { - init_nf_conntrack_hash_rnd(); - /* recompute the hash as nf_conntrack_hash_rnd is initialized */ - hash = hash_conntrack_raw(orig); - } - /* We don't want any race condition at early drop stage */ atomic_inc(&net->ct.count); @@ -858,7 +911,7 @@ __nf_conntrack_alloc(struct net *net, * Do not use kmem_cache_zalloc(), as this cache uses * SLAB_DESTROY_BY_RCU. */ - ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); + ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); if (ct == NULL) goto out; @@ -885,7 +938,7 @@ __nf_conntrack_alloc(struct net *net, atomic_set(&ct->ct_general.use, 0); return ct; out_free: - kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + kmem_cache_free(nf_conntrack_cachep, ct); out: atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); @@ -912,7 +965,7 @@ void nf_conntrack_free(struct nf_conn *ct) nf_ct_ext_destroy(ct); nf_ct_ext_free(ct); - kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + kmem_cache_free(nf_conntrack_cachep, ct); smp_mb__before_atomic(); atomic_dec(&net->ct.count); } @@ -966,7 +1019,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (!l4proto->new(ct, skb, dataoff, timeouts)) { nf_conntrack_free(ct); - pr_debug("init conntrack: can't track with proto module\n"); + pr_debug("can't track with proto module\n"); return NULL; } @@ -988,7 +1041,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, spin_lock(&nf_conntrack_expect_lock); exp = nf_ct_find_expectation(net, zone, tuple); if (exp) { - pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", + pr_debug("expectation arrives ct=%p exp=%p\n", ct, exp); /* Welcome, Mr. Bond. We've been expecting you... */ __set_bit(IPS_EXPECTED_BIT, &ct->status); @@ -1053,13 +1106,13 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, protonum, net, &tuple, l3proto, l4proto)) { - pr_debug("resolve_normal_ct: Can't get tuple\n"); + pr_debug("Can't get tuple\n"); return NULL; } /* look for tuple match */ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); - hash = hash_conntrack_raw(&tuple); + hash = hash_conntrack_raw(&tuple, net); h = __nf_conntrack_find_get(net, zone, &tuple, hash); if (!h) { h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, @@ -1079,14 +1132,13 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, } else { /* Once we've had two way comms, always ESTABLISHED. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - pr_debug("nf_conntrack_in: normal packet for %p\n", ct); + pr_debug("normal packet for %p\n", ct); *ctinfo = IP_CT_ESTABLISHED; } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { - pr_debug("nf_conntrack_in: related packet for %p\n", - ct); + pr_debug("related packet for %p\n", ct); *ctinfo = IP_CT_RELATED; } else { - pr_debug("nf_conntrack_in: new packet for %p\n", ct); + pr_debug("new packet for %p\n", ct); *ctinfo = IP_CT_NEW; } *set_reply = 0; @@ -1269,17 +1321,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, } acct: - if (do_acct) { - struct nf_conn_acct *acct; - - acct = nf_conn_acct_find(ct); - if (acct) { - struct nf_conn_counter *counter = acct->counter; - - atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); - atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes); - } - } + if (do_acct) + nf_ct_acct_update(ct, ctinfo, skb->len); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); @@ -1288,18 +1331,8 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, const struct sk_buff *skb, int do_acct) { - if (do_acct) { - struct nf_conn_acct *acct; - - acct = nf_conn_acct_find(ct); - if (acct) { - struct nf_conn_counter *counter = acct->counter; - - atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); - atomic64_add(skb->len - skb_network_offset(skb), - &counter[CTINFO2DIR(ctinfo)].bytes); - } - } + if (do_acct) + nf_ct_acct_update(ct, ctinfo, skb->len); if (del_timer(&ct->timeout)) { ct->timeout.function((unsigned long)ct); @@ -1395,16 +1428,17 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), int cpu; spinlock_t *lockp; - for (; *bucket < net->ct.htable_size; (*bucket)++) { + for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; local_bh_disable(); nf_conntrack_lock(lockp); - if (*bucket < net->ct.htable_size) { - hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { + if (*bucket < nf_conntrack_htable_size) { + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); - if (iter(ct, data)) + if (net_eq(nf_ct_net(ct), net) && + iter(ct, data)) goto found; } } @@ -1442,6 +1476,9 @@ void nf_ct_iterate_cleanup(struct net *net, might_sleep(); + if (atomic_read(&net->ct.count) == 0) + return; + while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) @@ -1493,6 +1530,8 @@ void nf_conntrack_cleanup_end(void) while (untrack_refs() > 0) schedule(); + nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); + #ifdef CONFIG_NF_CONNTRACK_ZONES nf_ct_extend_unregister(&nf_ct_zone_extend); #endif @@ -1543,15 +1582,12 @@ i_see_dead_people: } list_for_each_entry(net, net_exit_list, exit_list) { - nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); nf_conntrack_proto_pernet_fini(net); nf_conntrack_helper_pernet_fini(net); nf_conntrack_ecache_pernet_fini(net); nf_conntrack_tstamp_pernet_fini(net); nf_conntrack_acct_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); - kmem_cache_destroy(net->ct.nf_conntrack_cachep); - kfree(net->ct.slabname); free_percpu(net->ct.stat); free_percpu(net->ct.pcpu_lists); } @@ -1606,7 +1642,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) local_bh_disable(); nf_conntrack_all_lock(); - write_seqcount_begin(&init_net.ct.generation); + write_seqcount_begin(&nf_conntrack_generation); /* Lookups in the old hash might happen in parallel, which means we * might get false negatives during connection lookup. New connections @@ -1614,26 +1650,28 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) * though since that required taking the locks. */ - for (i = 0; i < init_net.ct.htable_size; i++) { - while (!hlist_nulls_empty(&init_net.ct.hash[i])) { - h = hlist_nulls_entry(init_net.ct.hash[i].first, - struct nf_conntrack_tuple_hash, hnnode); + for (i = 0; i < nf_conntrack_htable_size; i++) { + while (!hlist_nulls_empty(&nf_conntrack_hash[i])) { + h = hlist_nulls_entry(nf_conntrack_hash[i].first, + struct nf_conntrack_tuple_hash, hnnode); ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, hashsize); + bucket = __hash_conntrack(nf_ct_net(ct), + &h->tuple, hashsize); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } - old_size = init_net.ct.htable_size; - old_hash = init_net.ct.hash; + old_size = nf_conntrack_htable_size; + old_hash = nf_conntrack_hash; - init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; - init_net.ct.hash = hash; + nf_conntrack_hash = hash; + nf_conntrack_htable_size = hashsize; - write_seqcount_end(&init_net.ct.generation); + write_seqcount_end(&nf_conntrack_generation); nf_conntrack_all_unlock(); local_bh_enable(); + synchronize_net(); nf_ct_free_hashtable(old_hash, old_size); return 0; } @@ -1654,7 +1692,10 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); int nf_conntrack_init_start(void) { int max_factor = 8; - int i, ret, cpu; + int ret = -ENOMEM; + int i, cpu; + + seqcount_init(&nf_conntrack_generation); for (i = 0; i < CONNTRACK_LOCKS; i++) spin_lock_init(&nf_conntrack_locks[i]); @@ -1681,8 +1722,19 @@ int nf_conntrack_init_start(void) * entries. */ max_factor = 4; } + + nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1); + if (!nf_conntrack_hash) + return -ENOMEM; + nf_conntrack_max = max_factor * nf_conntrack_htable_size; + nf_conntrack_cachep = kmem_cache_create("nf_conntrack", + sizeof(struct nf_conn), 0, + SLAB_DESTROY_BY_RCU, NULL); + if (!nf_conntrack_cachep) + goto err_cachep; + printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", NF_CONNTRACK_VERSION, nf_conntrack_htable_size, nf_conntrack_max); @@ -1759,6 +1811,9 @@ err_tstamp: err_acct: nf_conntrack_expect_fini(); err_expect: + kmem_cache_destroy(nf_conntrack_cachep); +err_cachep: + nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); return ret; } @@ -1778,12 +1833,10 @@ void nf_conntrack_init_end(void) int nf_conntrack_init_net(struct net *net) { - static atomic64_t unique_id; int ret = -ENOMEM; int cpu; atomic_set(&net->ct.count, 0); - seqcount_init(&net->ct.generation); net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); if (!net->ct.pcpu_lists) @@ -1801,25 +1854,6 @@ int nf_conntrack_init_net(struct net *net) if (!net->ct.stat) goto err_pcpu_lists; - net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu", - (u64)atomic64_inc_return(&unique_id)); - if (!net->ct.slabname) - goto err_slabname; - - net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname, - sizeof(struct nf_conn), 0, - SLAB_DESTROY_BY_RCU, NULL); - if (!net->ct.nf_conntrack_cachep) { - printk(KERN_ERR "Unable to create nf_conn slab cache\n"); - goto err_cache; - } - - net->ct.htable_size = nf_conntrack_htable_size; - net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1); - if (!net->ct.hash) { - printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); - goto err_hash; - } ret = nf_conntrack_expect_pernet_init(net); if (ret < 0) goto err_expect; @@ -1851,12 +1885,6 @@ err_tstamp: err_acct: nf_conntrack_expect_pernet_fini(net); err_expect: - nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); -err_hash: - kmem_cache_destroy(net->ct.nf_conntrack_cachep); -err_cache: - kfree(net->ct.slabname); -err_slabname: free_percpu(net->ct.stat); err_pcpu_lists: free_percpu(net->ct.pcpu_lists); diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 4e78c57b818f..d28011b42845 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -113,6 +113,60 @@ static void ecache_work(struct work_struct *work) schedule_delayed_work(&ctnet->ecache_dwork, delay); } +int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, + u32 portid, int report) +{ + int ret = 0; + struct net *net = nf_ct_net(ct); + struct nf_ct_event_notifier *notify; + struct nf_conntrack_ecache *e; + + rcu_read_lock(); + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); + if (!notify) + goto out_unlock; + + e = nf_ct_ecache_find(ct); + if (!e) + goto out_unlock; + + if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { + struct nf_ct_event item = { + .ct = ct, + .portid = e->portid ? e->portid : portid, + .report = report + }; + /* This is a resent of a destroy event? If so, skip missed */ + unsigned long missed = e->portid ? 0 : e->missed; + + if (!((eventmask | missed) & e->ctmask)) + goto out_unlock; + + ret = notify->fcn(eventmask | missed, &item); + if (unlikely(ret < 0 || missed)) { + spin_lock_bh(&ct->lock); + if (ret < 0) { + /* This is a destroy event that has been + * triggered by a process, we store the PORTID + * to include it in the retransmission. + */ + if (eventmask & (1 << IPCT_DESTROY) && + e->portid == 0 && portid != 0) + e->portid = portid; + else + e->missed |= eventmask; + } else { + e->missed &= ~missed; + } + spin_unlock_bh(&ct->lock); + } + } +out_unlock: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); + /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ void nf_ct_deliver_cached_events(struct nf_conn *ct) @@ -167,6 +221,36 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); +void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, + struct nf_conntrack_expect *exp, + u32 portid, int report) + +{ + struct net *net = nf_ct_exp_net(exp); + struct nf_exp_event_notifier *notify; + struct nf_conntrack_ecache *e; + + rcu_read_lock(); + notify = rcu_dereference(net->ct.nf_expect_event_cb); + if (!notify) + goto out_unlock; + + e = nf_ct_ecache_find(exp->master); + if (!e) + goto out_unlock; + + if (e->expmask & (1 << event)) { + struct nf_exp_event item = { + .exp = exp, + .portid = portid, + .report = report + }; + notify->fcn(1 << event, &item); + } +out_unlock: + rcu_read_unlock(); +} + int nf_conntrack_register_notifier(struct net *net, struct nf_ct_event_notifier *new) { diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 278927ab0948..9e3693128313 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -24,6 +24,7 @@ #include <linux/moduleparam.h> #include <linux/export.h> #include <net/net_namespace.h> +#include <net/netns/hash.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> @@ -35,9 +36,13 @@ unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); +struct hlist_head *nf_ct_expect_hash __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_expect_hash); + unsigned int nf_ct_expect_max __read_mostly; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; +static unsigned int nf_ct_expect_hashrnd __read_mostly; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, @@ -72,21 +77,32 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect) nf_ct_expect_put(exp); } -static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple) +static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple) { - unsigned int hash; + unsigned int hash, seed; - if (unlikely(!nf_conntrack_hash_rnd)) { - init_nf_conntrack_hash_rnd(); - } + get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd)); + + seed = nf_ct_expect_hashrnd ^ net_hash_mix(n); hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | - (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd); + (__force __u16)tuple->dst.u.all) ^ seed); return reciprocal_scale(hash, nf_ct_expect_hsize); } +static bool +nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_expect *i, + const struct nf_conntrack_zone *zone, + const struct net *net) +{ + return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && + net_eq(net, nf_ct_net(i->master)) && + nf_ct_zone_equal_any(i->master, zone); +} + struct nf_conntrack_expect * __nf_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, @@ -98,10 +114,9 @@ __nf_ct_expect_find(struct net *net, if (!net->ct.expect_count) return NULL; - h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) { - if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - nf_ct_zone_equal_any(i->master, zone)) + h = nf_ct_expect_dst_hash(net, tuple); + hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) { + if (nf_ct_exp_equal(tuple, i, zone, net)) return i; } return NULL; @@ -139,11 +154,10 @@ nf_ct_find_expectation(struct net *net, if (!net->ct.expect_count) return NULL; - h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) { + h = nf_ct_expect_dst_hash(net, tuple); + hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && - nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - nf_ct_zone_equal_any(i->master, zone)) { + nf_ct_exp_equal(tuple, i, zone, net)) { exp = i; break; } @@ -223,6 +237,7 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, } return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && + net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) && nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); } @@ -232,6 +247,7 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, return a->master == b->master && a->class == b->class && nf_ct_tuple_equal(&a->tuple, &b->tuple) && nf_ct_tuple_mask_equal(&a->mask, &b->mask) && + net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) && nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); } @@ -342,7 +358,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) struct nf_conn_help *master_help = nfct_help(exp->master); struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(exp); - unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); + unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple); /* two references : one for hash insert, one for the timer */ atomic_add(2, &exp->use); @@ -350,7 +366,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) hlist_add_head(&exp->lnode, &master_help->expectations); master_help->expecting[exp->class]++; - hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); + hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); net->ct.expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, @@ -401,8 +417,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) ret = -ESHUTDOWN; goto out; } - h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) { + h = nf_ct_expect_dst_hash(net, &expect->tuple); + hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) { if (expect_matches(i, expect)) { if (del_timer(&i->timeout)) { nf_ct_unlink_expect(i); @@ -468,12 +484,11 @@ struct ct_expect_iter_state { static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { - struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); + n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); if (n) return n; } @@ -483,14 +498,13 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { - struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(hlist_next_rcu(head)); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); + head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); } return head; } @@ -623,28 +637,13 @@ module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400); int nf_conntrack_expect_pernet_init(struct net *net) { - int err = -ENOMEM; - net->ct.expect_count = 0; - net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); - if (net->ct.expect_hash == NULL) - goto err1; - - err = exp_proc_init(net); - if (err < 0) - goto err2; - - return 0; -err2: - nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); -err1: - return err; + return exp_proc_init(net); } void nf_conntrack_expect_pernet_fini(struct net *net) { exp_proc_remove(net); - nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); } int nf_conntrack_expect_init(void) @@ -660,6 +659,13 @@ int nf_conntrack_expect_init(void) 0, 0, NULL); if (!nf_ct_expect_cachep) return -ENOMEM; + + nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); + if (!nf_ct_expect_hash) { + kmem_cache_destroy(nf_ct_expect_cachep); + return -ENOMEM; + } + return 0; } @@ -667,4 +673,5 @@ void nf_conntrack_expect_fini(void) { rcu_barrier(); /* Wait for call_rcu() before destroy */ kmem_cache_destroy(nf_ct_expect_cachep); + nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 3b40ec575cd5..f703adb7e5f7 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -38,10 +38,10 @@ unsigned int nf_ct_helper_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_helper_hsize); static unsigned int nf_ct_helper_count __read_mostly; -static bool nf_ct_auto_assign_helper __read_mostly = true; +static bool nf_ct_auto_assign_helper __read_mostly = false; module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644); MODULE_PARM_DESC(nf_conntrack_helper, - "Enable automatic conntrack helper assignment (default 1)"); + "Enable automatic conntrack helper assignment (default 0)"); #ifdef CONFIG_SYSCTL static struct ctl_table helper_sysctl_table[] = { @@ -400,7 +400,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, spin_lock_bh(&nf_conntrack_expect_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, next, - &net->ct.expect_hash[i], hnode) { + &nf_ct_expect_hash[i], hnode) { struct nf_conn_help *help = nfct_help(exp->master); if ((rcu_dereference_protected( help->helper, @@ -424,10 +424,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, spin_unlock_bh(&pcpu->lock); } local_bh_disable(); - for (i = 0; i < net->ct.htable_size; i++) { + for (i = 0; i < nf_conntrack_htable_size; i++) { nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < net->ct.htable_size) { - hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) + if (i < nf_conntrack_htable_size) { + hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) unhelp(h, me); } spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 3ce5c314ea4b..252e6a7cd2f1 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -16,28 +16,11 @@ static spinlock_t nf_connlabels_lock; -static unsigned int label_bits(const struct nf_conn_labels *l) -{ - unsigned int longs = l->words; - return longs * BITS_PER_LONG; -} - -bool nf_connlabel_match(const struct nf_conn *ct, u16 bit) -{ - struct nf_conn_labels *labels = nf_ct_labels_find(ct); - - if (!labels) - return false; - - return bit < label_bits(labels) && test_bit(bit, labels->bits); -} -EXPORT_SYMBOL_GPL(nf_connlabel_match); - int nf_connlabel_set(struct nf_conn *ct, u16 bit) { struct nf_conn_labels *labels = nf_ct_labels_find(ct); - if (!labels || bit >= label_bits(labels)) + if (!labels || BIT_WORD(bit) >= labels->words) return -ENOSPC; if (test_bit(bit, labels->bits)) @@ -50,14 +33,18 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit) } EXPORT_SYMBOL_GPL(nf_connlabel_set); -static void replace_u32(u32 *address, u32 mask, u32 new) +static int replace_u32(u32 *address, u32 mask, u32 new) { u32 old, tmp; do { old = *address; tmp = (old & mask) ^ new; + if (old == tmp) + return 0; } while (cmpxchg(address, old, tmp) != old); + + return 1; } int nf_connlabels_replace(struct nf_conn *ct, @@ -66,6 +53,7 @@ int nf_connlabels_replace(struct nf_conn *ct, { struct nf_conn_labels *labels; unsigned int size, i; + int changed = 0; u32 *dst; labels = nf_ct_labels_find(ct); @@ -77,29 +65,27 @@ int nf_connlabels_replace(struct nf_conn *ct, words32 = size / sizeof(u32); dst = (u32 *) labels->bits; - if (words32) { - for (i = 0; i < words32; i++) - replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]); - } + for (i = 0; i < words32; i++) + changed |= replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]); size /= sizeof(u32); for (i = words32; i < size; i++) /* pad */ replace_u32(&dst[i], 0, 0); - nf_conntrack_event_cache(IPCT_LABEL, ct); + if (changed) + nf_conntrack_event_cache(IPCT_LABEL, ct); return 0; } EXPORT_SYMBOL_GPL(nf_connlabels_replace); -int nf_connlabels_get(struct net *net, unsigned int n_bits) +int nf_connlabels_get(struct net *net, unsigned int bits) { size_t words; - if (n_bits > (NF_CT_LABELS_MAX_SIZE * BITS_PER_BYTE)) + words = BIT_WORD(bits) + 1; + if (words > NF_CT_LABELS_MAX_SIZE / sizeof(long)) return -ERANGE; - words = BITS_TO_LONGS(n_bits); - spin_lock(&nf_connlabels_lock); net->ct.labels_used++; if (words > net->ct.label_words) @@ -128,6 +114,8 @@ static struct nf_ct_ext_type labels_extend __read_mostly = { int nf_conntrack_labels_init(void) { + BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX); + spin_lock_init(&nf_connlabels_lock); return nf_ct_extend_register(&labels_extend); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 355e8552fd5b..a18d1ceabad5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -58,10 +58,9 @@ MODULE_LICENSE("GPL"); static char __initdata version[] = "0.93"; -static inline int -ctnetlink_dump_tuples_proto(struct sk_buff *skb, - const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_l4proto *l4proto) +static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_l4proto *l4proto) { int ret = 0; struct nlattr *nest_parms; @@ -83,10 +82,9 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_tuples_ip(struct sk_buff *skb, - const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_l3proto *l3proto) +static int ctnetlink_dump_tuples_ip(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_l3proto *l3proto) { int ret = 0; struct nlattr *nest_parms; @@ -106,9 +104,8 @@ nla_put_failure: return -1; } -static int -ctnetlink_dump_tuples(struct sk_buff *skb, - const struct nf_conntrack_tuple *tuple) +static int ctnetlink_dump_tuples(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple) { int ret; struct nf_conntrack_l3proto *l3proto; @@ -127,9 +124,8 @@ ctnetlink_dump_tuples(struct sk_buff *skb, return ret; } -static inline int -ctnetlink_dump_zone_id(struct sk_buff *skb, int attrtype, - const struct nf_conntrack_zone *zone, int dir) +static int ctnetlink_dump_zone_id(struct sk_buff *skb, int attrtype, + const struct nf_conntrack_zone *zone, int dir) { if (zone->id == NF_CT_DEFAULT_ZONE_ID || zone->dir != dir) return 0; @@ -141,8 +137,7 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_STATUS, htonl(ct->status))) goto nla_put_failure; @@ -152,8 +147,7 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) { long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ; @@ -168,8 +162,7 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) +static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) { struct nf_conntrack_l4proto *l4proto; struct nlattr *nest_proto; @@ -193,8 +186,8 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_helpinfo(struct sk_buff *skb, + const struct nf_conn *ct) { struct nlattr *nest_helper; const struct nf_conn_help *help = nfct_help(ct); @@ -245,8 +238,10 @@ dump_counters(struct sk_buff *skb, struct nf_conn_acct *acct, if (!nest_count) goto nla_put_failure; - if (nla_put_be64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts)) || - nla_put_be64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes))) + if (nla_put_be64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts), + CTA_COUNTERS_PAD) || + nla_put_be64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes), + CTA_COUNTERS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest_count); @@ -287,9 +282,11 @@ ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) if (!nest_count) goto nla_put_failure; - if (nla_put_be64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start)) || + if (nla_put_be64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start), + CTA_TIMESTAMP_PAD) || (tstamp->stop != 0 && nla_put_be64(skb, CTA_TIMESTAMP_STOP, - cpu_to_be64(tstamp->stop)))) + cpu_to_be64(tstamp->stop), + CTA_TIMESTAMP_PAD))) goto nla_put_failure; nla_nest_end(skb, nest_count); @@ -300,8 +297,7 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_MARK -static inline int -ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark))) goto nla_put_failure; @@ -315,8 +311,7 @@ nla_put_failure: #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK -static inline int -ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) { struct nlattr *nest_secctx; int len, ret; @@ -345,7 +340,7 @@ nla_put_failure: #endif #ifdef CONFIG_NF_CONNTRACK_LABELS -static int ctnetlink_label_size(const struct nf_conn *ct) +static inline int ctnetlink_label_size(const struct nf_conn *ct) { struct nf_conn_labels *labels = nf_ct_labels_find(ct); @@ -380,8 +375,7 @@ ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) -static inline int -ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct) { struct nlattr *nest_parms; @@ -426,8 +420,8 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, + const struct nf_conn *ct) { struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); struct nf_ct_seqadj *seq; @@ -446,8 +440,7 @@ ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct) return 0; } -static inline int -ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) goto nla_put_failure; @@ -457,8 +450,7 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use)))) goto nla_put_failure; @@ -538,8 +530,7 @@ nla_put_failure: return -1; } -static inline size_t -ctnetlink_proto_size(const struct nf_conn *ct) +static inline size_t ctnetlink_proto_size(const struct nf_conn *ct) { struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -556,19 +547,17 @@ ctnetlink_proto_size(const struct nf_conn *ct) return len; } -static inline size_t -ctnetlink_acct_size(const struct nf_conn *ct) +static inline size_t ctnetlink_acct_size(const struct nf_conn *ct) { if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT)) return 0; return 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */ - + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */ - + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */ + + 2 * nla_total_size_64bit(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */ + + 2 * nla_total_size_64bit(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */ ; } -static inline int -ctnetlink_secctx_size(const struct nf_conn *ct) +static inline int ctnetlink_secctx_size(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_SECMARK int len, ret; @@ -584,20 +573,19 @@ ctnetlink_secctx_size(const struct nf_conn *ct) #endif } -static inline size_t -ctnetlink_timestamp_size(const struct nf_conn *ct) +static inline size_t ctnetlink_timestamp_size(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP)) return 0; - return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t)); + return nla_total_size(0) + 2 * nla_total_size_64bit(sizeof(uint64_t)); #else return 0; #endif } -static inline size_t -ctnetlink_nlmsg_size(const struct nf_conn *ct) +#ifdef CONFIG_NF_CONNTRACK_EVENTS +static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) { return NLMSG_ALIGN(sizeof(struct nfgenmsg)) + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */ @@ -628,7 +616,6 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) ; } -#ifdef CONFIG_NF_CONNTRACK_EVENTS static int ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) { @@ -837,19 +824,22 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conn *)cb->args[1]; local_bh_disable(); - for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { + for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; nf_conntrack_lock(lockp); - if (cb->args[0] >= net->ct.htable_size) { + if (cb->args[0] >= nf_conntrack_htable_size) { spin_unlock(lockp); goto out; } - hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]], - hnnode) { + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]], + hnnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); + if (!net_eq(net, nf_ct_net(ct))) + continue; + /* Dump entries of a given L3 protocol number. * If it is not specified, ie. l3proto == 0, * then dump everything. */ @@ -891,8 +881,8 @@ out: return skb->len; } -static inline int -ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple) +static int ctnetlink_parse_tuple_ip(struct nlattr *attr, + struct nf_conntrack_tuple *tuple) { struct nlattr *tb[CTA_IP_MAX+1]; struct nf_conntrack_l3proto *l3proto; @@ -921,9 +911,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = { [CTA_PROTO_NUM] = { .type = NLA_U8 }, }; -static inline int -ctnetlink_parse_tuple_proto(struct nlattr *attr, - struct nf_conntrack_tuple *tuple) +static int ctnetlink_parse_tuple_proto(struct nlattr *attr, + struct nf_conntrack_tuple *tuple) { struct nlattr *tb[CTA_PROTO_MAX+1]; struct nf_conntrack_l4proto *l4proto; @@ -1050,9 +1039,8 @@ static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { .len = NF_CT_HELPER_NAME_LEN - 1 }, }; -static inline int -ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, - struct nlattr **helpinfo) +static int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, + struct nlattr **helpinfo) { int err; struct nlattr *tb[CTA_HELP_MAX+1]; @@ -1463,8 +1451,8 @@ ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) #endif } -static inline int -ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) +static int ctnetlink_change_helper(struct nf_conn *ct, + const struct nlattr * const cda[]) { struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); @@ -1524,8 +1512,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) return -EOPNOTSUPP; } -static inline int -ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[]) +static int ctnetlink_change_timeout(struct nf_conn *ct, + const struct nlattr * const cda[]) { u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); @@ -1544,8 +1532,8 @@ static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = { [CTA_PROTOINFO_SCTP] = { .type = NLA_NESTED }, }; -static inline int -ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]) +static int ctnetlink_change_protoinfo(struct nf_conn *ct, + const struct nlattr * const cda[]) { const struct nlattr *attr = cda[CTA_PROTOINFO]; struct nlattr *tb[CTA_PROTOINFO_MAX+1]; @@ -1571,8 +1559,8 @@ static const struct nla_policy seqadj_policy[CTA_SEQADJ_MAX+1] = { [CTA_SEQADJ_OFFSET_AFTER] = { .type = NLA_U32 }, }; -static inline int -change_seq_adj(struct nf_ct_seqadj *seq, const struct nlattr * const attr) +static int change_seq_adj(struct nf_ct_seqadj *seq, + const struct nlattr * const attr) { int err; struct nlattr *cda[CTA_SEQADJ_MAX+1]; @@ -2405,10 +2393,9 @@ static struct nfnl_ct_hook ctnetlink_glue_hook = { * EXPECT ***********************************************************************/ -static inline int -ctnetlink_exp_dump_tuple(struct sk_buff *skb, - const struct nf_conntrack_tuple *tuple, - enum ctattr_expect type) +static int ctnetlink_exp_dump_tuple(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple, + enum ctattr_expect type) { struct nlattr *nest_parms; @@ -2425,10 +2412,9 @@ nla_put_failure: return -1; } -static inline int -ctnetlink_exp_dump_mask(struct sk_buff *skb, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple_mask *mask) +static int ctnetlink_exp_dump_mask(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple_mask *mask) { int ret; struct nf_conntrack_l3proto *l3proto; @@ -2646,10 +2632,14 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: - hlist_for_each_entry(exp, &net->ct.expect_hash[cb->args[0]], + hlist_for_each_entry(exp, &nf_ct_expect_hash[cb->args[0]], hnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; + + if (!net_eq(nf_ct_net(exp->master), net)) + continue; + if (cb->args[1]) { if (exp != last) continue; @@ -2900,8 +2890,12 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, spin_lock_bh(&nf_conntrack_expect_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, next, - &net->ct.expect_hash[i], + &nf_ct_expect_hash[i], hnode) { + + if (!net_eq(nf_ct_exp_net(exp), net)) + continue; + m_help = nfct_help(exp->master); if (!strcmp(m_help->helper->name, name) && del_timer(&exp->timeout)) { @@ -2918,8 +2912,12 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, spin_lock_bh(&nf_conntrack_expect_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, next, - &net->ct.expect_hash[i], + &nf_ct_expect_hash[i], hnode) { + + if (!net_eq(nf_ct_exp_net(exp), net)) + continue; + if (del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index fce1b1cca32d..399a38fd685a 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -645,7 +645,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE, ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) || nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, - cpu_to_be64(ct->proto.dccp.handshake_seq))) + cpu_to_be64(ct->proto.dccp.handshake_seq), + CTA_PROTOINFO_DCCP_PAD)) goto nla_put_failure; nla_nest_end(skb, nest_parms); spin_unlock_bh(&ct->lock); @@ -660,6 +661,7 @@ static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 }, + [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC }, }; static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 9578a7c371ef..1d7ab960a9e6 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -191,13 +191,7 @@ static void sctp_print_tuple(struct seq_file *s, /* Print out the private part of the conntrack. */ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { - enum sctp_conntrack state; - - spin_lock_bh(&ct->lock); - state = ct->proto.sctp.state; - spin_unlock_bh(&ct->lock); - - seq_printf(s, "%s ", sctp_conntrack_names[state]); + seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]); } #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 7cc1d9c22a9f..70c8381641a7 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -313,13 +313,7 @@ static void tcp_print_tuple(struct seq_file *s, /* Print out the private part of the conntrack. */ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { - enum tcp_conntrack state; - - spin_lock_bh(&ct->lock); - state = ct->proto.tcp.state; - spin_unlock_bh(&ct->lock); - - seq_printf(s, "%s ", tcp_conntrack_names[state]); + seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); } static unsigned int get_conntrack_index(const struct tcphdr *tcph) diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 478f92f834b6..4fd040575ffe 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -309,6 +309,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .l3proto = PF_INET, .l4proto = IPPROTO_UDP, .name = "udp", + .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, .print_tuple = udp_print_tuple, @@ -341,6 +342,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .l3proto = PF_INET6, .l4proto = IPPROTO_UDP, .name = "udp", + .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, .print_tuple = udp_print_tuple, diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 1ac8ee13a873..9d692f5adb94 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -274,6 +274,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .l3proto = PF_INET, .l4proto = IPPROTO_UDPLITE, .name = "udplite", + .allow_clash = true, .pkt_to_tuple = udplite_pkt_to_tuple, .invert_tuple = udplite_invert_tuple, .print_tuple = udplite_print_tuple, @@ -306,6 +307,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .l3proto = PF_INET6, .l4proto = IPPROTO_UDPLITE, .name = "udplite", + .allow_clash = true, .pkt_to_tuple = udplite_pkt_to_tuple, .invert_tuple = udplite_invert_tuple, .print_tuple = udplite_print_tuple, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0f1a45bcacb2..f87e84ebcec3 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -54,14 +54,13 @@ struct ct_iter_state { static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) { - struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < net->ct.htable_size; + st->bucket < nf_conntrack_htable_size; st->bucket++) { - n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); + n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); if (!is_a_nulls(n)) return n; } @@ -71,18 +70,17 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, struct hlist_nulls_node *head) { - struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; head = rcu_dereference(hlist_nulls_next_rcu(head)); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= net->ct.htable_size) + if (++st->bucket >= nf_conntrack_htable_size) return NULL; } head = rcu_dereference( hlist_nulls_first_rcu( - &net->ct.hash[st->bucket])); + &nf_conntrack_hash[st->bucket])); } return head; } @@ -458,7 +456,7 @@ static struct ctl_table nf_ct_sysctl_table[] = { }, { .procname = "nf_conntrack_buckets", - .data = &init_net.ct.htable_size, + .data = &nf_conntrack_htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = proc_dointvec, @@ -512,7 +510,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) goto out_kmemdup; table[1].data = &net->ct.count; - table[2].data = &net->ct.htable_size; table[3].data = &net->ct.sysctl_checksum; table[4].data = &net->ct.sysctl_log_invalid; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 06a9f45771ab..6877a396f8fc 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -38,6 +38,9 @@ static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] __read_mostly; +static struct hlist_head *nf_nat_bysource __read_mostly; +static unsigned int nf_nat_htable_size __read_mostly; +static unsigned int nf_nat_hash_rnd __read_mostly; inline const struct nf_nat_l3proto * __nf_nat_l3proto_find(u8 family) @@ -118,15 +121,17 @@ EXPORT_SYMBOL(nf_xfrm_me_harder); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) { unsigned int hash; + get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd)); + /* Original src, to ensure we map it consistently if poss. */ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), - tuple->dst.protonum ^ nf_conntrack_hash_rnd); + tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n)); - return reciprocal_scale(hash, net->ct.nat_htable_size); + return reciprocal_scale(hash, nf_nat_htable_size); } /* Is this tuple already taken? (not by us) */ @@ -196,9 +201,10 @@ find_appropriate_src(struct net *net, const struct nf_conn_nat *nat; const struct nf_conn *ct; - hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) { + hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple) && + net_eq(net, nf_ct_net(ct)) && nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { /* Copy source part from reply tuple. */ nf_ct_invert_tuplepr(result, @@ -431,7 +437,7 @@ nf_nat_setup_info(struct nf_conn *ct, nat = nfct_nat(ct); nat->ct = ct; hlist_add_head_rcu(&nat->bysource, - &net->ct.nat_bysource[srchash]); + &nf_nat_bysource[srchash]); spin_unlock_bh(&nf_nat_lock); } @@ -819,27 +825,14 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, } #endif -static int __net_init nf_nat_net_init(struct net *net) -{ - /* Leave them the same for the moment. */ - net->ct.nat_htable_size = net->ct.htable_size; - net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0); - if (!net->ct.nat_bysource) - return -ENOMEM; - return 0; -} - static void __net_exit nf_nat_net_exit(struct net *net) { struct nf_nat_proto_clean clean = {}; nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0); - synchronize_rcu(); - nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); } static struct pernet_operations nf_nat_net_ops = { - .init = nf_nat_net_init, .exit = nf_nat_net_exit, }; @@ -852,8 +845,16 @@ static int __init nf_nat_init(void) { int ret; + /* Leave them the same for the moment. */ + nf_nat_htable_size = nf_conntrack_htable_size; + + nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0); + if (!nf_nat_bysource) + return -ENOMEM; + ret = nf_ct_extend_register(&nat_extend); if (ret < 0) { + nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); return ret; } @@ -877,6 +878,7 @@ static int __init nf_nat_init(void) return 0; cleanup_extend: + nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); nf_ct_extend_unregister(&nat_extend); return ret; } @@ -895,6 +897,7 @@ static void __exit nf_nat_cleanup(void) for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); synchronize_net(); + nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); } MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2011977cd79d..4d292b933b5c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -944,8 +944,10 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) if (nest == NULL) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts)) || - nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes))) + if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts), + NFTA_COUNTER_PAD) || + nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), + NFTA_COUNTER_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); @@ -975,7 +977,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle))) + if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle), + NFTA_CHAIN_PAD)) goto nla_put_failure; if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name)) goto nla_put_failure; @@ -1803,13 +1806,15 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name)) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle))) + if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle), + NFTA_RULE_PAD)) goto nla_put_failure; if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { prule = list_entry(rule->list.prev, struct nft_rule, list); if (nla_put_be64(skb, NFTA_RULE_POSITION, - cpu_to_be64(prule->handle))) + cpu_to_be64(prule->handle), + NFTA_RULE_PAD)) goto nla_put_failure; } @@ -2312,7 +2317,7 @@ nft_select_set_ops(const struct nlattr * const nla[], static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_TABLE] = { .type = NLA_STRING }, [NFTA_SET_NAME] = { .type = NLA_STRING, - .len = IFNAMSIZ - 1 }, + .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_SET_FLAGS] = { .type = NLA_U32 }, [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, @@ -2396,7 +2401,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, unsigned long *inuse; unsigned int n = 0, min = 0; - p = strnchr(name, IFNAMSIZ, '%'); + p = strnchr(name, NFT_SET_MAXNAMELEN, '%'); if (p != NULL) { if (p[1] != 'd' || strchr(p + 2, '%')) return -EINVAL; @@ -2473,7 +2478,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, } if (set->timeout && - nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout))) + nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout), + NFTA_SET_PAD)) goto nla_put_failure; if (set->gc_int && nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int))) @@ -2690,7 +2696,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; - char name[IFNAMSIZ]; + char name[NFT_SET_MAXNAMELEN]; unsigned int size; bool create; u64 timeout; @@ -3076,7 +3082,8 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, - cpu_to_be64(*nft_set_ext_timeout(ext)))) + cpu_to_be64(*nft_set_ext_timeout(ext)), + NFTA_SET_ELEM_PAD)) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { @@ -3089,7 +3096,8 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, expires = 0; if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION, - cpu_to_be64(jiffies_to_msecs(expires)))) + cpu_to_be64(jiffies_to_msecs(expires)), + NFTA_SET_ELEM_PAD)) goto nla_put_failure; } @@ -3367,6 +3375,22 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem) } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); +static int nft_setelem_parse_flags(const struct nft_set *set, + const struct nlattr *attr, u32 *flags) +{ + if (attr == NULL) + return 0; + + *flags = ntohl(nla_get_be32(attr)); + if (*flags & ~NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + if (!(set->flags & NFT_SET_INTERVAL) && + *flags & NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + + return 0; +} + static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { @@ -3380,8 +3404,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_data data; enum nft_registers dreg; struct nft_trans *trans; + u32 flags = 0; u64 timeout; - u32 flags; u8 ulen; int err; @@ -3395,17 +3419,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_prepare(&tmpl); - flags = 0; - if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { - flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); - if (flags & ~NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; - if (!(set->flags & NFT_SET_INTERVAL) && - flags & NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; - if (flags != 0) - nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); - } + err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags); + if (err < 0) + return err; + if (flags != 0) + nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && @@ -3574,9 +3592,13 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + struct nft_set_ext_tmpl tmpl; struct nft_data_desc desc; struct nft_set_elem elem; + struct nft_set_ext *ext; struct nft_trans *trans; + u32 flags = 0; + void *priv; int err; err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, @@ -3588,6 +3610,14 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_KEY] == NULL) goto err1; + nft_set_ext_prepare(&tmpl); + + err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags); + if (err < 0) + return err; + if (flags != 0) + nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, nla[NFTA_SET_ELEM_KEY]); if (err < 0) @@ -3597,24 +3627,40 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) goto err2; + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, desc.len); + + err = -ENOMEM; + elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, NULL, 0, + GFP_KERNEL); + if (elem.priv == NULL) + goto err2; + + ext = nft_set_elem_ext(set, elem.priv); + if (flags) + *nft_set_ext_flags(ext) = flags; + trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); if (trans == NULL) { err = -ENOMEM; - goto err2; + goto err3; } - elem.priv = set->ops->deactivate(set, &elem); - if (elem.priv == NULL) { + priv = set->ops->deactivate(set, &elem); + if (priv == NULL) { err = -ENOENT; - goto err3; + goto err4; } + kfree(elem.priv); + elem.priv = priv; nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; -err3: +err4: kfree(trans); +err3: + kfree(elem.priv); err2: nft_data_uninit(&elem.key.val, desc.type); err1: diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index e9e959f65d91..39eb1cc62e91 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -156,7 +156,8 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb, return 0; return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE, - cpu_to_be64(info->rule->handle)); + cpu_to_be64(info->rule->handle), + NFTA_TRACE_PAD); } void nft_trace_notify(struct nft_traceinfo *info) @@ -174,7 +175,7 @@ void nft_trace_notify(struct nft_traceinfo *info) size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(NFT_TABLE_MAXNAMELEN) + nla_total_size(NFT_CHAIN_MAXNAMELEN) + - nla_total_size(sizeof(__be64)) + /* rule handle */ + nla_total_size_64bit(sizeof(__be64)) + /* rule handle */ nla_total_size(sizeof(__be32)) + /* trace type */ nla_total_size(0) + /* VERDICT, nested */ nla_total_size(sizeof(u32)) + /* verdict code */ diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index dbd0803b1827..1b4de4bd6958 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -162,15 +162,18 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, pkts = atomic64_read(&acct->pkts); bytes = atomic64_read(&acct->bytes); } - if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts)) || - nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) || + if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts), + NFACCT_PAD) || + nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes), + NFACCT_PAD) || nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) goto nla_put_failure; if (acct->flags & NFACCT_F_QUOTA) { u64 *quota = (u64 *)acct->data; if (nla_put_be32(skb, NFACCT_FLAGS, htonl(old_flags)) || - nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota))) + nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota), + NFACCT_PAD)) goto nla_put_failure; } nlmsg_end(skb, nlh); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 2671b9deb103..3c84f14326f5 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -306,10 +306,10 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) int i; local_bh_disable(); - for (i = 0; i < net->ct.htable_size; i++) { + for (i = 0; i < nf_conntrack_htable_size; i++) { nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < net->ct.htable_size) { - hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) + if (i < nf_conntrack_htable_size) { + hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) untimeout(h, timeout); } spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index cb5b630a645b..aa93877ab6e2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -295,6 +295,59 @@ static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata) return seclen; } +static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry) +{ + struct sk_buff *entskb = entry->skb; + u32 nlalen = 0; + + if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb)) + return 0; + + if (skb_vlan_tag_present(entskb)) + nlalen += nla_total_size(nla_total_size(sizeof(__be16)) + + nla_total_size(sizeof(__be16))); + + if (entskb->network_header > entskb->mac_header) + nlalen += nla_total_size((entskb->network_header - + entskb->mac_header)); + + return nlalen; +} + +static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb) +{ + struct sk_buff *entskb = entry->skb; + + if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb)) + return 0; + + if (skb_vlan_tag_present(entskb)) { + struct nlattr *nest; + + nest = nla_nest_start(skb, NFQA_VLAN | NLA_F_NESTED); + if (!nest) + goto nla_put_failure; + + if (nla_put_be16(skb, NFQA_VLAN_TCI, htons(entskb->vlan_tci)) || + nla_put_be16(skb, NFQA_VLAN_PROTO, entskb->vlan_proto)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + } + + if (entskb->mac_header < entskb->network_header) { + int len = (int)(entskb->network_header - entskb->mac_header); + + if (nla_put(skb, NFQA_L2HDR, len, skb_mac_header(entskb))) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -1; +} + static struct sk_buff * nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, @@ -334,6 +387,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (entskb->tstamp.tv64) size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); + size += nfqnl_get_bridge_size(entry); + if (entry->state.hook <= NF_INET_FORWARD || (entry->state.hook == NF_INET_POST_ROUTING && entskb->sk == NULL)) csum_verify = !skb_csum_unnecessary(entskb); @@ -497,6 +552,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } } + if (nfqnl_put_bridge(entry, skb) < 0) + goto nla_put_failure; + if (entskb->tstamp.tv64) { struct nfqnl_msg_packet_timestamp ts; struct timespec64 kts = ktime_to_timespec64(skb->tstamp); @@ -911,12 +969,18 @@ static struct notifier_block nfqnl_rtnl_notifier = { .notifier_call = nfqnl_rcv_nl_event, }; +static const struct nla_policy nfqa_vlan_policy[NFQA_VLAN_MAX + 1] = { + [NFQA_VLAN_TCI] = { .type = NLA_U16}, + [NFQA_VLAN_PROTO] = { .type = NLA_U16}, +}; + static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, [NFQA_MARK] = { .type = NLA_U32 }, [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, [NFQA_CT] = { .type = NLA_UNSPEC }, [NFQA_EXP] = { .type = NLA_UNSPEC }, + [NFQA_VLAN] = { .type = NLA_NESTED }, }; static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { @@ -1030,6 +1094,40 @@ static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct, return ct; } +static int nfqa_parse_bridge(struct nf_queue_entry *entry, + const struct nlattr * const nfqa[]) +{ + if (nfqa[NFQA_VLAN]) { + struct nlattr *tb[NFQA_VLAN_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFQA_VLAN_MAX, nfqa[NFQA_VLAN], + nfqa_vlan_policy); + if (err < 0) + return err; + + if (!tb[NFQA_VLAN_TCI] || !tb[NFQA_VLAN_PROTO]) + return -EINVAL; + + entry->skb->vlan_tci = ntohs(nla_get_be16(tb[NFQA_VLAN_TCI])); + entry->skb->vlan_proto = nla_get_be16(tb[NFQA_VLAN_PROTO]); + } + + if (nfqa[NFQA_L2HDR]) { + int mac_header_len = entry->skb->network_header - + entry->skb->mac_header; + + if (mac_header_len != nla_len(nfqa[NFQA_L2HDR])) + return -EINVAL; + else if (mac_header_len > 0) + memcpy(skb_mac_header(entry->skb), + nla_data(nfqa[NFQA_L2HDR]), + mac_header_len); + } + + return 0; +} + static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -1045,6 +1143,7 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, struct nfnl_ct_hook *nfnl_ct; struct nf_conn *ct = NULL; struct nfnl_queue_net *q = nfnl_queue_pernet(net); + int err; queue = instance_lookup(q, queue_num); if (!queue) @@ -1071,6 +1170,12 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo); } + if (entry->state.pf == PF_BRIDGE) { + err = nfqa_parse_bridge(entry, nfqa); + if (err < 0) + return err; + } + if (nfqa[NFQA_PAYLOAD]) { u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); int diff = payload_len - entry->skb->len; diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index c9743f78f219..77db8358ab14 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -76,8 +76,10 @@ static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) nft_counter_fetch(priv->counter, &total); - if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) || - nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets))) + if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), + NFTA_COUNTER_PAD) || + nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets), + NFTA_COUNTER_PAD)) goto nla_put_failure; return 0; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d4a4619fcebc..137e308d5b24 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -198,6 +198,14 @@ static void nft_ct_set_eval(const struct nft_expr *expr, } break; #endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + case NFT_CT_LABELS: + nf_connlabels_replace(ct, + ®s->data[priv->sreg], + ®s->data[priv->sreg], + NF_CT_LABELS_MAX_SIZE / sizeof(u32)); + break; +#endif default: break; } @@ -365,6 +373,16 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, len = FIELD_SIZEOF(struct nf_conn, mark); break; #endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + case NFT_CT_LABELS: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; + len = NF_CT_LABELS_MAX_SIZE; + err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1); + if (err) + return err; + break; +#endif default: return -EOPNOTSUPP; } @@ -384,6 +402,18 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, static void nft_ct_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { + struct nft_ct *priv = nft_expr_priv(expr); + + switch (priv->key) { +#ifdef CONFIG_NF_CONNTRACK_LABELS + case NFT_CT_LABELS: + nf_connlabels_put(ctx->net); + break; +#endif + default: + break; + } + nft_ct_l3proto_module_put(ctx->afi->family); } @@ -484,6 +514,8 @@ static struct nft_expr_type nft_ct_type __read_mostly = { static int __init nft_ct_module_init(void) { + BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE); + return nft_register_expr(&nft_ct_type); } diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 9dec3bd1b63c..78d4914fb39c 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -227,7 +227,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout))) + if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout), + NFTA_DYNSET_PAD)) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) goto nla_put_failure; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 3f9d45d3d9b7..6fa016564f90 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -192,7 +192,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; - err = rhashtable_walk_init(&priv->ht, &hti); + err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); iter->err = err; if (err) return; @@ -248,7 +248,7 @@ static void nft_hash_gc(struct work_struct *work) priv = container_of(work, struct nft_hash, gc_work.work); set = nft_set_container_of(priv); - err = rhashtable_walk_init(&priv->ht, &hti); + err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); if (err) goto schedule; diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 99d18578afc6..070b98938e02 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -97,8 +97,10 @@ static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit, u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC); u64 rate = limit->rate - limit->burst; - if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) || - nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) || + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate), + NFTA_LIMIT_PAD) || + nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs), + NFTA_LIMIT_PAD) || nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) || nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)) || nla_put_be32(skb, NFTA_LIMIT_FLAGS, htonl(flags))) diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 1c30f41cff5b..f762094af7c1 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -29,6 +29,17 @@ struct nft_rbtree_elem { struct nft_set_ext ext; }; +static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe) +{ + return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && + (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END); +} + +static bool nft_rbtree_equal(const struct nft_set *set, const void *this, + const struct nft_rbtree_elem *interval) +{ + return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; +} static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) @@ -37,6 +48,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key, const struct nft_rbtree_elem *rbe, *interval = NULL; const struct rb_node *parent; u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); + const void *this; int d; spin_lock_bh(&nft_rbtree_lock); @@ -44,9 +56,16 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key, while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); + this = nft_set_ext_key(&rbe->ext); + d = memcmp(this, key, set->klen); if (d < 0) { parent = parent->rb_left; + /* In case of adjacent ranges, we always see the high + * part of the range in first place, before the low one. + * So don't update interval if the keys are equal. + */ + if (interval && nft_rbtree_equal(set, this, interval)) + continue; interval = rbe; } else if (d > 0) parent = parent->rb_right; @@ -56,9 +75,7 @@ found: parent = parent->rb_left; continue; } - if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && - *nft_set_ext_flags(&rbe->ext) & - NFT_SET_ELEM_INTERVAL_END) + if (nft_rbtree_interval_end(rbe)) goto out; spin_unlock_bh(&nft_rbtree_lock); @@ -98,9 +115,16 @@ static int __nft_rbtree_insert(const struct nft_set *set, else if (d > 0) p = &parent->rb_right; else { - if (nft_set_elem_active(&rbe->ext, genmask)) - return -EEXIST; - p = &parent->rb_left; + if (nft_set_elem_active(&rbe->ext, genmask)) { + if (nft_rbtree_interval_end(rbe) && + !nft_rbtree_interval_end(new)) + p = &parent->rb_left; + else if (!nft_rbtree_interval_end(rbe) && + nft_rbtree_interval_end(new)) + p = &parent->rb_right; + else + return -EEXIST; + } } } rb_link_node(&new->node, parent, p); @@ -145,7 +169,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set, { const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; - struct nft_rbtree_elem *rbe; + struct nft_rbtree_elem *rbe, *this = elem->priv; u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int d; @@ -163,6 +187,15 @@ static void *nft_rbtree_deactivate(const struct nft_set *set, parent = parent->rb_left; continue; } + if (nft_rbtree_interval_end(rbe) && + !nft_rbtree_interval_end(this)) { + parent = parent->rb_left; + continue; + } else if (!nft_rbtree_interval_end(rbe) && + nft_rbtree_interval_end(this)) { + parent = parent->rb_right; + continue; + } nft_set_elem_change_active(set, &rbe->ext); return rbe; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 582c9cfd6567..c69c892231d7 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -416,6 +416,47 @@ int xt_check_match(struct xt_mtchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_match); +/** xt_check_entry_match - check that matches end before start of target + * + * @match: beginning of xt_entry_match + * @target: beginning of this rules target (alleged end of matches) + * @alignment: alignment requirement of match structures + * + * Validates that all matches add up to the beginning of the target, + * and that each match covers at least the base structure size. + * + * Return: 0 on success, negative errno on failure. + */ +static int xt_check_entry_match(const char *match, const char *target, + const size_t alignment) +{ + const struct xt_entry_match *pos; + int length = target - match; + + if (length == 0) /* no matches */ + return 0; + + pos = (struct xt_entry_match *)match; + do { + if ((unsigned long)pos % alignment) + return -EINVAL; + + if (length < (int)sizeof(struct xt_entry_match)) + return -EINVAL; + + if (pos->u.match_size < sizeof(struct xt_entry_match)) + return -EINVAL; + + if (pos->u.match_size > length) + return -EINVAL; + + length -= pos->u.match_size; + pos = ((void *)((char *)(pos) + (pos)->u.match_size)); + } while (length > 0); + + return 0; +} + #ifdef CONFIG_COMPAT int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { @@ -485,13 +526,14 @@ int xt_compat_match_offset(const struct xt_match *match) } EXPORT_SYMBOL_GPL(xt_compat_match_offset); -int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, - unsigned int *size) +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, + unsigned int *size) { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; int pad, off = xt_compat_match_offset(match); u_int16_t msize = cm->u.user.match_size; + char name[sizeof(m->u.user.name)]; m = *dstptr; memcpy(m, cm, sizeof(*cm)); @@ -505,10 +547,12 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, msize += off; m->u.user.match_size = msize; + strlcpy(name, match->name, sizeof(name)); + module_put(match->me); + strncpy(m->u.user.name, name, sizeof(m->u.user.name)); *size += off; *dstptr += msize; - return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); @@ -539,8 +583,125 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_to_user); + +/* non-compat version may have padding after verdict */ +struct compat_xt_standard_target { + struct compat_xt_entry_target t; + compat_uint_t verdict; +}; + +int xt_compat_check_entry_offsets(const void *base, const char *elems, + unsigned int target_offset, + unsigned int next_offset) +{ + long size_of_base_struct = elems - (const char *)base; + const struct compat_xt_entry_target *t; + const char *e = base; + + if (target_offset < size_of_base_struct) + return -EINVAL; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && + target_offset + sizeof(struct compat_xt_standard_target) != next_offset) + return -EINVAL; + + /* compat_xt_entry match has less strict aligment requirements, + * otherwise they are identical. In case of padding differences + * we need to add compat version of xt_check_entry_match. + */ + BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match)); + + return xt_check_entry_match(elems, base + target_offset, + __alignof__(struct compat_xt_entry_match)); +} +EXPORT_SYMBOL(xt_compat_check_entry_offsets); #endif /* CONFIG_COMPAT */ +/** + * xt_check_entry_offsets - validate arp/ip/ip6t_entry + * + * @base: pointer to arp/ip/ip6t_entry + * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems + * @target_offset: the arp/ip/ip6_t->target_offset + * @next_offset: the arp/ip/ip6_t->next_offset + * + * validates that target_offset and next_offset are sane and that all + * match sizes (if any) align with the target offset. + * + * This function does not validate the targets or matches themselves, it + * only tests that all the offsets and sizes are correct, that all + * match structures are aligned, and that the last structure ends where + * the target structure begins. + * + * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. + * + * The arp/ip/ip6t_entry structure @base must have passed following tests: + * - it must point to a valid memory location + * - base to base + next_offset must be accessible, i.e. not exceed allocated + * length. + * + * A well-formed entry looks like this: + * + * ip(6)t_entry match [mtdata] match [mtdata] target [tgdata] ip(6)t_entry + * e->elems[]-----' | | + * matchsize | | + * matchsize | | + * | | + * target_offset---------------------------------' | + * next_offset---------------------------------------------------' + * + * elems[]: flexible array member at end of ip(6)/arpt_entry struct. + * This is where matches (if any) and the target reside. + * target_offset: beginning of target. + * next_offset: start of the next rule; also: size of this rule. + * Since targets have a minimum size, target_offset + minlen <= next_offset. + * + * Every match stores its size, sum of sizes must not exceed target_offset. + * + * Return: 0 on success, negative errno on failure. + */ +int xt_check_entry_offsets(const void *base, + const char *elems, + unsigned int target_offset, + unsigned int next_offset) +{ + long size_of_base_struct = elems - (const char *)base; + const struct xt_entry_target *t; + const char *e = base; + + /* target start is within the ip/ip6/arpt_entry struct */ + if (target_offset < size_of_base_struct) + return -EINVAL; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && + target_offset + sizeof(struct xt_standard_target) != next_offset) + return -EINVAL; + + return xt_check_entry_match(elems, base + target_offset, + __alignof__(struct xt_entry_match)); +} +EXPORT_SYMBOL(xt_check_entry_offsets); + int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { @@ -591,6 +752,80 @@ int xt_check_target(struct xt_tgchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_target); +/** + * xt_copy_counters_from_user - copy counters and metadata from userspace + * + * @user: src pointer to userspace memory + * @len: alleged size of userspace memory + * @info: where to store the xt_counters_info metadata + * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel + * + * Copies counter meta data from @user and stores it in @info. + * + * vmallocs memory to hold the counters, then copies the counter data + * from @user to the new memory and returns a pointer to it. + * + * If @compat is true, @info gets converted automatically to the 64bit + * representation. + * + * The metadata associated with the counters is stored in @info. + * + * Return: returns pointer that caller has to test via IS_ERR(). + * If IS_ERR is false, caller has to vfree the pointer. + */ +void *xt_copy_counters_from_user(const void __user *user, unsigned int len, + struct xt_counters_info *info, bool compat) +{ + void *mem; + u64 size; + +#ifdef CONFIG_COMPAT + if (compat) { + /* structures only differ in size due to alignment */ + struct compat_xt_counters_info compat_tmp; + + if (len <= sizeof(compat_tmp)) + return ERR_PTR(-EINVAL); + + len -= sizeof(compat_tmp); + if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) + return ERR_PTR(-EFAULT); + + strlcpy(info->name, compat_tmp.name, sizeof(info->name)); + info->num_counters = compat_tmp.num_counters; + user += sizeof(compat_tmp); + } else +#endif + { + if (len <= sizeof(*info)) + return ERR_PTR(-EINVAL); + + len -= sizeof(*info); + if (copy_from_user(info, user, sizeof(*info)) != 0) + return ERR_PTR(-EFAULT); + + info->name[sizeof(info->name) - 1] = '\0'; + user += sizeof(*info); + } + + size = sizeof(struct xt_counters); + size *= info->num_counters; + + if (size != (u64)len) + return ERR_PTR(-EINVAL); + + mem = vmalloc(len); + if (!mem) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(mem, user, len) == 0) + return mem; + + vfree(mem); + return ERR_PTR(-EFAULT); +} +EXPORT_SYMBOL_GPL(xt_copy_counters_from_user); + #ifdef CONFIG_COMPAT int xt_compat_target_offset(const struct xt_target *target) { @@ -606,6 +841,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; int pad, off = xt_compat_target_offset(target); u_int16_t tsize = ct->u.user.target_size; + char name[sizeof(t->u.user.name)]; t = *dstptr; memcpy(t, ct, sizeof(*ct)); @@ -619,6 +855,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, tsize += off; t->u.user.target_size = tsize; + strlcpy(name, target->name, sizeof(name)); + module_put(target->me); + strncpy(t->u.user.name, name, sizeof(t->u.user.name)); *size += off; *dstptr += tsize; diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index bb9cbeb18868..a79af255561a 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -18,6 +18,16 @@ MODULE_DESCRIPTION("Xtables: add/match connection trackling labels"); MODULE_ALIAS("ipt_connlabel"); MODULE_ALIAS("ip6t_connlabel"); +static bool connlabel_match(const struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return false; + + return BIT_WORD(bit) < labels->words && test_bit(bit, labels->bits); +} + static bool connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) { @@ -33,7 +43,7 @@ connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) if (info->options & XT_CONNLABEL_OP_SET) return (nf_connlabel_set(ct, info->bit) == 0) ^ invert; - return nf_connlabel_match(ct, info->bit) ^ invert; + return connlabel_match(ct, info->bit) ^ invert; } static int connlabel_mt_check(const struct xt_mtchk_param *par) @@ -55,7 +65,7 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) return ret; } - ret = nf_connlabels_get(par->net, info->bit + 1); + ret = nf_connlabels_get(par->net, info->bit); if (ret < 0) nf_ct_l3proto_module_put(par->family); return ret; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 49d14ecad444..b10ade272b50 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -120,9 +120,9 @@ xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, { switch (protocol) { case IPPROTO_TCP: - return __inet_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, - in->ifindex); + return inet_lookup(net, &tcp_hashinfo, skb, doff, + saddr, sport, daddr, dport, + in->ifindex); case IPPROTO_UDP: return udp4_lib_lookup(net, saddr, sport, daddr, dport, in->ifindex); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 330ebd600f25..627f898c05b9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2059,6 +2059,7 @@ static int netlink_dump(struct sock *sk) struct netlink_callback *cb; struct sk_buff *skb = NULL; struct nlmsghdr *nlh; + struct module *module; int len, err = -ENOBUFS; int alloc_min_size; int alloc_size; @@ -2134,9 +2135,11 @@ static int netlink_dump(struct sock *sk) cb->done(cb); nlk->cb_running = false; + module = cb->module; + skb = cb->skb; mutex_unlock(nlk->cb_mutex); - module_put(cb->module); - consume_skb(cb->skb); + module_put(module); + consume_skb(skb); return 0; errout_skb: @@ -2343,7 +2346,8 @@ static int netlink_walk_start(struct nl_seq_iter *iter) { int err; - err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti); + err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti, + GFP_KERNEL); if (err) { iter->link = MAX_LINKS; return err; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index fbb7a2b57b44..61fff422424f 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -64,18 +64,26 @@ struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev, return NULL; } -int nci_get_conn_info_by_id(struct nci_dev *ndev, u8 id) +int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type, + struct dest_spec_params *params) { struct nci_conn_info *conn_info; list_for_each_entry(conn_info, &ndev->conn_info_list, list) { - if (conn_info->id == id) - return conn_info->conn_id; + if (conn_info->dest_type == dest_type) { + if (!params) + return conn_info->conn_id; + if (conn_info) { + if (params->id == conn_info->dest_params->id && + params->protocol == conn_info->dest_params->protocol) + return conn_info->conn_id; + } + } } return -EINVAL; } -EXPORT_SYMBOL(nci_get_conn_info_by_id); +EXPORT_SYMBOL(nci_get_conn_info_by_dest_type_params); /* ---- NCI requests ---- */ @@ -392,6 +400,83 @@ int nci_core_init(struct nci_dev *ndev) } EXPORT_SYMBOL(nci_core_init); +struct nci_loopback_data { + u8 conn_id; + struct sk_buff *data; +}; + +static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_loopback_data *data = (struct nci_loopback_data *)opt; + + nci_send_data(ndev, data->conn_id, data->data); +} + +static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err) +{ + struct nci_dev *ndev = (struct nci_dev *)context; + struct nci_conn_info *conn_info; + + conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id); + if (!conn_info) { + nci_req_complete(ndev, NCI_STATUS_REJECTED); + return; + } + + conn_info->rx_skb = skb; + + nci_req_complete(ndev, NCI_STATUS_OK); +} + +int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len, + struct sk_buff **resp) +{ + int r; + struct nci_loopback_data loopback_data; + struct nci_conn_info *conn_info; + struct sk_buff *skb; + int conn_id = nci_get_conn_info_by_dest_type_params(ndev, + NCI_DESTINATION_NFCC_LOOPBACK, NULL); + + if (conn_id < 0) { + r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCC_LOOPBACK, + 0, 0, NULL); + if (r != NCI_STATUS_OK) + return r; + + conn_id = nci_get_conn_info_by_dest_type_params(ndev, + NCI_DESTINATION_NFCC_LOOPBACK, + NULL); + } + + conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); + if (!conn_info) + return -EPROTO; + + /* store cb and context to be used on receiving data */ + conn_info->data_exchange_cb = nci_nfcc_loopback_cb; + conn_info->data_exchange_cb_context = ndev; + + skb = nci_skb_alloc(ndev, NCI_DATA_HDR_SIZE + data_len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, NCI_DATA_HDR_SIZE); + memcpy(skb_put(skb, data_len), data, data_len); + + loopback_data.conn_id = conn_id; + loopback_data.data = skb; + + ndev->cur_conn_id = conn_id; + r = nci_request(ndev, nci_send_data_req, (unsigned long)&loopback_data, + msecs_to_jiffies(NCI_DATA_TIMEOUT)); + if (r == NCI_STATUS_OK && resp) + *resp = conn_info->rx_skb; + + return r; +} +EXPORT_SYMBOL(nci_nfcc_loopback); + static int nci_open_device(struct nci_dev *ndev) { int rc = 0; @@ -610,9 +695,6 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, struct nci_core_conn_create_cmd *cmd; struct core_conn_create_data data; - if (!number_destination_params) - return -EINVAL; - data.length = params_len + sizeof(struct nci_core_conn_create_cmd); cmd = kzalloc(data.length, GFP_KERNEL); if (!cmd) @@ -620,17 +702,23 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, cmd->destination_type = destination_type; cmd->number_destination_params = number_destination_params; - memcpy(cmd->params, params, params_len); data.cmd = cmd; - if (params->length > 0) - ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX]; - else - ndev->cur_id = 0; + if (params) { + memcpy(cmd->params, params, params_len); + if (params->length > 0) + memcpy(&ndev->cur_params, + ¶ms->value[DEST_SPEC_PARAMS_ID_INDEX], + sizeof(struct dest_spec_params)); + else + ndev->cur_params.id = 0; + } else { + ndev->cur_params.id = 0; + } + ndev->cur_dest_type = destination_type; - r = __nci_request(ndev, nci_core_conn_create_req, - (unsigned long)&data, + r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data, msecs_to_jiffies(NCI_CMD_TIMEOUT)); kfree(cmd); return r; @@ -646,6 +734,7 @@ static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt) int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id) { + ndev->cur_conn_id = conn_id; return __nci_request(ndev, nci_core_conn_close_req, conn_id, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 2ada2b39e355..1e8c1a12aaec 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -734,7 +734,7 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, * “HCI Access”, even if the HCI Network contains multiple NFCEEs. */ ndev->hci_dev->nfcee_id = nfcee_ntf->nfcee_id; - ndev->cur_id = nfcee_ntf->nfcee_id; + ndev->cur_params.id = nfcee_ntf->nfcee_id; nci_req_complete(ndev, status); } diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index 9b6eb913d801..e3bbf1937d0e 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -226,7 +226,7 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) { __u8 status = skb->data[0]; - struct nci_conn_info *conn_info; + struct nci_conn_info *conn_info = NULL; struct nci_core_conn_create_rsp *rsp; pr_debug("status 0x%x\n", status); @@ -241,7 +241,17 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev, goto exit; } - conn_info->id = ndev->cur_id; + conn_info->dest_params = devm_kzalloc(&ndev->nfc_dev->dev, + sizeof(struct dest_spec_params), + GFP_KERNEL); + if (!conn_info->dest_params) { + status = NCI_STATUS_REJECTED; + goto free_conn_info; + } + + conn_info->dest_type = ndev->cur_dest_type; + conn_info->dest_params->id = ndev->cur_params.id; + conn_info->dest_params->protocol = ndev->cur_params.protocol; conn_info->conn_id = rsp->conn_id; /* Note: data_exchange_cb and data_exchange_cb_context need to @@ -251,7 +261,7 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev, INIT_LIST_HEAD(&conn_info->list); list_add(&conn_info->list, &ndev->conn_info_list); - if (ndev->cur_id == ndev->hci_dev->nfcee_id) + if (ndev->cur_params.id == ndev->hci_dev->nfcee_id) ndev->hci_dev->conn_info = conn_info; conn_info->conn_id = rsp->conn_id; @@ -259,7 +269,11 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev, atomic_set(&conn_info->credits_cnt, rsp->credits_cnt); } +free_conn_info: + if (status == NCI_STATUS_REJECTED) + devm_kfree(&ndev->nfc_dev->dev, conn_info); exit: + nci_req_complete(ndev, status); } @@ -271,7 +285,8 @@ static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev, pr_debug("status 0x%x\n", status); if (status == NCI_STATUS_OK) { - conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_id); + conn_info = nci_get_conn_info_by_conn_id(ndev, + ndev->cur_conn_id); if (conn_info) { list_del(&conn_info->list); devm_kfree(&ndev->nfc_dev->dev, conn_info); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 10c84d882881..3d5feede962d 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -439,20 +439,12 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, u8 protonum; l3proto = __nf_ct_l3proto_find(l3num); - if (!l3proto) { - pr_debug("ovs_ct_find_existing: Can't get l3proto\n"); - return NULL; - } if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum) <= 0) { pr_debug("ovs_ct_find_existing: Can't get protonum\n"); return NULL; } l4proto = __nf_ct_l4proto_find(l3num, protonum); - if (!l4proto) { - pr_debug("ovs_ct_find_existing: Can't get l4proto\n"); - return NULL; - } if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, protonum, net, &tuple, l3proto, l4proto)) { pr_debug("ovs_ct_find_existing: Can't get tuple\n"); @@ -1358,7 +1350,7 @@ void ovs_ct_init(struct net *net) unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); - if (nf_connlabels_get(net, n_bits)) { + if (nf_connlabels_get(net, n_bits - 1)) { ovs_net->xt_label = false; OVS_NLERR(true, "Failed to set connlabel length"); } else { diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0cc66a4e492d..856bd8dba676 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -738,9 +738,9 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, len += nla_total_size(acts->orig_len); return len - + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ - + nla_total_size(8); /* OVS_FLOW_ATTR_USED */ + + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */ } /* Called with ovs_mutex or RCU read lock. */ @@ -754,11 +754,14 @@ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow, ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); if (used && - nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) + nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used), + OVS_FLOW_ATTR_PAD)) return -EMSGSIZE; if (stats.n_packets && - nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) + nla_put_64bit(skb, OVS_FLOW_ATTR_STATS, + sizeof(struct ovs_flow_stats), &stats, + OVS_FLOW_ATTR_PAD)) return -EMSGSIZE; if ((u8)ntohs(tcp_flags) && @@ -1434,8 +1437,8 @@ static size_t ovs_dp_cmd_msg_size(void) size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); msgsize += nla_total_size(IFNAMSIZ); - msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); - msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats)); + msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats)); + msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats)); msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ return msgsize; @@ -1462,13 +1465,13 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, goto nla_put_failure; get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); - if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), - &dp_stats)) + if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), + &dp_stats, OVS_DP_ATTR_PAD)) goto nla_put_failure; - if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS, - sizeof(struct ovs_dp_megaflow_stats), - &dp_megaflow_stats)) + if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS, + sizeof(struct ovs_dp_megaflow_stats), + &dp_megaflow_stats, OVS_DP_ATTR_PAD)) goto nla_put_failure; if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) @@ -1837,8 +1840,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, goto nla_put_failure; ovs_vport_get_stats(vport, &vport_stats); - if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), - &vport_stats)) + if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, + sizeof(struct ovs_vport_stats), &vport_stats, + OVS_VPORT_ATTR_PAD)) goto nla_put_failure; if (ovs_vport_get_upcall_portids(vport, skb)) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 689c17264221..0bb650f4f219 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -261,7 +261,7 @@ size_t ovs_tun_key_attr_size(void) /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider * updating this function. */ - return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + return nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */ + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ @@ -720,7 +720,8 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, unsigned short tun_proto) { if (output->tun_flags & TUNNEL_KEY && - nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) + nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id, + OVS_TUNNEL_KEY_ATTR_PAD)) return -EMSGSIZE; switch (tun_proto) { case AF_INET: diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 7c8b90bf0e54..2ee48e447b72 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -165,11 +165,10 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | - IFF_PHONY_HEADROOM; + IFF_PHONY_HEADROOM | IFF_NO_QUEUE; netdev->destructor = internal_dev_destructor; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; - netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 18d0becbc46d..4040eb92d9c9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1837,6 +1837,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); struct sk_buff *skb = NULL; struct net_device *dev; + struct sockcm_cookie sockc; __be16 proto = 0; int err; int extra_len = 0; @@ -1925,12 +1926,21 @@ retry: goto out_unlock; } + sockc.tsflags = 0; + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (unlikely(err)) { + err = -EINVAL; + goto out_unlock; + } + } + skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -2042,6 +2052,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; + bool is_drop_n_account = false; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -2130,6 +2141,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, return 0; drop_n_acct: + is_drop_n_account = true; spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_drops++; atomic_inc(&sk->sk_drops); @@ -2141,7 +2153,10 @@ drop_n_restore: skb->len = skb_len; } drop: - consume_skb(skb); + if (!is_drop_n_account) + consume_skb(skb); + else + kfree_skb(skb); return 0; } @@ -2160,6 +2175,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct sk_buff *copy_skb = NULL; struct timespec ts; __u32 ts_status; + bool is_drop_n_account = false; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. * We may add members to them until current aligned size without forcing @@ -2367,10 +2383,14 @@ drop_n_restore: skb->len = skb_len; } drop: - kfree_skb(skb); + if (!is_drop_n_account) + consume_skb(skb); + else + kfree_skb(skb); return 0; drop_n_account: + is_drop_n_account = true; po->stats.stats1.tp_drops++; spin_unlock(&sk->sk_receive_queue.lock); @@ -2486,7 +2506,8 @@ static int packet_snd_vnet_gso(struct sk_buff *skb, static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, void *data, int tp_len, - __be16 proto, unsigned char *addr, int hlen, int copylen) + __be16 proto, unsigned char *addr, int hlen, int copylen, + const struct sockcm_cookie *sockc) { union tpacket_uhdr ph; int to_write, offset, len, nr_frags, len_max; @@ -2500,7 +2521,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->dev = dev; skb->priority = po->sk.sk_priority; skb->mark = po->sk.sk_mark; - sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); skb_shinfo(skb)->destructor_arg = ph.raw; skb_reserve(skb, hlen); @@ -2624,6 +2645,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) struct sk_buff *skb; struct net_device *dev; struct virtio_net_hdr *vnet_hdr = NULL; + struct sockcm_cookie sockc; __be16 proto; int err, reserve = 0; void *ph; @@ -2655,6 +2677,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); } + sockc.tsflags = 0; + if (msg->msg_controllen) { + err = sock_cmsg_send(&po->sk, msg, &sockc); + if (unlikely(err)) + goto out; + } + err = -ENXIO; if (unlikely(dev == NULL)) goto out; @@ -2712,7 +2741,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) goto out_status; } tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto, - addr, hlen, copylen); + addr, hlen, copylen, &sockc); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && !po->has_vnet_hdr && @@ -2851,6 +2880,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + sockc.tsflags = 0; sockc.mark = sk->sk_mark; if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); @@ -2908,7 +2938,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig new file mode 100644 index 000000000000..b83c6807a5ae --- /dev/null +++ b/net/qrtr/Kconfig @@ -0,0 +1,24 @@ +# Qualcomm IPC Router configuration +# + +config QRTR + tristate "Qualcomm IPC Router support" + depends on ARCH_QCOM || COMPILE_TEST + ---help--- + Say Y if you intend to use Qualcomm IPC router protocol. The + protocol is used to communicate with services provided by other + hardware blocks in the system. + + In order to do service lookups, a userspace daemon is required to + maintain a service listing. + +if QRTR + +config QRTR_SMD + tristate "SMD IPC Router channels" + depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) + ---help--- + Say Y here to support SMD based ipcrouter channels. SMD is the + most common transport for IPC Router. + +endif # QRTR diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile new file mode 100644 index 000000000000..ab09e40f7c74 --- /dev/null +++ b/net/qrtr/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_QRTR) := qrtr.o + +obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o +qrtr-smd-y := smd.o diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c new file mode 100644 index 000000000000..c985ecbe9bd6 --- /dev/null +++ b/net/qrtr/qrtr.c @@ -0,0 +1,1007 @@ +/* + * Copyright (c) 2015, Sony Mobile Communications Inc. + * Copyright (c) 2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/qrtr.h> +#include <linux/termios.h> /* For TIOCINQ/OUTQ */ + +#include <net/sock.h> + +#include "qrtr.h" + +#define QRTR_PROTO_VER 1 + +/* auto-bind range */ +#define QRTR_MIN_EPH_SOCKET 0x4000 +#define QRTR_MAX_EPH_SOCKET 0x7fff + +enum qrtr_pkt_type { + QRTR_TYPE_DATA = 1, + QRTR_TYPE_HELLO = 2, + QRTR_TYPE_BYE = 3, + QRTR_TYPE_NEW_SERVER = 4, + QRTR_TYPE_DEL_SERVER = 5, + QRTR_TYPE_DEL_CLIENT = 6, + QRTR_TYPE_RESUME_TX = 7, + QRTR_TYPE_EXIT = 8, + QRTR_TYPE_PING = 9, +}; + +/** + * struct qrtr_hdr - (I|R)PCrouter packet header + * @version: protocol version + * @type: packet type; one of QRTR_TYPE_* + * @src_node_id: source node + * @src_port_id: source port + * @confirm_rx: boolean; whether a resume-tx packet should be send in reply + * @size: length of packet, excluding this header + * @dst_node_id: destination node + * @dst_port_id: destination port + */ +struct qrtr_hdr { + __le32 version; + __le32 type; + __le32 src_node_id; + __le32 src_port_id; + __le32 confirm_rx; + __le32 size; + __le32 dst_node_id; + __le32 dst_port_id; +} __packed; + +#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr) +#define QRTR_NODE_BCAST ((unsigned int)-1) +#define QRTR_PORT_CTRL ((unsigned int)-2) + +struct qrtr_sock { + /* WARNING: sk must be the first member */ + struct sock sk; + struct sockaddr_qrtr us; + struct sockaddr_qrtr peer; +}; + +static inline struct qrtr_sock *qrtr_sk(struct sock *sk) +{ + BUILD_BUG_ON(offsetof(struct qrtr_sock, sk) != 0); + return container_of(sk, struct qrtr_sock, sk); +} + +static unsigned int qrtr_local_nid = -1; + +/* for node ids */ +static RADIX_TREE(qrtr_nodes, GFP_KERNEL); +/* broadcast list */ +static LIST_HEAD(qrtr_all_nodes); +/* lock for qrtr_nodes, qrtr_all_nodes and node reference */ +static DEFINE_MUTEX(qrtr_node_lock); + +/* local port allocation management */ +static DEFINE_IDR(qrtr_ports); +static DEFINE_MUTEX(qrtr_port_lock); + +/** + * struct qrtr_node - endpoint node + * @ep_lock: lock for endpoint management and callbacks + * @ep: endpoint + * @ref: reference count for node + * @nid: node id + * @rx_queue: receive queue + * @work: scheduled work struct for recv work + * @item: list item for broadcast list + */ +struct qrtr_node { + struct mutex ep_lock; + struct qrtr_endpoint *ep; + struct kref ref; + unsigned int nid; + + struct sk_buff_head rx_queue; + struct work_struct work; + struct list_head item; +}; + +/* Release node resources and free the node. + * + * Do not call directly, use qrtr_node_release. To be used with + * kref_put_mutex. As such, the node mutex is expected to be locked on call. + */ +static void __qrtr_node_release(struct kref *kref) +{ + struct qrtr_node *node = container_of(kref, struct qrtr_node, ref); + + if (node->nid != QRTR_EP_NID_AUTO) + radix_tree_delete(&qrtr_nodes, node->nid); + + list_del(&node->item); + mutex_unlock(&qrtr_node_lock); + + skb_queue_purge(&node->rx_queue); + kfree(node); +} + +/* Increment reference to node. */ +static struct qrtr_node *qrtr_node_acquire(struct qrtr_node *node) +{ + if (node) + kref_get(&node->ref); + return node; +} + +/* Decrement reference to node and release as necessary. */ +static void qrtr_node_release(struct qrtr_node *node) +{ + if (!node) + return; + kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock); +} + +/* Pass an outgoing packet socket buffer to the endpoint driver. */ +static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb) +{ + int rc = -ENODEV; + + mutex_lock(&node->ep_lock); + if (node->ep) + rc = node->ep->xmit(node->ep, skb); + else + kfree_skb(skb); + mutex_unlock(&node->ep_lock); + + return rc; +} + +/* Lookup node by id. + * + * callers must release with qrtr_node_release() + */ +static struct qrtr_node *qrtr_node_lookup(unsigned int nid) +{ + struct qrtr_node *node; + + mutex_lock(&qrtr_node_lock); + node = radix_tree_lookup(&qrtr_nodes, nid); + node = qrtr_node_acquire(node); + mutex_unlock(&qrtr_node_lock); + + return node; +} + +/* Assign node id to node. + * + * This is mostly useful for automatic node id assignment, based on + * the source id in the incoming packet. + */ +static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid) +{ + if (node->nid != QRTR_EP_NID_AUTO || nid == QRTR_EP_NID_AUTO) + return; + + mutex_lock(&qrtr_node_lock); + radix_tree_insert(&qrtr_nodes, nid, node); + node->nid = nid; + mutex_unlock(&qrtr_node_lock); +} + +/** + * qrtr_endpoint_post() - post incoming data + * @ep: endpoint handle + * @data: data pointer + * @len: size of data in bytes + * + * Return: 0 on success; negative error code on failure + */ +int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) +{ + struct qrtr_node *node = ep->node; + const struct qrtr_hdr *phdr = data; + struct sk_buff *skb; + unsigned int psize; + unsigned int size; + unsigned int type; + unsigned int ver; + unsigned int dst; + + if (len < QRTR_HDR_SIZE || len & 3) + return -EINVAL; + + ver = le32_to_cpu(phdr->version); + size = le32_to_cpu(phdr->size); + type = le32_to_cpu(phdr->type); + dst = le32_to_cpu(phdr->dst_port_id); + + psize = (size + 3) & ~3; + + if (ver != QRTR_PROTO_VER) + return -EINVAL; + + if (len != psize + QRTR_HDR_SIZE) + return -EINVAL; + + if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA) + return -EINVAL; + + skb = netdev_alloc_skb(NULL, len); + if (!skb) + return -ENOMEM; + + skb_reset_transport_header(skb); + memcpy(skb_put(skb, len), data, len); + + skb_queue_tail(&node->rx_queue, skb); + schedule_work(&node->work); + + return 0; +} +EXPORT_SYMBOL_GPL(qrtr_endpoint_post); + +/* Allocate and construct a resume-tx packet. */ +static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, + u32 dst_node, u32 port) +{ + const int pkt_len = 20; + struct qrtr_hdr *hdr; + struct sk_buff *skb; + u32 *buf; + + skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); + if (!skb) + return NULL; + skb_reset_transport_header(skb); + + hdr = (struct qrtr_hdr *)skb_put(skb, QRTR_HDR_SIZE); + hdr->version = cpu_to_le32(QRTR_PROTO_VER); + hdr->type = cpu_to_le32(QRTR_TYPE_RESUME_TX); + hdr->src_node_id = cpu_to_le32(src_node); + hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL); + hdr->confirm_rx = cpu_to_le32(0); + hdr->size = cpu_to_le32(pkt_len); + hdr->dst_node_id = cpu_to_le32(dst_node); + hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); + + buf = (u32 *)skb_put(skb, pkt_len); + memset(buf, 0, pkt_len); + buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); + buf[1] = cpu_to_le32(src_node); + buf[2] = cpu_to_le32(port); + + return skb; +} + +static struct qrtr_sock *qrtr_port_lookup(int port); +static void qrtr_port_put(struct qrtr_sock *ipc); + +/* Handle and route a received packet. + * + * This will auto-reply with resume-tx packet as necessary. + */ +static void qrtr_node_rx_work(struct work_struct *work) +{ + struct qrtr_node *node = container_of(work, struct qrtr_node, work); + struct sk_buff *skb; + + while ((skb = skb_dequeue(&node->rx_queue)) != NULL) { + const struct qrtr_hdr *phdr; + u32 dst_node, dst_port; + struct qrtr_sock *ipc; + u32 src_node; + int confirm; + + phdr = (const struct qrtr_hdr *)skb_transport_header(skb); + src_node = le32_to_cpu(phdr->src_node_id); + dst_node = le32_to_cpu(phdr->dst_node_id); + dst_port = le32_to_cpu(phdr->dst_port_id); + confirm = !!phdr->confirm_rx; + + qrtr_node_assign(node, src_node); + + ipc = qrtr_port_lookup(dst_port); + if (!ipc) { + kfree_skb(skb); + } else { + if (sock_queue_rcv_skb(&ipc->sk, skb)) + kfree_skb(skb); + + qrtr_port_put(ipc); + } + + if (confirm) { + skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port); + if (!skb) + break; + if (qrtr_node_enqueue(node, skb)) + break; + } + } +} + +/** + * qrtr_endpoint_register() - register a new endpoint + * @ep: endpoint to register + * @nid: desired node id; may be QRTR_EP_NID_AUTO for auto-assignment + * Return: 0 on success; negative error code on failure + * + * The specified endpoint must have the xmit function pointer set on call. + */ +int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid) +{ + struct qrtr_node *node; + + if (!ep || !ep->xmit) + return -EINVAL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + INIT_WORK(&node->work, qrtr_node_rx_work); + kref_init(&node->ref); + mutex_init(&node->ep_lock); + skb_queue_head_init(&node->rx_queue); + node->nid = QRTR_EP_NID_AUTO; + node->ep = ep; + + qrtr_node_assign(node, nid); + + mutex_lock(&qrtr_node_lock); + list_add(&node->item, &qrtr_all_nodes); + mutex_unlock(&qrtr_node_lock); + ep->node = node; + + return 0; +} +EXPORT_SYMBOL_GPL(qrtr_endpoint_register); + +/** + * qrtr_endpoint_unregister - unregister endpoint + * @ep: endpoint to unregister + */ +void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) +{ + struct qrtr_node *node = ep->node; + + mutex_lock(&node->ep_lock); + node->ep = NULL; + mutex_unlock(&node->ep_lock); + + qrtr_node_release(node); + ep->node = NULL; +} +EXPORT_SYMBOL_GPL(qrtr_endpoint_unregister); + +/* Lookup socket by port. + * + * Callers must release with qrtr_port_put() + */ +static struct qrtr_sock *qrtr_port_lookup(int port) +{ + struct qrtr_sock *ipc; + + if (port == QRTR_PORT_CTRL) + port = 0; + + mutex_lock(&qrtr_port_lock); + ipc = idr_find(&qrtr_ports, port); + if (ipc) + sock_hold(&ipc->sk); + mutex_unlock(&qrtr_port_lock); + + return ipc; +} + +/* Release acquired socket. */ +static void qrtr_port_put(struct qrtr_sock *ipc) +{ + sock_put(&ipc->sk); +} + +/* Remove port assignment. */ +static void qrtr_port_remove(struct qrtr_sock *ipc) +{ + int port = ipc->us.sq_port; + + if (port == QRTR_PORT_CTRL) + port = 0; + + __sock_put(&ipc->sk); + + mutex_lock(&qrtr_port_lock); + idr_remove(&qrtr_ports, port); + mutex_unlock(&qrtr_port_lock); +} + +/* Assign port number to socket. + * + * Specify port in the integer pointed to by port, and it will be adjusted + * on return as necesssary. + * + * Port may be: + * 0: Assign ephemeral port in [QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET] + * <QRTR_MIN_EPH_SOCKET: Specified; requires CAP_NET_ADMIN + * >QRTR_MIN_EPH_SOCKET: Specified; available to all + */ +static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) +{ + int rc; + + mutex_lock(&qrtr_port_lock); + if (!*port) { + rc = idr_alloc(&qrtr_ports, ipc, + QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET + 1, + GFP_ATOMIC); + if (rc >= 0) + *port = rc; + } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { + rc = -EACCES; + } else if (*port == QRTR_PORT_CTRL) { + rc = idr_alloc(&qrtr_ports, ipc, 0, 1, GFP_ATOMIC); + } else { + rc = idr_alloc(&qrtr_ports, ipc, *port, *port + 1, GFP_ATOMIC); + if (rc >= 0) + *port = rc; + } + mutex_unlock(&qrtr_port_lock); + + if (rc == -ENOSPC) + return -EADDRINUSE; + else if (rc < 0) + return rc; + + sock_hold(&ipc->sk); + + return 0; +} + +/* Bind socket to address. + * + * Socket should be locked upon call. + */ +static int __qrtr_bind(struct socket *sock, + const struct sockaddr_qrtr *addr, int zapped) +{ + struct qrtr_sock *ipc = qrtr_sk(sock->sk); + struct sock *sk = sock->sk; + int port; + int rc; + + /* rebinding ok */ + if (!zapped && addr->sq_port == ipc->us.sq_port) + return 0; + + port = addr->sq_port; + rc = qrtr_port_assign(ipc, &port); + if (rc) + return rc; + + /* unbind previous, if any */ + if (!zapped) + qrtr_port_remove(ipc); + ipc->us.sq_port = port; + + sock_reset_flag(sk, SOCK_ZAPPED); + + return 0; +} + +/* Auto bind to an ephemeral port. */ +static int qrtr_autobind(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct sockaddr_qrtr addr; + + if (!sock_flag(sk, SOCK_ZAPPED)) + return 0; + + addr.sq_family = AF_QIPCRTR; + addr.sq_node = qrtr_local_nid; + addr.sq_port = 0; + + return __qrtr_bind(sock, &addr, 1); +} + +/* Bind socket to specified sockaddr. */ +static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len) +{ + DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); + struct qrtr_sock *ipc = qrtr_sk(sock->sk); + struct sock *sk = sock->sk; + int rc; + + if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) + return -EINVAL; + + if (addr->sq_node != ipc->us.sq_node) + return -EINVAL; + + lock_sock(sk); + rc = __qrtr_bind(sock, addr, sock_flag(sk, SOCK_ZAPPED)); + release_sock(sk); + + return rc; +} + +/* Queue packet to local peer socket. */ +static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb) +{ + const struct qrtr_hdr *phdr; + struct qrtr_sock *ipc; + + phdr = (const struct qrtr_hdr *)skb_transport_header(skb); + + ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id)); + if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ + kfree_skb(skb); + return -ENODEV; + } + + if (sock_queue_rcv_skb(&ipc->sk, skb)) { + qrtr_port_put(ipc); + kfree_skb(skb); + return -ENOSPC; + } + + qrtr_port_put(ipc); + + return 0; +} + +/* Queue packet for broadcast. */ +static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) +{ + struct sk_buff *skbn; + + mutex_lock(&qrtr_node_lock); + list_for_each_entry(node, &qrtr_all_nodes, item) { + skbn = skb_clone(skb, GFP_KERNEL); + if (!skbn) + break; + skb_set_owner_w(skbn, skb->sk); + qrtr_node_enqueue(node, skbn); + } + mutex_unlock(&qrtr_node_lock); + + qrtr_local_enqueue(node, skb); + + return 0; +} + +static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) +{ + DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); + int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *); + struct qrtr_sock *ipc = qrtr_sk(sock->sk); + struct sock *sk = sock->sk; + struct qrtr_node *node; + struct qrtr_hdr *hdr; + struct sk_buff *skb; + size_t plen; + int rc; + + if (msg->msg_flags & ~(MSG_DONTWAIT)) + return -EINVAL; + + if (len > 65535) + return -EMSGSIZE; + + lock_sock(sk); + + if (addr) { + if (msg->msg_namelen < sizeof(*addr)) { + release_sock(sk); + return -EINVAL; + } + + if (addr->sq_family != AF_QIPCRTR) { + release_sock(sk); + return -EINVAL; + } + + rc = qrtr_autobind(sock); + if (rc) { + release_sock(sk); + return rc; + } + } else if (sk->sk_state == TCP_ESTABLISHED) { + addr = &ipc->peer; + } else { + release_sock(sk); + return -ENOTCONN; + } + + node = NULL; + if (addr->sq_node == QRTR_NODE_BCAST) { + enqueue_fn = qrtr_bcast_enqueue; + } else if (addr->sq_node == ipc->us.sq_node) { + enqueue_fn = qrtr_local_enqueue; + } else { + enqueue_fn = qrtr_node_enqueue; + node = qrtr_node_lookup(addr->sq_node); + if (!node) { + release_sock(sk); + return -ECONNRESET; + } + } + + plen = (len + 3) & ~3; + skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE, + msg->msg_flags & MSG_DONTWAIT, &rc); + if (!skb) + goto out_node; + + skb_reset_transport_header(skb); + skb_put(skb, len + QRTR_HDR_SIZE); + + hdr = (struct qrtr_hdr *)skb_transport_header(skb); + hdr->version = cpu_to_le32(QRTR_PROTO_VER); + hdr->src_node_id = cpu_to_le32(ipc->us.sq_node); + hdr->src_port_id = cpu_to_le32(ipc->us.sq_port); + hdr->confirm_rx = cpu_to_le32(0); + hdr->size = cpu_to_le32(len); + hdr->dst_node_id = cpu_to_le32(addr->sq_node); + hdr->dst_port_id = cpu_to_le32(addr->sq_port); + + rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE, + &msg->msg_iter, len); + if (rc) { + kfree_skb(skb); + goto out_node; + } + + if (plen != len) { + skb_pad(skb, plen - len); + skb_put(skb, plen - len); + } + + if (ipc->us.sq_port == QRTR_PORT_CTRL) { + if (len < 4) { + rc = -EINVAL; + kfree_skb(skb); + goto out_node; + } + + /* control messages already require the type as 'command' */ + skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4); + } else { + hdr->type = cpu_to_le32(QRTR_TYPE_DATA); + } + + rc = enqueue_fn(node, skb); + if (rc >= 0) + rc = len; + +out_node: + qrtr_node_release(node); + release_sock(sk); + + return rc; +} + +static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) +{ + DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); + const struct qrtr_hdr *phdr; + struct sock *sk = sock->sk; + struct sk_buff *skb; + int copied, rc; + + lock_sock(sk); + + if (sock_flag(sk, SOCK_ZAPPED)) { + release_sock(sk); + return -EADDRNOTAVAIL; + } + + skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, + flags & MSG_DONTWAIT, &rc); + if (!skb) { + release_sock(sk); + return rc; + } + + phdr = (const struct qrtr_hdr *)skb_transport_header(skb); + copied = le32_to_cpu(phdr->size); + if (copied > size) { + copied = size; + msg->msg_flags |= MSG_TRUNC; + } + + rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied); + if (rc < 0) + goto out; + rc = copied; + + if (addr) { + addr->sq_family = AF_QIPCRTR; + addr->sq_node = le32_to_cpu(phdr->src_node_id); + addr->sq_port = le32_to_cpu(phdr->src_port_id); + msg->msg_namelen = sizeof(*addr); + } + +out: + skb_free_datagram(sk, skb); + release_sock(sk); + + return rc; +} + +static int qrtr_connect(struct socket *sock, struct sockaddr *saddr, + int len, int flags) +{ + DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); + struct qrtr_sock *ipc = qrtr_sk(sock->sk); + struct sock *sk = sock->sk; + int rc; + + if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) + return -EINVAL; + + lock_sock(sk); + + sk->sk_state = TCP_CLOSE; + sock->state = SS_UNCONNECTED; + + rc = qrtr_autobind(sock); + if (rc) { + release_sock(sk); + return rc; + } + + ipc->peer = *addr; + sock->state = SS_CONNECTED; + sk->sk_state = TCP_ESTABLISHED; + + release_sock(sk); + + return 0; +} + +static int qrtr_getname(struct socket *sock, struct sockaddr *saddr, + int *len, int peer) +{ + struct qrtr_sock *ipc = qrtr_sk(sock->sk); + struct sockaddr_qrtr qaddr; + struct sock *sk = sock->sk; + + lock_sock(sk); + if (peer) { + if (sk->sk_state != TCP_ESTABLISHED) { + release_sock(sk); + return -ENOTCONN; + } + + qaddr = ipc->peer; + } else { + qaddr = ipc->us; + } + release_sock(sk); + + *len = sizeof(qaddr); + qaddr.sq_family = AF_QIPCRTR; + + memcpy(saddr, &qaddr, sizeof(qaddr)); + + return 0; +} + +static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct qrtr_sock *ipc = qrtr_sk(sock->sk); + struct sock *sk = sock->sk; + struct sockaddr_qrtr *sq; + struct sk_buff *skb; + struct ifreq ifr; + long len = 0; + int rc = 0; + + lock_sock(sk); + + switch (cmd) { + case TIOCOUTQ: + len = sk->sk_sndbuf - sk_wmem_alloc_get(sk); + if (len < 0) + len = 0; + rc = put_user(len, (int __user *)argp); + break; + case TIOCINQ: + skb = skb_peek(&sk->sk_receive_queue); + if (skb) + len = skb->len - QRTR_HDR_SIZE; + rc = put_user(len, (int __user *)argp); + break; + case SIOCGIFADDR: + if (copy_from_user(&ifr, argp, sizeof(ifr))) { + rc = -EFAULT; + break; + } + + sq = (struct sockaddr_qrtr *)&ifr.ifr_addr; + *sq = ipc->us; + if (copy_to_user(argp, &ifr, sizeof(ifr))) { + rc = -EFAULT; + break; + } + break; + case SIOCGSTAMP: + rc = sock_get_timestamp(sk, argp); + break; + case SIOCADDRT: + case SIOCDELRT: + case SIOCSIFADDR: + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + case SIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + rc = -EINVAL; + break; + default: + rc = -ENOIOCTLCMD; + break; + } + + release_sock(sk); + + return rc; +} + +static int qrtr_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct qrtr_sock *ipc; + + if (!sk) + return 0; + + lock_sock(sk); + + ipc = qrtr_sk(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + + sock_set_flag(sk, SOCK_DEAD); + sock->sk = NULL; + + if (!sock_flag(sk, SOCK_ZAPPED)) + qrtr_port_remove(ipc); + + skb_queue_purge(&sk->sk_receive_queue); + + release_sock(sk); + sock_put(sk); + + return 0; +} + +static const struct proto_ops qrtr_proto_ops = { + .owner = THIS_MODULE, + .family = AF_QIPCRTR, + .bind = qrtr_bind, + .connect = qrtr_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .listen = sock_no_listen, + .sendmsg = qrtr_sendmsg, + .recvmsg = qrtr_recvmsg, + .getname = qrtr_getname, + .ioctl = qrtr_ioctl, + .poll = datagram_poll, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .release = qrtr_release, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static struct proto qrtr_proto = { + .name = "QIPCRTR", + .owner = THIS_MODULE, + .obj_size = sizeof(struct qrtr_sock), +}; + +static int qrtr_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + struct qrtr_sock *ipc; + struct sock *sk; + + if (sock->type != SOCK_DGRAM) + return -EPROTOTYPE; + + sk = sk_alloc(net, AF_QIPCRTR, GFP_KERNEL, &qrtr_proto, kern); + if (!sk) + return -ENOMEM; + + sock_set_flag(sk, SOCK_ZAPPED); + + sock_init_data(sock, sk); + sock->ops = &qrtr_proto_ops; + + ipc = qrtr_sk(sk); + ipc->us.sq_family = AF_QIPCRTR; + ipc->us.sq_node = qrtr_local_nid; + ipc->us.sq_port = 0; + + return 0; +} + +static const struct nla_policy qrtr_policy[IFA_MAX + 1] = { + [IFA_LOCAL] = { .type = NLA_U32 }, +}; + +static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct nlattr *tb[IFA_MAX + 1]; + struct ifaddrmsg *ifm; + int rc; + + if (!netlink_capable(skb, CAP_NET_ADMIN)) + return -EPERM; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + ASSERT_RTNL(); + + rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy); + if (rc < 0) + return rc; + + ifm = nlmsg_data(nlh); + if (!tb[IFA_LOCAL]) + return -EINVAL; + + qrtr_local_nid = nla_get_u32(tb[IFA_LOCAL]); + return 0; +} + +static const struct net_proto_family qrtr_family = { + .owner = THIS_MODULE, + .family = AF_QIPCRTR, + .create = qrtr_create, +}; + +static int __init qrtr_proto_init(void) +{ + int rc; + + rc = proto_register(&qrtr_proto, 1); + if (rc) + return rc; + + rc = sock_register(&qrtr_family); + if (rc) { + proto_unregister(&qrtr_proto); + return rc; + } + + rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL); + + return 0; +} +module_init(qrtr_proto_init); + +static void __exit qrtr_proto_fini(void) +{ + rtnl_unregister(PF_QIPCRTR, RTM_NEWADDR); + sock_unregister(qrtr_family.family); + proto_unregister(&qrtr_proto); +} +module_exit(qrtr_proto_fini); + +MODULE_DESCRIPTION("Qualcomm IPC-router driver"); +MODULE_LICENSE("GPL v2"); diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h new file mode 100644 index 000000000000..2b848718f8fe --- /dev/null +++ b/net/qrtr/qrtr.h @@ -0,0 +1,31 @@ +#ifndef __QRTR_H_ +#define __QRTR_H_ + +#include <linux/types.h> + +struct sk_buff; + +/* endpoint node id auto assignment */ +#define QRTR_EP_NID_AUTO (-1) + +/** + * struct qrtr_endpoint - endpoint handle + * @xmit: Callback for outgoing packets + * + * The socket buffer passed to the xmit function becomes owned by the endpoint + * driver. As such, when the driver is done with the buffer, it should + * call kfree_skb() on failure, or consume_skb() on success. + */ +struct qrtr_endpoint { + int (*xmit)(struct qrtr_endpoint *ep, struct sk_buff *skb); + /* private: not for endpoint use */ + struct qrtr_node *node; +}; + +int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid); + +void qrtr_endpoint_unregister(struct qrtr_endpoint *ep); + +int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len); + +#endif diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c new file mode 100644 index 000000000000..0d11132b3370 --- /dev/null +++ b/net/qrtr/smd.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2015, Sony Mobile Communications Inc. + * Copyright (c) 2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/soc/qcom/smd.h> + +#include "qrtr.h" + +struct qrtr_smd_dev { + struct qrtr_endpoint ep; + struct qcom_smd_channel *channel; + struct device *dev; +}; + +/* from smd to qrtr */ +static int qcom_smd_qrtr_callback(struct qcom_smd_channel *channel, + const void *data, size_t len) +{ + struct qrtr_smd_dev *qdev = qcom_smd_get_drvdata(channel); + int rc; + + if (!qdev) + return -EAGAIN; + + rc = qrtr_endpoint_post(&qdev->ep, data, len); + if (rc == -EINVAL) { + dev_err(qdev->dev, "invalid ipcrouter packet\n"); + /* return 0 to let smd drop the packet */ + rc = 0; + } + + return rc; +} + +/* from qrtr to smd */ +static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) +{ + struct qrtr_smd_dev *qdev = container_of(ep, struct qrtr_smd_dev, ep); + int rc; + + rc = skb_linearize(skb); + if (rc) + goto out; + + rc = qcom_smd_send(qdev->channel, skb->data, skb->len); + +out: + if (rc) + kfree_skb(skb); + else + consume_skb(skb); + return rc; +} + +static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev) +{ + struct qrtr_smd_dev *qdev; + int rc; + + qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL); + if (!qdev) + return -ENOMEM; + + qdev->channel = sdev->channel; + qdev->dev = &sdev->dev; + qdev->ep.xmit = qcom_smd_qrtr_send; + + rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO); + if (rc) + return rc; + + qcom_smd_set_drvdata(sdev->channel, qdev); + dev_set_drvdata(&sdev->dev, qdev); + + dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n"); + + return 0; +} + +static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev) +{ + struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev); + + qrtr_endpoint_unregister(&qdev->ep); + + dev_set_drvdata(&sdev->dev, NULL); +} + +static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = { + { "IPCRTR" }, + {} +}; + +static struct qcom_smd_driver qcom_smd_qrtr_driver = { + .probe = qcom_smd_qrtr_probe, + .remove = qcom_smd_qrtr_remove, + .callback = qcom_smd_qrtr_callback, + .smd_match_table = qcom_smd_qrtr_smd_match, + .driver = { + .name = "qcom_smd_qrtr", + .owner = THIS_MODULE, + }, +}; + +module_qcom_smd_driver(qcom_smd_qrtr_driver); + +MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver"); +MODULE_LICENSE("GPL v2"); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 27a992154804..d75d8b56a9e3 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -207,22 +207,14 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } if (left && tc->t_tinc_data_rem) { - clone = skb_clone(skb, arg->gfp); + to_copy = min(tc->t_tinc_data_rem, left); + + clone = pskb_extract(skb, offset, to_copy, arg->gfp); if (!clone) { desc->error = -ENOMEM; goto out; } - to_copy = min(tc->t_tinc_data_rem, left); - if (!pskb_pull(clone, offset) || - pskb_trim(clone, to_copy)) { - pr_warn("rds_tcp_data_recv: pull/trim failed " - "left %zu data_rem %zu skb_len %d\n", - left, tc->t_tinc_data_rem, skb->len); - kfree_skb(clone); - desc->error = -ENOMEM; - goto out; - } skb_queue_tail(&tinc->ti_skb_list, clone); rdsdebug("skb %p data %p len %d off %u to_copy %zu -> " diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 03f26e3a6f48..884027f62783 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1141,6 +1141,7 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf, { struct rfkill *rfkill; struct rfkill_event ev; + int ret; /* we don't need the 'hard' variable but accept it */ if (count < RFKILL_EVENT_SIZE_V1 - 1) @@ -1155,29 +1156,36 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf, if (copy_from_user(&ev, buf, count)) return -EFAULT; - if (ev.op != RFKILL_OP_CHANGE && ev.op != RFKILL_OP_CHANGE_ALL) - return -EINVAL; - if (ev.type >= NUM_RFKILL_TYPES) return -EINVAL; mutex_lock(&rfkill_global_mutex); - if (ev.op == RFKILL_OP_CHANGE_ALL) + switch (ev.op) { + case RFKILL_OP_CHANGE_ALL: rfkill_update_global_state(ev.type, ev.soft); - - list_for_each_entry(rfkill, &rfkill_list, node) { - if (rfkill->idx != ev.idx && ev.op != RFKILL_OP_CHANGE_ALL) - continue; - - if (rfkill->type != ev.type && ev.type != RFKILL_TYPE_ALL) - continue; - - rfkill_set_block(rfkill, ev.soft); + list_for_each_entry(rfkill, &rfkill_list, node) + if (rfkill->type == ev.type || + ev.type == RFKILL_TYPE_ALL) + rfkill_set_block(rfkill, ev.soft); + ret = 0; + break; + case RFKILL_OP_CHANGE: + list_for_each_entry(rfkill, &rfkill_list, node) + if (rfkill->idx == ev.idx && + (rfkill->type == ev.type || + ev.type == RFKILL_TYPE_ALL)) + rfkill_set_block(rfkill, ev.soft); + ret = 0; + break; + default: + ret = -EINVAL; + break; } + mutex_unlock(&rfkill_global_mutex); - return count; + return ret ?: count; } static int rfkill_fop_release(struct inode *inode, struct file *file) diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 23dcef12b986..784c53163b7b 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -30,7 +30,7 @@ config AF_RXRPC_DEBUG config RXKAD - tristate "RxRPC Kerberos security" + bool "RxRPC Kerberos security" depends on AF_RXRPC select CRYPTO select CRYPTO_MANAGER diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index ec126f91276b..e05a06ef2254 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -18,11 +18,12 @@ af-rxrpc-y := \ ar-recvmsg.o \ ar-security.o \ ar-skbuff.o \ - ar-transport.o + ar-transport.o \ + insecure.o \ + misc.o af-rxrpc-$(CONFIG_PROC_FS) += ar-proc.o +af-rxrpc-$(CONFIG_RXKAD) += rxkad.o af-rxrpc-$(CONFIG_SYSCTL) += sysctl.o obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o - -obj-$(CONFIG_RXKAD) += rxkad.o diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 9d935fa5a2a9..e45e94ca030f 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -806,6 +806,12 @@ static int __init af_rxrpc_init(void) goto error_work_queue; } + ret = rxrpc_init_security(); + if (ret < 0) { + printk(KERN_CRIT "RxRPC: Cannot initialise security\n"); + goto error_security; + } + ret = proto_register(&rxrpc_proto, 1); if (ret < 0) { printk(KERN_CRIT "RxRPC: Cannot register protocol\n"); @@ -853,6 +859,8 @@ error_sock: proto_unregister(&rxrpc_proto); error_proto: destroy_workqueue(rxrpc_workqueue); +error_security: + rxrpc_exit_security(); error_work_queue: kmem_cache_destroy(rxrpc_call_jar); error_call_jar: @@ -883,6 +891,7 @@ static void __exit af_rxrpc_exit(void) remove_proc_entry("rxrpc_conns", init_net.proc_net); remove_proc_entry("rxrpc_calls", init_net.proc_net); destroy_workqueue(rxrpc_workqueue); + rxrpc_exit_security(); kmem_cache_destroy(rxrpc_call_jar); _leave(""); } diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c index 277731a5e67a..e7a7f05f13e2 100644 --- a/net/rxrpc/ar-accept.c +++ b/net/rxrpc/ar-accept.c @@ -108,7 +108,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local, goto error; } - conn = rxrpc_incoming_connection(trans, &sp->hdr, GFP_NOIO); + conn = rxrpc_incoming_connection(trans, &sp->hdr); rxrpc_put_transport(trans); if (IS_ERR(conn)) { _debug("no conn"); @@ -116,7 +116,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local, goto error; } - call = rxrpc_incoming_call(rx, conn, &sp->hdr, GFP_NOIO); + call = rxrpc_incoming_call(rx, conn, &sp->hdr); rxrpc_put_connection(conn); if (IS_ERR(call)) { _debug("no call"); diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index 16d967075eaf..374478e006e7 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -20,74 +20,6 @@ #include "ar-internal.h" /* - * How long to wait before scheduling ACK generation after seeing a - * packet with RXRPC_REQUEST_ACK set (in jiffies). - */ -unsigned int rxrpc_requested_ack_delay = 1; - -/* - * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). - * - * We use this when we've received new data packets. If those packets aren't - * all consumed within this time we will send a DELAY ACK if an ACK was not - * requested to let the sender know it doesn't need to resend. - */ -unsigned int rxrpc_soft_ack_delay = 1 * HZ; - -/* - * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). - * - * We use this when we've consumed some previously soft-ACK'd packets when - * further packets aren't immediately received to decide when to send an IDLE - * ACK let the other end know that it can free up its Tx buffer space. - */ -unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; - -/* - * Receive window size in packets. This indicates the maximum number of - * unconsumed received packets we're willing to retain in memory. Once this - * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further - * packets. - */ -unsigned int rxrpc_rx_window_size = 32; - -/* - * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet - * made by gluing normal packets together that we're willing to handle. - */ -unsigned int rxrpc_rx_mtu = 5692; - -/* - * The maximum number of fragments in a received jumbo packet that we tell the - * sender that we're willing to handle. - */ -unsigned int rxrpc_rx_jumbo_max = 4; - -static const char *rxrpc_acks(u8 reason) -{ - static const char *const str[] = { - "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", - "IDL", "-?-" - }; - - if (reason >= ARRAY_SIZE(str)) - reason = ARRAY_SIZE(str) - 1; - return str[reason]; -} - -static const s8 rxrpc_ack_priority[] = { - [0] = 0, - [RXRPC_ACK_DELAY] = 1, - [RXRPC_ACK_REQUESTED] = 2, - [RXRPC_ACK_IDLE] = 3, - [RXRPC_ACK_PING_RESPONSE] = 4, - [RXRPC_ACK_DUPLICATE] = 5, - [RXRPC_ACK_OUT_OF_SEQUENCE] = 6, - [RXRPC_ACK_EXCEEDS_WINDOW] = 7, - [RXRPC_ACK_NOSPACE] = 8, -}; - -/* * propose an ACK be sent */ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, @@ -426,7 +358,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard) int tail = call->acks_tail, old_tail; int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz); - kenter("{%u,%u},%u", call->acks_hard, win, hard); + _enter("{%u,%u},%u", call->acks_hard, win, hard); ASSERTCMP(hard - call->acks_hard, <=, win); @@ -656,7 +588,8 @@ process_further: _proto("OOSQ DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); /* secured packets must be verified and possibly decrypted */ - if (rxrpc_verify_packet(call, skb, _abort_code) < 0) + if (call->conn->security->verify_packet(call, skb, + _abort_code) < 0) goto protocol_error; rxrpc_insert_oos_packet(call, skb); @@ -901,8 +834,8 @@ void rxrpc_process_call(struct work_struct *work) /* there's a good chance we're going to have to send a message, so set * one up in advance */ - msg.msg_name = &call->conn->trans->peer->srx.transport.sin; - msg.msg_namelen = sizeof(call->conn->trans->peer->srx.transport.sin); + msg.msg_name = &call->conn->trans->peer->srx.transport; + msg.msg_namelen = call->conn->trans->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -973,7 +906,7 @@ void rxrpc_process_call(struct work_struct *work) ECONNABORTED, true) < 0) goto no_mem; whdr.type = RXRPC_PACKET_TYPE_ABORT; - data = htonl(call->abort_code); + data = htonl(call->local_abort); iov[1].iov_base = &data; iov[1].iov_len = sizeof(data); genbit = RXRPC_CALL_EV_ABORT; @@ -1036,7 +969,7 @@ void rxrpc_process_call(struct work_struct *work) write_lock_bh(&call->state_lock); if (call->state <= RXRPC_CALL_COMPLETE) { call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->abort_code = RX_CALL_TIMEOUT; + call->local_abort = RX_CALL_TIMEOUT; set_bit(RXRPC_CALL_EV_ABORT, &call->events); } write_unlock_bh(&call->state_lock); diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c index 7c8d300ade9b..571a41fd5a32 100644 --- a/net/rxrpc/ar-call.c +++ b/net/rxrpc/ar-call.c @@ -411,18 +411,17 @@ found_extant_second: */ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_connection *conn, - struct rxrpc_host_header *hdr, - gfp_t gfp) + struct rxrpc_host_header *hdr) { struct rxrpc_call *call, *candidate; struct rb_node **p, *parent; u32 call_id; - _enter(",%d,,%x", conn->debug_id, gfp); + _enter(",%d", conn->debug_id); ASSERT(rx != NULL); - candidate = rxrpc_alloc_call(gfp); + candidate = rxrpc_alloc_call(GFP_NOIO); if (!candidate) return ERR_PTR(-EBUSY); @@ -682,7 +681,7 @@ void rxrpc_release_call(struct rxrpc_call *call) call->state != RXRPC_CALL_CLIENT_FINAL_ACK) { _debug("+++ ABORTING STATE %d +++\n", call->state); call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->abort_code = RX_CALL_DEAD; + call->local_abort = RX_CALL_DEAD; set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } @@ -758,7 +757,7 @@ static void rxrpc_mark_call_released(struct rxrpc_call *call) if (call->state < RXRPC_CALL_COMPLETE) { _debug("abort call %p", call); call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->abort_code = RX_CALL_DEAD; + call->local_abort = RX_CALL_DEAD; if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) sched = true; } diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 9942da1edbf6..97f4fae74bca 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -207,6 +207,7 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) INIT_LIST_HEAD(&conn->bundle_link); conn->calls = RB_ROOT; skb_queue_head_init(&conn->rx_queue); + conn->security = &rxrpc_no_security; rwlock_init(&conn->lock); spin_lock_init(&conn->state_lock); atomic_set(&conn->usage, 1); @@ -564,8 +565,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, candidate->debug_id, candidate->trans->debug_id); rxrpc_assign_connection_id(candidate); - if (candidate->security) - candidate->security->prime_packet_security(candidate); + candidate->security->prime_packet_security(candidate); /* leave the candidate lurking in zombie mode attached to the * bundle until we're ready for it */ @@ -619,8 +619,7 @@ interrupted: */ struct rxrpc_connection * rxrpc_incoming_connection(struct rxrpc_transport *trans, - struct rxrpc_host_header *hdr, - gfp_t gfp) + struct rxrpc_host_header *hdr) { struct rxrpc_connection *conn, *candidate = NULL; struct rb_node *p, **pp; @@ -659,7 +658,7 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans, /* not yet present - create a candidate for a new record and then * redo the search */ - candidate = rxrpc_alloc_connection(gfp); + candidate = rxrpc_alloc_connection(GFP_NOIO); if (!candidate) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); @@ -831,7 +830,10 @@ static void rxrpc_destroy_connection(struct rxrpc_connection *conn) ASSERT(RB_EMPTY_ROOT(&conn->calls)); rxrpc_purge_queue(&conn->rx_queue); - rxrpc_clear_conn_security(conn); + conn->security->clear(conn); + key_put(conn->key); + key_put(conn->server_key); + rxrpc_put_transport(conn->trans); kfree(conn); _leave(""); diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c index 1bdaaed8cdc4..5f9563968a5b 100644 --- a/net/rxrpc/ar-connevent.c +++ b/net/rxrpc/ar-connevent.c @@ -40,11 +40,13 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state, write_lock(&call->state_lock); if (call->state <= RXRPC_CALL_COMPLETE) { call->state = state; - call->abort_code = abort_code; - if (state == RXRPC_CALL_LOCALLY_ABORTED) + if (state == RXRPC_CALL_LOCALLY_ABORTED) { + call->local_abort = conn->local_abort; set_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - else + } else { + call->remote_abort = conn->remote_abort; set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); + } rxrpc_queue_call(call); } write_unlock(&call->state_lock); @@ -84,8 +86,8 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code); - msg.msg_name = &conn->trans->peer->srx.transport.sin; - msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin); + msg.msg_name = &conn->trans->peer->srx.transport; + msg.msg_namelen = conn->trans->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -101,7 +103,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, whdr._rsvd = 0; whdr.serviceId = htons(conn->service_id); - word = htonl(abort_code); + word = htonl(conn->local_abort); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); @@ -112,7 +114,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); - _proto("Tx CONN ABORT %%%u { %d }", serial, abort_code); + _proto("Tx CONN ABORT %%%u { %d }", serial, conn->local_abort); ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len); if (ret < 0) { @@ -172,15 +174,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: - if (conn->security) - return conn->security->respond_to_challenge( - conn, skb, _abort_code); - return -EPROTO; + return conn->security->respond_to_challenge(conn, skb, + _abort_code); case RXRPC_PACKET_TYPE_RESPONSE: - if (!conn->security) - return -EPROTO; - ret = conn->security->verify_response(conn, skb, _abort_code); if (ret < 0) return ret; @@ -236,8 +233,6 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn) } } - ASSERT(conn->security != NULL); - if (conn->security->issue_challenge(conn) < 0) { abort_code = RX_CALL_DEAD; ret = -ENOMEM; diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index 63ed75c40e29..6ff97412a0bb 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -25,12 +25,6 @@ #include <net/net_namespace.h> #include "ar-internal.h" -const char *rxrpc_pkts[] = { - "?00", - "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG", - "?09", "?10", "?11", "?12", "VERSION", "?14", "?15" -}; - /* * queue a packet for recvmsg to pass to userspace * - the caller must hold a lock on call->lock @@ -199,7 +193,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, /* if the packet need security things doing to it, then it goes down * the slow path */ - if (call->conn->security) + if (call->conn->security_ix) goto enqueue_packet; sp->call = call; @@ -355,7 +349,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) write_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) { call->state = RXRPC_CALL_REMOTELY_ABORTED; - call->abort_code = abort_code; + call->remote_abort = abort_code; set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); rxrpc_queue_call(call); } @@ -428,7 +422,7 @@ protocol_error: protocol_error_locked: if (call->state <= RXRPC_CALL_COMPLETE) { call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->abort_code = RX_PROTOCOL_ERROR; + call->local_abort = RX_PROTOCOL_ERROR; set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } @@ -500,7 +494,7 @@ protocol_error: write_lock_bh(&call->state_lock); if (call->state <= RXRPC_CALL_COMPLETE) { call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->abort_code = RX_PROTOCOL_ERROR; + call->local_abort = RX_PROTOCOL_ERROR; set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } @@ -612,9 +606,9 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) struct rxrpc_wire_header whdr; /* dig out the RxRPC connection details */ - if (skb_copy_bits(skb, sizeof(struct udphdr), &whdr, sizeof(whdr)) < 0) + if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) return -EBADMSG; - if (!pskb_pull(skb, sizeof(struct udphdr) + sizeof(whdr))) + if (!pskb_pull(skb, sizeof(whdr))) BUG(); memset(sp, 0, sizeof(*sp)); @@ -704,12 +698,12 @@ void rxrpc_data_ready(struct sock *sk) if (skb_checksum_complete(skb)) { rxrpc_free_skb(skb); rxrpc_put_local(local); - UDP_INC_STATS_BH(&init_net, UDP_MIB_INERRORS, 0); + __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0); _leave(" [CSUM failed]"); return; } - UDP_INC_STATS_BH(&init_net, UDP_MIB_INDATAGRAMS, 0); + __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); /* The socket buffer we have is owned by UDP, with UDP's data all over * it, but we really want our own data there. diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index cd6cdbe87125..f0b807a163fa 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -9,6 +9,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <net/sock.h> #include <rxrpc/packet.h> #if 0 @@ -124,11 +125,15 @@ enum rxrpc_command { * RxRPC security module interface */ struct rxrpc_security { - struct module *owner; /* providing module */ - struct list_head link; /* link in master list */ const char *name; /* name of this service */ u8 security_index; /* security type provided */ + /* Initialise a security service */ + int (*init)(void); + + /* Clean up a security service */ + void (*exit)(void); + /* initialise a connection's security */ int (*init_connection_security)(struct rxrpc_connection *); @@ -268,7 +273,7 @@ struct rxrpc_connection { struct rb_root calls; /* calls on this connection */ struct sk_buff_head rx_queue; /* received conn-level packets */ struct rxrpc_call *channels[RXRPC_MAXCALLS]; /* channels (active calls) */ - struct rxrpc_security *security; /* applied security module */ + const struct rxrpc_security *security; /* applied security module */ struct key *key; /* security for this connection (client) */ struct key *server_key; /* security for this service */ struct crypto_skcipher *cipher; /* encryption handle */ @@ -289,7 +294,9 @@ struct rxrpc_connection { RXRPC_CONN_LOCALLY_ABORTED, /* - conn aborted locally */ RXRPC_CONN_NETWORK_ERROR, /* - conn terminated by network error */ } state; - int error; /* error code for local abort */ + u32 local_abort; /* local abort code */ + u32 remote_abort; /* remote abort code */ + int error; /* local error incurred */ int debug_id; /* debug ID for printks */ unsigned int call_counter; /* call ID counter */ atomic_t serial; /* packet serial number counter */ @@ -399,7 +406,9 @@ struct rxrpc_call { rwlock_t state_lock; /* lock for state transition */ atomic_t usage; atomic_t sequence; /* Tx data packet sequence counter */ - u32 abort_code; /* local/remote abort code */ + u32 local_abort; /* local abort code */ + u32 remote_abort; /* remote abort code */ + int error; /* local error incurred */ enum rxrpc_call_state state : 8; /* current state of call */ int debug_id; /* debug ID for printks */ u8 channel; /* connection channel occupied by this call */ @@ -453,7 +462,7 @@ static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code) { write_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) { - call->abort_code = abort_code; + call->local_abort = abort_code; call->state = RXRPC_CALL_LOCALLY_ABORTED; set_bit(RXRPC_CALL_EV_ABORT, &call->events); } @@ -478,13 +487,6 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * ar-ack.c */ -extern unsigned int rxrpc_requested_ack_delay; -extern unsigned int rxrpc_soft_ack_delay; -extern unsigned int rxrpc_idle_ack_delay; -extern unsigned int rxrpc_rx_window_size; -extern unsigned int rxrpc_rx_mtu; -extern unsigned int rxrpc_rx_jumbo_max; - void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool); void rxrpc_process_call(struct work_struct *); @@ -506,7 +508,7 @@ struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *, unsigned long, int, gfp_t); struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_connection *, - struct rxrpc_host_header *, gfp_t); + struct rxrpc_host_header *); struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long); void rxrpc_release_call(struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); @@ -531,8 +533,7 @@ void __exit rxrpc_destroy_all_connections(void); struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *, struct rxrpc_host_header *); extern struct rxrpc_connection * -rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_host_header *, - gfp_t); +rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_host_header *); /* * ar-connevent.c @@ -550,8 +551,6 @@ void rxrpc_UDP_error_handler(struct work_struct *); /* * ar-input.c */ -extern const char *rxrpc_pkts[]; - void rxrpc_data_ready(struct sock *); int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool); void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); @@ -610,14 +609,10 @@ int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* * ar-security.c */ -int rxrpc_register_security(struct rxrpc_security *); -void rxrpc_unregister_security(struct rxrpc_security *); +int __init rxrpc_init_security(void); +void rxrpc_exit_security(void); int rxrpc_init_client_conn_security(struct rxrpc_connection *); int rxrpc_init_server_conn_security(struct rxrpc_connection *); -int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *, size_t, - void *); -int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *, u32 *); -void rxrpc_clear_conn_security(struct rxrpc_connection *); /* * ar-skbuff.c @@ -637,6 +632,33 @@ struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *, struct rxrpc_peer *); /* + * insecure.c + */ +extern const struct rxrpc_security rxrpc_no_security; + +/* + * misc.c + */ +extern unsigned int rxrpc_requested_ack_delay; +extern unsigned int rxrpc_soft_ack_delay; +extern unsigned int rxrpc_idle_ack_delay; +extern unsigned int rxrpc_rx_window_size; +extern unsigned int rxrpc_rx_mtu; +extern unsigned int rxrpc_rx_jumbo_max; + +extern const char *const rxrpc_pkts[]; +extern const s8 rxrpc_ack_priority[]; + +extern const char *rxrpc_acks(u8 reason); + +/* + * rxkad.c + */ +#ifdef CONFIG_RXKAD +extern const struct rxrpc_security rxkad; +#endif + +/* * sysctl.c */ #ifdef CONFIG_SYSCTL diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index d36fb6e1a29c..51cb10062a8d 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -110,7 +110,7 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) if (call->state <= RXRPC_CALL_COMPLETE) { call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->abort_code = abort_code; + call->local_abort = abort_code; set_bit(RXRPC_CALL_EV_ABORT, &call->events); del_timer_sync(&call->resend_timer); del_timer_sync(&call->ack_timer); @@ -663,7 +663,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, size_t pad; /* pad out if we're using security */ - if (conn->security) { + if (conn->security_ix) { pad = conn->security_size + skb->mark; pad = conn->size_align - pad; pad &= conn->size_align - 1; @@ -695,7 +695,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (more && seq & 1) sp->hdr.flags |= RXRPC_REQUEST_ACK; - ret = rxrpc_secure_packet( + ret = conn->security->secure_packet( call, skb, skb->mark, skb->head + sizeof(struct rxrpc_wire_header)); if (ret < 0) diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c index 525b2ba5a8f4..225163bc658d 100644 --- a/net/rxrpc/ar-proc.c +++ b/net/rxrpc/ar-proc.c @@ -80,7 +80,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call->conn->in_clientflag ? "Svc" : "Clt", atomic_read(&call->usage), rxrpc_call_states[call->state], - call->abort_code, + call->remote_abort ?: call->local_abort, call->user_call_ID); return 0; diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 64facba24a45..160f0927aa3e 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -288,7 +288,11 @@ receive_non_data_message: ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code); break; case RXRPC_SKB_MARK_REMOTE_ABORT: - abort_code = call->abort_code; + abort_code = call->remote_abort; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); + break; + case RXRPC_SKB_MARK_LOCAL_ABORT: + abort_code = call->local_abort; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); break; case RXRPC_SKB_MARK_NET_ERROR: @@ -303,6 +307,7 @@ receive_non_data_message: &abort_code); break; default: + pr_err("RxRPC: Unknown packet mark %u\n", skb->mark); BUG(); break; } @@ -401,9 +406,14 @@ u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_REMOTE_ABORT); - - return sp->call->abort_code; + switch (skb->mark) { + case RXRPC_SKB_MARK_REMOTE_ABORT: + return sp->call->remote_abort; + case RXRPC_SKB_MARK_LOCAL_ABORT: + return sp->call->local_abort; + default: + BUG(); + } } EXPORT_SYMBOL(rxrpc_kernel_get_abort_code); diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c index ceff6394a65f..d223253b22fa 100644 --- a/net/rxrpc/ar-security.c +++ b/net/rxrpc/ar-security.c @@ -22,109 +22,60 @@ static LIST_HEAD(rxrpc_security_methods); static DECLARE_RWSEM(rxrpc_security_sem); -/* - * get an RxRPC security module - */ -static struct rxrpc_security *rxrpc_security_get(struct rxrpc_security *sec) -{ - return try_module_get(sec->owner) ? sec : NULL; -} - -/* - * release an RxRPC security module - */ -static void rxrpc_security_put(struct rxrpc_security *sec) +static const struct rxrpc_security *rxrpc_security_types[] = { + [RXRPC_SECURITY_NONE] = &rxrpc_no_security, +#ifdef CONFIG_RXKAD + [RXRPC_SECURITY_RXKAD] = &rxkad, +#endif +}; + +int __init rxrpc_init_security(void) { - module_put(sec->owner); -} - -/* - * look up an rxrpc security module - */ -static struct rxrpc_security *rxrpc_security_lookup(u8 security_index) -{ - struct rxrpc_security *sec = NULL; - - _enter(""); + int i, ret; - down_read(&rxrpc_security_sem); - - list_for_each_entry(sec, &rxrpc_security_methods, link) { - if (sec->security_index == security_index) { - if (unlikely(!rxrpc_security_get(sec))) - break; - goto out; + for (i = 0; i < ARRAY_SIZE(rxrpc_security_types); i++) { + if (rxrpc_security_types[i]) { + ret = rxrpc_security_types[i]->init(); + if (ret < 0) + goto failed; } } - sec = NULL; -out: - up_read(&rxrpc_security_sem); - _leave(" = %p [%s]", sec, sec ? sec->name : ""); - return sec; + return 0; + +failed: + for (i--; i >= 0; i--) + if (rxrpc_security_types[i]) + rxrpc_security_types[i]->exit(); + return ret; } -/** - * rxrpc_register_security - register an RxRPC security handler - * @sec: security module - * - * register an RxRPC security handler for use by RxRPC - */ -int rxrpc_register_security(struct rxrpc_security *sec) +void rxrpc_exit_security(void) { - struct rxrpc_security *psec; - int ret; + int i; - _enter(""); - down_write(&rxrpc_security_sem); - - ret = -EEXIST; - list_for_each_entry(psec, &rxrpc_security_methods, link) { - if (psec->security_index == sec->security_index) - goto out; - } - - list_add(&sec->link, &rxrpc_security_methods); - - printk(KERN_NOTICE "RxRPC: Registered security type %d '%s'\n", - sec->security_index, sec->name); - ret = 0; - -out: - up_write(&rxrpc_security_sem); - _leave(" = %d", ret); - return ret; + for (i = 0; i < ARRAY_SIZE(rxrpc_security_types); i++) + if (rxrpc_security_types[i]) + rxrpc_security_types[i]->exit(); } -EXPORT_SYMBOL_GPL(rxrpc_register_security); - -/** - * rxrpc_unregister_security - unregister an RxRPC security handler - * @sec: security module - * - * unregister an RxRPC security handler +/* + * look up an rxrpc security module */ -void rxrpc_unregister_security(struct rxrpc_security *sec) +static const struct rxrpc_security *rxrpc_security_lookup(u8 security_index) { - - _enter(""); - down_write(&rxrpc_security_sem); - list_del_init(&sec->link); - up_write(&rxrpc_security_sem); - - printk(KERN_NOTICE "RxRPC: Unregistered security type %d '%s'\n", - sec->security_index, sec->name); + if (security_index >= ARRAY_SIZE(rxrpc_security_types)) + return NULL; + return rxrpc_security_types[security_index]; } -EXPORT_SYMBOL_GPL(rxrpc_unregister_security); - /* * initialise the security on a client connection */ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) { + const struct rxrpc_security *sec; struct rxrpc_key_token *token; - struct rxrpc_security *sec; struct key *key = conn->key; int ret; @@ -148,8 +99,7 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) ret = conn->security->init_connection_security(conn); if (ret < 0) { - rxrpc_security_put(conn->security); - conn->security = NULL; + conn->security = &rxrpc_no_security; return ret; } @@ -162,7 +112,7 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) */ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) { - struct rxrpc_security *sec; + const struct rxrpc_security *sec; struct rxrpc_local *local = conn->trans->local; struct rxrpc_sock *rx; struct key *key; @@ -188,14 +138,12 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) /* the service appears to have died */ read_unlock_bh(&local->services_lock); - rxrpc_security_put(sec); _leave(" = -ENOENT"); return -ENOENT; found_service: if (!rx->securities) { read_unlock_bh(&local->services_lock); - rxrpc_security_put(sec); _leave(" = -ENOKEY"); return -ENOKEY; } @@ -205,7 +153,6 @@ found_service: &key_type_rxrpc_s, kdesc); if (IS_ERR(kref)) { read_unlock_bh(&local->services_lock); - rxrpc_security_put(sec); _leave(" = %ld [search]", PTR_ERR(kref)); return PTR_ERR(kref); } @@ -219,46 +166,3 @@ found_service: _leave(" = 0"); return 0; } - -/* - * secure a packet prior to transmission - */ -int rxrpc_secure_packet(const struct rxrpc_call *call, - struct sk_buff *skb, - size_t data_size, - void *sechdr) -{ - if (call->conn->security) - return call->conn->security->secure_packet( - call, skb, data_size, sechdr); - return 0; -} - -/* - * secure a packet prior to transmission - */ -int rxrpc_verify_packet(const struct rxrpc_call *call, struct sk_buff *skb, - u32 *_abort_code) -{ - if (call->conn->security) - return call->conn->security->verify_packet( - call, skb, _abort_code); - return 0; -} - -/* - * clear connection security - */ -void rxrpc_clear_conn_security(struct rxrpc_connection *conn) -{ - _enter("{%d}", conn->debug_id); - - if (conn->security) { - conn->security->clear(conn); - rxrpc_security_put(conn->security); - conn->security = NULL; - } - - key_put(conn->key); - key_put(conn->server_key); -} diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c new file mode 100644 index 000000000000..e571403613c1 --- /dev/null +++ b/net/rxrpc/insecure.c @@ -0,0 +1,83 @@ +/* Null security operations. + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +static int none_init_connection_security(struct rxrpc_connection *conn) +{ + return 0; +} + +static void none_prime_packet_security(struct rxrpc_connection *conn) +{ +} + +static int none_secure_packet(const struct rxrpc_call *call, + struct sk_buff *skb, + size_t data_size, + void *sechdr) +{ + return 0; +} + +static int none_verify_packet(const struct rxrpc_call *call, + struct sk_buff *skb, + u32 *_abort_code) +{ + return 0; +} + +static int none_respond_to_challenge(struct rxrpc_connection *conn, + struct sk_buff *skb, + u32 *_abort_code) +{ + *_abort_code = RX_PROTOCOL_ERROR; + return -EPROTO; +} + +static int none_verify_response(struct rxrpc_connection *conn, + struct sk_buff *skb, + u32 *_abort_code) +{ + *_abort_code = RX_PROTOCOL_ERROR; + return -EPROTO; +} + +static void none_clear(struct rxrpc_connection *conn) +{ +} + +static int none_init(void) +{ + return 0; +} + +static void none_exit(void) +{ +} + +/* + * RxRPC Kerberos-based security + */ +const struct rxrpc_security rxrpc_no_security = { + .name = "none", + .security_index = RXRPC_SECURITY_NONE, + .init = none_init, + .exit = none_exit, + .init_connection_security = none_init_connection_security, + .prime_packet_security = none_prime_packet_security, + .secure_packet = none_secure_packet, + .verify_packet = none_verify_packet, + .respond_to_challenge = none_respond_to_challenge, + .verify_response = none_verify_response, + .clear = none_clear, +}; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c new file mode 100644 index 000000000000..1afe9876e79f --- /dev/null +++ b/net/rxrpc/misc.c @@ -0,0 +1,89 @@ +/* Miscellaneous bits + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +/* + * How long to wait before scheduling ACK generation after seeing a + * packet with RXRPC_REQUEST_ACK set (in jiffies). + */ +unsigned int rxrpc_requested_ack_delay = 1; + +/* + * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). + * + * We use this when we've received new data packets. If those packets aren't + * all consumed within this time we will send a DELAY ACK if an ACK was not + * requested to let the sender know it doesn't need to resend. + */ +unsigned int rxrpc_soft_ack_delay = 1 * HZ; + +/* + * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). + * + * We use this when we've consumed some previously soft-ACK'd packets when + * further packets aren't immediately received to decide when to send an IDLE + * ACK let the other end know that it can free up its Tx buffer space. + */ +unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; + +/* + * Receive window size in packets. This indicates the maximum number of + * unconsumed received packets we're willing to retain in memory. Once this + * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further + * packets. + */ +unsigned int rxrpc_rx_window_size = 32; + +/* + * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet + * made by gluing normal packets together that we're willing to handle. + */ +unsigned int rxrpc_rx_mtu = 5692; + +/* + * The maximum number of fragments in a received jumbo packet that we tell the + * sender that we're willing to handle. + */ +unsigned int rxrpc_rx_jumbo_max = 4; + +const char *const rxrpc_pkts[] = { + "?00", + "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG", + "?09", "?10", "?11", "?12", "VERSION", "?14", "?15" +}; + +const s8 rxrpc_ack_priority[] = { + [0] = 0, + [RXRPC_ACK_DELAY] = 1, + [RXRPC_ACK_REQUESTED] = 2, + [RXRPC_ACK_IDLE] = 3, + [RXRPC_ACK_PING_RESPONSE] = 4, + [RXRPC_ACK_DUPLICATE] = 5, + [RXRPC_ACK_OUT_OF_SEQUENCE] = 6, + [RXRPC_ACK_EXCEEDS_WINDOW] = 7, + [RXRPC_ACK_NOSPACE] = 8, +}; + +const char *rxrpc_acks(u8 reason) +{ + static const char *const str[] = { + "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", + "IDL", "-?-" + }; + + if (reason >= ARRAY_SIZE(str)) + reason = ARRAY_SIZE(str) - 1; + return str[reason]; +} diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index f0aeb8163688..6b726a046a7d 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -20,7 +20,6 @@ #include <net/sock.h> #include <net/af_rxrpc.h> #include <keys/rxrpc-type.h> -#define rxrpc_debug rxkad_debug #include "ar-internal.h" #define RXKAD_VERSION 2 @@ -31,10 +30,6 @@ #define REALM_SZ 40 /* size of principal's auth domain */ #define SNAME_SZ 40 /* size of service name */ -unsigned int rxrpc_debug; -module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(debug, "rxkad debugging mask"); - struct rxkad_level1_hdr { __be32 data_size; /* true data size (excluding padding) */ }; @@ -44,10 +39,6 @@ struct rxkad_level2_hdr { __be32 checksum; /* decrypted data checksum */ }; -MODULE_DESCRIPTION("RxRPC network protocol type-2 security (Kerberos 4)"); -MODULE_AUTHOR("Red Hat, Inc."); -MODULE_LICENSE("GPL"); - /* * this holds a pinned cipher so that keventd doesn't get called by the cipher * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE @@ -1164,12 +1155,35 @@ static void rxkad_clear(struct rxrpc_connection *conn) } /* + * Initialise the rxkad security service. + */ +static int rxkad_init(void) +{ + /* pin the cipher we need so that the crypto layer doesn't invoke + * keventd to go get it */ + rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(rxkad_ci)) + return PTR_ERR(rxkad_ci); + return 0; +} + +/* + * Clean up the rxkad security service. + */ +static void rxkad_exit(void) +{ + if (rxkad_ci) + crypto_free_skcipher(rxkad_ci); +} + +/* * RxRPC Kerberos-based security */ -static struct rxrpc_security rxkad = { - .owner = THIS_MODULE, +const struct rxrpc_security rxkad = { .name = "rxkad", .security_index = RXRPC_SECURITY_RXKAD, + .init = rxkad_init, + .exit = rxkad_exit, .init_connection_security = rxkad_init_connection_security, .prime_packet_security = rxkad_prime_packet_security, .secure_packet = rxkad_secure_packet, @@ -1179,28 +1193,3 @@ static struct rxrpc_security rxkad = { .verify_response = rxkad_verify_response, .clear = rxkad_clear, }; - -static __init int rxkad_init(void) -{ - _enter(""); - - /* pin the cipher we need so that the crypto layer doesn't invoke - * keventd to go get it */ - rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(rxkad_ci)) - return PTR_ERR(rxkad_ci); - - return rxrpc_register_security(&rxkad); -} - -module_init(rxkad_init); - -static __exit void rxkad_exit(void) -{ - _enter(""); - - rxrpc_unregister_security(&rxkad); - crypto_free_skcipher(rxkad_ci); -} - -module_exit(rxkad_exit); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 96066665e376..336774a535c3 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -657,12 +657,15 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (compat_mode) { if (a->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, - TCA_STATS, TCA_XSTATS, &p->tcfc_lock, &d); + TCA_STATS, + TCA_XSTATS, + &p->tcfc_lock, &d, + TCA_PAD); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, - &p->tcfc_lock, &d); + &p->tcfc_lock, &d, TCA_ACT_PAD); if (err < 0) goto errout; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 8c9f1f0459ab..c7123e01c2ca 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -53,9 +53,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); + bpf_compute_data_end(skb); filter_res = BPF_PROG_RUN(filter, skb); __skb_pull(skb, skb->mac_len); } else { + bpf_compute_data_end(skb); filter_res = BPF_PROG_RUN(filter, skb); } rcu_read_unlock(); @@ -156,7 +158,8 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse); tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires); - if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm)) + if (nla_put_64bit(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm, + TCA_ACT_BPF_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index c0ed93ce2391..2ba700c765e0 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -163,7 +163,8 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(ci->tcf_tm.expires); - if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t, + TCA_CONNMARK_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index d22426cdebc0..28e934ed038a 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -549,7 +549,7 @@ static int tcf_csum_dump(struct sk_buff *skb, t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(p->tcf_tm.expires); - if (nla_put(skb, TCA_CSUM_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 887fc1f209ff..ec5cc8435238 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -148,6 +148,20 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, return action; } +static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, + u64 lastuse) +{ + struct tcf_gact *gact = a->priv; + int action = READ_ONCE(gact->tcf_action); + struct tcf_t *tm = &gact->tcf_tm; + + _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, packets); + if (action == TC_ACT_SHOT) + this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; + + tm->lastuse = lastuse; +} + static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); @@ -177,7 +191,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(gact->tcf_tm.expires); - if (nla_put(skb, TCA_GACT_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_GACT_TM, sizeof(t), &t, TCA_GACT_PAD)) goto nla_put_failure; return skb->len; @@ -207,6 +221,7 @@ static struct tc_action_ops act_gact_ops = { .type = TCA_ACT_GACT, .owner = THIS_MODULE, .act = tcf_gact, + .stats_update = tcf_gact_stats_update, .dump = tcf_gact_dump, .init = tcf_gact_init, .walk = tcf_gact_walker, diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 343d011aa818..658046dfe02d 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -556,7 +556,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, t.install = jiffies_to_clock_t(jiffies - ife->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - ife->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(ife->tcf_tm.expires); - if (nla_put(skb, TCA_IFE_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD)) goto nla_put_failure; if (!is_zero_ether_addr(ife->eth_dst)) { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 8b5270008a6e..9f002ada7074 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -280,7 +280,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install); tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse); tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires); - if (nla_put(skb, TCA_IPT_TM, sizeof (tm), &tm)) + if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD)) goto nla_put_failure; kfree(t); return skb->len; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 8f3948dd38b8..128942bc9e42 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -36,14 +36,15 @@ static DEFINE_SPINLOCK(mirred_list_lock); static void tcf_mirred_release(struct tc_action *a, int bind) { struct tcf_mirred *m = to_mirred(a); - struct net_device *dev = rcu_dereference_protected(m->tcfm_dev, 1); + struct net_device *dev; /* We could be called either in a RCU callback or with RTNL lock held. */ spin_lock_bh(&mirred_list_lock); list_del(&m->tcfm_list); - spin_unlock_bh(&mirred_list_lock); + dev = rcu_dereference_protected(m->tcfm_dev, 1); if (dev) dev_put(dev); + spin_unlock_bh(&mirred_list_lock); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { @@ -221,7 +222,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(m->tcf_tm.expires); - if (nla_put(skb, TCA_MIRRED_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 0f65cdfbfb1d..c0a879f940de 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -267,7 +267,7 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(p->tcf_tm.expires); - if (nla_put(skb, TCA_NAT_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 429c3ab65142..c6e18f230af6 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -203,7 +203,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(p->tcf_tm.expires); - if (nla_put(skb, TCA_PEDIT_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD)) goto nla_put_failure; kfree(opt); return skb->len; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 3a33fb648a6d..e42f8daca147 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -161,7 +161,7 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(d->tcf_tm.expires); - if (nla_put(skb, TCA_DEF_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 69da5a8f0034..e928802966bc 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -171,7 +171,7 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(d->tcf_tm.expires); - if (nla_put(skb, TCA_SKBEDIT_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index c45f926dafb9..ac4adc812c12 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -185,7 +185,7 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, t.install = jiffies_to_clock_t(jiffies - v->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - v->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(v->tcf_tm.expires); - if (nla_put(skb, TCA_VLAN_TM, sizeof(t), &t)) + if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 425fe6a0eda3..7b342c779da7 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -96,9 +96,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); + bpf_compute_data_end(skb); filter_res = BPF_PROG_RUN(prog->filter, skb); __skb_pull(skb, skb->mac_len); } else { + bpf_compute_data_end(skb); filter_res = BPF_PROG_RUN(prog->filter, skb); } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 2181ffc76638..730aacafc22d 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -210,6 +210,25 @@ static void fl_hw_replace_filter(struct tcf_proto *tp, dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); } +static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_flower_offload offload = {0}; + struct tc_to_netdev tc; + + if (!tc_should_offload(dev, 0)) + return; + + offload.command = TC_CLSFLOWER_STATS; + offload.cookie = (unsigned long)f; + offload.exts = &f->exts; + + tc.type = TC_SETUP_CLSFLOWER; + tc.cls_flower = &offload; + + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); +} + static bool fl_destroy(struct tcf_proto *tp, bool force) { struct cls_fl_head *head = rtnl_dereference(tp->root); @@ -662,6 +681,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, goto nla_put_failure; } + fl_hw_update_stats(tp, f); + if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, sizeof(key->eth.dst)) || diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 563cdad76448..079b43b3c5d2 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -134,6 +134,11 @@ next_knode: j = 0; #endif + if (tc_skip_sw(n->flags)) { + n = rcu_dereference_bh(n->next); + goto next_knode; + } + #ifdef CONFIG_CLS_U32_MARK if ((skb->mark & n->mask) != n->val) { n = rcu_dereference_bh(n->next); @@ -443,13 +448,14 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) } } -static void u32_replace_hw_hnode(struct tcf_proto *tp, +static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; struct tc_to_netdev offload; + int err; offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; @@ -460,9 +466,13 @@ static void u32_replace_hw_hnode(struct tcf_proto *tp, offload.cls_u32->hnode.handle = h->handle; offload.cls_u32->hnode.prio = h->prio; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + if (tc_skip_sw(flags)) + return err; } + + return 0; } static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) @@ -485,13 +495,14 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) } } -static void u32_replace_hw_knode(struct tcf_proto *tp, +static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; struct tc_to_netdev offload; + int err; offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; @@ -512,9 +523,13 @@ static void u32_replace_hw_knode(struct tcf_proto *tp, if (n->ht_down) offload.cls_u32->knode.link_handle = n->ht_down->handle; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + if (tc_skip_sw(flags)) + return err; } + + return 0; } static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) @@ -845,8 +860,11 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - if (tb[TCA_U32_FLAGS]) + if (tb[TCA_U32_FLAGS]) { flags = nla_get_u32(tb[TCA_U32_FLAGS]); + if (!tc_flags_valid(flags)) + return err; + } n = (struct tc_u_knode *)*arg; if (n) { @@ -871,10 +889,15 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return err; } + err = u32_replace_hw_knode(tp, new, flags); + if (err) { + u32_destroy_key(tp, new, false); + return err; + } + u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); call_rcu(&n->rcu, u32_delete_key_rcu); - u32_replace_hw_knode(tp, new, flags); return 0; } @@ -978,6 +1001,10 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, struct tc_u_knode __rcu **ins; struct tc_u_knode *pins; + err = u32_replace_hw_knode(tp, n, flags); + if (err) + goto errhw; + ins = &ht->ht[TC_U32_HASH(handle)]; for (pins = rtnl_dereference(*ins); pins; ins = &pins->next, pins = rtnl_dereference(*ins)) @@ -986,11 +1013,11 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(n->next, pins); rcu_assign_pointer(*ins, n); - u32_replace_hw_knode(tp, n, flags); *arg = (unsigned long)n; return 0; } +errhw: #ifdef CONFIG_CLS_U32_MARK free_percpu(n->pcpu_success); errout: @@ -1140,9 +1167,10 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, gpf->kcnts[i] += pf->kcnts[i]; } - if (nla_put(skb, TCA_U32_PCNT, - sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64), - gpf)) { + if (nla_put_64bit(skb, TCA_U32_PCNT, + sizeof(struct tc_u32_pcnt) + + n->sel.nkeys * sizeof(u64), + gpf, TCA_U32_PAD)) { kfree(gpf); goto nla_put_failure; } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index f2aabc0089da..a309a07ccb35 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -796,7 +796,7 @@ struct meta_type_ops { int (*dump)(struct sk_buff *, struct meta_value *, int); }; -static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = { +static const struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = { [TCF_META_TYPE_VAR] = { .destroy = meta_var_destroy, .compare = meta_var_compare, @@ -812,7 +812,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = { } }; -static inline struct meta_type_ops *meta_type_ops(struct meta_value *v) +static inline const struct meta_type_ops *meta_type_ops(struct meta_value *v) { return &__meta_type_ops[meta_type(v)]; } @@ -870,7 +870,7 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m, static void meta_delete(struct meta_match *meta) { if (meta) { - struct meta_type_ops *ops = meta_type_ops(&meta->lvalue); + const struct meta_type_ops *ops = meta_type_ops(&meta->lvalue); if (ops && ops->destroy) { ops->destroy(&meta->lvalue); @@ -964,7 +964,7 @@ static int em_meta_dump(struct sk_buff *skb, struct tcf_ematch *em) { struct meta_match *meta = (struct meta_match *) em->data; struct tcf_meta_hdr hdr; - struct meta_type_ops *ops; + const struct meta_type_ops *ops; memset(&hdr, 0, sizeof(hdr)); memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left)); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 3b180ff72f79..64f71a2155f3 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1365,7 +1365,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, - qdisc_root_sleeping_lock(q), &d) < 0) + qdisc_root_sleeping_lock(q), &d, + TCA_PAD) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) @@ -1679,7 +1680,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, - qdisc_root_sleeping_lock(q), &d) < 0) + qdisc_root_sleeping_lock(q), &d, + TCA_PAD) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 9b7e2980ee5c..dddf3bb65a32 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -49,6 +49,8 @@ #include <linux/prefetch.h> #include <net/pkt_sched.h> #include <net/codel.h> +#include <net/codel_impl.h> +#include <net/codel_qdisc.h> #define DEFAULT_CODEL_LIMIT 1000 @@ -64,20 +66,33 @@ struct codel_sched_data { * to dequeue a packet from queue. Note: backlog is handled in * codel, we dont need to reduce it here. */ -static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch) +static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) { + struct Qdisc *sch = ctx; struct sk_buff *skb = __skb_dequeue(&sch->q); + if (skb) + sch->qstats.backlog -= qdisc_pkt_len(skb); + prefetch(&skb->end); /* we'll need skb_shinfo() */ return skb; } +static void drop_func(struct sk_buff *skb, void *ctx) +{ + struct Qdisc *sch = ctx; + + qdisc_drop(skb, sch); +} + static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) { struct codel_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue); + skb = codel_dequeue(sch, &sch->qstats.backlog, &q->params, &q->vars, + &q->stats, qdisc_pkt_len, codel_get_enqueue_time, + drop_func, dequeue_func); /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, * or HTB crashes. Defer it for next round. @@ -173,9 +188,10 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = DEFAULT_CODEL_LIMIT; - codel_params_init(&q->params, sch); + codel_params_init(&q->params); codel_vars_init(&q->vars); codel_stats_init(&q->stats); + q->params.mtu = psched_mtu(qdisc_dev(sch)); if (opt) { int err = codel_change(sch, opt); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index d3fc8f9dd3d4..6883a8971562 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -24,6 +24,8 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/codel.h> +#include <net/codel_impl.h> +#include <net/codel_qdisc.h> /* Fair Queue CoDel. * @@ -57,8 +59,12 @@ struct fq_codel_sched_data { u32 flows_cnt; /* number of flows */ u32 perturbation; /* hash perturbation */ u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ + u32 drop_batch_size; + u32 memory_limit; struct codel_params cparams; struct codel_stats cstats; + u32 memory_usage; + u32 drop_overmemory; u32 drop_overlimit; u32 new_flow_count; @@ -133,17 +139,21 @@ static inline void flow_queue_add(struct fq_codel_flow *flow, skb->next = NULL; } -static unsigned int fq_codel_drop(struct Qdisc *sch) +static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets) { struct fq_codel_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; unsigned int maxbacklog = 0, idx = 0, i, len; struct fq_codel_flow *flow; + unsigned int threshold; + unsigned int mem = 0; - /* Queue is full! Find the fat flow and drop packet from it. + /* Queue is full! Find the fat flow and drop packet(s) from it. * This might sound expensive, but with 1024 flows, we scan * 4KB of memory, and we dont need to handle a complex tree * in fast path (packet queue/enqueue) with many cache misses. + * In stress mode, we'll try to drop 64 packets from the flow, + * amortizing this linear lookup to one cache line per drop. */ for (i = 0; i < q->flows_cnt; i++) { if (q->backlogs[i] > maxbacklog) { @@ -151,15 +161,26 @@ static unsigned int fq_codel_drop(struct Qdisc *sch) idx = i; } } + + /* Our goal is to drop half of this fat flow backlog */ + threshold = maxbacklog >> 1; + flow = &q->flows[idx]; - skb = dequeue_head(flow); - len = qdisc_pkt_len(skb); + len = 0; + i = 0; + do { + skb = dequeue_head(flow); + len += qdisc_pkt_len(skb); + mem += skb->truesize; + kfree_skb(skb); + } while (++i < max_packets && len < threshold); + + flow->dropped += i; q->backlogs[idx] -= len; - sch->q.qlen--; - qdisc_qstats_drop(sch); - qdisc_qstats_backlog_dec(sch, skb); - kfree_skb(skb); - flow->dropped++; + q->memory_usage -= mem; + sch->qstats.drops += i; + sch->qstats.backlog -= len; + sch->q.qlen -= i; return idx; } @@ -168,16 +189,17 @@ static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch) unsigned int prev_backlog; prev_backlog = sch->qstats.backlog; - fq_codel_drop(sch); + fq_codel_drop(sch, 1U); return prev_backlog - sch->qstats.backlog; } static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); - unsigned int idx, prev_backlog; + unsigned int idx, prev_backlog, prev_qlen; struct fq_codel_flow *flow; int uninitialized_var(ret); + bool memory_limited; idx = fq_codel_classify(skb, sch, &ret); if (idx == 0) { @@ -200,28 +222,38 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) flow->deficit = q->quantum; flow->dropped = 0; } - if (++sch->q.qlen <= sch->limit) + q->memory_usage += skb->truesize; + memory_limited = q->memory_usage > q->memory_limit; + if (++sch->q.qlen <= sch->limit && !memory_limited) return NET_XMIT_SUCCESS; prev_backlog = sch->qstats.backlog; - q->drop_overlimit++; - /* Return Congestion Notification only if we dropped a packet - * from this flow. + prev_qlen = sch->q.qlen; + + /* fq_codel_drop() is quite expensive, as it performs a linear search + * in q->backlogs[] to find a fat flow. + * So instead of dropping a single packet, drop half of its backlog + * with a 64 packets limit to not add a too big cpu spike here. */ - if (fq_codel_drop(sch) == idx) - return NET_XMIT_CN; + ret = fq_codel_drop(sch, q->drop_batch_size); + + q->drop_overlimit += prev_qlen - sch->q.qlen; + if (memory_limited) + q->drop_overmemory += prev_qlen - sch->q.qlen; + /* As we dropped packet(s), better let upper stack know this */ + qdisc_tree_reduce_backlog(sch, prev_qlen - sch->q.qlen, + prev_backlog - sch->qstats.backlog); - /* As we dropped a packet, better let upper stack know this */ - qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog); - return NET_XMIT_SUCCESS; + return ret == idx ? NET_XMIT_CN : NET_XMIT_SUCCESS; } /* This is the specific function called from codel_dequeue() * to dequeue a packet from queue. Note: backlog is handled in * codel, we dont need to reduce it here. */ -static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch) +static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) { + struct Qdisc *sch = ctx; struct fq_codel_sched_data *q = qdisc_priv(sch); struct fq_codel_flow *flow; struct sk_buff *skb = NULL; @@ -230,11 +262,20 @@ static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch) if (flow->head) { skb = dequeue_head(flow); q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb); + q->memory_usage -= skb->truesize; sch->q.qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); } return skb; } +static void drop_func(struct sk_buff *skb, void *ctx) +{ + struct Qdisc *sch = ctx; + + qdisc_drop(skb, sch); +} + static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); @@ -263,8 +304,9 @@ begin: prev_ecn_mark = q->cstats.ecn_mark; prev_backlog = sch->qstats.backlog; - skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats, - dequeue); + skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams, + &flow->cvars, &q->cstats, qdisc_pkt_len, + codel_get_enqueue_time, drop_func, dequeue_func); flow->dropped += q->cstats.drop_count - prev_drop_count; flow->dropped += q->cstats.ecn_mark - prev_ecn_mark; @@ -313,6 +355,7 @@ static void fq_codel_reset(struct Qdisc *sch) } memset(q->backlogs, 0, q->flows_cnt * sizeof(u32)); sch->q.qlen = 0; + q->memory_usage = 0; } static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { @@ -323,6 +366,8 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 }, [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 }, [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 }, + [TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 }, + [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 }, }; static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) @@ -374,7 +419,14 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_FQ_CODEL_QUANTUM]) q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); - while (sch->q.qlen > sch->limit) { + if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]) + q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])); + + if (tb[TCA_FQ_CODEL_MEMORY_LIMIT]) + q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT])); + + while (sch->q.qlen > sch->limit || + q->memory_usage > q->memory_limit) { struct sk_buff *skb = fq_codel_dequeue(sch); q->cstats.drop_len += qdisc_pkt_len(skb); @@ -419,13 +471,16 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = 10*1024; q->flows_cnt = 1024; + q->memory_limit = 32 << 20; /* 32 MBytes */ + q->drop_batch_size = 64; q->quantum = psched_mtu(qdisc_dev(sch)); q->perturbation = prandom_u32(); INIT_LIST_HEAD(&q->new_flows); INIT_LIST_HEAD(&q->old_flows); - codel_params_init(&q->cparams, sch); + codel_params_init(&q->cparams); codel_stats_init(&q->cstats); q->cparams.ecn = true; + q->cparams.mtu = psched_mtu(qdisc_dev(sch)); if (opt) { int err = fq_codel_change(sch, opt); @@ -476,6 +531,10 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) q->cparams.ecn) || nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM, q->quantum) || + nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE, + q->drop_batch_size) || + nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT, + q->memory_limit) || nla_put_u32(skb, TCA_FQ_CODEL_FLOWS, q->flows_cnt)) goto nla_put_failure; @@ -504,6 +563,8 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.qdisc_stats.ecn_mark = q->cstats.ecn_mark; st.qdisc_stats.new_flow_count = q->new_flow_count; st.qdisc_stats.ce_mark = q->cstats.ce_mark; + st.qdisc_stats.memory_usage = q->memory_usage; + st.qdisc_stats.drop_overmemory = q->drop_overmemory; list_for_each(pos, &q->new_flows) st.qdisc_stats.new_flows_len++; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 80742edea96f..269dd71b3828 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -108,35 +108,6 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, return skb; } -static inline int handle_dev_cpu_collision(struct sk_buff *skb, - struct netdev_queue *dev_queue, - struct Qdisc *q) -{ - int ret; - - if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { - /* - * Same CPU holding the lock. It may be a transient - * configuration error, when hard_start_xmit() recurses. We - * detect it by checking xmit owner and drop the packet when - * deadloop is detected. Return OK to try the next skb. - */ - kfree_skb_list(skb); - net_warn_ratelimited("Dead loop on netdevice %s, fix it urgently!\n", - dev_queue->dev->name); - ret = qdisc_qlen(q); - } else { - /* - * Another cpu is holding lock, requeue & delay xmits for - * some time. - */ - __this_cpu_inc(softnet_data.cpu_collision); - ret = dev_requeue_skb(skb, q); - } - - return ret; -} - /* * Transmit possibly several skbs, and handle the return status as * required. Holding the __QDISC___STATE_RUNNING bit guarantees that @@ -174,9 +145,6 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, if (dev_xmit_complete(ret)) { /* Driver sent out skb successfully or skb was consumed */ ret = qdisc_qlen(q); - } else if (ret == NETDEV_TX_LOCKED) { - /* Driver try lock failed */ - ret = handle_dev_cpu_collision(skb, txq, q); } else { /* Driver returned NETDEV_TX_BUSY - requeue skb */ if (unlikely(ret != NETDEV_TX_BUSY)) @@ -259,13 +227,12 @@ unsigned long dev_trans_start(struct net_device *dev) if (is_vlan_dev(dev)) dev = vlan_dev_real_dev(dev); - res = dev->trans_start; - for (i = 0; i < dev->num_tx_queues; i++) { + res = netdev_get_tx_queue(dev, 0)->trans_start; + for (i = 1; i < dev->num_tx_queues; i++) { val = netdev_get_tx_queue(dev, i)->trans_start; if (val && time_after(val, res)) res = val; } - dev->trans_start = res; return res; } @@ -288,10 +255,7 @@ static void dev_watchdog(unsigned long arg) struct netdev_queue *txq; txq = netdev_get_tx_queue(dev, i); - /* - * old device drivers set dev->trans_start - */ - trans_start = txq->trans_start ? : dev->trans_start; + trans_start = txq->trans_start; if (netif_xmit_stopped(txq) && time_after(jiffies, (trans_start + dev->watchdog_timeo))) { @@ -807,7 +771,7 @@ void dev_activate(struct net_device *dev) transition_one_qdisc(dev, dev_ingress_queue(dev), NULL); if (need_watchdog) { - dev->trans_start = jiffies; + netif_trans_update(dev); dev_watchdog_up(dev); } } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 87b02ed3d5f2..f6bf5818ed4d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1122,10 +1122,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) && - nla_put_u64(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps)) + nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps, + TCA_HTB_PAD)) goto nla_put_failure; if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) && - nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps)) + nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps, + TCA_HTB_PAD)) goto nla_put_failure; return nla_nest_end(skb, nest); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 4befe97a9034..205bed00dd34 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1051,7 +1051,8 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; if (q->rate >= (1ULL << 32)) { - if (nla_put_u64(skb, TCA_NETEM_RATE64, q->rate)) + if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate, + TCA_NETEM_PAD)) goto nla_put_failure; rate.rate = ~0U; } else { diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index c2fbde742f37..83b90b584fae 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -472,11 +472,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if (q->rate.rate_bytes_ps >= (1ULL << 32) && - nla_put_u64(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps)) + nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps, + TCA_TBF_PAD)) goto nla_put_failure; if (tbf_peak_present(q) && q->peak.rate_bytes_ps >= (1ULL << 32) && - nla_put_u64(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps)) + nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps, + TCA_TBF_PAD)) goto nla_put_failure; return nla_nest_end(skb, nest); diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 71c1a598d9bc..d9c04dc1b3f3 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -99,5 +99,9 @@ config SCTP_COOKIE_HMAC_SHA1 select CRYPTO_HMAC if SCTP_COOKIE_HMAC_SHA1 select CRYPTO_SHA1 if SCTP_COOKIE_HMAC_SHA1 +config INET_SCTP_DIAG + depends on INET_DIAG + def_tristate INET_DIAG + endif # IP_SCTP diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 3b4ffb021cf1..0fca5824ad0e 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_IP_SCTP) += sctp.o obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o +obj-$(CONFIG_INET_SCTP_DIAG) += sctp_diag.o sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ protocol.o endpointola.o associola.o \ diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 958ef5f33f4b..1eb94bf18ef4 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -239,7 +239,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, offset = 0; if ((whole > 1) || (whole && over)) - SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); /* Create chunks for all the full sized DATA chunks. */ for (i = 0, len = first_len; i < whole; i++) { diff --git a/net/sctp/input.c b/net/sctp/input.c index 00b8445364e3..a701527a9480 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -84,7 +84,7 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb) if (val != cmp) { /* CRC failure, dump it. */ - SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS); + __SCTP_INC_STATS(net, SCTP_MIB_CHECKSUMERRORS); return -1; } return 0; @@ -122,7 +122,7 @@ int sctp_rcv(struct sk_buff *skb) if (skb->pkt_type != PACKET_HOST) goto discard_it; - SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS); + __SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS); if (skb_linearize(skb)) goto discard_it; @@ -208,7 +208,7 @@ int sctp_rcv(struct sk_buff *skb) */ if (!asoc) { if (sctp_rcv_ootb(skb)) { - SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES); + __SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); goto discard_release; } } @@ -264,9 +264,9 @@ int sctp_rcv(struct sk_buff *skb) skb = NULL; /* sctp_chunk_free already freed the skb */ goto discard_release; } - SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG); + __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_BACKLOG); } else { - SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ); + __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); } @@ -281,7 +281,7 @@ int sctp_rcv(struct sk_buff *skb) return 0; discard_it: - SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS); + __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; @@ -532,7 +532,7 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); *app = asoc; *tpp = transport; @@ -589,7 +589,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; } /* Warning: The sock lock is held. Remember to call diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 7e8a16c77039..9d87bba0ff1d 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -89,10 +89,12 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) * Eventually, we should clean up inqueue to not rely * on the BH related data structures. */ + local_bh_disable(); list_add_tail(&chunk->list, &q->in_chunk_list); if (chunk->asoc) chunk->asoc->stats.ipackets++; q->immediate.func(&q->immediate); + local_bh_enable(); } /* Peek at the next chunk on the inqeue. */ @@ -163,6 +165,9 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) chunk->singleton = 1; ch = (sctp_chunkhdr_t *) chunk->skb->data; chunk->data_accepted = 0; + + if (chunk->asoc) + sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb); } chunk->chunk_hdr = ch; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ce46f1c7f133..0657d18a85bf 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -162,7 +162,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(net, idev, ICMP6_MIB_INERRORS); goto out; } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 5cfac8d5d3b3..4cb5aedfe3ee 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -280,82 +280,38 @@ void sctp_eps_proc_exit(struct net *net) struct sctp_ht_iter { struct seq_net_private p; struct rhashtable_iter hti; + int start_fail; }; -static struct sctp_transport *sctp_transport_get_next(struct seq_file *seq) +static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; - struct sctp_transport *t; - - t = rhashtable_walk_next(&iter->hti); - for (; t; t = rhashtable_walk_next(&iter->hti)) { - if (IS_ERR(t)) { - if (PTR_ERR(t) == -EAGAIN) - continue; - break; - } + int err = sctp_transport_walk_start(&iter->hti); - if (net_eq(sock_net(t->asoc->base.sk), seq_file_net(seq)) && - t->asoc->peer.primary_path == t) - break; + if (err) { + iter->start_fail = 1; + return ERR_PTR(err); } - return t; + return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos); } -static struct sctp_transport *sctp_transport_get_idx(struct seq_file *seq, - loff_t pos) -{ - void *obj = SEQ_START_TOKEN; - - while (pos && (obj = sctp_transport_get_next(seq)) && !IS_ERR(obj)) - pos--; - - return obj; -} - -static int sctp_transport_walk_start(struct seq_file *seq) +static void sctp_transport_seq_stop(struct seq_file *seq, void *v) { struct sctp_ht_iter *iter = seq->private; - int err; - - err = rhashtable_walk_init(&sctp_transport_hashtable, &iter->hti); - if (err) - return err; - - err = rhashtable_walk_start(&iter->hti); - return err == -EAGAIN ? 0 : err; + if (iter->start_fail) + return; + sctp_transport_walk_stop(&iter->hti); } -static void sctp_transport_walk_stop(struct seq_file *seq) +static void *sctp_transport_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; - rhashtable_walk_stop(&iter->hti); - rhashtable_walk_exit(&iter->hti); -} - -static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) -{ - int err = sctp_transport_walk_start(seq); - - if (err) - return ERR_PTR(err); - - return sctp_transport_get_idx(seq, *pos); -} - -static void sctp_assocs_seq_stop(struct seq_file *seq, void *v) -{ - sctp_transport_walk_stop(seq); -} - -static void *sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ ++*pos; - return sctp_transport_get_next(seq); + return sctp_transport_get_next(seq_file_net(seq), &iter->hti); } /* Display sctp associations (/proc/net/sctp/assocs). */ @@ -416,9 +372,9 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) } static const struct seq_operations sctp_assoc_ops = { - .start = sctp_assocs_seq_start, - .next = sctp_assocs_seq_next, - .stop = sctp_assocs_seq_stop, + .start = sctp_transport_seq_start, + .next = sctp_transport_seq_next, + .stop = sctp_transport_seq_stop, .show = sctp_assocs_seq_show, }; @@ -455,28 +411,6 @@ void sctp_assocs_proc_exit(struct net *net) remove_proc_entry("assocs", net->sctp.proc_net_sctp); } -static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos) -{ - int err = sctp_transport_walk_start(seq); - - if (err) - return ERR_PTR(err); - - return sctp_transport_get_idx(seq, *pos); -} - -static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - - return sctp_transport_get_next(seq); -} - -static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v) -{ - sctp_transport_walk_stop(seq); -} - static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { struct sctp_association *assoc; @@ -550,9 +484,9 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) } static const struct seq_operations sctp_remaddr_ops = { - .start = sctp_remaddr_seq_start, - .next = sctp_remaddr_seq_next, - .stop = sctp_remaddr_seq_stop, + .start = sctp_transport_seq_start, + .next = sctp_transport_seq_next, + .stop = sctp_transport_seq_stop, .show = sctp_remaddr_seq_show, }; diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c new file mode 100644 index 000000000000..8e3e769dc9ea --- /dev/null +++ b/net/sctp/sctp_diag.c @@ -0,0 +1,500 @@ +#include <linux/module.h> +#include <linux/inet_diag.h> +#include <linux/sock_diag.h> +#include <net/sctp/sctp.h> + +extern void inet_diag_msg_common_fill(struct inet_diag_msg *r, + struct sock *sk); +extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct inet_diag_msg *r, int ext, + struct user_namespace *user_ns); + +static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *info); + +/* define some functions to make asoc/ep fill look clean */ +static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r, + struct sock *sk, + struct sctp_association *asoc) +{ + union sctp_addr laddr, paddr; + struct dst_entry *dst; + + laddr = list_entry(asoc->base.bind_addr.address_list.next, + struct sctp_sockaddr_entry, list)->a; + paddr = asoc->peer.primary_path->ipaddr; + dst = asoc->peer.primary_path->dst; + + r->idiag_family = sk->sk_family; + r->id.idiag_sport = htons(asoc->base.bind_addr.port); + r->id.idiag_dport = htons(asoc->peer.port); + r->id.idiag_if = dst ? dst->dev->ifindex : 0; + sock_diag_save_cookie(sk, r->id.idiag_cookie); + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + *(struct in6_addr *)r->id.idiag_src = laddr.v6.sin6_addr; + *(struct in6_addr *)r->id.idiag_dst = paddr.v6.sin6_addr; + } else +#endif + { + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + + r->id.idiag_src[0] = laddr.v4.sin_addr.s_addr; + r->id.idiag_dst[0] = paddr.v4.sin_addr.s_addr; + } + + r->idiag_state = asoc->state; + r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX; + r->idiag_retrans = asoc->rtx_data_chunks; + r->idiag_expires = jiffies_to_msecs( + asoc->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] - jiffies); +} + +static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, + struct list_head *address_list) +{ + struct sctp_sockaddr_entry *laddr; + int addrlen = sizeof(struct sockaddr_storage); + int addrcnt = 0; + struct nlattr *attr; + void *info = NULL; + + list_for_each_entry_rcu(laddr, address_list, list) + addrcnt++; + + attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt); + if (!attr) + return -EMSGSIZE; + + info = nla_data(attr); + list_for_each_entry_rcu(laddr, address_list, list) { + memcpy(info, &laddr->a, addrlen); + info += addrlen; + } + + return 0; +} + +static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb, + struct sctp_association *asoc) +{ + int addrlen = sizeof(struct sockaddr_storage); + struct sctp_transport *from; + struct nlattr *attr; + void *info = NULL; + + attr = nla_reserve(skb, INET_DIAG_PEERS, + addrlen * asoc->peer.transport_count); + if (!attr) + return -EMSGSIZE; + + info = nla_data(attr); + list_for_each_entry(from, &asoc->peer.transport_addr_list, + transports) { + memcpy(info, &from->ipaddr, addrlen); + info += addrlen; + } + + return 0; +} + +/* sctp asoc/ep fill*/ +static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, + struct sk_buff *skb, + const struct inet_diag_req_v2 *req, + struct user_namespace *user_ns, + int portid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + struct sctp_endpoint *ep = sctp_sk(sk)->ep; + struct list_head *addr_list; + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + int ext = req->idiag_ext; + struct sctp_infox infox; + void *info = NULL; + + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), + nlmsg_flags); + if (!nlh) + return -EMSGSIZE; + + r = nlmsg_data(nlh); + BUG_ON(!sk_fullsock(sk)); + + if (asoc) { + inet_diag_msg_sctpasoc_fill(r, sk, asoc); + } else { + inet_diag_msg_common_fill(r, sk); + r->idiag_state = sk->sk_state; + r->idiag_timer = 0; + r->idiag_retrans = 0; + } + + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns)) + goto errout; + + if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) { + u32 mem[SK_MEMINFO_VARS]; + int amt; + + if (asoc && asoc->ep->sndbuf_policy) + amt = asoc->sndbuf_used; + else + amt = sk_wmem_alloc_get(sk); + mem[SK_MEMINFO_WMEM_ALLOC] = amt; + if (asoc && asoc->ep->rcvbuf_policy) + amt = atomic_read(&asoc->rmem_alloc); + else + amt = sk_rmem_alloc_get(sk); + mem[SK_MEMINFO_RMEM_ALLOC] = amt; + mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; + mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; + mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; + mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; + mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); + mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; + mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); + + if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0) + goto errout; + } + + if (ext & (1 << (INET_DIAG_INFO - 1))) { + struct nlattr *attr; + + attr = nla_reserve_64bit(skb, INET_DIAG_INFO, + sizeof(struct sctp_info), + INET_DIAG_PAD); + if (!attr) + goto errout; + + info = nla_data(attr); + } + infox.sctpinfo = (struct sctp_info *)info; + infox.asoc = asoc; + sctp_diag_get_info(sk, r, &infox); + + addr_list = asoc ? &asoc->base.bind_addr.address_list + : &ep->base.bind_addr.address_list; + if (inet_diag_msg_sctpladdrs_fill(skb, addr_list)) + goto errout; + + if (asoc && (ext & (1 << (INET_DIAG_CONG - 1)))) + if (nla_put_string(skb, INET_DIAG_CONG, "reno") < 0) + goto errout; + + if (asoc && inet_diag_msg_sctpaddrs_fill(skb, asoc)) + goto errout; + + nlmsg_end(skb, nlh); + return 0; + +errout: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +/* callback and param */ +struct sctp_comm_param { + struct sk_buff *skb; + struct netlink_callback *cb; + const struct inet_diag_req_v2 *r; + const struct nlmsghdr *nlh; +}; + +static size_t inet_assoc_attr_size(struct sctp_association *asoc) +{ + int addrlen = sizeof(struct sockaddr_storage); + int addrcnt = 0; + struct sctp_sockaddr_entry *laddr; + + list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list, + list) + addrcnt++; + + return nla_total_size(sizeof(struct sctp_info)) + + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(addrlen * asoc->peer.transport_count) + + nla_total_size(addrlen * addrcnt) + + nla_total_size(sizeof(struct inet_diag_meminfo)) + + nla_total_size(sizeof(struct inet_diag_msg)) + + 64; +} + +static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p) +{ + struct sctp_association *assoc = tsp->asoc; + struct sock *sk = tsp->asoc->base.sk; + struct sctp_comm_param *commp = p; + struct sk_buff *in_skb = commp->skb; + const struct inet_diag_req_v2 *req = commp->r; + const struct nlmsghdr *nlh = commp->nlh; + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + int err; + + err = sock_diag_check_cookie(sk, req->id.idiag_cookie); + if (err) + goto out; + + err = -ENOMEM; + rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL); + if (!rep) + goto out; + + lock_sock(sk); + if (sk != assoc->base.sk) { + release_sock(sk); + sk = assoc->base.sk; + lock_sock(sk); + } + err = inet_sctp_diag_fill(sk, assoc, rep, req, + sk_user_ns(NETLINK_CB(in_skb).sk), + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0, nlh); + release_sock(sk); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(rep); + goto out; + } + + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, + MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + return err; +} + +static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) +{ + struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; + struct sk_buff *skb = commp->skb; + struct netlink_callback *cb = commp->cb; + const struct inet_diag_req_v2 *r = commp->r; + struct sctp_association *assoc = + list_entry(ep->asocs.next, struct sctp_association, asocs); + int err = 0; + + /* find the ep only once through the transports by this condition */ + if (tsp->asoc != assoc) + goto out; + + if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) + goto out; + + lock_sock(sk); + if (sk != assoc->base.sk) + goto release; + list_for_each_entry(assoc, &ep->asocs, asocs) { + if (cb->args[4] < cb->args[1]) + goto next; + + if (r->id.idiag_sport != htons(assoc->base.bind_addr.port) && + r->id.idiag_sport) + goto next; + if (r->id.idiag_dport != htons(assoc->peer.port) && + r->id.idiag_dport) + goto next; + + if (!cb->args[3] && + inet_sctp_diag_fill(sk, NULL, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, cb->nlh) < 0) { + cb->args[3] = 1; + err = 2; + goto release; + } + cb->args[3] = 1; + + if (inet_sctp_diag_fill(sk, assoc, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) { + err = 2; + goto release; + } +next: + cb->args[4]++; + } + cb->args[1] = 0; + cb->args[2]++; + cb->args[3] = 0; + cb->args[4] = 0; +release: + release_sock(sk); + return err; +out: + cb->args[2]++; + return err; +} + +static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) +{ + struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; + struct sk_buff *skb = commp->skb; + struct netlink_callback *cb = commp->cb; + const struct inet_diag_req_v2 *r = commp->r; + struct net *net = sock_net(skb->sk); + struct inet_sock *inet = inet_sk(sk); + int err = 0; + + if (!net_eq(sock_net(sk), net)) + goto out; + + if (cb->args[4] < cb->args[1]) + goto next; + + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next; + + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next; + + if (r->id.idiag_dport != inet->inet_dport && + r->id.idiag_dport) + goto next; + + if (inet_sctp_diag_fill(sk, NULL, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->nlh) < 0) { + err = 2; + goto out; + } +next: + cb->args[4]++; +out: + return err; +} + +/* define the functions for sctp_diag_handler*/ +static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *info) +{ + struct sctp_infox *infox = (struct sctp_infox *)info; + + if (infox->asoc) { + r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc); + r->idiag_wqueue = infox->asoc->sndbuf_used; + } else { + r->idiag_rqueue = sk->sk_ack_backlog; + r->idiag_wqueue = sk->sk_max_ack_backlog; + } + if (infox->sctpinfo) + sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo); +} + +static int sctp_diag_dump_one(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + union sctp_addr laddr, paddr; + struct sctp_comm_param commp = { + .skb = in_skb, + .r = req, + .nlh = nlh, + }; + + if (req->sdiag_family == AF_INET) { + laddr.v4.sin_port = req->id.idiag_sport; + laddr.v4.sin_addr.s_addr = req->id.idiag_src[0]; + laddr.v4.sin_family = AF_INET; + + paddr.v4.sin_port = req->id.idiag_dport; + paddr.v4.sin_addr.s_addr = req->id.idiag_dst[0]; + paddr.v4.sin_family = AF_INET; + } else { + laddr.v6.sin6_port = req->id.idiag_sport; + memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, 64); + laddr.v6.sin6_family = AF_INET6; + + paddr.v6.sin6_port = req->id.idiag_dport; + memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, 64); + paddr.v6.sin6_family = AF_INET6; + } + + return sctp_transport_lookup_process(sctp_tsp_dump_one, + net, &laddr, &paddr, &commp); +} + +static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + u32 idiag_states = r->idiag_states; + struct net *net = sock_net(skb->sk); + struct sctp_comm_param commp = { + .skb = skb, + .cb = cb, + .r = r, + }; + + /* eps hashtable dumps + * args: + * 0 : if it will traversal listen sock + * 1 : to record the sock pos of this time's traversal + * 4 : to work as a temporary variable to traversal list + */ + if (cb->args[0] == 0) { + if (!(idiag_states & TCPF_LISTEN)) + goto skip; + if (sctp_for_each_endpoint(sctp_ep_dump, &commp)) + goto done; +skip: + cb->args[0] = 1; + cb->args[1] = 0; + cb->args[4] = 0; + } + + /* asocs by transport hashtable dump + * args: + * 1 : to record the assoc pos of this time's traversal + * 2 : to record the transport pos of this time's traversal + * 3 : to mark if we have dumped the ep info of the current asoc + * 4 : to work as a temporary variable to traversal list + */ + if (!(idiag_states & ~TCPF_LISTEN)) + goto done; + sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp); +done: + cb->args[1] = cb->args[4]; + cb->args[4] = 0; +} + +static const struct inet_diag_handler sctp_diag_handler = { + .dump = sctp_diag_dump, + .dump_one = sctp_diag_dump_one, + .idiag_get_info = sctp_diag_get_info, + .idiag_type = IPPROTO_SCTP, + .idiag_info_size = sizeof(struct sctp_info), +}; + +static int __init sctp_diag_init(void) +{ + return inet_diag_register(&sctp_diag_handler); +} + +static void __exit sctp_diag_exit(void) +{ + inet_diag_unregister(&sctp_diag_handler); +} + +module_init(sctp_diag_init); +module_exit(sctp_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 41b081a64752..aa3712259368 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1218,6 +1218,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_cmd_seq_t *commands, gfp_t gfp) { + struct sock *sk = ep->base.sk; + struct sctp_sock *sp = sctp_sk(sk); int error = 0; int force; sctp_cmd_t *cmd; @@ -1738,6 +1740,10 @@ out: error = sctp_outq_uncork(&asoc->outqueue, gfp); } else if (local_cork) error = sctp_outq_uncork(&asoc->outqueue, gfp); + + if (sp->data_ready_signalled) + sp->data_ready_signalled = 0; + return error; nomem: error = -ENOMEM; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 878d28eda1a6..777d0324594a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4202,6 +4202,222 @@ static void sctp_shutdown(struct sock *sk, int how) } } +int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, + struct sctp_info *info) +{ + struct sctp_transport *prim; + struct list_head *pos; + int mask; + + memset(info, 0, sizeof(*info)); + if (!asoc) { + struct sctp_sock *sp = sctp_sk(sk); + + info->sctpi_s_autoclose = sp->autoclose; + info->sctpi_s_adaptation_ind = sp->adaptation_ind; + info->sctpi_s_pd_point = sp->pd_point; + info->sctpi_s_nodelay = sp->nodelay; + info->sctpi_s_disable_fragments = sp->disable_fragments; + info->sctpi_s_v4mapped = sp->v4mapped; + info->sctpi_s_frag_interleave = sp->frag_interleave; + + return 0; + } + + info->sctpi_tag = asoc->c.my_vtag; + info->sctpi_state = asoc->state; + info->sctpi_rwnd = asoc->a_rwnd; + info->sctpi_unackdata = asoc->unack_data; + info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); + info->sctpi_instrms = asoc->c.sinit_max_instreams; + info->sctpi_outstrms = asoc->c.sinit_num_ostreams; + list_for_each(pos, &asoc->base.inqueue.in_chunk_list) + info->sctpi_inqueue++; + list_for_each(pos, &asoc->outqueue.out_chunk_list) + info->sctpi_outqueue++; + info->sctpi_overall_error = asoc->overall_error_count; + info->sctpi_max_burst = asoc->max_burst; + info->sctpi_maxseg = asoc->frag_point; + info->sctpi_peer_rwnd = asoc->peer.rwnd; + info->sctpi_peer_tag = asoc->c.peer_vtag; + + mask = asoc->peer.ecn_capable << 1; + mask = (mask | asoc->peer.ipv4_address) << 1; + mask = (mask | asoc->peer.ipv6_address) << 1; + mask = (mask | asoc->peer.hostname_address) << 1; + mask = (mask | asoc->peer.asconf_capable) << 1; + mask = (mask | asoc->peer.prsctp_capable) << 1; + mask = (mask | asoc->peer.auth_capable); + info->sctpi_peer_capable = mask; + mask = asoc->peer.sack_needed << 1; + mask = (mask | asoc->peer.sack_generation) << 1; + mask = (mask | asoc->peer.zero_window_announced); + info->sctpi_peer_sack = mask; + + info->sctpi_isacks = asoc->stats.isacks; + info->sctpi_osacks = asoc->stats.osacks; + info->sctpi_opackets = asoc->stats.opackets; + info->sctpi_ipackets = asoc->stats.ipackets; + info->sctpi_rtxchunks = asoc->stats.rtxchunks; + info->sctpi_outofseqtsns = asoc->stats.outofseqtsns; + info->sctpi_idupchunks = asoc->stats.idupchunks; + info->sctpi_gapcnt = asoc->stats.gapcnt; + info->sctpi_ouodchunks = asoc->stats.ouodchunks; + info->sctpi_iuodchunks = asoc->stats.iuodchunks; + info->sctpi_oodchunks = asoc->stats.oodchunks; + info->sctpi_iodchunks = asoc->stats.iodchunks; + info->sctpi_octrlchunks = asoc->stats.octrlchunks; + info->sctpi_ictrlchunks = asoc->stats.ictrlchunks; + + prim = asoc->peer.primary_path; + memcpy(&info->sctpi_p_address, &prim->ipaddr, + sizeof(struct sockaddr_storage)); + info->sctpi_p_state = prim->state; + info->sctpi_p_cwnd = prim->cwnd; + info->sctpi_p_srtt = prim->srtt; + info->sctpi_p_rto = jiffies_to_msecs(prim->rto); + info->sctpi_p_hbinterval = prim->hbinterval; + info->sctpi_p_pathmaxrxt = prim->pathmaxrxt; + info->sctpi_p_sackdelay = jiffies_to_msecs(prim->sackdelay); + info->sctpi_p_ssthresh = prim->ssthresh; + info->sctpi_p_partial_bytes_acked = prim->partial_bytes_acked; + info->sctpi_p_flight_size = prim->flight_size; + info->sctpi_p_error = prim->error_count; + + return 0; +} +EXPORT_SYMBOL_GPL(sctp_get_sctp_info); + +/* use callback to avoid exporting the core structure */ +int sctp_transport_walk_start(struct rhashtable_iter *iter) +{ + int err; + + err = rhashtable_walk_init(&sctp_transport_hashtable, iter, + GFP_KERNEL); + if (err) + return err; + + err = rhashtable_walk_start(iter); + if (err && err != -EAGAIN) { + rhashtable_walk_exit(iter); + return err; + } + + return 0; +} + +void sctp_transport_walk_stop(struct rhashtable_iter *iter) +{ + rhashtable_walk_stop(iter); + rhashtable_walk_exit(iter); +} + +struct sctp_transport *sctp_transport_get_next(struct net *net, + struct rhashtable_iter *iter) +{ + struct sctp_transport *t; + + t = rhashtable_walk_next(iter); + for (; t; t = rhashtable_walk_next(iter)) { + if (IS_ERR(t)) { + if (PTR_ERR(t) == -EAGAIN) + continue; + break; + } + + if (net_eq(sock_net(t->asoc->base.sk), net) && + t->asoc->peer.primary_path == t) + break; + } + + return t; +} + +struct sctp_transport *sctp_transport_get_idx(struct net *net, + struct rhashtable_iter *iter, + int pos) +{ + void *obj = SEQ_START_TOKEN; + + while (pos && (obj = sctp_transport_get_next(net, iter)) && + !IS_ERR(obj)) + pos--; + + return obj; +} + +int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), + void *p) { + int err = 0; + int hash = 0; + struct sctp_ep_common *epb; + struct sctp_hashbucket *head; + + for (head = sctp_ep_hashtable; hash < sctp_ep_hashsize; + hash++, head++) { + read_lock(&head->lock); + sctp_for_each_hentry(epb, &head->chain) { + err = cb(sctp_ep(epb), p); + if (err) + break; + } + read_unlock(&head->lock); + } + + return err; +} +EXPORT_SYMBOL_GPL(sctp_for_each_endpoint); + +int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), + struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr, void *p) +{ + struct sctp_transport *transport; + int err = 0; + + rcu_read_lock(); + transport = sctp_addrs_lookup_transport(net, laddr, paddr); + if (!transport || !sctp_transport_hold(transport)) + goto out; + err = cb(transport, p); + sctp_transport_put(transport); + +out: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); + +int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), + struct net *net, int pos, void *p) { + struct rhashtable_iter hti; + void *obj; + int err; + + err = sctp_transport_walk_start(&hti); + if (err) + return err; + + sctp_transport_get_idx(net, &hti, pos); + obj = sctp_transport_get_next(net, &hti); + for (; obj && !IS_ERR(obj); obj = sctp_transport_get_next(net, &hti)) { + struct sctp_transport *transport = obj; + + if (!sctp_transport_hold(transport)) + continue; + err = cb(transport, p); + sctp_transport_put(transport); + if (err) + break; + } + sctp_transport_walk_stop(&hti); + + return err; +} +EXPORT_SYMBOL_GPL(sctp_for_each_transport); + /* 7.2.1 Association Status (SCTP_STATUS) * Applications can retrieve current status information about an @@ -6430,6 +6646,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) poll_wait(file, sk_sleep(sk), wait); + sock_rps_record_flow(sk); + /* A TCP-style listening socket becomes readable when the accept queue * is not empty. */ @@ -6764,13 +6982,11 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, * However, this function was correct in any case. 8) */ if (flags & MSG_PEEK) { - spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) atomic_inc(&skb->users); - spin_unlock_bh(&sk->sk_receive_queue.lock); } else { - skb = skb_dequeue(&sk->sk_receive_queue); + skb = __skb_dequeue(&sk->sk_receive_queue); } if (skb) @@ -7186,6 +7402,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_lingertime = sk->sk_lingertime; newsk->sk_rcvtimeo = sk->sk_rcvtimeo; newsk->sk_sndtimeo = sk->sk_sndtimeo; + newsk->sk_rxhash = sk->sk_rxhash; newinet = inet_sk(newsk); diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index ce469d648ffb..ec166d2bd2d9 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -141,7 +141,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc) */ if (!skb_queue_empty(&sp->pd_lobby)) { struct list_head *list; - sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue); + skb_queue_splice_tail_init(&sp->pd_lobby, + &sk->sk_receive_queue); list = (struct list_head *)&sctp_sk(sk)->pd_lobby; INIT_LIST_HEAD(list); return 1; @@ -193,6 +194,7 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sock *sk = ulpq->asoc->base.sk; + struct sctp_sock *sp = sctp_sk(sk); struct sk_buff_head *queue, *skb_list; struct sk_buff *skb = sctp_event2skb(event); int clear_pd = 0; @@ -210,7 +212,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) sk_incoming_cpu_update(sk); } /* Check if the user wishes to receive this event. */ - if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe)) + if (!sctp_ulpevent_is_enabled(event, &sp->subscribe)) goto out_free; /* If we are in partial delivery mode, post to the lobby until @@ -218,7 +220,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) * the association the cause of the partial delivery. */ - if (atomic_read(&sctp_sk(sk)->pd_mode) == 0) { + if (atomic_read(&sp->pd_mode) == 0) { queue = &sk->sk_receive_queue; } else { if (ulpq->pd_mode) { @@ -230,7 +232,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) if ((event->msg_flags & MSG_NOTIFICATION) || (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK))) - queue = &sctp_sk(sk)->pd_lobby; + queue = &sp->pd_lobby; else { clear_pd = event->msg_flags & MSG_EOR; queue = &sk->sk_receive_queue; @@ -241,10 +243,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) * can queue this to the receive queue instead * of the lobby. */ - if (sctp_sk(sk)->frag_interleave) + if (sp->frag_interleave) queue = &sk->sk_receive_queue; else - queue = &sctp_sk(sk)->pd_lobby; + queue = &sp->pd_lobby; } } @@ -252,7 +254,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) * collected on a list. */ if (skb_list) - sctp_skb_list_tail(skb_list, queue); + skb_queue_splice_tail_init(skb_list, queue); else __skb_queue_tail(queue, skb); @@ -263,8 +265,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) if (clear_pd) sctp_ulpq_clear_pd(ulpq); - if (queue == &sk->sk_receive_queue) + if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) { + sp->data_ready_signalled = 1; sk->sk_data_ready(sk); + } return 1; out_free: @@ -1125,11 +1129,13 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) { struct sctp_ulpevent *ev = NULL; struct sock *sk; + struct sctp_sock *sp; if (!ulpq->pd_mode) return; sk = ulpq->asoc->base.sk; + sp = sctp_sk(sk); if (sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT, &sctp_sk(sk)->subscribe)) ev = sctp_ulpevent_make_pdapi(ulpq->asoc, @@ -1139,6 +1145,8 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev)); /* If there is data waiting, send it up the socket now. */ - if (sctp_ulpq_clear_pd(ulpq) || ev) + if ((sctp_ulpq_clear_pd(ulpq) || ev) && !sp->data_ready_signalled) { + sp->data_ready_signalled = 1; sk->sk_data_ready(sk); + } } diff --git a/net/socket.c b/net/socket.c index 35e4523edada..e7793f5601ae 100644 --- a/net/socket.c +++ b/net/socket.c @@ -587,22 +587,19 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) +void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) { u8 flags = *tx_flags; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE) + if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) flags |= SKBTX_HW_TSTAMP; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) + if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) flags |= SKBTX_SW_TSTAMP; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED) + if (tsflags & SOF_TIMESTAMPING_TX_SCHED) flags |= SKBTX_SCHED_TSTAMP; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK) - flags |= SKBTX_ACK_TSTAMP; - *tx_flags = flags; } EXPORT_SYMBOL(__sock_tx_timestamp); @@ -709,17 +706,16 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, - size_t size, int flags) + int flags) { - return sock->ops->recvmsg(sock, msg, size, flags); + return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags); } -int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, - int flags) +int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) { - int err = security_socket_recvmsg(sock, msg, size, flags); + int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags); - return err ?: sock_recvmsg_nosec(sock, msg, size, flags); + return err ?: sock_recvmsg_nosec(sock, msg, flags); } EXPORT_SYMBOL(sock_recvmsg); @@ -746,7 +742,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size); set_fs(KERNEL_DS); - result = sock_recvmsg(sock, msg, size, flags); + result = sock_recvmsg(sock, msg, flags); set_fs(oldfs); return result; } @@ -796,7 +792,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!iov_iter_count(to)) /* Match SYS5 behaviour */ return 0; - res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags); + res = sock_recvmsg(sock, &msg, msg.msg_flags); *to = msg.msg_iter; return res; } @@ -1046,7 +1042,7 @@ static int sock_fasync(int fd, struct file *filp, int on) return -EINVAL; lock_sock(sk); - wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk)); + wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk)); fasync_helper(fd, filp, on, &wq->fasync_list); if (!wq->fasync_list) @@ -1696,7 +1692,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_iocb = NULL; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags); + err = sock_recvmsg(sock, &msg, flags); if (err >= 0 && addr != NULL) { err2 = move_addr_to_user(&address, @@ -2073,7 +2069,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; unsigned long cmsg_ptr; - int total_len, len; + int len; ssize_t err; /* kernel mode address */ @@ -2091,7 +2087,6 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov); if (err < 0) return err; - total_len = iov_iter_count(&msg_sys->msg_iter); cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); @@ -2101,8 +2096,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, - total_len, flags); + err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags); if (err < 0) goto out_freeiov; len = err; diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index de70c78025d7..f217c348b341 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -155,7 +155,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) struct xdr_skb_reader desc; desc.skb = skb; - desc.offset = sizeof(struct udphdr); + desc.offset = 0; desc.count = skb->len - desc.offset; if (skb_csum_unnecessary(skb)) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1413cdcc131c..dadfec66dbd8 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -85,8 +85,7 @@ static void svc_reclassify_socket(struct socket *sock) { struct sock *sk = sock->sk; - WARN_ON_ONCE(sock_owned_by_user(sk)); - if (sock_owned_by_user(sk)) + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) return; switch (sk->sk_family) { @@ -617,7 +616,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) svsk->sk_sk->sk_stamp = skb->tstamp; set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ - len = skb->len - sizeof(struct udphdr); + len = skb->len; rqstp->rq_arg.len = len; rqstp->rq_prot = IPPROTO_UDP; @@ -641,8 +640,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) skb_free_datagram_locked(svsk->sk_sk, skb); } else { /* we can use it in-place */ - rqstp->rq_arg.head[0].iov_base = skb->data + - sizeof(struct udphdr); + rqstp->rq_arg.head[0].iov_base = skb->data; rqstp->rq_arg.head[0].iov_len = len; if (skb_checksum_complete(skb)) goto out_free; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 65e759569e48..b90c5397b5e1 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -995,15 +995,14 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, u32 _xid; __be32 *xp; - repsize = skb->len - sizeof(struct udphdr); + repsize = skb->len; if (repsize < 4) { dprintk("RPC: impossible RPC reply size %d!\n", repsize); return; } /* Copy the XID from the skb... */ - xp = skb_header_pointer(skb, sizeof(struct udphdr), - sizeof(_xid), &_xid); + xp = skb_header_pointer(skb, 0, sizeof(_xid), &_xid); if (xp == NULL) return; @@ -1019,11 +1018,11 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, /* Suck it into the iovec, verify checksum if not done by hw. */ if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { - UDPX_INC_STATS_BH(sk, UDP_MIB_INERRORS); + __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); goto out_unlock; } - UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS); + __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); xprt_adjust_cwnd(xprt, task, copied); xprt_complete_rqst(task, copied); @@ -1881,8 +1880,7 @@ static inline void xs_reclassify_socket6(struct socket *sock) static inline void xs_reclassify_socket(int family, struct socket *sock) { - WARN_ON_ONCE(sock_owned_by_user(sock->sk)); - if (sock_owned_by_user(sock->sk)) + if (WARN_ON_ONCE(!sock_allow_reclassification(sock->sk))) return; switch (family) { @@ -1952,6 +1950,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, sk->sk_user_data = xprt; sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; + sock_set_flag(sk, SOCK_FASYNC); sk->sk_error_report = xs_error_report; sk->sk_allocation = GFP_NOIO; @@ -2138,6 +2137,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_user_data = xprt; sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; + sock_set_flag(sk, SOCK_FASYNC); sk->sk_allocation = GFP_NOIO; xprt_set_connected(xprt); @@ -2239,6 +2239,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; + sock_set_flag(sk, SOCK_FASYNC); sk->sk_error_report = xs_error_report; sk->sk_allocation = GFP_NOIO; diff --git a/net/switchdev/Kconfig b/net/switchdev/Kconfig index 86a47e17cfaf..651fa201a570 100644 --- a/net/switchdev/Kconfig +++ b/net/switchdev/Kconfig @@ -3,7 +3,7 @@ # config NET_SWITCHDEV - bool "Switch (and switch-ish) device support (EXPERIMENTAL)" + bool "Switch (and switch-ish) device support" depends on INET ---help--- This module provides glue between core networking code and device diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index b7e01d88bdc5..59658b2e9cdf 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1188,6 +1188,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, .dst = dst, .dst_len = dst_len, + .fi = fi, .tos = tos, .type = type, .nlflags = nlflags, @@ -1196,8 +1197,6 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, struct net_device *dev; int err = 0; - memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); - /* Don't offload route if using custom ip rules or if * IPv4 FIB offloading has been disabled completely. */ @@ -1242,6 +1241,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, .dst = dst, .dst_len = dst_len, + .fi = fi, .tos = tos, .type = type, .nlflags = 0, @@ -1250,8 +1250,6 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, struct net_device *dev; int err = 0; - memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); - if (!(fi->fib_flags & RTNH_F_OFFLOAD)) return 0; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 27a5406213c6..6f11c62bc8f9 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -205,6 +205,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, struct tipc_bearer *b; struct tipc_media *m; struct tipc_bearer_names b_names; + struct sk_buff *skb; char addr_string[16]; u32 bearer_id; u32 with_this_prio; @@ -301,7 +302,7 @@ restart: b->net_plane = bearer_id + 'A'; b->priority = priority; - res = tipc_disc_create(net, b, &b->bcast_addr); + res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { bearer_disable(net, b); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", @@ -310,7 +311,8 @@ restart: } rcu_assign_pointer(tn->bearer_list[bearer_id], b); - + if (skb) + tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, tipc_addr_string_fill(addr_string, disc_domain), priority); @@ -335,23 +337,16 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) */ static void bearer_disable(struct net *net, struct tipc_bearer *b) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - u32 i; + struct tipc_net *tn = tipc_net(net); + int bearer_id = b->identity; pr_info("Disabling bearer <%s>\n", b->name); b->media->disable_media(b); - - tipc_node_delete_links(net, b->identity); + tipc_node_delete_links(net, bearer_id); RCU_INIT_POINTER(b->media_ptr, NULL); if (b->link_req) tipc_disc_delete(b->link_req); - - for (i = 0; i < MAX_BEARERS; i++) { - if (b == rtnl_dereference(tn->bearer_list[i])) { - RCU_INIT_POINTER(tn->bearer_list[i], NULL); - break; - } - } + RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); kfree_rcu(b, rcu); } @@ -394,7 +389,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) /** * tipc_l2_send_msg - send a TIPC packet out over an L2 interface - * @buf: the packet to be sent + * @skb: the packet to be sent * @b: the bearer through which the packet is to be sent * @dest: peer destination address */ @@ -403,17 +398,21 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, { struct net_device *dev; int delta; + void *tipc_ptr; dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr); if (!dev) return 0; + /* Send RESET message even if bearer is detached from device */ + tipc_ptr = rtnl_dereference(dev->tipc_ptr); + if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb)))) + goto drop; + delta = dev->hard_header_len - skb_headroom(skb); if ((delta > 0) && - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(skb); - return 0; - } + pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) + goto drop; skb_reset_network_header(skb); skb->dev = dev; @@ -422,6 +421,9 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, dev->dev_addr, skb->len); dev_queue_xmit(skb); return 0; +drop: + kfree_skb(skb); + return 0; } int tipc_bearer_mtu(struct net *net, u32 bearer_id) @@ -450,6 +452,8 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (likely(b)) b->media->send_msg(net, skb, b, dest); + else + kfree_skb(skb); rcu_read_unlock(); } @@ -468,11 +472,11 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, rcu_read_lock(); b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (likely(b)) { - skb_queue_walk_safe(xmitq, skb, tmp) { - __skb_dequeue(xmitq); - b->media->send_msg(net, skb, b, dst); - } + if (unlikely(!b)) + __skb_queue_purge(xmitq); + skb_queue_walk_safe(xmitq, skb, tmp) { + __skb_dequeue(xmitq); + b->media->send_msg(net, skb, b, dst); } rcu_read_unlock(); } @@ -490,14 +494,14 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, rcu_read_lock(); b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (likely(b)) { - skb_queue_walk_safe(xmitq, skb, tmp) { - hdr = buf_msg(skb); - msg_set_non_seq(hdr, 1); - msg_set_mc_netid(hdr, net_id); - __skb_dequeue(xmitq); - b->media->send_msg(net, skb, b, &b->bcast_addr); - } + if (unlikely(!b)) + __skb_queue_purge(xmitq); + skb_queue_walk_safe(xmitq, skb, tmp) { + hdr = buf_msg(skb); + msg_set_non_seq(hdr, 1); + msg_set_mc_netid(hdr, net_id); + __skb_dequeue(xmitq); + b->media->send_msg(net, skb, b, &b->bcast_addr); } rcu_read_unlock(); } @@ -513,24 +517,21 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, * ignores packets sent using interface multicast, and traffic sent to other * nodes (which can happen if interface is running in promiscuous mode). */ -static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, +static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct tipc_bearer *b; rcu_read_lock(); b = rcu_dereference_rtnl(dev->tipc_ptr); - if (likely(b)) { - if (likely(buf->pkt_type <= PACKET_BROADCAST)) { - buf->next = NULL; - tipc_rcv(dev_net(dev), buf, b); - rcu_read_unlock(); - return NET_RX_SUCCESS; - } + if (likely(b && (skb->pkt_type <= PACKET_BROADCAST))) { + skb->next = NULL; + tipc_rcv(dev_net(dev), skb, b); + rcu_read_unlock(); + return NET_RX_SUCCESS; } rcu_read_unlock(); - - kfree_skb(buf); + kfree_skb(skb); return NET_RX_DROP; } @@ -548,9 +549,18 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); + struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; + int i; b = rtnl_dereference(dev->tipc_ptr); + if (!b) { + for (i = 0; i < MAX_BEARERS; b = NULL, i++) { + b = rtnl_dereference(tn->bearer_list[i]); + if (b && (b->media_ptr == dev)) + break; + } + } if (!b) return NOTIFY_DONE; @@ -560,13 +570,20 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, case NETDEV_CHANGE: if (netif_carrier_ok(dev)) break; + case NETDEV_UP: + rcu_assign_pointer(dev->tipc_ptr, b); + break; case NETDEV_GOING_DOWN: + RCU_INIT_POINTER(dev->tipc_ptr, NULL); + synchronize_net(); + tipc_reset_bearer(net, b); + break; case NETDEV_CHANGEMTU: tipc_reset_bearer(net, b); break; case NETDEV_CHANGEADDR: b->media->raw2addr(b, &b->addr, - (char *)dev->dev_addr); + (char *)dev->dev_addr); tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index e31820516774..f686e41b5abb 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -42,8 +42,6 @@ #include <net/genetlink.h> #define MAX_MEDIA 3 -#define MAX_NODES 4096 -#define WSIZE 32 /* Identifiers associated with TIPC message header media address info * - address info field is 32 bytes long @@ -62,16 +60,6 @@ #define TIPC_MEDIA_TYPE_UDP 3 /** - * struct tipc_node_map - set of node identifiers - * @count: # of nodes in set - * @map: bitmap of node identifiers that are in the set - */ -struct tipc_node_map { - u32 count; - u32 map[MAX_NODES / WSIZE]; -}; - -/** * struct tipc_media_addr - destination address used by TIPC bearers * @value: address info (format defined by media) * @media_id: TIPC media type identifier @@ -142,7 +130,6 @@ struct tipc_media { * @identity: array index of this bearer within TIPC bearer array * @link_req: ptr to (optional) structure making periodic link setup requests * @net_plane: network plane ('A' through 'H') currently associated with bearer - * @nodes: indicates which nodes in cluster can be reached through bearer * * Note: media-specific code is responsible for initialization of the fields * indicated below when a bearer is enabled; TIPC's generic bearer code takes @@ -163,8 +150,6 @@ struct tipc_bearer { u32 identity; struct tipc_link_req *link_req; char net_plane; - int node_cnt; - struct tipc_node_map nodes; }; struct tipc_bearer_names { diff --git a/net/tipc/core.c b/net/tipc/core.c index e2bdb07a49a2..fe1b062c4f18 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -112,11 +112,9 @@ static int __init tipc_init(void) pr_info("Activated (version " TIPC_MOD_VER ")\n"); - sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << - TIPC_LOW_IMPORTANCE; - sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << - TIPC_CRITICAL_IMPORTANCE; - sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; + sysctl_tipc_rmem[0] = RCVBUF_MIN; + sysctl_tipc_rmem[1] = RCVBUF_DEF; + sysctl_tipc_rmem[2] = RCVBUF_MAX; err = tipc_netlink_start(); if (err) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index f1e738e80535..ad9d477cc242 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -268,10 +268,9 @@ exit: * Returns 0 if successful, otherwise -errno. */ int tipc_disc_create(struct net *net, struct tipc_bearer *b, - struct tipc_media_addr *dest) + struct tipc_media_addr *dest, struct sk_buff **skb) { struct tipc_link_req *req; - struct sk_buff *skb; req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) @@ -293,9 +292,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b, setup_timer(&req->timer, disc_timeout, (unsigned long)req); mod_timer(&req->timer, jiffies + req->timer_intv); b->link_req = req; - skb = skb_clone(req->buf, GFP_ATOMIC); - if (skb) - tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest); + *skb = skb_clone(req->buf, GFP_ATOMIC); return 0; } diff --git a/net/tipc/discover.h b/net/tipc/discover.h index c9b12770c5ed..b80a335389c0 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -40,7 +40,7 @@ struct tipc_link_req; int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, - struct tipc_media_addr *dest); + struct tipc_media_addr *dest, struct sk_buff **skb); void tipc_disc_delete(struct tipc_link_req *req); void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr); void tipc_disc_add_dest(struct tipc_link_req *req); diff --git a/net/tipc/link.c b/net/tipc/link.c index 7d2bb3e70baa..7059c94f33c5 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -140,6 +140,7 @@ struct tipc_link { char if_name[TIPC_MAX_IF_NAME]; u32 priority; char net_plane; + u16 rst_cnt; /* Failover/synch */ u16 drop_point; @@ -701,40 +702,34 @@ static void link_profile_stats(struct tipc_link *l) /* tipc_link_timeout - perform periodic task as instructed from node timeout */ -/* tipc_link_timeout - perform periodic task as instructed from node timeout - */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { - int rc = 0; - int mtyp = STATE_MSG; - bool xmit = false; - bool prb = false; + int mtyp, rc = 0; + bool state = false; + bool probe = false; + bool setup = false; u16 bc_snt = l->bc_sndlink->snd_nxt - 1; u16 bc_acked = l->bc_rcvlink->acked; - bool bc_up = link_is_up(l->bc_rcvlink); link_profile_stats(l); switch (l->state) { case LINK_ESTABLISHED: case LINK_SYNCHING: - if (!l->silent_intv_cnt) { - if (bc_up && (bc_acked != bc_snt)) - xmit = true; - } else if (l->silent_intv_cnt <= l->abort_limit) { - xmit = true; - prb = true; - } else { - rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT); - } + if (l->silent_intv_cnt > l->abort_limit) + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + mtyp = STATE_MSG; + state = bc_acked != bc_snt; + probe = l->silent_intv_cnt; l->silent_intv_cnt++; break; case LINK_RESET: - xmit = true; + setup = l->rst_cnt++ <= 4; + setup |= !(l->rst_cnt % 16); mtyp = RESET_MSG; break; case LINK_ESTABLISHING: - xmit = true; + setup = true; mtyp = ACTIVATE_MSG; break; case LINK_PEER_RESET: @@ -745,8 +740,8 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) break; } - if (xmit) - tipc_link_build_proto_msg(l, mtyp, prb, 0, 0, 0, xmitq); + if (state || probe || setup) + tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq); return rc; } @@ -833,6 +828,7 @@ void tipc_link_reset(struct tipc_link *l) l->rcv_nxt = 1; l->acked = 0; l->silent_intv_cnt = 0; + l->rst_cnt = 0; l->stats.recv_info = 0; l->stale_count = 0; l->bc_peer_is_up = false; @@ -1110,12 +1106,12 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) return released; } -/* tipc_link_build_ack_msg: prepare link acknowledge message for transmission +/* tipc_link_build_state_msg: prepare link state message for transmission * * Note that sending of broadcast ack is coordinated among nodes, to reduce * risk of ack storms towards the sender */ -int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { if (!l) return 0; @@ -1140,11 +1136,17 @@ int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { int mtyp = RESET_MSG; + struct sk_buff *skb; if (l->state == LINK_ESTABLISHING) mtyp = ACTIVATE_MSG; tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq); + + /* Inform peer that this endpoint is going down if applicable */ + skb = skb_peek_tail(xmitq); + if (skb && (l->state == LINK_RESET)) + msg_set_peer_stopping(buf_msg(skb), 1); } /* tipc_link_build_nack_msg: prepare link nack message for transmission @@ -1219,7 +1221,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, if (!tipc_data_input(l, skb, l->inputq)) rc |= tipc_link_input(l, skb, l->inputq); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) - rc |= tipc_link_build_ack_msg(l, xmitq); + rc |= tipc_link_build_state_msg(l, xmitq); if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK)) break; } while ((skb = __skb_dequeue(defq))); @@ -1411,7 +1413,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, l->priority = peers_prio; /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ - if ((mtyp == RESET_MSG) || !link_is_up(l)) + if (msg_peer_stopping(hdr)) + rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + else if ((mtyp == RESET_MSG) || !link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); /* ACTIVATE_MSG takes up link if it was already locally reset */ diff --git a/net/tipc/link.h b/net/tipc/link.h index 6a94175ee20a..d7e9d42fcb2d 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -123,7 +123,7 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); -int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq); void tipc_link_add_bc_peer(struct tipc_link *snd_l, struct tipc_link *uc_l, struct sk_buff_head *xmitq); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 55778a0aebf3..024da8af91f0 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -715,6 +715,16 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) msg_set_bits(m, 5, 12, 0x1, r); } +static inline u32 msg_peer_stopping(struct tipc_msg *m) +{ + return msg_bits(m, 5, 13, 0x1); +} + +static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s) +{ + msg_set_bits(m, 5, 13, 0x1, s); +} + static inline char *msg_media_addr(struct tipc_msg *m) { return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; @@ -733,16 +743,26 @@ static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n) msg_set_bits(m, 9, 16, 0xffff, n); } -static inline u32 msg_bcast_tag(struct tipc_msg *m) +static inline u32 msg_conn_ack(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); } -static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n) +static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n) { msg_set_bits(m, 9, 16, 0xffff, n); } +static inline u32 msg_adv_win(struct tipc_msg *m) +{ + return msg_bits(m, 9, 0, 0xffff); +} + +static inline void msg_set_adv_win(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 9, 0, 0xffff, n); +} + static inline u32 msg_max_pkt(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff) * 4; @@ -779,6 +799,11 @@ static inline bool msg_peer_node_is_up(struct tipc_msg *m) return msg_redundant_link(m); } +static inline bool msg_is_reset(struct tipc_msg *hdr) +{ + return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG); +} + struct sk_buff *tipc_buf_acquire(u32 size); bool tipc_msg_validate(struct sk_buff *skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index d7d050f44fc1..4dfc5c14f8c3 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -802,7 +802,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, goto out; tipc_tlv_sprintf(msg->rep, "%-10u %s", - nla_get_u32(publ[TIPC_NLA_PUBL_REF]), + nla_get_u32(publ[TIPC_NLA_PUBL_KEY]), scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]); out: tipc_tlv_sprintf(msg->rep, "\n"); diff --git a/net/tipc/node.c b/net/tipc/node.c index 9aaa1bc566ae..e01e2c71b5a1 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1,7 +1,7 @@ /* * net/tipc/node.c: TIPC node management routines * - * Copyright (c) 2000-2006, 2012-2015, Ericsson AB + * Copyright (c) 2000-2006, 2012-2016, Ericsson AB * Copyright (c) 2005-2006, 2010-2014, Wind River Systems * All rights reserved. * @@ -191,6 +191,20 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) tipc_node_put(n); return mtu; } + +u16 tipc_node_get_capabilities(struct net *net, u32 addr) +{ + struct tipc_node *n; + u16 caps; + + n = tipc_node_find(net, addr); + if (unlikely(!n)) + return TIPC_NODE_CAPABILITIES; + caps = n->capabilities; + tipc_node_put(n); + return caps; +} + /* * A trivial power-of-two bitmask technique is used for speed, since this * operation is done for every incoming TIPC packet. The number of hash table @@ -304,8 +318,11 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) spin_lock_bh(&tn->node_list_lock); n = tipc_node_find(net, addr); - if (n) + if (n) { + /* Same node may come back with new capabilities */ + n->capabilities = capabilities; goto exit; + } n = kzalloc(sizeof(*n), GFP_ATOMIC); if (!n) { pr_warn("Node creation failed, no memory\n"); @@ -525,7 +542,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, struct tipc_link *ol = node_active_link(n, 0); struct tipc_link *nl = n->links[bearer_id].link; - if (!nl) + if (!nl || tipc_link_is_up(nl)) return; tipc_link_fsm_evt(nl, LINK_ESTABLISH_EVT); @@ -545,12 +562,16 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, pr_debug("Established link <%s> on network plane %c\n", tipc_link_name(nl), tipc_link_plane(nl)); + /* Ensure that a STATE message goes first */ + tipc_link_build_state_msg(nl, xmitq); + /* First link? => give it both slots */ if (!ol) { *slot0 = bearer_id; *slot1 = bearer_id; tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); n->action_flags |= TIPC_NOTIFY_NODE_UP; + tipc_link_set_active(nl, true); tipc_bcast_add_peer(n->net, nl, xmitq); return; } @@ -581,8 +602,12 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, static void tipc_node_link_up(struct tipc_node *n, int bearer_id, struct sk_buff_head *xmitq) { + struct tipc_media_addr *maddr; + tipc_node_write_lock(n); __tipc_node_link_up(n, bearer_id, xmitq); + maddr = &n->links[bearer_id].maddr; + tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr); tipc_node_write_unlock(n); } @@ -1279,7 +1304,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id /* Broadcast ACKs are sent on a unicast link */ if (rc & TIPC_LINK_SND_BC_ACK) { tipc_node_read_lock(n); - tipc_link_build_ack_msg(le->link, &xmitq); + tipc_link_build_state_msg(le->link, &xmitq); tipc_node_read_unlock(n); } diff --git a/net/tipc/node.h b/net/tipc/node.h index f39d9d06e8bb..8264b3d97dc4 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -45,10 +45,11 @@ /* Optional capabilities supported by this code version */ enum { - TIPC_BCAST_SYNCH = (1 << 1) + TIPC_BCAST_SYNCH = (1 << 1), + TIPC_BLOCK_FLOWCTL = (2 << 1) }; -#define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH +#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | TIPC_BLOCK_FLOWCTL) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); @@ -70,6 +71,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); +u16 tipc_node_get_capabilities(struct net *net, u32 addr); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); diff --git a/net/tipc/server.c b/net/tipc/server.c index 2446bfbaa309..7a0af2dc0406 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,6 +86,7 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); +static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -102,6 +103,7 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); + tipc_sock_release(con); sock_release(sock); con->sock = NULL; } @@ -184,26 +186,31 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(&sk->sk_callback_lock); } +static void tipc_sock_release(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); + + tipc_unregister_callbacks(con); +} + static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - if (con->conid) - s->tipc_conn_shutdown(con->conid, con->usr_data); spin_lock_bh(&s->idr_lock); idr_remove(&s->conn_idr, con->conid); s->idr_in_use--; spin_unlock_bh(&s->idr_lock); - tipc_unregister_callbacks(con); - /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs * are harmless for us here as we have already deleted this - * connection from server connection list and set - * sk->sk_user_data to 0 before releasing connection object. + * connection from server connection list. */ kernel_sock_shutdown(con->sock, SHUT_RDWR); diff --git a/net/tipc/server.h b/net/tipc/server.h index 9015faedb1b0..34f8055afa3b 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -53,7 +53,7 @@ * @send_wq: send workqueue * @max_rcvbuf_size: maximum permitted receive message length * @tipc_conn_new: callback will be called when new connection is incoming - * @tipc_conn_shutdown: callback will be called when connection is shut down + * @tipc_conn_release: callback will be called before releasing the connection * @tipc_conn_recvmsg: callback will be called when message arrives * @saddr: TIPC server address * @name: server name @@ -70,7 +70,7 @@ struct tipc_server { struct workqueue_struct *send_wq; int max_rcvbuf_size; void *(*tipc_conn_new)(int conid); - void (*tipc_conn_shutdown)(int conid, void *usr_data); + void (*tipc_conn_release)(int conid, void *usr_data); void (*tipc_conn_recvmsg)(struct net *net, int conid, struct sockaddr_tipc *addr, void *usr_data, void *buf, size_t len); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3eeb50a27b89..3b7a79991d55 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -96,8 +96,11 @@ struct tipc_sock { uint conn_timeout; atomic_t dupl_rcvcnt; bool link_cong; - uint sent_unacked; - uint rcv_unacked; + u16 snt_unacked; + u16 snd_win; + u16 peer_caps; + u16 rcv_unacked; + u16 rcv_win; struct sockaddr_tipc remote; struct rhash_head node; struct rcu_head rcu; @@ -227,9 +230,29 @@ static struct tipc_sock *tipc_sk(const struct sock *sk) return container_of(sk, struct tipc_sock, sk); } -static int tsk_conn_cong(struct tipc_sock *tsk) +static bool tsk_conn_cong(struct tipc_sock *tsk) { - return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; + return tsk->snt_unacked >= tsk->snd_win; +} + +/* tsk_blocks(): translate a buffer size in bytes to number of + * advertisable blocks, taking into account the ratio truesize(len)/len + * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ + */ +static u16 tsk_adv_blocks(int len) +{ + return len / FLOWCTL_BLK_SZ / 4; +} + +/* tsk_inc(): increment counter for sent or received data + * - If block based flow control is not supported by peer we + * fall back to message based ditto, incrementing the counter + */ +static u16 tsk_inc(struct tipc_sock *tsk, int msglen) +{ + if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) + return ((msglen / FLOWCTL_BLK_SZ) + 1); + return 1; } /** @@ -377,9 +400,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sk->sk_write_space = tipc_write_space; sk->sk_destruct = tipc_sock_destruct; tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; - tsk->sent_unacked = 0; atomic_set(&tsk->dupl_rcvcnt, 0); + /* Start out with safe limits until we receive an advertised window */ + tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN); + tsk->rcv_win = tsk->snd_win; + if (sock->state == SS_READY) { tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) @@ -775,7 +801,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) struct sock *sk = &tsk->sk; struct tipc_msg *hdr = buf_msg(skb); int mtyp = msg_type(hdr); - int conn_cong; + bool conn_cong; /* Ignore if connection cannot be validated: */ if (!tsk_peer_msg(tsk, hdr)) @@ -789,7 +815,9 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) return; } else if (mtyp == CONN_ACK) { conn_cong = tsk_conn_cong(tsk); - tsk->sent_unacked -= msg_msgcnt(hdr); + tsk->snt_unacked -= msg_conn_ack(hdr); + if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) + tsk->snd_win = msg_adv_win(hdr); if (conn_cong) sk->sk_write_space(sk); } else if (mtyp != CONN_PROBE_REPLY) { @@ -1020,12 +1048,14 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) u32 dnode; uint mtu, send, sent = 0; struct iov_iter save; + int hlen = MIN_H_SIZE; /* Handle implied connection establishment */ if (unlikely(dest)) { rc = __tipc_sendmsg(sock, m, dsz); + hlen = msg_hdr_sz(mhdr); if (dsz && (dsz == rc)) - tsk->sent_unacked = 1; + tsk->snt_unacked = tsk_inc(tsk, dsz + hlen); return rc; } if (dsz > (uint)INT_MAX) @@ -1054,7 +1084,7 @@ next: if (likely(!tsk_conn_cong(tsk))) { rc = tipc_node_xmit(net, &pktchain, dnode, portid); if (likely(!rc)) { - tsk->sent_unacked++; + tsk->snt_unacked += tsk_inc(tsk, send + hlen); sent += send; if (sent == dsz) return dsz; @@ -1118,6 +1148,13 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); + tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); + if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) + return; + + /* Fall back to message based flow control */ + tsk->rcv_win = FLOWCTL_MSG_WIN; + tsk->snd_win = FLOWCTL_MSG_WIN; } /** @@ -1214,7 +1251,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, return 0; } -static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) +static void tipc_sk_send_ack(struct tipc_sock *tsk) { struct net *net = sock_net(&tsk->sk); struct sk_buff *skb = NULL; @@ -1230,7 +1267,14 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) if (!skb) return; msg = buf_msg(skb); - msg_set_msgcnt(msg, ack); + msg_set_conn_ack(msg, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + + /* Adjust to and advertize the correct window limit */ + if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) { + tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf); + msg_set_adv_win(msg, tsk->rcv_win); + } tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg)); } @@ -1288,7 +1332,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, long timeo; unsigned int sz; u32 err; - int res; + int res, hlen; /* Catch invalid receive requests */ if (unlikely(!buf_len)) @@ -1313,6 +1357,7 @@ restart: buf = skb_peek(&sk->sk_receive_queue); msg = buf_msg(buf); sz = msg_data_sz(msg); + hlen = msg_hdr_sz(msg); err = msg_errcode(msg); /* Discard an empty non-errored message & try again */ @@ -1335,7 +1380,7 @@ restart: sz = buf_len; m->msg_flags |= MSG_TRUNC; } - res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg), m, sz); + res = skb_copy_datagram_msg(buf, hlen, m, sz); if (res) goto exit; res = sz; @@ -1347,15 +1392,15 @@ restart: res = -ECONNRESET; } - /* Consume received message (optional) */ - if (likely(!(flags & MSG_PEEK))) { - if ((sock->state != SS_READY) && - (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { - tipc_sk_send_ack(tsk, tsk->rcv_unacked); - tsk->rcv_unacked = 0; - } - tsk_advance_rx_queue(sk); + if (unlikely(flags & MSG_PEEK)) + goto exit; + + if (likely(sock->state != SS_READY)) { + tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); + if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) + tipc_sk_send_ack(tsk); } + tsk_advance_rx_queue(sk); exit: release_sock(sk); return res; @@ -1384,7 +1429,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, int sz_to_copy, target, needed; int sz_copied = 0; u32 err; - int res = 0; + int res = 0, hlen; /* Catch invalid receive attempts */ if (unlikely(!buf_len)) @@ -1410,6 +1455,7 @@ restart: buf = skb_peek(&sk->sk_receive_queue); msg = buf_msg(buf); sz = msg_data_sz(msg); + hlen = msg_hdr_sz(msg); err = msg_errcode(msg); /* Discard an empty non-errored message & try again */ @@ -1434,8 +1480,7 @@ restart: needed = (buf_len - sz_copied); sz_to_copy = (sz <= needed) ? sz : needed; - res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg) + offset, - m, sz_to_copy); + res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy); if (res) goto exit; @@ -1457,20 +1502,18 @@ restart: res = -ECONNRESET; } - /* Consume received message (optional) */ - if (likely(!(flags & MSG_PEEK))) { - if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { - tipc_sk_send_ack(tsk, tsk->rcv_unacked); - tsk->rcv_unacked = 0; - } - tsk_advance_rx_queue(sk); - } + if (unlikely(flags & MSG_PEEK)) + goto exit; + + tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); + if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) + tipc_sk_send_ack(tsk); + tsk_advance_rx_queue(sk); /* Loop around if more data is required */ if ((sz_copied < buf_len) && /* didn't get all requested data */ (!skb_queue_empty(&sk->sk_receive_queue) || (sz_copied < target)) && /* and more is ready or required */ - (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */ (!err)) /* and haven't reached a FIN */ goto restart; @@ -1602,30 +1645,33 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) /** * rcvbuf_limit - get proper overload limit of socket receive queue * @sk: socket - * @buf: message + * @skb: message * - * For all connection oriented messages, irrespective of importance, - * the default overload value (i.e. 67MB) is set as limit. + * For connection oriented messages, irrespective of importance, + * default queue limit is 2 MB. * - * For all connectionless messages, by default new queue limits are - * as belows: + * For connectionless messages, queue limits are based on message + * importance as follows: * - * TIPC_LOW_IMPORTANCE (4 MB) - * TIPC_MEDIUM_IMPORTANCE (8 MB) - * TIPC_HIGH_IMPORTANCE (16 MB) - * TIPC_CRITICAL_IMPORTANCE (32 MB) + * TIPC_LOW_IMPORTANCE (2 MB) + * TIPC_MEDIUM_IMPORTANCE (4 MB) + * TIPC_HIGH_IMPORTANCE (8 MB) + * TIPC_CRITICAL_IMPORTANCE (16 MB) * * Returns overload limit according to corresponding message importance */ -static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) +static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) { - struct tipc_msg *msg = buf_msg(buf); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = buf_msg(skb); - if (msg_connected(msg)) - return sysctl_tipc_rmem[2]; + if (unlikely(!msg_connected(hdr))) + return sk->sk_rcvbuf << msg_importance(hdr); - return sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << - msg_importance(msg); + if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) + return sk->sk_rcvbuf; + + return FLOWCTL_MSG_LIM; } /** @@ -1748,7 +1794,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /* Try backlog, compensating for double-counted bytes */ dcnt = &tipc_sk(sk)->dupl_rcvcnt; - if (sk->sk_backlog.len) + if (!sk->sk_backlog.len) atomic_set(dcnt, 0); lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); if (likely(!sk_add_backlog(sk, skb, lim))) @@ -2807,6 +2853,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; + if (!attrs[TIPC_NLA_SOCK]) + return -EINVAL; + err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], tipc_nl_sock_policy); diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 4241f22069dc..06fb5944cf76 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -1,6 +1,6 @@ /* net/tipc/socket.h: Include file for TIPC socket code * - * Copyright (c) 2014-2015, Ericsson AB + * Copyright (c) 2014-2016, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,10 +38,17 @@ #include <net/sock.h> #include <net/genetlink.h> -#define TIPC_CONNACK_INTV 256 -#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) -#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \ - SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) +/* Compatibility values for deprecated message based flow control */ +#define FLOWCTL_MSG_WIN 512 +#define FLOWCTL_MSG_LIM ((FLOWCTL_MSG_WIN * 2 + 1) * SKB_TRUESIZE(MAX_MSG_SIZE)) + +#define FLOWCTL_BLK_SZ 1024 + +/* Socket receive buffer sizes */ +#define RCVBUF_MIN (FLOWCTL_BLK_SZ * 512) +#define RCVBUF_DEF (FLOWCTL_BLK_SZ * 1024 * 2) +#define RCVBUF_MAX (FLOWCTL_BLK_SZ * 1024 * 16) + int tipc_socket_init(void); void tipc_socket_stop(void); void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index e6cb386fbf34..0dd02244e21d 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -302,7 +302,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, } /* Handle one termination request for the subscriber */ -static void tipc_subscrb_shutdown_cb(int conid, void *usr_data) +static void tipc_subscrb_release_cb(int conid, void *usr_data) { tipc_subscrb_delete((struct tipc_subscriber *)usr_data); } @@ -326,8 +326,7 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, return tipc_subscrp_cancel(s, subscriber); } - if (s) - tipc_subscrp_subscribe(net, s, subscriber, swap); + tipc_subscrp_subscribe(net, s, subscriber, swap); } /* Handle one request to establish a new subscriber */ @@ -365,7 +364,7 @@ int tipc_topsrv_start(struct net *net) topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb; topsrv->tipc_conn_new = tipc_subscrb_connect_cb; - topsrv->tipc_conn_shutdown = tipc_subscrb_shutdown_cb; + topsrv->tipc_conn_release = tipc_subscrb_release_cb; strncpy(topsrv->name, name, strlen(name) + 1); tn->topsrv = topsrv; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 56214736fe88..4120b7a538be 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -2051,7 +2051,7 @@ static u32 vmci_transport_get_local_cid(void) return vmci_get_context_id(); } -static struct vsock_transport vmci_transport = { +static const struct vsock_transport vmci_transport = { .init = vmci_transport_socket_init, .destruct = vmci_transport_destruct, .release = vmci_transport_release, diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 59cabc9bce69..da49c0b1fd32 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -739,7 +739,7 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, * and thus fail the GO instantiation, consider only the interfaces of * the current registered device. */ - list_for_each_entry(wdev, &rdev->wdev_list, list) { + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { struct ieee80211_channel *other_chan = NULL; int r1, r2; @@ -768,7 +768,7 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, if (chan == other_chan) return true; - if (chan->band != IEEE80211_BAND_5GHZ) + if (chan->band != NL80211_BAND_5GHZ) continue; r1 = cfg80211_get_unii(chan->center_freq); diff --git a/net/wireless/core.c b/net/wireless/core.c index 9f1c4aa851ef..d25c82bc1bbe 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -3,6 +3,7 @@ * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright 2015 Intel Deutschland GmbH */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -157,7 +158,7 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, if (!(rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK)) return -EOPNOTSUPP; - list_for_each_entry(wdev, &rdev->wdev_list, list) { + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL; @@ -171,7 +172,8 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, /* failed -- clean up to old netns */ net = wiphy_net(&rdev->wiphy); - list_for_each_entry_continue_reverse(wdev, &rdev->wdev_list, + list_for_each_entry_continue_reverse(wdev, + &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; @@ -230,7 +232,7 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) ASSERT_RTNL(); - list_for_each_entry(wdev, &rdev->wdev_list, list) { + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (wdev->netdev) { dev_close(wdev->netdev); continue; @@ -298,7 +300,8 @@ void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) kfree(item); spin_unlock_irq(&rdev->destroy_list_lock); - list_for_each_entry_safe(wdev, tmp, &rdev->wdev_list, list) { + list_for_each_entry_safe(wdev, tmp, + &rdev->wiphy.wdev_list, list) { if (nlportid == wdev->owner_nlportid) rdev_del_virtual_intf(rdev, wdev); } @@ -410,7 +413,7 @@ use_default_name: dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); } - INIT_LIST_HEAD(&rdev->wdev_list); + INIT_LIST_HEAD(&rdev->wiphy.wdev_list); INIT_LIST_HEAD(&rdev->beacon_registrations); spin_lock_init(&rdev->beacon_registrations_lock); spin_lock_init(&rdev->bss_lock); @@ -557,7 +560,7 @@ int wiphy_register(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); int res; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_supported_band *sband; bool have_band = false; int i; @@ -626,6 +629,13 @@ int wiphy_register(struct wiphy *wiphy) !rdev->ops->set_mac_acl))) return -EINVAL; + /* assure only valid behaviours are flagged by driver + * hence subtract 2 as bit 0 is invalid. + */ + if (WARN_ON(wiphy->bss_select_support && + (wiphy->bss_select_support & ~(BIT(__NL80211_BSS_SELECT_ATTR_AFTER_LAST) - 2)))) + return -EINVAL; + if (wiphy->addresses) memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN); @@ -640,7 +650,7 @@ int wiphy_register(struct wiphy *wiphy) return res; /* sanity check supported bands/channels */ - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wiphy->bands[band]; if (!sband) continue; @@ -652,7 +662,7 @@ int wiphy_register(struct wiphy *wiphy) * on 60GHz band, there are no legacy rates, so * n_bitrates is 0 */ - if (WARN_ON(band != IEEE80211_BAND_60GHZ && + if (WARN_ON(band != NL80211_BAND_60GHZ && !sband->n_bitrates)) return -EINVAL; @@ -662,7 +672,7 @@ int wiphy_register(struct wiphy *wiphy) * global structure for that. */ if (cfg80211_disable_40mhz_24ghz && - band == IEEE80211_BAND_2GHZ && + band == NL80211_BAND_2GHZ && sband->ht_cap.ht_supported) { sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40; @@ -792,7 +802,7 @@ void wiphy_unregister(struct wiphy *wiphy) nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY); rdev->wiphy.registered = false; - WARN_ON(!list_empty(&rdev->wdev_list)); + WARN_ON(!list_empty(&rdev->wiphy.wdev_list)); /* * First remove the hardware from everywhere, this makes @@ -1014,7 +1024,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, spin_lock_init(&wdev->mgmt_registrations_lock); wdev->identifier = ++rdev->wdev_id; - list_add_rcu(&wdev->list, &rdev->wdev_list); + list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list); rdev->devlist_generation++; /* can only change netns with wiphy */ dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/net/wireless/core.h b/net/wireless/core.h index 022ccad06cbe..025b7a5d508b 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -50,10 +50,9 @@ struct cfg80211_registered_device { /* wiphy index, internal only */ int wiphy_idx; - /* associated wireless interfaces, protected by rtnl or RCU */ - struct list_head wdev_list; + /* protected by RTNL */ int devlist_generation, wdev_id; - int opencount; /* also protected by devlist_mtx */ + int opencount; wait_queue_head_t dev_wait; struct list_head beacon_registrations; @@ -214,6 +213,7 @@ struct cfg80211_event { const u8 *resp_ie; size_t req_ie_len; size_t resp_ie_len; + struct cfg80211_bss *bss; u16 status; } cr; struct { diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 454157717efa..5d453916a417 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -69,7 +69,7 @@ static ssize_t ht40allow_map_read(struct file *file, struct wiphy *wiphy = file->private_data; char *buf; unsigned int offset = 0, buf_size = PAGE_SIZE, i, r; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_supported_band *sband; buf = kzalloc(buf_size, GFP_KERNEL); @@ -78,7 +78,7 @@ static ssize_t ht40allow_map_read(struct file *file, rtnl_lock(); - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wiphy->bands[band]; if (!sband) continue; diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 4c55fab9b4e4..4a4dda53bdf1 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -104,7 +104,7 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct ieee80211_supported_band *sband = rdev->wiphy.bands[params->chandef.chan->band]; int j; - u32 flag = params->chandef.chan->band == IEEE80211_BAND_5GHZ ? + u32 flag = params->chandef.chan->band == NL80211_BAND_5GHZ ? IEEE80211_RATE_MANDATORY_A : IEEE80211_RATE_MANDATORY_B; @@ -236,7 +236,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct cfg80211_cached_keys *ck = NULL; - enum ieee80211_band band; + enum nl80211_band band; int i, err; ASSERT_WDEV_LOCK(wdev); @@ -248,7 +248,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (!wdev->wext.ibss.chandef.chan) { struct ieee80211_channel *new_chan = NULL; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 092300b30c37..fa2066b56f36 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -128,9 +128,9 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!setup->chandef.chan) { /* if we don't have that either, use the first usable channel */ - enum ieee80211_band band; + enum nl80211_band band; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; int i; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index ff328250bc44..c284d883c349 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -726,7 +726,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) wiphy = &rdev->wiphy; rtnl_lock(); - for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) { + for (bandid = 0; bandid < NUM_NL80211_BANDS; bandid++) { sband = wiphy->bands[bandid]; if (!sband) continue; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 056a7307862b..d7599014055d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -103,7 +103,7 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) if (have_wdev_id && rdev->wiphy_idx != wiphy_idx) continue; - list_for_each_entry(wdev, &rdev->wdev_list, list) { + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (have_ifidx && wdev->netdev && wdev->netdev->ifindex == ifidx) { result = wdev; @@ -149,7 +149,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) tmp = cfg80211_rdev_by_wiphy_idx(wdev_id >> 32); if (tmp) { /* make sure wdev exists */ - list_for_each_entry(wdev, &tmp->wdev_list, list) { + list_for_each_entry(wdev, &tmp->wiphy.wdev_list, list) { if (wdev->identifier != (u32)wdev_id) continue; found = true; @@ -402,6 +402,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 }, [NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG }, [NL80211_ATTR_PBSS] = { .type = NLA_FLAG }, + [NL80211_ATTR_BSS_SELECT] = { .type = NLA_NESTED }, + [NL80211_ATTR_STA_SUPPORT_P2P_PS] = { .type = NLA_U8 }, }; /* policy for the key attributes */ @@ -486,6 +488,15 @@ nl80211_plan_policy[NL80211_SCHED_SCAN_PLAN_MAX + 1] = { [NL80211_SCHED_SCAN_PLAN_ITERATIONS] = { .type = NLA_U32 }, }; +static const struct nla_policy +nl80211_bss_select_policy[NL80211_BSS_SELECT_ATTR_MAX + 1] = { + [NL80211_BSS_SELECT_ATTR_RSSI] = { .type = NLA_FLAG }, + [NL80211_BSS_SELECT_ATTR_BAND_PREF] = { .type = NLA_U32 }, + [NL80211_BSS_SELECT_ATTR_RSSI_ADJUST] = { + .len = sizeof(struct nl80211_bss_select_rssi_adjust) + }, +}; + static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, @@ -524,7 +535,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; - list_for_each_entry(tmp, &(*rdev)->wdev_list, list) { + list_for_each_entry(tmp, &(*rdev)->wiphy.wdev_list, list) { if (tmp->identifier == cb->args[1]) { *wdev = tmp; break; @@ -1266,7 +1277,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, struct nlattr *nl_bands, *nl_band; struct nlattr *nl_freqs, *nl_freq; struct nlattr *nl_cmds; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_channel *chan; int i; const struct ieee80211_txrx_stypes *mgmt_stypes = @@ -1399,7 +1410,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, goto nla_put_failure; for (band = state->band_start; - band < IEEE80211_NUM_BANDS; band++) { + band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; sband = rdev->wiphy.bands[band]; @@ -1461,7 +1472,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, } nla_nest_end(msg, nl_bands); - if (band < IEEE80211_NUM_BANDS) + if (band < NUM_NL80211_BANDS) state->band_start = band + 1; else state->band_start = 0; @@ -1731,6 +1742,25 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, rdev->wiphy.ext_features)) goto nla_put_failure; + if (rdev->wiphy.bss_select_support) { + struct nlattr *nested; + u32 bss_select_support = rdev->wiphy.bss_select_support; + + nested = nla_nest_start(msg, NL80211_ATTR_BSS_SELECT); + if (!nested) + goto nla_put_failure; + + i = 0; + while (bss_select_support) { + if ((bss_select_support & 1) && + nla_put_flag(msg, i)) + goto nla_put_failure; + i++; + bss_select_support >>= 1; + } + nla_nest_end(msg, nested); + } + /* done */ state->split_start = 0; break; @@ -2399,7 +2429,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFTYPE, wdev->iftype) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev_address(wdev)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ @@ -2459,7 +2490,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * } if_idx = 0; - list_for_each_entry(wdev, &rdev->wdev_list, list) { + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (if_idx < if_start) { if_idx++; continue; @@ -2731,7 +2762,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) spin_lock_init(&wdev->mgmt_registrations_lock); wdev->identifier = ++rdev->wdev_id; - list_add_rcu(&wdev->list, &rdev->wdev_list); + list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list); rdev->devlist_generation++; break; default: @@ -3267,7 +3298,7 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev; bool ret = false; - list_for_each_entry(wdev, &rdev->wdev_list, list) { + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) continue; @@ -3463,7 +3494,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } params.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); - if (params.pbss && !rdev->wiphy.bands[IEEE80211_BAND_60GHZ]) + if (params.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) return -EOPNOTSUPP; wdev_lock(wdev); @@ -3724,11 +3755,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, goto nla_put_failure; #define PUT_SINFO(attr, memb, type) do { \ - if (sinfo->filled & BIT(NL80211_STA_INFO_ ## attr) && \ + BUILD_BUG_ON(sizeof(type) == sizeof(u64)); \ + if (sinfo->filled & (1ULL << NL80211_STA_INFO_ ## attr) && \ nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr, \ sinfo->memb)) \ goto nla_put_failure; \ } while (0) +#define PUT_SINFO_U64(attr, memb) do { \ + if (sinfo->filled & (1ULL << NL80211_STA_INFO_ ## attr) && \ + nla_put_u64_64bit(msg, NL80211_STA_INFO_ ## attr, \ + sinfo->memb, NL80211_STA_INFO_PAD)) \ + goto nla_put_failure; \ + } while (0) PUT_SINFO(CONNECTED_TIME, connected_time, u32); PUT_SINFO(INACTIVE_TIME, inactive_time, u32); @@ -3745,11 +3783,12 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, (u32)sinfo->tx_bytes)) goto nla_put_failure; - PUT_SINFO(RX_BYTES64, rx_bytes, u64); - PUT_SINFO(TX_BYTES64, tx_bytes, u64); + PUT_SINFO_U64(RX_BYTES64, rx_bytes); + PUT_SINFO_U64(TX_BYTES64, tx_bytes); PUT_SINFO(LLID, llid, u16); PUT_SINFO(PLID, plid, u16); PUT_SINFO(PLINK_STATE, plink_state, u8); + PUT_SINFO_U64(RX_DURATION, rx_duration); switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: @@ -3817,12 +3856,13 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, &sinfo->sta_flags)) goto nla_put_failure; - PUT_SINFO(T_OFFSET, t_offset, u64); - PUT_SINFO(RX_DROP_MISC, rx_dropped_misc, u64); - PUT_SINFO(BEACON_RX, rx_beacon, u64); + PUT_SINFO_U64(T_OFFSET, t_offset); + PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc); + PUT_SINFO_U64(BEACON_RX, rx_beacon); PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8); #undef PUT_SINFO +#undef PUT_SINFO_U64 if (sinfo->filled & BIT(NL80211_STA_INFO_TID_STATS)) { struct nlattr *tidsattr; @@ -3845,19 +3885,19 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, if (!tidattr) goto nla_put_failure; -#define PUT_TIDVAL(attr, memb, type) do { \ +#define PUT_TIDVAL_U64(attr, memb) do { \ if (tidstats->filled & BIT(NL80211_TID_STATS_ ## attr) && \ - nla_put_ ## type(msg, NL80211_TID_STATS_ ## attr, \ - tidstats->memb)) \ + nla_put_u64_64bit(msg, NL80211_TID_STATS_ ## attr, \ + tidstats->memb, NL80211_TID_STATS_PAD)) \ goto nla_put_failure; \ } while (0) - PUT_TIDVAL(RX_MSDU, rx_msdu, u64); - PUT_TIDVAL(TX_MSDU, tx_msdu, u64); - PUT_TIDVAL(TX_MSDU_RETRIES, tx_msdu_retries, u64); - PUT_TIDVAL(TX_MSDU_FAILED, tx_msdu_failed, u64); + PUT_TIDVAL_U64(RX_MSDU, rx_msdu); + PUT_TIDVAL_U64(TX_MSDU, tx_msdu); + PUT_TIDVAL_U64(TX_MSDU_RETRIES, tx_msdu_retries); + PUT_TIDVAL_U64(TX_MSDU_FAILED, tx_msdu_failed); -#undef PUT_TIDVAL +#undef PUT_TIDVAL_U64 nla_nest_end(msg, tidattr); } @@ -3977,6 +4017,10 @@ int cfg80211_check_station_change(struct wiphy *wiphy, statype != CFG80211_STA_AP_CLIENT_UNASSOC) return -EINVAL; + if (params->support_p2p_ps != -1 && + statype != CFG80211_STA_AP_CLIENT_UNASSOC) + return -EINVAL; + if (params->aid && !(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) && statype != CFG80211_STA_AP_CLIENT_UNASSOC) @@ -4270,6 +4314,18 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) else params.listen_interval = -1; + if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) { + u8 tmp; + + tmp = nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]); + if (tmp >= NUM_NL80211_P2P_PS_STATUS) + return -EINVAL; + + params.support_p2p_ps = tmp; + } else { + params.support_p2p_ps = -1; + } + if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; @@ -4393,6 +4449,23 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); + if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) { + u8 tmp; + + tmp = nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]); + if (tmp >= NUM_NL80211_P2P_PS_STATUS) + return -EINVAL; + + params.support_p2p_ps = tmp; + } else { + /* + * if not specified, assume it's supported for P2P GO interface, + * and is NOT supported for AP interface + */ + params.support_p2p_ps = + dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO; + } + if (info->attrs[NL80211_ATTR_PEER_AID]) params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); else @@ -5758,6 +5831,73 @@ static int validate_scan_freqs(struct nlattr *freqs) return n_channels; } +static bool is_band_valid(struct wiphy *wiphy, enum nl80211_band b) +{ + return b < NUM_NL80211_BANDS && wiphy->bands[b]; +} + +static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy, + struct cfg80211_bss_selection *bss_select) +{ + struct nlattr *attr[NL80211_BSS_SELECT_ATTR_MAX + 1]; + struct nlattr *nest; + int err; + bool found = false; + int i; + + /* only process one nested attribute */ + nest = nla_data(nla); + if (!nla_ok(nest, nla_len(nest))) + return -EINVAL; + + err = nla_parse(attr, NL80211_BSS_SELECT_ATTR_MAX, nla_data(nest), + nla_len(nest), nl80211_bss_select_policy); + if (err) + return err; + + /* only one attribute may be given */ + for (i = 0; i <= NL80211_BSS_SELECT_ATTR_MAX; i++) { + if (attr[i]) { + if (found) + return -EINVAL; + found = true; + } + } + + bss_select->behaviour = __NL80211_BSS_SELECT_ATTR_INVALID; + + if (attr[NL80211_BSS_SELECT_ATTR_RSSI]) + bss_select->behaviour = NL80211_BSS_SELECT_ATTR_RSSI; + + if (attr[NL80211_BSS_SELECT_ATTR_BAND_PREF]) { + bss_select->behaviour = NL80211_BSS_SELECT_ATTR_BAND_PREF; + bss_select->param.band_pref = + nla_get_u32(attr[NL80211_BSS_SELECT_ATTR_BAND_PREF]); + if (!is_band_valid(wiphy, bss_select->param.band_pref)) + return -EINVAL; + } + + if (attr[NL80211_BSS_SELECT_ATTR_RSSI_ADJUST]) { + struct nl80211_bss_select_rssi_adjust *adj_param; + + adj_param = nla_data(attr[NL80211_BSS_SELECT_ATTR_RSSI_ADJUST]); + bss_select->behaviour = NL80211_BSS_SELECT_ATTR_RSSI_ADJUST; + bss_select->param.adjust.band = adj_param->band; + bss_select->param.adjust.delta = adj_param->delta; + if (!is_band_valid(wiphy, bss_select->param.adjust.band)) + return -EINVAL; + } + + /* user-space did not provide behaviour attribute */ + if (bss_select->behaviour == __NL80211_BSS_SELECT_ATTR_INVALID) + return -EINVAL; + + if (!(wiphy->bss_select_support & BIT(bss_select->behaviour))) + return -EINVAL; + + return 0; +} + static int nl80211_parse_random_mac(struct nlattr **attrs, u8 *mac_addr, u8 *mac_addr_mask) { @@ -5888,10 +6028,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) i++; } } else { - enum ieee80211_band band; + enum nl80211_band band; /* all channels */ - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { int j; if (!wiphy->bands[band]) continue; @@ -5936,7 +6076,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->ie_len); } - for (i = 0; i < IEEE80211_NUM_BANDS; i++) + for (i = 0; i < NUM_NL80211_BANDS; i++) if (wiphy->bands[i]) request->rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; @@ -5945,9 +6085,9 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SUPP_RATES], tmp) { - enum ieee80211_band band = nla_type(attr); + enum nl80211_band band = nla_type(attr); - if (band < 0 || band >= IEEE80211_NUM_BANDS) { + if (band < 0 || band >= NUM_NL80211_BANDS) { err = -EINVAL; goto out_free; } @@ -5996,6 +6136,12 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); + if (info->attrs[NL80211_ATTR_MAC]) + memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_MAC]), + ETH_ALEN); + else + eth_broadcast_addr(request->bssid); + request->wdev = wdev; request->wiphy = &rdev->wiphy; request->scan_start = jiffies; @@ -6129,7 +6275,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_sched_scan_request *request; struct nlattr *attr; int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i, n_plans = 0; - enum ieee80211_band band; + enum nl80211_band band; size_t ie_len; struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1]; s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF; @@ -6294,7 +6440,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, } } else { /* all channels */ - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { int j; if (!wiphy->bands[band]) continue; @@ -6738,7 +6884,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) goto nla_put_failure; - if (nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + if (nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) goto nla_put_failure; bss = nla_nest_start(msg, NL80211_ATTR_BSS); @@ -6759,7 +6906,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, */ ies = rcu_dereference(res->ies); if (ies) { - if (nla_put_u64(msg, NL80211_BSS_TSF, ies->tsf)) + if (nla_put_u64_64bit(msg, NL80211_BSS_TSF, ies->tsf, + NL80211_BSS_PAD)) goto fail_unlock_rcu; if (ies->len && nla_put(msg, NL80211_BSS_INFORMATION_ELEMENTS, ies->len, ies->data)) @@ -6769,7 +6917,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, /* and this pointer is always (unless driver didn't know) beacon data */ ies = rcu_dereference(res->beacon_ies); if (ies && ies->from_beacon) { - if (nla_put_u64(msg, NL80211_BSS_BEACON_TSF, ies->tsf)) + if (nla_put_u64_64bit(msg, NL80211_BSS_BEACON_TSF, ies->tsf, + NL80211_BSS_PAD)) goto fail_unlock_rcu; if (ies->len && nla_put(msg, NL80211_BSS_BEACON_IES, ies->len, ies->data)) @@ -6788,8 +6937,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, goto nla_put_failure; if (intbss->ts_boottime && - nla_put_u64(msg, NL80211_BSS_LAST_SEEN_BOOTTIME, - intbss->ts_boottime)) + nla_put_u64_64bit(msg, NL80211_BSS_LAST_SEEN_BOOTTIME, + intbss->ts_boottime, NL80211_BSS_PAD)) goto nla_put_failure; switch (rdev->wiphy.signal_type) { @@ -6909,28 +7058,28 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, nla_put_flag(msg, NL80211_SURVEY_INFO_IN_USE)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME) && - nla_put_u64(msg, NL80211_SURVEY_INFO_TIME, - survey->time)) + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME, + survey->time, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_BUSY) && - nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_BUSY, - survey->time_busy)) + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_BUSY, + survey->time_busy, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_EXT_BUSY) && - nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_EXT_BUSY, - survey->time_ext_busy)) + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_EXT_BUSY, + survey->time_ext_busy, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_RX) && - nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_RX, - survey->time_rx)) + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_RX, + survey->time_rx, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_TX) && - nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_TX, - survey->time_tx)) + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_TX, + survey->time_tx, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_SCAN) && - nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_SCAN, - survey->time_scan)) + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_SCAN, + survey->time_scan, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; nla_nest_end(msg, infoattr); @@ -7402,14 +7551,14 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) static bool nl80211_parse_mcast_rate(struct cfg80211_registered_device *rdev, - int mcast_rate[IEEE80211_NUM_BANDS], + int mcast_rate[NUM_NL80211_BANDS], int rateval) { struct wiphy *wiphy = &rdev->wiphy; bool found = false; int band, i; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; sband = wiphy->bands[band]; @@ -7589,7 +7738,7 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - int mcast_rate[IEEE80211_NUM_BANDS]; + int mcast_rate[NUM_NL80211_BANDS]; u32 nla_rate; int err; @@ -7650,8 +7799,8 @@ __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, } if (wdev) { - if (nla_put_u64(skb, NL80211_ATTR_WDEV, - wdev_id(wdev))) + if (nla_put_u64_64bit(skb, NL80211_ATTR_WDEV, + wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; if (wdev->netdev && nla_put_u32(skb, NL80211_ATTR_IFINDEX, @@ -7922,6 +8071,10 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.mfp = NL80211_MFP_NO; } + if (info->attrs[NL80211_ATTR_PREV_BSSID]) + connect.prev_bssid = + nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { connect.channel = nl80211_get_valid_chan( wiphy, info->attrs[NL80211_ATTR_WIPHY_FREQ]); @@ -7990,13 +8143,29 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) } connect.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); - if (connect.pbss && !rdev->wiphy.bands[IEEE80211_BAND_60GHZ]) { + if (connect.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { kzfree(connkeys); return -EOPNOTSUPP; } + if (info->attrs[NL80211_ATTR_BSS_SELECT]) { + /* bss selection makes no sense if bssid is set */ + if (connect.bssid) { + kzfree(connkeys); + return -EINVAL; + } + + err = parse_bss_select(info->attrs[NL80211_ATTR_BSS_SELECT], + wiphy, &connect.bss_select); + if (err) { + kzfree(connkeys); + return err; + } + } + wdev_lock(dev->ieee80211_ptr); - err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL); + err = cfg80211_connect(rdev, dev, &connect, connkeys, + connect.prev_bssid); wdev_unlock(dev->ieee80211_ptr); if (err) kzfree(connkeys); @@ -8224,7 +8393,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (err) goto free_msg; - if (nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie)) + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -8394,7 +8564,7 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, memset(&mask, 0, sizeof(mask)); /* Default to all rates enabled */ - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { sband = rdev->wiphy.bands[i]; if (!sband) @@ -8418,14 +8588,14 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, /* * The nested attribute uses enum nl80211_band as the index. This maps - * directly to the enum ieee80211_band values used in cfg80211. + * directly to the enum nl80211_band values used in cfg80211. */ BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) { - enum ieee80211_band band = nla_type(tx_rates); + enum nl80211_band band = nla_type(tx_rates); int err; - if (band < 0 || band >= IEEE80211_NUM_BANDS) + if (band < 0 || band >= NUM_NL80211_BANDS) return -EINVAL; sband = rdev->wiphy.bands[band]; if (sband == NULL) @@ -8636,7 +8806,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) goto free_msg; if (msg) { - if (nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie)) + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -9922,7 +10093,8 @@ static int nl80211_probe_client(struct sk_buff *skb, if (err) goto free_msg; - if (nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie)) + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -10220,7 +10392,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, *wdev = NULL; if (cb->args[1]) { - list_for_each_entry(tmp, &(*rdev)->wdev_list, list) { + list_for_each_entry(tmp, &wiphy->wdev_list, list) { if (tmp->identifier == cb->args[1] - 1) { *wdev = tmp; break; @@ -10347,8 +10519,9 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, break; if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - (wdev && nla_put_u64(skb, NL80211_ATTR_WDEV, - wdev_id(wdev)))) { + (wdev && nla_put_u64_64bit(skb, NL80211_ATTR_WDEV, + wdev_id(wdev), + NL80211_ATTR_PAD))) { genlmsg_cancel(skb, hdr); break; } @@ -10590,7 +10763,7 @@ static int nl80211_tdls_channel_switch(struct sk_buff *skb, * section 10.22.6.2.1. Disallow 5/10Mhz channels as well for now, the * specification is not defined for them. */ - if (chandef.chan->band == IEEE80211_BAND_2GHZ && + if (chandef.chan->band == NL80211_BAND_2GHZ && chandef.width != NL80211_CHAN_WIDTH_20_NOHT && chandef.width != NL80211_CHAN_WIDTH_20) return -EINVAL; @@ -11555,7 +11728,8 @@ static int nl80211_send_scan_msg(struct sk_buff *msg, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) goto nla_put_failure; /* ignore errors and send incomplete event anyway */ @@ -12222,11 +12396,13 @@ static void nl80211_send_remain_on_chan_event( if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chan->center_freq) || nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, NL80211_CHAN_NO_HT) || - nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie)) + nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD)) goto nla_put_failure; if (cmd == NL80211_CMD_REMAIN_ON_CHANNEL && @@ -12460,7 +12636,8 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) || (sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) || @@ -12503,9 +12680,11 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_FRAME, len, buf) || - nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie) || + nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD) || (ack && nla_put_flag(msg, NL80211_ATTR_ACK))) goto nla_put_failure; @@ -12885,7 +13064,8 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = netdev->ieee80211_ptr; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) goto nla_put_failure; } @@ -12930,7 +13110,8 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || - nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie) || + nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD) || (acked && nla_put_flag(msg, NL80211_ATTR_ACK))) goto nla_put_failure; @@ -13075,7 +13256,8 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, goto free_msg; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) goto free_msg; if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, @@ -13231,7 +13413,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb, sched_scan_req->owner_nlportid == notify->portid) schedule_scan_stop = true; - list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) { + list_for_each_entry_rcu(wdev, &rdev->wiphy.wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); if (wdev->owner_nlportid == notify->portid) @@ -13350,7 +13532,8 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -13383,7 +13566,8 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev) if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) goto out; genlmsg_end(msg, hdr); diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 8ae0c04f9fc7..85ff30bee2b9 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1048,7 +1048,7 @@ rdev_start_radar_detection(struct cfg80211_registered_device *rdev, static inline int rdev_set_mcast_rate(struct cfg80211_registered_device *rdev, struct net_device *dev, - int mcast_rate[IEEE80211_NUM_BANDS]) + int mcast_rate[NUM_NL80211_BANDS]) { int ret = -ENOTSUPP; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index c5fb317eee68..5dbac3749738 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1546,12 +1546,12 @@ static void reg_process_ht_flags_band(struct wiphy *wiphy, static void reg_process_ht_flags(struct wiphy *wiphy) { - enum ieee80211_band band; + enum nl80211_band band; if (!wiphy) return; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) + for (band = 0; band < NUM_NL80211_BANDS; band++) reg_process_ht_flags_band(wiphy, wiphy->bands[band]); } @@ -1639,7 +1639,7 @@ static void reg_leave_invalid_chans(struct wiphy *wiphy) ASSERT_RTNL(); - list_for_each_entry(wdev, &rdev->wdev_list, list) + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) if (!reg_wdev_chan_valid(wiphy, wdev)) cfg80211_leave(rdev, wdev); } @@ -1673,7 +1673,7 @@ static void reg_check_channels(void) static void wiphy_update_regulatory(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { - enum ieee80211_band band; + enum nl80211_band band; struct regulatory_request *lr = get_last_request(); if (ignore_reg_update(wiphy, initiator)) { @@ -1690,7 +1690,7 @@ static void wiphy_update_regulatory(struct wiphy *wiphy, lr->dfs_region = get_cfg80211_regdom()->dfs_region; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) + for (band = 0; band < NUM_NL80211_BANDS; band++) handle_band(wiphy, initiator, wiphy->bands[band]); reg_process_beacons(wiphy); @@ -1786,14 +1786,14 @@ static void handle_band_custom(struct wiphy *wiphy, void wiphy_apply_custom_regulatory(struct wiphy *wiphy, const struct ieee80211_regdomain *regd) { - enum ieee80211_band band; + enum nl80211_band band; unsigned int bands_set = 0; WARN(!(wiphy->regulatory_flags & REGULATORY_CUSTOM_REG), "wiphy should have REGULATORY_CUSTOM_REG\n"); wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { if (!wiphy->bands[band]) continue; handle_band_custom(wiphy, wiphy->bands[band], regd); @@ -2228,7 +2228,7 @@ static void reg_process_self_managed_hints(void) struct wiphy *wiphy; const struct ieee80211_regdomain *tmp; const struct ieee80211_regdomain *regd; - enum ieee80211_band band; + enum nl80211_band band; struct regulatory_request request = {}; list_for_each_entry(rdev, &cfg80211_rdev_list, list) { @@ -2246,7 +2246,7 @@ static void reg_process_self_managed_hints(void) rcu_assign_pointer(wiphy->regd, regd); rcu_free_regdom(tmp); - for (band = 0; band < IEEE80211_NUM_BANDS; band++) + for (band = 0; band < NUM_NL80211_BANDS; band++) handle_band_custom(wiphy, wiphy->bands[band], regd); reg_process_ht_flags(wiphy); @@ -2404,7 +2404,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) } EXPORT_SYMBOL(regulatory_hint); -void regulatory_hint_country_ie(struct wiphy *wiphy, enum ieee80211_band band, +void regulatory_hint_country_ie(struct wiphy *wiphy, enum nl80211_band band, const u8 *country_ie, u8 country_ie_len) { char alpha2[2]; @@ -2504,11 +2504,11 @@ static void restore_alpha2(char *alpha2, bool reset_user) static void restore_custom_reg_settings(struct wiphy *wiphy) { struct ieee80211_supported_band *sband; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_channel *chan; int i; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wiphy->bands[band]; if (!sband) continue; @@ -2623,9 +2623,9 @@ void regulatory_hint_disconnect(void) static bool freq_is_chan_12_13_14(u16 freq) { - if (freq == ieee80211_channel_to_frequency(12, IEEE80211_BAND_2GHZ) || - freq == ieee80211_channel_to_frequency(13, IEEE80211_BAND_2GHZ) || - freq == ieee80211_channel_to_frequency(14, IEEE80211_BAND_2GHZ)) + if (freq == ieee80211_channel_to_frequency(12, NL80211_BAND_2GHZ) || + freq == ieee80211_channel_to_frequency(13, NL80211_BAND_2GHZ) || + freq == ieee80211_channel_to_frequency(14, NL80211_BAND_2GHZ)) return true; return false; } @@ -2650,7 +2650,7 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy, if (beacon_chan->beacon_found || beacon_chan->flags & IEEE80211_CHAN_RADAR || - (beacon_chan->band == IEEE80211_BAND_2GHZ && + (beacon_chan->band == NL80211_BAND_2GHZ && !freq_is_chan_12_13_14(beacon_chan->center_freq))) return 0; diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 9f495d76eca0..f6ced316b5a4 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -104,7 +104,7 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy, * information for a band the BSS is not present in it will be ignored. */ void regulatory_hint_country_ie(struct wiphy *wiphy, - enum ieee80211_band band, + enum nl80211_band band, const u8 *country_ie, u8 country_ie_len); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 14d5369eb778..ef2955c89a00 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -364,13 +364,16 @@ const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) } EXPORT_SYMBOL(cfg80211_find_ie); -const u8 *cfg80211_find_vendor_ie(unsigned int oui, u8 oui_type, +const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, int len) { struct ieee80211_vendor_ie *ie; const u8 *pos = ies, *end = ies + len; int ie_oui; + if (WARN_ON(oui_type > 0xff)) + return NULL; + while (pos < end) { pos = cfg80211_find_ie(WLAN_EID_VENDOR_SPECIFIC, pos, end - pos); @@ -386,7 +389,8 @@ const u8 *cfg80211_find_vendor_ie(unsigned int oui, u8 oui_type, goto cont; ie_oui = ie->oui[0] << 16 | ie->oui[1] << 8 | ie->oui[2]; - if (ie_oui == oui && ie->oui_type == oui_type) + if (ie_oui == oui && + (oui_type < 0 || ie->oui_type == oui_type)) return pos; cont: pos += 2 + ie->len; @@ -531,7 +535,7 @@ static int cmp_bss(struct cfg80211_bss *a, } static bool cfg80211_bss_type_match(u16 capability, - enum ieee80211_band band, + enum nl80211_band band, enum ieee80211_bss_type bss_type) { bool ret = true; @@ -540,7 +544,7 @@ static bool cfg80211_bss_type_match(u16 capability, if (bss_type == IEEE80211_BSS_TYPE_ANY) return ret; - if (band == IEEE80211_BAND_60GHZ) { + if (band == NL80211_BAND_60GHZ) { mask = WLAN_CAPABILITY_DMG_TYPE_MASK; switch (bss_type) { case IEEE80211_BSS_TYPE_ESS: @@ -1006,7 +1010,7 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, if (!res) return NULL; - if (channel->band == IEEE80211_BAND_60GHZ) { + if (channel->band == NL80211_BAND_60GHZ) { bss_type = res->pub.capability & WLAN_CAPABILITY_DMG_TYPE_MASK; if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP || bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS) @@ -1089,7 +1093,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, if (!res) return NULL; - if (channel->band == IEEE80211_BAND_60GHZ) { + if (channel->band == NL80211_BAND_60GHZ) { bss_type = res->pub.capability & WLAN_CAPABILITY_DMG_TYPE_MASK; if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP || bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS) @@ -1185,7 +1189,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, struct iw_scan_req *wreq = NULL; struct cfg80211_scan_request *creq = NULL; int i, err, n_channels = 0; - enum ieee80211_band band; + enum nl80211_band band; if (!netif_running(dev)) return -ENETDOWN; @@ -1229,7 +1233,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, /* translate "Scan on frequencies" request */ i = 0; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { int j; if (!wiphy->bands[band]) @@ -1289,10 +1293,12 @@ int cfg80211_wext_siwscan(struct net_device *dev, creq->n_ssids = 0; } - for (i = 0; i < IEEE80211_NUM_BANDS; i++) + for (i = 0; i < NUM_NL80211_BANDS; i++) if (wiphy->bands[i]) creq->rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; + eth_broadcast_addr(creq->bssid); + rdev->scan_req = creq; err = rdev_scan(rdev, creq); if (err) { diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 544558171787..584fdc347221 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -81,7 +81,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) return -ENOMEM; if (wdev->conn->params.channel) { - enum ieee80211_band band = wdev->conn->params.channel->band; + enum nl80211_band band = wdev->conn->params.channel->band; struct ieee80211_supported_band *sband = wdev->wiphy->bands[band]; @@ -93,11 +93,11 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) request->rates[band] = (1 << sband->n_bitrates) - 1; } else { int i = 0, j; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_supported_band *bands; struct ieee80211_channel *channel; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { bands = wdev->wiphy->bands[band]; if (!bands) continue; @@ -119,6 +119,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) wdev->conn->params.ssid_len); request->ssids[0].ssid_len = wdev->conn->params.ssid_len; + eth_broadcast_addr(request->bssid); + request->wdev = wdev; request->wiphy = &rdev->wiphy; request->scan_start = jiffies; @@ -221,7 +223,7 @@ void cfg80211_conn_work(struct work_struct *work) rtnl_lock(); - list_for_each_entry(wdev, &rdev->wdev_list, list) { + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; @@ -490,8 +492,18 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, if (!rdev->ops->auth || !rdev->ops->assoc) return -EOPNOTSUPP; - if (wdev->current_bss) - return -EALREADY; + if (wdev->current_bss) { + if (!prev_bssid) + return -EALREADY; + if (prev_bssid && + !ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid)) + return -ENOTCONN; + cfg80211_unhold_bss(wdev->current_bss); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); + wdev->current_bss = NULL; + + cfg80211_sme_free(wdev); + } if (WARN_ON(wdev->conn)) return -EINPROGRESS; @@ -605,7 +617,7 @@ static bool cfg80211_is_all_idle(void) * count as new regulatory hints. */ list_for_each_entry(rdev, &cfg80211_rdev_list, list) { - list_for_each_entry(wdev, &rdev->wdev_list, list) { + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { wdev_lock(wdev); if (wdev->conn || wdev->current_bss) is_all_idle = false; @@ -741,19 +753,32 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, kfree(country_ie); } -void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, - u16 status, gfp_t gfp) +/* Consumes bss object one way or another */ +void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, + struct cfg80211_bss *bss, const u8 *req_ie, + size_t req_ie_len, const u8 *resp_ie, + size_t resp_ie_len, u16 status, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; + if (bss) { + /* Make sure the bss entry provided by the driver is valid. */ + struct cfg80211_internal_bss *ibss = bss_from_pub(bss); + + if (WARN_ON(list_empty(&ibss->list))) { + cfg80211_put_bss(wdev->wiphy, bss); + return; + } + } + ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp); - if (!ev) + if (!ev) { + cfg80211_put_bss(wdev->wiphy, bss); return; + } ev->type = EVENT_CONNECT_RESULT; if (bssid) @@ -768,6 +793,9 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, ev->cr.resp_ie_len = resp_ie_len; memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len); } + if (bss) + cfg80211_hold_bss(bss_from_pub(bss)); + ev->cr.bss = bss; ev->cr.status = status; spin_lock_irqsave(&wdev->event_lock, flags); @@ -775,7 +803,7 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, spin_unlock_irqrestore(&wdev->event_lock, flags); queue_work(cfg80211_wq, &rdev->event_work); } -EXPORT_SYMBOL(cfg80211_connect_result); +EXPORT_SYMBOL(cfg80211_connect_bss); /* Consumes bss object one way or another */ void __cfg80211_roamed(struct wireless_dev *wdev, diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 9cee0220665d..e46469bc130f 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -91,7 +91,7 @@ static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) { struct wireless_dev *wdev; - list_for_each_entry(wdev, &rdev->wdev_list, list) + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) cfg80211_leave(rdev, wdev); } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 09b242b09bed..3c1091ae6c36 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -110,7 +110,7 @@ conf->dot11MeshHWMPconfirmationInterval; \ } while (0) -#define CHAN_ENTRY __field(enum ieee80211_band, band) \ +#define CHAN_ENTRY __field(enum nl80211_band, band) \ __field(u16, center_freq) #define CHAN_ASSIGN(chan) \ do { \ @@ -125,7 +125,7 @@ #define CHAN_PR_FMT "band: %d, freq: %u" #define CHAN_PR_ARG __entry->band, __entry->center_freq -#define CHAN_DEF_ENTRY __field(enum ieee80211_band, band) \ +#define CHAN_DEF_ENTRY __field(enum nl80211_band, band) \ __field(u32, control_freq) \ __field(u32, width) \ __field(u32, center_freq1) \ @@ -1259,6 +1259,7 @@ TRACE_EVENT(rdev_connect, __field(bool, privacy) __field(u32, wpa_versions) __field(u32, flags) + MAC_ENTRY(prev_bssid) ), TP_fast_assign( WIPHY_ASSIGN; @@ -1270,13 +1271,14 @@ TRACE_EVENT(rdev_connect, __entry->privacy = sme->privacy; __entry->wpa_versions = sme->crypto.wpa_versions; __entry->flags = sme->flags; + MAC_ASSIGN(prev_bssid, sme->prev_bssid); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT ", ssid: %s, auth type: %d, privacy: %s, wpa versions: %u, " - "flags: %u", + "flags: %u, previous bssid: " MAC_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(bssid), __entry->ssid, __entry->auth_type, BOOL_TO_STR(__entry->privacy), - __entry->wpa_versions, __entry->flags) + __entry->wpa_versions, __entry->flags, MAC_PR_ARG(prev_bssid)) ); TRACE_EVENT(rdev_set_cqm_rssi_config, @@ -2645,7 +2647,7 @@ TRACE_EVENT(cfg80211_scan_done, TP_STRUCT__entry( __field(u32, n_channels) __dynamic_array(u8, ie, request ? request->ie_len : 0) - __array(u32, rates, IEEE80211_NUM_BANDS) + __array(u32, rates, NUM_NL80211_BANDS) __field(u32, wdev_id) MAC_ENTRY(wiphy_mac) __field(bool, no_cck) @@ -2656,7 +2658,7 @@ TRACE_EVENT(cfg80211_scan_done, memcpy(__get_dynamic_array(ie), request->ie, request->ie_len); memcpy(__entry->rates, request->rates, - IEEE80211_NUM_BANDS); + NUM_NL80211_BANDS); __entry->wdev_id = request->wdev ? request->wdev->identifier : 0; if (request->wiphy) @@ -2881,25 +2883,25 @@ TRACE_EVENT(rdev_start_radar_detection, TRACE_EVENT(rdev_set_mcast_rate, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - int mcast_rate[IEEE80211_NUM_BANDS]), + int mcast_rate[NUM_NL80211_BANDS]), TP_ARGS(wiphy, netdev, mcast_rate), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY - __array(int, mcast_rate, IEEE80211_NUM_BANDS) + __array(int, mcast_rate, NUM_NL80211_BANDS) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; memcpy(__entry->mcast_rate, mcast_rate, - sizeof(int) * IEEE80211_NUM_BANDS); + sizeof(int) * NUM_NL80211_BANDS); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 60GHz=0x%x]", WIPHY_PR_ARG, NETDEV_PR_ARG, - __entry->mcast_rate[IEEE80211_BAND_2GHZ], - __entry->mcast_rate[IEEE80211_BAND_5GHZ], - __entry->mcast_rate[IEEE80211_BAND_60GHZ]) + __entry->mcast_rate[NL80211_BAND_2GHZ], + __entry->mcast_rate[NL80211_BAND_5GHZ], + __entry->mcast_rate[NL80211_BAND_60GHZ]) ); TRACE_EVENT(rdev_set_coalesce, diff --git a/net/wireless/util.c b/net/wireless/util.c index 9f440a9de63b..219bd197039e 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -47,7 +47,7 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband, if (WARN_ON(!sband)) return 1; - if (sband->band == IEEE80211_BAND_2GHZ) { + if (sband->band == NL80211_BAND_2GHZ) { if (scan_width == NL80211_BSS_CHAN_WIDTH_5 || scan_width == NL80211_BSS_CHAN_WIDTH_10) mandatory_flag = IEEE80211_RATE_MANDATORY_G; @@ -65,26 +65,26 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband, } EXPORT_SYMBOL(ieee80211_mandatory_rates); -int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band) +int ieee80211_channel_to_frequency(int chan, enum nl80211_band band) { /* see 802.11 17.3.8.3.2 and Annex J * there are overlapping channel numbers in 5GHz and 2GHz bands */ if (chan <= 0) return 0; /* not supported */ switch (band) { - case IEEE80211_BAND_2GHZ: + case NL80211_BAND_2GHZ: if (chan == 14) return 2484; else if (chan < 14) return 2407 + chan * 5; break; - case IEEE80211_BAND_5GHZ: + case NL80211_BAND_5GHZ: if (chan >= 182 && chan <= 196) return 4000 + chan * 5; else return 5000 + chan * 5; break; - case IEEE80211_BAND_60GHZ: + case NL80211_BAND_60GHZ: if (chan < 5) return 56160 + chan * 2160; break; @@ -116,11 +116,11 @@ EXPORT_SYMBOL(ieee80211_frequency_to_channel); struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy, int freq) { - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_supported_band *sband; int i; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wiphy->bands[band]; if (!sband) @@ -137,12 +137,12 @@ struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy, EXPORT_SYMBOL(__ieee80211_get_channel); static void set_mandatory_flags_band(struct ieee80211_supported_band *sband, - enum ieee80211_band band) + enum nl80211_band band) { int i, want; switch (band) { - case IEEE80211_BAND_5GHZ: + case NL80211_BAND_5GHZ: want = 3; for (i = 0; i < sband->n_bitrates; i++) { if (sband->bitrates[i].bitrate == 60 || @@ -155,7 +155,7 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband, } WARN_ON(want); break; - case IEEE80211_BAND_2GHZ: + case NL80211_BAND_2GHZ: want = 7; for (i = 0; i < sband->n_bitrates; i++) { if (sband->bitrates[i].bitrate == 10) { @@ -185,12 +185,12 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband, } WARN_ON(want != 0 && want != 3 && want != 6); break; - case IEEE80211_BAND_60GHZ: + case NL80211_BAND_60GHZ: /* check for mandatory HT MCS 1..4 */ WARN_ON(!sband->ht_cap.ht_supported); WARN_ON((sband->ht_cap.mcs.rx_mask[0] & 0x1e) != 0x1e); break; - case IEEE80211_NUM_BANDS: + case NUM_NL80211_BANDS: WARN_ON(1); break; } @@ -198,9 +198,9 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband, void ieee80211_set_bitrate_flags(struct wiphy *wiphy) { - enum ieee80211_band band; + enum nl80211_band band; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) + for (band = 0; band < NUM_NL80211_BANDS; band++) if (wiphy->bands[band]) set_mandatory_flags_band(wiphy->bands[band], band); } @@ -950,7 +950,7 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) ev->cr.resp_ie, ev->cr.resp_ie_len, ev->cr.status, ev->cr.status == WLAN_STATUS_SUCCESS, - NULL); + ev->cr.bss); break; case EVENT_ROAMED: __cfg80211_roamed(wdev, ev->rm.bss, ev->rm.req_ie, @@ -986,7 +986,7 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev) ASSERT_RTNL(); - list_for_each_entry(wdev, &rdev->wdev_list, list) + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) cfg80211_process_wdev_events(wdev); } @@ -1399,22 +1399,22 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, EXPORT_SYMBOL(ieee80211_ie_split_ric); bool ieee80211_operating_class_to_band(u8 operating_class, - enum ieee80211_band *band) + enum nl80211_band *band) { switch (operating_class) { case 112: case 115 ... 127: case 128 ... 130: - *band = IEEE80211_BAND_5GHZ; + *band = NL80211_BAND_5GHZ; return true; case 81: case 82: case 83: case 84: - *band = IEEE80211_BAND_2GHZ; + *band = NL80211_BAND_2GHZ; return true; case 180: - *band = IEEE80211_BAND_60GHZ; + *band = NL80211_BAND_60GHZ; return true; } @@ -1560,7 +1560,7 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, if (!beacon_int) return -EINVAL; - list_for_each_entry(wdev, &rdev->wdev_list, list) { + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->beacon_interval) continue; if (wdev->beacon_interval != beacon_int) { @@ -1726,10 +1726,10 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy) { - enum ieee80211_band band; + enum nl80211_band band; unsigned int n_channels = 0; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) + for (band = 0; band < NUM_NL80211_BANDS; band++) if (wiphy->bands[band]) n_channels += wiphy->bands[band]->n_channels; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index fd682832a0e3..9f27221c8913 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -25,42 +25,7 @@ int cfg80211_wext_giwname(struct net_device *dev, struct iw_request_info *info, char *name, char *extra) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct ieee80211_supported_band *sband; - bool is_ht = false, is_a = false, is_b = false, is_g = false; - - if (!wdev) - return -EOPNOTSUPP; - - sband = wdev->wiphy->bands[IEEE80211_BAND_5GHZ]; - if (sband) { - is_a = true; - is_ht |= sband->ht_cap.ht_supported; - } - - sband = wdev->wiphy->bands[IEEE80211_BAND_2GHZ]; - if (sband) { - int i; - /* Check for mandatory rates */ - for (i = 0; i < sband->n_bitrates; i++) { - if (sband->bitrates[i].bitrate == 10) - is_b = true; - if (sband->bitrates[i].bitrate == 60) - is_g = true; - } - is_ht |= sband->ht_cap.ht_supported; - } - strcpy(name, "IEEE 802.11"); - if (is_a) - strcat(name, "a"); - if (is_b) - strcat(name, "b"); - if (is_g) - strcat(name, "g"); - if (is_ht) - strcat(name, "n"); - return 0; } EXPORT_WEXT_HANDLER(cfg80211_wext_giwname); @@ -143,7 +108,7 @@ int cfg80211_wext_giwrange(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct iw_range *range = (struct iw_range *) extra; - enum ieee80211_band band; + enum nl80211_band band; int i, c = 0; if (!wdev) @@ -215,7 +180,7 @@ int cfg80211_wext_giwrange(struct net_device *dev, } } - for (band = 0; band < IEEE80211_NUM_BANDS; band ++) { + for (band = 0; band < NUM_NL80211_BANDS; band ++) { struct ieee80211_supported_band *sband; sband = wdev->wiphy->bands[band]; @@ -265,11 +230,11 @@ int cfg80211_wext_freq(struct iw_freq *freq) * -EINVAL for impossible things. */ if (freq->e == 0) { - enum ieee80211_band band = IEEE80211_BAND_2GHZ; + enum nl80211_band band = NL80211_BAND_2GHZ; if (freq->m < 0) return 0; if (freq->m > 14) - band = IEEE80211_BAND_5GHZ; + band = NL80211_BAND_5GHZ; return ieee80211_channel_to_frequency(freq->m, band); } else { int i, div = 1000000; @@ -1245,7 +1210,7 @@ static int cfg80211_wext_siwrate(struct net_device *dev, maxrate = rate->value / 100000; } - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wdev->wiphy->bands[band]; if (sband == NULL) continue; diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index b50ee5d622e1..6250b1cfcde5 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -399,7 +399,10 @@ static int __init wireless_nlevent_init(void) if (err) return err; - return register_netdevice_notifier(&wext_netdev_notifier); + err = register_netdevice_notifier(&wext_netdev_notifier); + if (err) + unregister_pernet_subsys(&wext_pernet_ops); + return err; } subsys_initcall(wireless_nlevent_init); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2cc7af858c6f..d516845e16e3 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -809,7 +809,8 @@ static int copy_to_user_state_extra(struct xfrm_state *x, goto out; } if (x->lastused) { - ret = nla_put_u64(skb, XFRMA_LASTUSED, x->lastused); + ret = nla_put_u64_64bit(skb, XFRMA_LASTUSED, x->lastused, + XFRMA_PAD); if (ret) goto out; } @@ -1813,7 +1814,7 @@ static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x) return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) + nla_total_size(replay_size) - + nla_total_size(sizeof(struct xfrm_lifetime_cur)) + + nla_total_size_64bit(sizeof(struct xfrm_lifetime_cur)) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(4) /* XFRM_AE_RTHR */ + nla_total_size(4); /* XFRM_AE_ETHR */ @@ -1848,7 +1849,8 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct } if (err) goto out_cancel; - err = nla_put(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); + err = nla_put_64bit(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft, + XFRMA_PAD); if (err) goto out_cancel; @@ -2617,7 +2619,7 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(x->props.extra_flags)); /* Must count x->lastused as it may become non-zero behind our back. */ - l += nla_total_size(sizeof(u64)); + l += nla_total_size_64bit(sizeof(u64)); return l; } |