diff options
-rw-r--r-- | include/net/dst.h | 14 | ||||
-rw-r--r-- | include/net/xfrm.h | 59 | ||||
-rw-r--r-- | include/uapi/linux/if_link.h | 10 | ||||
-rw-r--r-- | include/uapi/linux/xfrm.h | 5 | ||||
-rw-r--r-- | net/core/pktgen.c | 2 | ||||
-rw-r--r-- | net/ipv4/esp4_offload.c | 6 | ||||
-rw-r--r-- | net/ipv6/esp6_offload.c | 6 | ||||
-rw-r--r-- | net/ipv6/xfrm6_mode_ro.c | 2 | ||||
-rw-r--r-- | net/key/af_key.c | 6 | ||||
-rw-r--r-- | net/xfrm/Kconfig | 8 | ||||
-rw-r--r-- | net/xfrm/Makefile | 1 | ||||
-rw-r--r-- | net/xfrm/xfrm_device.c | 19 | ||||
-rw-r--r-- | net/xfrm/xfrm_input.c | 5 | ||||
-rw-r--r-- | net/xfrm/xfrm_interface.c | 975 | ||||
-rw-r--r-- | net/xfrm/xfrm_output.c | 3 | ||||
-rw-r--r-- | net/xfrm/xfrm_policy.c | 314 | ||||
-rw-r--r-- | net/xfrm/xfrm_state.c | 48 | ||||
-rw-r--r-- | net/xfrm/xfrm_user.c | 94 |
18 files changed, 1331 insertions, 246 deletions
diff --git a/include/net/dst.h b/include/net/dst.h index b3219cd8a5a1..7f735e76ca73 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -475,6 +475,14 @@ static inline struct dst_entry *xfrm_lookup(struct net *net, return dst_orig; } +static inline struct dst_entry * +xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, const struct sock *sk, + int flags, u32 if_id) +{ + return dst_orig; +} + static inline struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, @@ -494,6 +502,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, const struct sock *sk, int flags); +struct dst_entry *xfrm_lookup_with_ifid(struct net *net, + struct dst_entry *dst_orig, + const struct flowi *fl, + const struct sock *sk, int flags, + u32 if_id); + struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, const struct sock *sk, int flags); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 557122846e0e..ca820945f30c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -23,6 +23,7 @@ #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/flow.h> +#include <net/gro_cells.h> #include <linux/interrupt.h> @@ -147,6 +148,7 @@ struct xfrm_state { struct xfrm_id id; struct xfrm_selector sel; struct xfrm_mark mark; + u32 if_id; u32 tfcpad; u32 genid; @@ -166,7 +168,7 @@ struct xfrm_state { int header_len; int trailer_len; u32 extra_flags; - u32 output_mark; + struct xfrm_mark smark; } props; struct xfrm_lifetime_cfg lft; @@ -225,7 +227,7 @@ struct xfrm_state { long saved_tmo; /* Last used time */ - unsigned long lastused; + time64_t lastused; struct page_frag xfrag; @@ -292,6 +294,13 @@ struct xfrm_replay { int (*overflow)(struct xfrm_state *x, struct sk_buff *skb); }; +struct xfrm_if_cb { + struct xfrm_if *(*decode_session)(struct sk_buff *skb); +}; + +void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); +void xfrm_if_unregister_cb(void); + struct net_device; struct xfrm_type; struct xfrm_dst; @@ -323,7 +332,6 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c); -void xfrm_policy_cache_flush(void); void km_state_notify(struct xfrm_state *x, const struct km_event *c); struct xfrm_tmpl; @@ -574,6 +582,7 @@ struct xfrm_policy { atomic_t genid; u32 priority; u32 index; + u32 if_id; struct xfrm_mark mark; struct xfrm_selector selector; struct xfrm_lifetime_cfg lft; @@ -1037,6 +1046,22 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); +struct xfrm_if_parms { + char name[IFNAMSIZ]; /* name of XFRM device */ + int link; /* ifindex of underlying L2 interface */ + u32 if_id; /* interface identifyer */ +}; + +struct xfrm_if { + struct xfrm_if __rcu *next; /* next interface in list */ + struct net_device *dev; /* virtual device associated with interface */ + struct net_device *phydev; /* physical device */ + struct net *net; /* netns for packet i/o */ + struct xfrm_if_parms p; /* interface parms */ + + struct gro_cells gro_cells; +}; + struct xfrm_offload { /* Output sequence number for replay protection on offloading. */ struct { @@ -1532,8 +1557,8 @@ struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr, const struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, - unsigned short family); -struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, + unsigned short family, u32 if_id); +struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, @@ -1690,20 +1715,20 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, void *); void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err); -struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir, - u32 id, int delete, int *err); +struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8, + int dir, u32 id, int delete, int *err); int xfrm_policy_flush(struct net *net, u8 type, bool task_valid); void xfrm_policy_hash_rebuild(struct net *net); u32 xfrm_get_acqseq(void); int verify_spi_info(u8 proto, u32 min, u32 max); int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, - u8 mode, u32 reqid, u8 proto, + u8 mode, u32 reqid, u32 if_id, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create, unsigned short family); @@ -2012,6 +2037,22 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m) return ret; } +static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x) +{ + struct xfrm_mark *m = &x->props.smark; + + return (m->v & m->m) | (mark & ~m->m); +} + +static inline int xfrm_if_id_put(struct sk_buff *skb, __u32 if_id) +{ + int ret = 0; + + if (if_id) + ret = nla_put_u32(skb, XFRMA_IF_ID, if_id); + return ret; +} + static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x, unsigned int family) { diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 01b5069a73a5..553c438cabe3 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -460,6 +460,16 @@ enum { #define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1) +/* XFRM section */ +enum { + IFLA_XFRM_UNSPEC, + IFLA_XFRM_LINK, + IFLA_XFRM_IF_ID, + __IFLA_XFRM_MAX +}; + +#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1) + enum macsec_validation_type { MACSEC_VALIDATE_DISABLED = 0, MACSEC_VALIDATE_CHECK = 1, diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index e3af2859188b..5f3b9fec7b5f 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -305,9 +305,12 @@ enum xfrm_attr_type_t { XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ XFRMA_PAD, XFRMA_OFFLOAD_DEV, /* struct xfrm_state_offload */ - XFRMA_OUTPUT_MARK, /* __u32 */ + XFRMA_SET_MARK, /* __u32 */ + XFRMA_SET_MARK_MASK, /* __u32 */ + XFRMA_IF_ID, /* __u32 */ __XFRMA_MAX +#define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ #define XFRMA_MAX (__XFRMA_MAX - 1) }; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 308ed04984de..7f6938405fa1 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2253,7 +2253,7 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET); } else { /* slow path: we dont already have xfrm_state */ - x = xfrm_stateonly_find(pn->net, DUMMY_MARK, + x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 0, (xfrm_address_t *)&pkt_dev->cur_daddr, (xfrm_address_t *)&pkt_dev->cur_saddr, AF_INET, diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index bbeecd13e534..58834a10c0be 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -135,8 +135,7 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, skb->encap_hdr_csum = 1; - if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || - (x->xso.dev != skb->dev)) + if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); else if (!(features & NETIF_F_HW_ESP_TX_CSUM)) esp_features = features & ~NETIF_F_CSUM_MASK; @@ -179,8 +178,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ if (!xo) return -EINVAL; - if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || - (x->xso.dev != skb->dev)) { + if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) { xo->flags |= CRYPTO_FALLBACK; hw_offload = false; } diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index ddfa533a84e5..6177e2171171 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -162,8 +162,7 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, skb->encap_hdr_csum = 1; - if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || - (x->xso.dev != skb->dev)) + if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); else if (!(features & NETIF_F_HW_ESP_TX_CSUM)) esp_features = features & ~NETIF_F_CSUM_MASK; @@ -207,8 +206,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features if (!xo) return -EINVAL; - if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || - (x->xso.dev != skb->dev)) { + if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) { xo->flags |= CRYPTO_FALLBACK; hw_offload = false; } diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index 07d36573f50b..da28e4407b8f 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -55,7 +55,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) __skb_pull(skb, hdr_len); memmove(ipv6_hdr(skb), iph, hdr_len); - x->lastused = get_seconds(); + x->lastused = ktime_get_real_seconds(); return 0; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 5e1d2946ffbf..9d61266526e7 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1383,7 +1383,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ } if (!x) - x = xfrm_find_acq(net, &dummy_mark, mode, reqid, proto, xdaddr, xsaddr, 1, family); + x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; @@ -2414,7 +2414,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa return err; } - xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); @@ -2663,7 +2663,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); - xp = xfrm_policy_byid(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_byid(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index eab952cca7d0..4a9ee2d83158 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -25,6 +25,14 @@ config XFRM_USER If unsure, say Y. +config XFRM_INTERFACE + tristate "Transformation virtual interface" + depends on XFRM && IPV6 + ---help--- + This provides a virtual interface to route IPsec traffic. + + If unsure, say N. + config XFRM_SUB_POLICY bool "Transformation sub policy support" depends on XFRM diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 0bd2465a8c5a..fbc4552d17b8 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o +obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 175941e15a6e..5611b7521020 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -56,7 +56,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur if (skb_is_gso(skb)) { struct net_device *dev = skb->dev; - if (unlikely(!x->xso.offload_handle || (x->xso.dev != dev))) { + if (unlikely(x->xso.dev != dev)) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ @@ -162,7 +162,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, } dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, - x->props.family, x->props.output_mark); + x->props.family, + xfrm_smark_get(0, x)); if (IS_ERR(dst)) return 0; @@ -210,8 +211,8 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) if (!x->type_offload || x->encap) return false; - if ((!dev || (x->xso.offload_handle && (dev == xfrm_dst_path(dst)->dev))) && - (!xdst->child->xfrm && x->type->get_mtu)) { + if ((!dev || (dev == xfrm_dst_path(dst)->dev)) && + (!xdst->child->xfrm && x->type->get_mtu)) { mtu = x->type->get_mtu(x, xdst->child_mtu_cached); if (skb->len <= mtu) @@ -306,12 +307,6 @@ static int xfrm_dev_register(struct net_device *dev) return xfrm_api_check(dev); } -static int xfrm_dev_unregister(struct net_device *dev) -{ - xfrm_policy_cache_flush(); - return NOTIFY_DONE; -} - static int xfrm_dev_feat_change(struct net_device *dev) { return xfrm_api_check(dev); @@ -322,7 +317,6 @@ static int xfrm_dev_down(struct net_device *dev) if (dev->features & NETIF_F_HW_ESP) xfrm_dev_state_flush(dev_net(dev), dev, true); - xfrm_policy_cache_flush(); return NOTIFY_DONE; } @@ -334,9 +328,6 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void case NETDEV_REGISTER: return xfrm_dev_register(dev); - case NETDEV_UNREGISTER: - return xfrm_dev_unregister(dev); - case NETDEV_FEAT_CHANGE: return xfrm_dev_feat_change(dev); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 352abca2605f..b89c9c7f8c5c 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -320,6 +320,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) seq = 0; if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { + secpath_reset(skb); XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } @@ -328,17 +329,21 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_SPI_SKB_CB(skb)->daddroff); do { if (skb->sp->len == XFRM_MAX_DEPTH) { + secpath_reset(skb); XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family); if (x == NULL) { + secpath_reset(skb); XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound(skb, family, spi, seq); goto drop; } + skb->mark = xfrm_smark_get(skb->mark, x); + skb->sp->xvec[skb->sp->len++] = x; lock: diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c new file mode 100644 index 000000000000..31acc6f33d98 --- /dev/null +++ b/net/xfrm/xfrm_interface.c @@ -0,0 +1,975 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * XFRM virtual interface + * + * Copyright (C) 2018 secunet Security Networks AG + * + * Author: + * Steffen Klassert <steffen.klassert@secunet.com> + */ + +#include <linux/module.h> +#include <linux/capability.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/sockios.h> +#include <linux/icmp.h> +#include <linux/if.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/if_link.h> +#include <linux/if_arp.h> +#include <linux/icmpv6.h> +#include <linux/init.h> +#include <linux/route.h> +#include <linux/rtnetlink.h> +#include <linux/netfilter_ipv6.h> +#include <linux/slab.h> +#include <linux/hash.h> + +#include <linux/uaccess.h> +#include <linux/atomic.h> + +#include <net/icmp.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/xfrm.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <linux/etherdevice.h> + +static int xfrmi_dev_init(struct net_device *dev); +static void xfrmi_dev_setup(struct net_device *dev); +static struct rtnl_link_ops xfrmi_link_ops __read_mostly; +static unsigned int xfrmi_net_id __read_mostly; + +struct xfrmi_net { + /* lists for storing interfaces in use */ + struct xfrm_if __rcu *xfrmi[1]; +}; + +#define for_each_xfrmi_rcu(start, xi) \ + for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next)) + +static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x) +{ + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); + struct xfrm_if *xi; + + for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) { + if (x->if_id == xi->p.if_id && + (xi->dev->flags & IFF_UP)) + return xi; + } + + return NULL; +} + +static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb) +{ + struct xfrmi_net *xfrmn; + int ifindex; + struct xfrm_if *xi; + + if (!skb->dev) + return NULL; + + xfrmn = net_generic(dev_net(skb->dev), xfrmi_net_id); + ifindex = skb->dev->ifindex; + + for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) { + if (ifindex == xi->dev->ifindex && + (xi->dev->flags & IFF_UP)) + return xi; + } + + return NULL; +} + +static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi) +{ + struct xfrm_if __rcu **xip = &xfrmn->xfrmi[0]; + + rcu_assign_pointer(xi->next , rtnl_dereference(*xip)); + rcu_assign_pointer(*xip, xi); +} + +static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi) +{ + struct xfrm_if __rcu **xip; + struct xfrm_if *iter; + + for (xip = &xfrmn->xfrmi[0]; + (iter = rtnl_dereference(*xip)) != NULL; + xip = &iter->next) { + if (xi == iter) { + rcu_assign_pointer(*xip, xi->next); + break; + } + } +} + +static void xfrmi_dev_free(struct net_device *dev) +{ + free_percpu(dev->tstats); +} + +static int xfrmi_create2(struct net_device *dev) +{ + struct xfrm_if *xi = netdev_priv(dev); + struct net *net = dev_net(dev); + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); + int err; + + dev->rtnl_link_ops = &xfrmi_link_ops; + err = register_netdevice(dev); + if (err < 0) + goto out; + + strcpy(xi->p.name, dev->name); + + dev_hold(dev); + xfrmi_link(xfrmn, xi); + + return 0; + +out: + return err; +} + +static struct xfrm_if *xfrmi_create(struct net *net, struct xfrm_if_parms *p) +{ + struct net_device *dev; + struct xfrm_if *xi; + char name[IFNAMSIZ]; + int err; + + if (p->name[0]) { + strlcpy(name, p->name, IFNAMSIZ); + } else { + err = -EINVAL; + goto failed; + } + + dev = alloc_netdev(sizeof(*xi), name, NET_NAME_UNKNOWN, xfrmi_dev_setup); + if (!dev) { + err = -EAGAIN; + goto failed; + } + + dev_net_set(dev, net); + + xi = netdev_priv(dev); + xi->p = *p; + xi->net = net; + xi->dev = dev; + xi->phydev = dev_get_by_index(net, p->link); + if (!xi->phydev) { + err = -ENODEV; + goto failed_free; + } + + err = xfrmi_create2(dev); + if (err < 0) + goto failed_dev_put; + + return xi; + +failed_dev_put: + dev_put(xi->phydev); +failed_free: + free_netdev(dev); +failed: + return ERR_PTR(err); +} + +static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p, + int create) +{ + struct xfrm_if __rcu **xip; + struct xfrm_if *xi; + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); + + for (xip = &xfrmn->xfrmi[0]; + (xi = rtnl_dereference(*xip)) != NULL; + xip = &xi->next) { + if (xi->p.if_id == p->if_id) { + if (create) + return ERR_PTR(-EEXIST); + + return xi; + } + } + if (!create) + return ERR_PTR(-ENODEV); + return xfrmi_create(net, p); +} + +static void xfrmi_dev_uninit(struct net_device *dev) +{ + struct xfrm_if *xi = netdev_priv(dev); + struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id); + + xfrmi_unlink(xfrmn, xi); + dev_put(xi->phydev); + dev_put(dev); +} + +static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) +{ + skb->tstamp = 0; + skb->pkt_type = PACKET_HOST; + skb->skb_iif = 0; + skb->ignore_df = 0; + skb_dst_drop(skb); + nf_reset(skb); + nf_reset_trace(skb); + + if (!xnet) + return; + + ipvs_reset(skb); + secpath_reset(skb); + skb_orphan(skb); + skb->mark = 0; +} + +static int xfrmi_rcv_cb(struct sk_buff *skb, int err) +{ + struct pcpu_sw_netstats *tstats; + struct xfrm_mode *inner_mode; + struct net_device *dev; + struct xfrm_state *x; + struct xfrm_if *xi; + bool xnet; + + if (err && !skb->sp) + return 0; + + x = xfrm_input_state(skb); + + xi = xfrmi_lookup(xs_net(x), x); + if (!xi) + return 1; + + dev = xi->dev; + skb->dev = dev; + + if (err) { + dev->stats.rx_errors++; + dev->stats.rx_dropped++; + + return 0; + } + + xnet = !net_eq(xi->net, dev_net(skb->dev)); + + if (xnet) { + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) { + XFRM_INC_STATS(dev_net(skb->dev), + LINUX_MIB_XFRMINSTATEMODEERROR); + return -EINVAL; + } + } + + if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, + inner_mode->afinfo->family)) + return -EPERM; + } + + xfrmi_scrub_packet(skb, xnet); + + tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + return 0; +} + +static int +xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) +{ + struct xfrm_if *xi = netdev_priv(dev); + struct net_device_stats *stats = &xi->dev->stats; + struct dst_entry *dst = skb_dst(skb); + unsigned int length = skb->len; + struct net_device *tdev; + struct xfrm_state *x; + int err = -1; + int mtu; + + if (!dst) + goto tx_err_link_failure; + + dst_hold(dst); + dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; + goto tx_err_link_failure; + } + + x = dst->xfrm; + if (!x) + goto tx_err_link_failure; + + if (x->if_id != xi->p.if_id) + goto tx_err_link_failure; + + tdev = dst->dev; + + if (tdev == dev) { + stats->collisions++; + net_warn_ratelimited("%s: Local routing loop detected!\n", + xi->p.name); + goto tx_err_dst_release; + } + + mtu = dst_mtu(dst); + if (!skb->ignore_df && skb->len > mtu) { + skb_dst_update_pmtu(skb, mtu); + + if (skb->protocol == htons(ETH_P_IPV6)) { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + } else { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + } + + dst_release(dst); + return -EMSGSIZE; + } + + xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev))); + skb_dst_set(skb, dst); + skb->dev = tdev; + + err = dst_output(xi->net, skb->sk, skb); + if (net_xmit_eval(err) == 0) { + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += length; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + stats->tx_errors++; + stats->tx_aborted_errors++; + } + + return 0; +tx_err_link_failure: + stats->tx_carrier_errors++; + dst_link_failure(skb); +tx_err_dst_release: + dst_release(dst); + return err; +} + +static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct xfrm_if *xi = netdev_priv(dev); + struct net_device_stats *stats = &xi->dev->stats; + struct flowi fl; + int ret; + + memset(&fl, 0, sizeof(fl)); + + switch (skb->protocol) { + case htons(ETH_P_IPV6): + xfrm_decode_session(skb, &fl, AF_INET6); + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + break; + case htons(ETH_P_IP): + xfrm_decode_session(skb, &fl, AF_INET); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + break; + default: + goto tx_err; + } + + fl.flowi_oif = xi->phydev->ifindex; + + ret = xfrmi_xmit2(skb, dev, &fl); + if (ret < 0) + goto tx_err; + + return NETDEV_TX_OK; + +tx_err: + stats->tx_errors++; + stats->tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int xfrmi4_err(struct sk_buff *skb, u32 info) +{ + const struct iphdr *iph = (const struct iphdr *)skb->data; + struct net *net = dev_net(skb->dev); + int protocol = iph->protocol; + struct ip_comp_hdr *ipch; + struct ip_esp_hdr *esph; + struct ip_auth_hdr *ah ; + struct xfrm_state *x; + struct xfrm_if *xi; + __be32 spi; + + switch (protocol) { + case IPPROTO_ESP: + esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); + spi = esph->spi; + break; + case IPPROTO_AH: + ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); + spi = ah->spi; + break; + case IPPROTO_COMP: + ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); + spi = htonl(ntohs(ipch->cpi)); + break; + default: + return 0; + } + + switch (icmp_hdr(skb)->type) { + case ICMP_DEST_UNREACH: + if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) + return 0; + case ICMP_REDIRECT: + break; + default: + return 0; + } + + x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + spi, protocol, AF_INET); + if (!x) + return 0; + + xi = xfrmi_lookup(net, x); + if (!xi) { + xfrm_state_put(x); + return -1; + } + + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0); + else + ipv4_redirect(skb, net, 0, 0, protocol, 0); + xfrm_state_put(x); + + return 0; +} + +static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; + struct net *net = dev_net(skb->dev); + int protocol = iph->nexthdr; + struct ip_comp_hdr *ipch; + struct ip_esp_hdr *esph; + struct ip_auth_hdr *ah; + struct xfrm_state *x; + struct xfrm_if *xi; + __be32 spi; + + switch (protocol) { + case IPPROTO_ESP: + esph = (struct ip_esp_hdr *)(skb->data + offset); + spi = esph->spi; + break; + case IPPROTO_AH: + ah = (struct ip_auth_hdr *)(skb->data + offset); + spi = ah->spi; + break; + case IPPROTO_COMP: + ipch = (struct ip_comp_hdr *)(skb->data + offset); + spi = htonl(ntohs(ipch->cpi)); + break; + default: + return 0; + } + + if (type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) + return 0; + + x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + spi, protocol, AF_INET6); + if (!x) + return 0; + + xi = xfrmi_lookup(net, x); + if (!xi) { + xfrm_state_put(x); + return -1; + } + + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); + else + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); + xfrm_state_put(x); + + return 0; +} + +static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p) +{ + if (xi->p.link != p->link) + return -EINVAL; + + xi->p.if_id = p->if_id; + + return 0; +} + +static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p) +{ + struct net *net = dev_net(xi->dev); + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); + int err; + + xfrmi_unlink(xfrmn, xi); + synchronize_net(); + err = xfrmi_change(xi, p); + xfrmi_link(xfrmn, xi); + netdev_state_change(xi->dev); + return err; +} + +static void xfrmi_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *s) +{ + int cpu; + + if (!dev->tstats) + return; + + for_each_possible_cpu(cpu) { + struct pcpu_sw_netstats *stats; + struct pcpu_sw_netstats tmp; + int start; + + stats = per_cpu_ptr(dev->tstats, cpu); + do { + start = u64_stats_fetch_begin_irq(&stats->syncp); + tmp.rx_packets = stats->rx_packets; + tmp.rx_bytes = stats->rx_bytes; + tmp.tx_packets = stats->tx_packets; + tmp.tx_bytes = stats->tx_bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + s->rx_packets += tmp.rx_packets; + s->rx_bytes += tmp.rx_bytes; + s->tx_packets += tmp.tx_packets; + s->tx_bytes += tmp.tx_bytes; + } + + s->rx_dropped = dev->stats.rx_dropped; + s->tx_dropped = dev->stats.tx_dropped; +} + +static int xfrmi_get_iflink(const struct net_device *dev) +{ + struct xfrm_if *xi = netdev_priv(dev); + + return xi->phydev->ifindex; +} + + +static const struct net_device_ops xfrmi_netdev_ops = { + .ndo_init = xfrmi_dev_init, + .ndo_uninit = xfrmi_dev_uninit, + .ndo_start_xmit = xfrmi_xmit, + .ndo_get_stats64 = xfrmi_get_stats64, + .ndo_get_iflink = xfrmi_get_iflink, +}; + +static void xfrmi_dev_setup(struct net_device *dev) +{ + dev->netdev_ops = &xfrmi_netdev_ops; + dev->type = ARPHRD_NONE; + dev->hard_header_len = ETH_HLEN; + dev->min_header_len = ETH_HLEN; + dev->mtu = ETH_DATA_LEN; + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = ETH_DATA_LEN; + dev->addr_len = ETH_ALEN; + dev->flags = IFF_NOARP; + dev->needs_free_netdev = true; + dev->priv_destructor = xfrmi_dev_free; + netif_keep_dst(dev); +} + +static int xfrmi_dev_init(struct net_device *dev) +{ + struct xfrm_if *xi = netdev_priv(dev); + struct net_device *phydev = xi->phydev; + int err; + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + err = gro_cells_init(&xi->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + + dev->features |= NETIF_F_LLTX; + + dev->needed_headroom = phydev->needed_headroom; + dev->needed_tailroom = phydev->needed_tailroom; + + if (is_zero_ether_addr(dev->dev_addr)) + eth_hw_addr_inherit(dev, phydev); + if (is_zero_ether_addr(dev->broadcast)) + memcpy(dev->broadcast, phydev->broadcast, dev->addr_len); + + return 0; +} + +static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + return 0; +} + +static void xfrmi_netlink_parms(struct nlattr *data[], + struct xfrm_if_parms *parms) +{ + memset(parms, 0, sizeof(*parms)); + + if (!data) + return; + + if (data[IFLA_XFRM_LINK]) + parms->link = nla_get_u32(data[IFLA_XFRM_LINK]); + + if (data[IFLA_XFRM_IF_ID]) + parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]); +} + +static int xfrmi_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct net *net = dev_net(dev); + struct xfrm_if_parms *p; + struct xfrm_if *xi; + + xi = netdev_priv(dev); + p = &xi->p; + + xfrmi_netlink_parms(data, p); + + if (!tb[IFLA_IFNAME]) + return -EINVAL; + + nla_strlcpy(p->name, tb[IFLA_IFNAME], IFNAMSIZ); + + xi = xfrmi_locate(net, p, 1); + return PTR_ERR_OR_ZERO(xi); +} + +static void xfrmi_dellink(struct net_device *dev, struct list_head *head) +{ + unregister_netdevice_queue(dev, head); +} + +static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct xfrm_if *xi = netdev_priv(dev); + struct net *net = dev_net(dev); + + xfrmi_netlink_parms(data, &xi->p); + + xi = xfrmi_locate(net, &xi->p, 0); + + if (IS_ERR_OR_NULL(xi)) { + xi = netdev_priv(dev); + } else { + if (xi->dev != dev) + return -EEXIST; + } + + return xfrmi_update(xi, &xi->p); +} + +static size_t xfrmi_get_size(const struct net_device *dev) +{ + return + /* IFLA_XFRM_LINK */ + nla_total_size(4) + + /* IFLA_XFRM_IF_ID */ + nla_total_size(4) + + 0; +} + +static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct xfrm_if *xi = netdev_priv(dev); + struct xfrm_if_parms *parm = &xi->p; + + if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) || + nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +struct net *xfrmi_get_link_net(const struct net_device *dev) +{ + struct xfrm_if *xi = netdev_priv(dev); + + return dev_net(xi->phydev); +} + +static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = { + [IFLA_XFRM_LINK] = { .type = NLA_U32 }, + [IFLA_XFRM_IF_ID] = { .type = NLA_U32 }, +}; + +static struct rtnl_link_ops xfrmi_link_ops __read_mostly = { + .kind = "xfrm", + .maxtype = IFLA_XFRM_MAX, + .policy = xfrmi_policy, + .priv_size = sizeof(struct xfrm_if), + .setup = xfrmi_dev_setup, + .validate = xfrmi_validate, + .newlink = xfrmi_newlink, + .dellink = xfrmi_dellink, + .changelink = xfrmi_changelink, + .get_size = xfrmi_get_size, + .fill_info = xfrmi_fill_info, + .get_link_net = xfrmi_get_link_net, +}; + +static void __net_exit xfrmi_destroy_interfaces(struct xfrmi_net *xfrmn) +{ + struct xfrm_if *xi; + LIST_HEAD(list); + + xi = rtnl_dereference(xfrmn->xfrmi[0]); + if (!xi) + return; + + unregister_netdevice_queue(xi->dev, &list); + unregister_netdevice_many(&list); +} + +static int __net_init xfrmi_init_net(struct net *net) +{ + return 0; +} + +static void __net_exit xfrmi_exit_net(struct net *net) +{ + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); + + rtnl_lock(); + xfrmi_destroy_interfaces(xfrmn); + rtnl_unlock(); +} + +static struct pernet_operations xfrmi_net_ops = { + .init = xfrmi_init_net, + .exit = xfrmi_exit_net, + .id = &xfrmi_net_id, + .size = sizeof(struct xfrmi_net), +}; + +static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { + .handler = xfrm6_rcv, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi6_err, + .priority = 10, +}; + +static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = { + .handler = xfrm6_rcv, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi6_err, + .priority = 10, +}; + +static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = { + .handler = xfrm6_rcv, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi6_err, + .priority = 10, +}; + +static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = { + .handler = xfrm4_rcv, + .input_handler = xfrm_input, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi4_err, + .priority = 10, +}; + +static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = { + .handler = xfrm4_rcv, + .input_handler = xfrm_input, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi4_err, + .priority = 10, +}; + +static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = { + .handler = xfrm4_rcv, + .input_handler = xfrm_input, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi4_err, + .priority = 10, +}; + +static int __init xfrmi4_init(void) +{ + int err; + + err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP); + if (err < 0) + goto xfrm_proto_esp_failed; + err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH); + if (err < 0) + goto xfrm_proto_ah_failed; + err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); + if (err < 0) + goto xfrm_proto_comp_failed; + + return 0; + +xfrm_proto_comp_failed: + xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH); +xfrm_proto_ah_failed: + xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP); +xfrm_proto_esp_failed: + return err; +} + +static void xfrmi4_fini(void) +{ + xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); + xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH); + xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP); +} + +static int __init xfrmi6_init(void) +{ + int err; + + err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP); + if (err < 0) + goto xfrm_proto_esp_failed; + err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH); + if (err < 0) + goto xfrm_proto_ah_failed; + err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); + if (err < 0) + goto xfrm_proto_comp_failed; + + return 0; + +xfrm_proto_comp_failed: + xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH); +xfrm_proto_ah_failed: + xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP); +xfrm_proto_esp_failed: + return err; +} + +static void xfrmi6_fini(void) +{ + xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); + xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH); + xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP); +} + +static const struct xfrm_if_cb xfrm_if_cb = { + .decode_session = xfrmi_decode_session, +}; + +static int __init xfrmi_init(void) +{ + const char *msg; + int err; + + pr_info("IPsec XFRM device driver\n"); + + msg = "tunnel device"; + err = register_pernet_device(&xfrmi_net_ops); + if (err < 0) + goto pernet_dev_failed; + + msg = "xfrm4 protocols"; + err = xfrmi4_init(); + if (err < 0) + goto xfrmi4_failed; + + msg = "xfrm6 protocols"; + err = xfrmi6_init(); + if (err < 0) + goto xfrmi6_failed; + + + msg = "netlink interface"; + err = rtnl_link_register(&xfrmi_link_ops); + if (err < 0) + goto rtnl_link_failed; + + xfrm_if_register_cb(&xfrm_if_cb); + + return err; + +rtnl_link_failed: + xfrmi6_fini(); +xfrmi6_failed: + xfrmi4_fini(); +xfrmi4_failed: + unregister_pernet_device(&xfrmi_net_ops); +pernet_dev_failed: + pr_err("xfrmi init: failed to register %s\n", msg); + return err; +} + +static void __exit xfrmi_fini(void) +{ + xfrm_if_unregister_cb(); + rtnl_link_unregister(&xfrmi_link_ops); + xfrmi4_fini(); + xfrmi6_fini(); + unregister_pernet_device(&xfrmi_net_ops); +} + +module_init(xfrmi_init); +module_exit(xfrmi_fini); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("xfrm"); +MODULE_ALIAS_NETDEV("xfrm0"); +MODULE_AUTHOR("Steffen Klassert"); +MODULE_DESCRIPTION("XFRM virtual interface"); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 89b178a78dc7..45ba07ab3e4f 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -66,8 +66,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) goto error_nolock; } - if (x->props.output_mark) - skb->mark = x->props.output_mark; + skb->mark = xfrm_smark_get(skb->mark, x); err = x->outer_mode->output(x, skb); if (err) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5f48251c1319..69f06f879091 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -45,8 +45,9 @@ struct xfrm_flo { u8 flags; }; -static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst); -static struct work_struct *xfrm_pcpu_work __read_mostly; +static DEFINE_SPINLOCK(xfrm_if_cb_lock); +static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly; + static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; @@ -119,6 +120,12 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa return afinfo; } +/* Called with rcu_read_lock(). */ +static const struct xfrm_if_cb *xfrm_if_get_cb(void) +{ + return rcu_dereference(xfrm_if_cb); +} + struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, @@ -182,8 +189,8 @@ static inline unsigned long make_jiffies(long secs) static void xfrm_policy_timer(struct timer_list *t) { struct xfrm_policy *xp = from_timer(xp, t, timer); - unsigned long now = get_seconds(); - long next = LONG_MAX; + time64_t now = ktime_get_real_seconds(); + time64_t next = TIME64_MAX; int warn = 0; int dir; @@ -195,7 +202,7 @@ static void xfrm_policy_timer(struct timer_list *t) dir = xfrm_policy_id2dir(xp->index); if (xp->lft.hard_add_expires_seconds) { - long tmo = xp->lft.hard_add_expires_seconds + + time64_t tmo = xp->lft.hard_add_expires_seconds + xp->curlft.add_time - now; if (tmo <= 0) goto expired; @@ -203,7 +210,7 @@ static void xfrm_policy_timer(struct timer_list *t) next = tmo; } if (xp->lft.hard_use_expires_seconds) { - long tmo = xp->lft.hard_use_expires_seconds + + time64_t tmo = xp->lft.hard_use_expires_seconds + (xp->curlft.use_time ? : xp->curlft.add_time) - now; if (tmo <= 0) goto expired; @@ -211,7 +218,7 @@ static void xfrm_policy_timer(struct timer_list *t) next = tmo; } if (xp->lft.soft_add_expires_seconds) { - long tmo = xp->lft.soft_add_expires_seconds + + time64_t tmo = xp->lft.soft_add_expires_seconds + xp->curlft.add_time - now; if (tmo <= 0) { warn = 1; @@ -221,7 +228,7 @@ static void xfrm_policy_timer(struct timer_list *t) next = tmo; } if (xp->lft.soft_use_expires_seconds) { - long tmo = xp->lft.soft_use_expires_seconds + + time64_t tmo = xp->lft.soft_use_expires_seconds + (xp->curlft.use_time ? : xp->curlft.add_time) - now; if (tmo <= 0) { warn = 1; @@ -233,7 +240,7 @@ static void xfrm_policy_timer(struct timer_list *t) if (warn) km_policy_expired(xp, dir, 0, 0); - if (next != LONG_MAX && + if (next != TIME64_MAX && !mod_timer(&xp->timer, jiffies + make_jiffies(next))) xfrm_pol_hold(xp); @@ -747,6 +754,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) newpos = NULL; hlist_for_each_entry(pol, chain, bydst) { if (pol->type == policy->type && + pol->if_id == policy->if_id && !selector_cmp(&pol->selector, &policy->selector) && xfrm_policy_mark_match(policy, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && @@ -783,7 +791,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index); hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); - policy->curlft.add_time = get_seconds(); + policy->curlft.add_time = ktime_get_real_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); @@ -798,8 +806,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, - int dir, struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, + u8 type, int dir, + struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err) { @@ -812,6 +821,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, ret = NULL; hlist_for_each_entry(pol, chain, bydst) { if (pol->type == type && + pol->if_id == if_id && (mark & pol->mark.m) == pol->mark.v && !selector_cmp(sel, &pol->selector) && xfrm_sec_ctx_match(ctx, pol->security)) { @@ -837,8 +847,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, - int dir, u32 id, int delete, int *err) +struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, + u8 type, int dir, u32 id, int delete, + int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; @@ -853,6 +864,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, ret = NULL; hlist_for_each_entry(pol, chain, byidx) { if (pol->type == type && pol->index == id && + pol->if_id == if_id && (mark & pol->mark.m) == pol->mark.v) { xfrm_pol_hold(pol); if (delete) { @@ -1056,13 +1068,14 @@ EXPORT_SYMBOL(xfrm_policy_walk_done); */ static int xfrm_policy_match(const struct xfrm_policy *pol, const struct flowi *fl, - u8 type, u16 family, int dir) + u8 type, u16 family, int dir, u32 if_id) { const struct xfrm_selector *sel = &pol->selector; int ret = -ESRCH; bool match; if (pol->family != family || + pol->if_id != if_id || (fl->flowi_mark & pol->mark.m) != pol->mark.v || pol->type != type) return ret; @@ -1077,7 +1090,8 @@ static int xfrm_policy_match(const struct xfrm_policy *pol, static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, const struct flowi *fl, - u16 family, u8 dir) + u16 family, u8 dir, + u32 if_id) { int err; struct xfrm_policy *pol, *ret; @@ -1101,7 +1115,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, priority = ~0U; ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { - err = xfrm_policy_match(pol, fl, type, family, dir); + err = xfrm_policy_match(pol, fl, type, family, dir, if_id); if (err) { if (err == -ESRCH) continue; @@ -1120,7 +1134,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if ((pol->priority >= priority) && ret) break; - err = xfrm_policy_match(pol, fl, type, family, dir); + err = xfrm_policy_match(pol, fl, type, family, dir, if_id); if (err) { if (err == -ESRCH) continue; @@ -1145,21 +1159,25 @@ fail: return ret; } -static struct xfrm_policy * -xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) +static struct xfrm_policy *xfrm_policy_lookup(struct net *net, + const struct flowi *fl, + u16 family, u8 dir, u32 if_id) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_policy *pol; - pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, + dir, if_id); if (pol != NULL) return pol; #endif - return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); + return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, + dir, if_id); } static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, - const struct flowi *fl, u16 family) + const struct flowi *fl, + u16 family, u32 if_id) { struct xfrm_policy *pol; @@ -1177,7 +1195,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, match = xfrm_selector_match(&pol->selector, fl, family); if (match) { - if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { + if ((sk->sk_mark & pol->mark.m) != pol->mark.v || + pol->if_id != if_id) { pol = NULL; goto out; } @@ -1268,7 +1287,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) old_pol = rcu_dereference_protected(sk->sk_policy[dir], lockdep_is_held(&net->xfrm.xfrm_policy_lock)); if (pol) { - pol->curlft.add_time = get_seconds(); + pol->curlft.add_time = ktime_get_real_seconds(); pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); xfrm_sk_policy_link(pol, dir); } @@ -1305,6 +1324,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) newp->lft = old->lft; newp->curlft = old->curlft; newp->mark = old->mark; + newp->if_id = old->if_id; newp->action = old->action; newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; @@ -1390,7 +1410,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, } } - x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); + x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, + family, policy->if_id); if (x && x->km.state == XFRM_STATE_VALID) { xfrm[nx++] = x; @@ -1607,10 +1628,11 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst_copy_metrics(dst1, dst); if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { + __u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]); + family = xfrm[i]->props.family; dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif, - &saddr, &daddr, family, - xfrm[i]->props.output_mark); + &saddr, &daddr, family, mark); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; @@ -1692,7 +1714,8 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), XFRM_POLICY_TYPE_MAIN, fl, family, - XFRM_POLICY_OUT); + XFRM_POLICY_OUT, + pols[0]->if_id); if (pols[1]) { if (IS_ERR(pols[1])) { xfrm_pols_put(pols, *num_pols); @@ -1714,108 +1737,6 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, } -static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old) -{ - this_cpu_write(xfrm_last_dst, xdst); - if (old) - dst_release(&old->u.dst); -} - -static void __xfrm_pcpu_work_fn(void) -{ - struct xfrm_dst *old; - - old = this_cpu_read(xfrm_last_dst); - if (old && !xfrm_bundle_ok(old)) - xfrm_last_dst_update(NULL, old); -} - -static void xfrm_pcpu_work_fn(struct work_struct *work) -{ - local_bh_disable(); - rcu_read_lock(); - __xfrm_pcpu_work_fn(); - rcu_read_unlock(); - local_bh_enable(); -} - -void xfrm_policy_cache_flush(void) -{ - struct xfrm_dst *old; - bool found = false; - int cpu; - - might_sleep(); - - local_bh_disable(); - rcu_read_lock(); - for_each_possible_cpu(cpu) { - old = per_cpu(xfrm_last_dst, cpu); - if (old && !xfrm_bundle_ok(old)) { - if (smp_processor_id() == cpu) { - __xfrm_pcpu_work_fn(); - continue; - } - found = true; - break; - } - } - - rcu_read_unlock(); - local_bh_enable(); - - if (!found) - return; - - get_online_cpus(); - - for_each_possible_cpu(cpu) { - bool bundle_release; - - rcu_read_lock(); - old = per_cpu(xfrm_last_dst, cpu); - bundle_release = old && !xfrm_bundle_ok(old); - rcu_read_unlock(); - - if (!bundle_release) - continue; - - if (cpu_online(cpu)) { - schedule_work_on(cpu, &xfrm_pcpu_work[cpu]); - continue; - } - - rcu_read_lock(); - old = per_cpu(xfrm_last_dst, cpu); - if (old && !xfrm_bundle_ok(old)) { - per_cpu(xfrm_last_dst, cpu) = NULL; - dst_release(&old->u.dst); - } - rcu_read_unlock(); - } - - put_online_cpus(); -} - -static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst, - struct xfrm_state * const xfrm[], - int num) -{ - const struct dst_entry *dst = &xdst->u.dst; - int i; - - if (xdst->num_xfrms != num) - return false; - - for (i = 0; i < num; i++) { - if (!dst || dst->xfrm != xfrm[i]) - return false; - dst = xfrm_dst_child(dst); - } - - return xfrm_bundle_ok(xdst); -} - static struct xfrm_dst * xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, const struct flowi *fl, u16 family, @@ -1824,34 +1745,21 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, struct net *net = xp_net(pols[0]); struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; struct xfrm_dst *bundle[XFRM_MAX_DEPTH]; - struct xfrm_dst *xdst, *old; + struct xfrm_dst *xdst; struct dst_entry *dst; int err; /* Try to instantiate a bundle */ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); if (err <= 0) { - if (err != 0 && err != -EAGAIN) + if (err == 0) + return NULL; + + if (err != -EAGAIN) XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); return ERR_PTR(err); } - xdst = this_cpu_read(xfrm_last_dst); - if (xdst && - xdst->u.dst.dev == dst_orig->dev && - xdst->num_pols == num_pols && - memcmp(xdst->pols, pols, - sizeof(struct xfrm_policy *) * num_pols) == 0 && - xfrm_xdst_can_reuse(xdst, xfrm, err)) { - dst_hold(&xdst->u.dst); - xfrm_pols_put(pols, num_pols); - while (err > 0) - xfrm_state_put(xfrm[--err]); - return xdst; - } - - old = xdst; - dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig); if (IS_ERR(dst)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); @@ -1864,9 +1772,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); - atomic_set(&xdst->u.dst.__refcnt, 2); - xfrm_last_dst_update(xdst, old); - return xdst; } @@ -2047,8 +1952,10 @@ free_dst: goto out; } -static struct xfrm_dst * -xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo) +static struct xfrm_dst *xfrm_bundle_lookup(struct net *net, + const struct flowi *fl, + u16 family, u8 dir, + struct xfrm_flo *xflo, u32 if_id) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols = 0, num_xfrms = 0, err; @@ -2057,7 +1964,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, /* Resolve policies to use if we couldn't get them from * previous cache entry */ num_pols = 1; - pols[0] = xfrm_policy_lookup(net, fl, family, dir); + pols[0] = xfrm_policy_lookup(net, fl, family, dir, if_id); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) @@ -2067,13 +1974,15 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, if (num_xfrms <= 0) goto make_dummy_bundle; - local_bh_disable(); xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, xflo->dst_orig); - local_bh_enable(); - if (IS_ERR(xdst)) { err = PTR_ERR(xdst); + if (err == -EREMOTE) { + xfrm_pols_put(pols, num_pols); + return NULL; + } + if (err != -EAGAIN) goto error; goto make_dummy_bundle; @@ -2123,14 +2032,19 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family, return ret; } -/* Main function: finds/creates a bundle for given flow. +/* Finds/creates a bundle for given flow and if_id * * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. + * + * xfrm_lookup uses an if_id of 0 by default, and is provided for + * compatibility */ -struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, - const struct flowi *fl, - const struct sock *sk, int flags) +struct dst_entry *xfrm_lookup_with_ifid(struct net *net, + struct dst_entry *dst_orig, + const struct flowi *fl, + const struct sock *sk, + int flags, u32 if_id) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_dst *xdst; @@ -2146,7 +2060,8 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, sk = sk_const_to_full_sk(sk); if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { num_pols = 1; - pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family); + pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family, + if_id); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) @@ -2158,15 +2073,16 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, goto no_transform; } - local_bh_disable(); xdst = xfrm_resolve_and_create_bundle( pols, num_pols, fl, family, dst_orig); - local_bh_enable(); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); err = PTR_ERR(xdst); + if (err == -EREMOTE) + goto nopol; + goto dropdst; } else if (xdst == NULL) { num_xfrms = 0; @@ -2189,7 +2105,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo); + xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id); if (xdst == NULL) goto nopol; if (IS_ERR(xdst)) { @@ -2234,7 +2150,7 @@ no_transform: } for (i = 0; i < num_pols; i++) - pols[i]->curlft.use_time = get_seconds(); + pols[i]->curlft.use_time = ktime_get_real_seconds(); if (num_xfrms < 0) { /* Prohibit the flow */ @@ -2270,6 +2186,19 @@ dropdst: xfrm_pols_put(pols, drop_pols); return ERR_PTR(err); } +EXPORT_SYMBOL(xfrm_lookup_with_ifid); + +/* Main function: finds/creates a bundle for given flow. + * + * At the moment we eat a raw IP route. Mostly to speed up lookups + * on interfaces with disabled IPsec. + */ +struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, const struct sock *sk, + int flags) +{ + return xfrm_lookup_with_ifid(net, dst_orig, fl, sk, flags, 0); +} EXPORT_SYMBOL(xfrm_lookup); /* Callers of xfrm_lookup_route() must ensure a call to dst_output(). @@ -2365,6 +2294,7 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, return -EAFNOSUPPORT; afinfo->decode_session(skb, fl, reverse); + err = security_xfrm_decode_session(skb, &fl->flowi_secid); rcu_read_unlock(); return err; @@ -2395,6 +2325,19 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, int reverse; struct flowi fl; int xerr_idx = -1; + const struct xfrm_if_cb *ifcb; + struct xfrm_if *xi; + u32 if_id = 0; + + rcu_read_lock(); + ifcb = xfrm_if_get_cb(); + + if (ifcb) { + xi = ifcb->decode_session(skb); + if (xi) + if_id = xi->p.if_id; + } + rcu_read_unlock(); reverse = dir & ~XFRM_POLICY_MASK; dir &= XFRM_POLICY_MASK; @@ -2422,7 +2365,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = NULL; sk = sk_to_full_sk(sk); if (sk && sk->sk_policy[dir]) { - pol = xfrm_sk_policy_lookup(sk, dir, &fl, family); + pol = xfrm_sk_policy_lookup(sk, dir, &fl, family, if_id); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; @@ -2430,7 +2373,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } if (!pol) - pol = xfrm_policy_lookup(net, &fl, family, dir); + pol = xfrm_policy_lookup(net, &fl, family, dir, if_id); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); @@ -2446,7 +2389,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, return 1; } - pol->curlft.use_time = get_seconds(); + pol->curlft.use_time = ktime_get_real_seconds(); pols[0] = pol; npols++; @@ -2454,13 +2397,13 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, &fl, family, - XFRM_POLICY_IN); + XFRM_POLICY_IN, if_id); if (pols[1]) { if (IS_ERR(pols[1])) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } - pols[1]->curlft.use_time = get_seconds(); + pols[1]->curlft.use_time = ktime_get_real_seconds(); npols++; } } @@ -2819,6 +2762,21 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); +void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb) +{ + spin_lock(&xfrm_if_cb_lock); + rcu_assign_pointer(xfrm_if_cb, ifcb); + spin_unlock(&xfrm_if_cb_lock); +} +EXPORT_SYMBOL(xfrm_if_register_cb); + +void xfrm_if_unregister_cb(void) +{ + RCU_INIT_POINTER(xfrm_if_cb, NULL); + synchronize_rcu(); +} +EXPORT_SYMBOL(xfrm_if_unregister_cb); + #ifdef CONFIG_XFRM_STATISTICS static int __net_init xfrm_statistics_init(struct net *net) { @@ -2986,19 +2944,13 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { - int i; - - xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work), - GFP_KERNEL); - BUG_ON(!xfrm_pcpu_work); - - for (i = 0; i < NR_CPUS; i++) - INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn); - register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); + + RCU_INIT_POINTER(xfrm_if_cb, NULL); + synchronize_rcu(); } #ifdef CONFIG_AUDITSYSCALL diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8308281f3253..b669262682c9 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -475,8 +475,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) { struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer); - unsigned long now = get_seconds(); - long next = LONG_MAX; + time64_t now = ktime_get_real_seconds(); + time64_t next = TIME64_MAX; int warn = 0; int err = 0; @@ -537,7 +537,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) if (warn) km_state_expired(x, 0, 0); resched: - if (next != LONG_MAX) { + if (next != TIME64_MAX) { tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL); } @@ -577,7 +577,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_BOOTTIME, HRTIMER_MODE_ABS); timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0); - x->curlft.add_time = get_seconds(); + x->curlft.add_time = ktime_get_real_seconds(); x->lft.soft_byte_limit = XFRM_INF; x->lft.soft_packet_limit = XFRM_INF; x->lft.hard_byte_limit = XFRM_INF; @@ -735,10 +735,9 @@ restart: } out: spin_unlock_bh(&net->xfrm.xfrm_state_lock); - if (cnt) { + if (cnt) err = 0; - xfrm_policy_cache_flush(); - } + return err; } EXPORT_SYMBOL(xfrm_state_flush); @@ -931,7 +930,7 @@ struct xfrm_state * xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, const struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, - unsigned short family) + unsigned short family, u32 if_id) { static xfrm_address_t saddr_wildcard = { }; struct net *net = xp_net(pol); @@ -955,6 +954,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && + x->if_id == if_id && !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, encap_family) && tmpl->mode == x->props.mode && @@ -971,6 +971,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && + x->if_id == if_id && !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_addr_equal(&x->id.daddr, daddr, encap_family) && tmpl->mode == x->props.mode && @@ -1010,6 +1011,7 @@ found: * to current session. */ xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); memcpy(&x->mark, &pol->mark, sizeof(x->mark)); + x->if_id = if_id; error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid); if (error) { @@ -1067,7 +1069,7 @@ out: } struct xfrm_state * -xfrm_stateonly_find(struct net *net, u32 mark, +xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid) { @@ -1080,6 +1082,7 @@ xfrm_stateonly_find(struct net *net, u32 mark, if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && + x->if_id == if_id && !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, family) && mode == x->props.mode && @@ -1160,11 +1163,13 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) struct xfrm_state *x; unsigned int h; u32 mark = xnew->mark.v & xnew->mark.m; + u32 if_id = xnew->if_id; h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && + x->if_id == if_id && (mark & x->mark.m) == x->mark.v && xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) && xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family)) @@ -1187,7 +1192,7 @@ EXPORT_SYMBOL(xfrm_state_insert); static struct xfrm_state *__find_acq_core(struct net *net, const struct xfrm_mark *m, unsigned short family, u8 mode, - u32 reqid, u8 proto, + u32 reqid, u32 if_id, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create) @@ -1242,6 +1247,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, x->props.family = family; x->props.mode = mode; x->props.reqid = reqid; + x->if_id = if_id; x->mark.v = m->v; x->mark.m = m->m; x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; @@ -1296,7 +1302,7 @@ int xfrm_state_add(struct xfrm_state *x) if (use_spi && !x1) x1 = __find_acq_core(net, &x->mark, family, x->props.mode, - x->props.reqid, x->id.proto, + x->props.reqid, x->if_id, x->id.proto, &x->id.daddr, &x->props.saddr, 0); __xfrm_state_bump_genids(x); @@ -1395,6 +1401,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->props.flags = orig->props.flags; x->props.extra_flags = orig->props.extra_flags; + x->if_id = orig->if_id; x->tfcpad = orig->tfcpad; x->replay_maxdiff = orig->replay_maxdiff; x->replay_maxage = orig->replay_maxage; @@ -1554,6 +1561,19 @@ out: if (x1->curlft.use_time) xfrm_state_check_expire(x1); + if (x->props.smark.m || x->props.smark.v || x->if_id) { + spin_lock_bh(&net->xfrm.xfrm_state_lock); + + if (x->props.smark.m || x->props.smark.v) + x1->props.smark = x->props.smark; + + if (x->if_id) + x1->if_id = x->if_id; + + __xfrm_state_bump_genids(x1); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + } + err = 0; x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); @@ -1571,7 +1591,7 @@ EXPORT_SYMBOL(xfrm_state_update); int xfrm_state_check_expire(struct xfrm_state *x) { if (!x->curlft.use_time) - x->curlft.use_time = get_seconds(); + x->curlft.use_time = ktime_get_real_seconds(); if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { @@ -1619,13 +1639,13 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, - u8 proto, const xfrm_address_t *daddr, + u32 if_id, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&net->xfrm.xfrm_state_lock); - x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create); + x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create); spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 09cceab450b8..5553724b5fcc 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -527,6 +527,19 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, x->replay_maxdiff = nla_get_u32(rt); } +static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m) +{ + if (attrs[XFRMA_SET_MARK]) { + m->v = nla_get_u32(attrs[XFRMA_SET_MARK]); + if (attrs[XFRMA_SET_MARK_MASK]) + m->m = nla_get_u32(attrs[XFRMA_SET_MARK_MASK]); + else + m->m = 0xffffffff; + } else { + m->v = m->m = 0; + } +} + static struct xfrm_state *xfrm_state_construct(struct net *net, struct xfrm_usersa_info *p, struct nlattr **attrs, @@ -579,8 +592,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, xfrm_mark_get(attrs, &x->mark); - if (attrs[XFRMA_OUTPUT_MARK]) - x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]); + xfrm_smark_init(attrs, &x->props.smark); + + if (attrs[XFRMA_IF_ID]) + x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]); if (err) @@ -824,6 +839,18 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) return 0; } +static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m) +{ + int ret = 0; + + if (m->v | m->m) { + ret = nla_put_u32(skb, XFRMA_SET_MARK, m->v); + if (!ret) + ret = nla_put_u32(skb, XFRMA_SET_MARK_MASK, m->m); + } + return ret; +} + /* Don't change this without updating xfrm_sa_len! */ static int copy_to_user_state_extra(struct xfrm_state *x, struct xfrm_usersa_info *p, @@ -887,6 +914,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x, ret = xfrm_mark_put(skb, &x->mark); if (ret) goto out; + + ret = xfrm_smark_put(skb, &x->props.smark); + if (ret) + goto out; + if (x->replay_esn) ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL, xfrm_replay_state_esn_len(x->replay_esn), @@ -900,8 +932,8 @@ static int copy_to_user_state_extra(struct xfrm_state *x, ret = copy_user_offload(&x->xso, skb); if (ret) goto out; - if (x->props.output_mark) { - ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark); + if (x->if_id) { + ret = nla_put_u32(skb, XFRMA_IF_ID, x->if_id); if (ret) goto out; } @@ -1253,6 +1285,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, int err; u32 mark; struct xfrm_mark m; + u32 if_id = 0; p = nlmsg_data(nlh); err = verify_spi_info(p->info.id.proto, p->min, p->max); @@ -1265,6 +1298,10 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, x = NULL; mark = xfrm_mark_get(attrs, &m); + + if (attrs[XFRMA_IF_ID]) + if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (p->info.seq) { x = xfrm_find_acq_byseq(net, mark, p->info.seq); if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) { @@ -1275,7 +1312,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, if (!x) x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid, - p->info.id.proto, daddr, + if_id, p->info.id.proto, daddr, &p->info.saddr, 1, family); err = -ENOENT; @@ -1563,6 +1600,9 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us xfrm_mark_get(attrs, &xp->mark); + if (attrs[XFRMA_IF_ID]) + xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + return xp; error: *errp = err; @@ -1708,6 +1748,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr err = copy_to_user_policy_type(xp->type, skb); if (!err) err = xfrm_mark_put(skb, &xp->mark); + if (!err) + err = xfrm_if_id_put(skb, xp->if_id); if (err) { nlmsg_cancel(skb, nlh); return err; @@ -1789,6 +1831,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, int delete; struct xfrm_mark m; u32 mark = xfrm_mark_get(attrs, &m); + u32 if_id = 0; p = nlmsg_data(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; @@ -1801,8 +1844,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; + if (attrs[XFRMA_IF_ID]) + if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (p->index) - xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, delete, &err); + xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1819,7 +1865,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, &p->sel, + xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel, ctx, delete, &err); security_xfrm_policy_free(ctx); } @@ -1942,6 +1988,10 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct if (err) goto out_cancel; + err = xfrm_if_id_put(skb, x->if_id); + if (err) + goto out_cancel; + nlmsg_end(skb, nlh); return 0; @@ -2084,6 +2134,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, int err = -ENOENT; struct xfrm_mark m; u32 mark = xfrm_mark_get(attrs, &m); + u32 if_id = 0; err = copy_from_user_policy_type(&type, attrs); if (err) @@ -2093,8 +2144,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; + if (attrs[XFRMA_IF_ID]) + if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (p->index) - xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err); + xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, 0, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -2111,7 +2165,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, + xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel, ctx, 0, &err); security_xfrm_policy_free(ctx); } @@ -2493,7 +2547,9 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_PROTO] = { .type = NLA_U8 }, [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, - [XFRMA_OUTPUT_MARK] = { .type = NLA_U32 }, + [XFRMA_SET_MARK] = { .type = NLA_U32 }, + [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, + [XFRMA_IF_ID] = { .type = NLA_U32 }, }; static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { @@ -2625,6 +2681,10 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct if (err) return err; + err = xfrm_if_id_put(skb, x->if_id); + if (err) + return err; + nlmsg_end(skb, nlh); return 0; } @@ -2719,8 +2779,12 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(x->props.extra_flags)); if (x->xso.dev) l += nla_total_size(sizeof(x->xso)); - if (x->props.output_mark) - l += nla_total_size(sizeof(x->props.output_mark)); + if (x->props.smark.v | x->props.smark.m) { + l += nla_total_size(sizeof(x->props.smark.v)); + l += nla_total_size(sizeof(x->props.smark.m)); + } + if (x->if_id) + l += nla_total_size(sizeof(x->if_id)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size_64bit(sizeof(u64)); @@ -2850,6 +2914,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, err = copy_to_user_policy_type(xp->type, skb); if (!err) err = xfrm_mark_put(skb, &xp->mark); + if (!err) + err = xfrm_if_id_put(skb, xp->if_id); if (err) { nlmsg_cancel(skb, nlh); return err; @@ -2966,6 +3032,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, err = copy_to_user_policy_type(xp->type, skb); if (!err) err = xfrm_mark_put(skb, &xp->mark); + if (!err) + err = xfrm_if_id_put(skb, xp->if_id); if (err) { nlmsg_cancel(skb, nlh); return err; @@ -3047,6 +3115,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e err = copy_to_user_policy_type(xp->type, skb); if (!err) err = xfrm_mark_put(skb, &xp->mark); + if (!err) + err = xfrm_if_id_put(skb, xp->if_id); if (err) goto out_free_skb; |