diff options
Diffstat (limited to 'net/ipv6')
36 files changed, 954 insertions, 743 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 4ea244891b58..309af19a0a0a 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -40,7 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o -obj-y += addrconf_core.o exthdrs_core.o +obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1b5d8cb9b123..4dc0d44a5d31 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -110,10 +110,6 @@ static inline u32 cstamp_delta(unsigned long cstamp) return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } -#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) -#define ADDRCONF_TIMER_FUZZ (HZ / 4) -#define ADDRCONF_TIMER_FUZZ_MAX (HZ) - #ifdef CONFIG_SYSCTL static void addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); @@ -248,6 +244,9 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; /* Check if a valid qdisc is available */ static inline bool addrconf_qdisc_ok(const struct net_device *dev) @@ -432,6 +431,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev); + /* Join interface-local all-node multicast group */ + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes); + /* Join all-node multicast group */ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); @@ -615,10 +617,15 @@ static void dev_forward_change(struct inet6_dev *idev) if (idev->cnf.forwarding) dev_disable_lro(dev); if (dev->flags & IFF_MULTICAST) { - if (idev->cnf.forwarding) + if (idev->cnf.forwarding) { ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); - else + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters); + ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters); + } else { ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters); + ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters); + ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters); + } } list_for_each_entry(ifa, &idev->addr_list, if_list) { @@ -1051,7 +1058,7 @@ retry: ipv6_add_addr(idev, &addr, tmp_plen, ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, addr_flags) : NULL; - if (!ift || IS_ERR(ift)) { + if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); pr_info("%s: retry temporary address regeneration\n", __func__); @@ -2080,7 +2087,7 @@ ok: addr_type&IPV6_ADDR_SCOPE_MASK, addr_flags); - if (!ifp || IS_ERR(ifp)) { + if (IS_ERR_OR_NULL(ifp)) { in6_dev_put(in6_dev); return; } @@ -3318,14 +3325,14 @@ static const struct file_operations if6_fops = { static int __net_init if6_proc_net_init(struct net *net) { - if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops)) + if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops)) return -ENOMEM; return 0; } static void __net_exit if6_proc_net_exit(struct net *net) { - proc_net_remove(net, "if_inet6"); + remove_proc_entry("if_inet6", net->proc_net); } static struct pernet_operations if6_proc_net_ops = { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b043c60429bd..6b793bfc0e10 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -811,11 +811,10 @@ static struct pernet_operations inet6_net_ops = { static int __init inet6_init(void) { - struct sk_buff *dummy_skb; struct list_head *r; int err = 0; - BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); /* Register the socket-side information for inet6_create. */ for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 384233188ac1..bb02e176cb70 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -521,8 +521,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 757a810d8f15..5a80f15a9de2 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -47,7 +47,7 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); /* Big ac list lock for all the sockets */ -static DEFINE_RWLOCK(ipv6_sk_ac_lock); +static DEFINE_SPINLOCK(ipv6_sk_ac_lock); /* @@ -128,10 +128,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = ipv6_dev_ac_inc(dev, addr); if (!err) { - write_lock_bh(&ipv6_sk_ac_lock); + spin_lock_bh(&ipv6_sk_ac_lock); pac->acl_next = np->ipv6_ac_list; np->ipv6_ac_list = pac; - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); pac = NULL; } @@ -152,7 +152,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_ac_socklist *pac, *prev_pac; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_ac_lock); + spin_lock_bh(&ipv6_sk_ac_lock); prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { if ((ifindex == 0 || pac->acl_ifindex == ifindex) && @@ -161,7 +161,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) prev_pac = pac; } if (!pac) { - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); return -ENOENT; } if (prev_pac) @@ -169,7 +169,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) else np->ipv6_ac_list = pac->acl_next; - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, pac->acl_ifindex); @@ -192,10 +192,10 @@ void ipv6_sock_ac_close(struct sock *sk) if (!np->ipv6_ac_list) return; - write_lock_bh(&ipv6_sk_ac_lock); + spin_lock_bh(&ipv6_sk_ac_lock); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); prev_index = 0; rcu_read_lock(); @@ -509,7 +509,7 @@ static const struct file_operations ac6_seq_fops = { int __net_init ac6_proc_init(struct net *net) { - if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops)) + if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops)) return -ENOMEM; return 0; @@ -517,7 +517,7 @@ int __net_init ac6_proc_init(struct net *net) void ac6_proc_exit(struct net *net) { - proc_net_remove(net, "anycast6"); + remove_proc_entry("anycast6", net->proc_net); } #endif diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 7a778b9a7b85..f5a54782a340 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -30,6 +30,7 @@ #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/tcp_states.h> +#include <net/dsfield.h> #include <linux/errqueue.h> #include <asm/uaccess.h> @@ -356,12 +357,11 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = serr->port; sin->sin6_scope_id = 0; if (skb->protocol == htons(ETH_P_IPV6)) { - sin->sin6_addr = - *(struct in6_addr *)(nh + serr->addr_offset); + const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset), + struct ipv6hdr, daddr); + sin->sin6_addr = ip6h->daddr; if (np->sndflow) - sin->sin6_flowinfo = - (*(__be32 *)(nh + serr->addr_offset - 24) & - IPV6_FLOWINFO_MASK); + sin->sin6_flowinfo = ip6_flowinfo(ip6h); if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = IP6CB(skb)->iif; } else { @@ -489,13 +489,14 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.rxtclass) { - int tclass = ipv6_tclass(ipv6_hdr(skb)); + int tclass = ipv6_get_dsfield(ipv6_hdr(skb)); put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } - if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) { - __be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK; - put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); + if (np->rxopt.bits.rxflow) { + __be32 flowinfo = ip6_flowinfo((struct ipv6hdr *)nh); + if (flowinfo) + put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); } /* HbH is allowed only once */ diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 473f628f9f20..07a7d65a7cb6 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -553,7 +553,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { - IP6CB(skb)->ra = optoff; + IP6CB(skb)->flags |= IP6SKB_ROUTERALERT; + memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra)); return true; } LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 30647857a375..b386a2ce4c6f 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -32,6 +32,9 @@ int inet6_csk_bind_conflict(const struct sock *sk, { const struct sock *sk2; const struct hlist_node *node; + int reuse = sk->sk_reuse; + int reuseport = sk->sk_reuseport; + kuid_t uid = sock_i_uid((struct sock *)sk); /* We must walk the whole port owner list in this case. -DaveM */ /* @@ -42,11 +45,17 @@ int inet6_csk_bind_conflict(const struct sock *sk, if (sk != sk2 && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && - (!sk->sk_reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; + sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + if ((!reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + (!reuseport || !sk2->sk_reuseport || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, + sock_i_uid((struct sock *)sk2))))) { + if (ipv6_rcv_saddr_equal(sk, sk2)) + break; + } + } } return node != NULL; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index dea17fd28e50..32b4a1675d82 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -158,25 +158,38 @@ static inline int compute_score(struct sock *sk, struct net *net, } struct sock *inet6_lookup_listener(struct net *net, - struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, + struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, + const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { struct sock *sk; const struct hlist_nulls_node *node; struct sock *result; - int score, hiscore; + int score, hiscore, matches = 0, reuseport = 0; + u32 phash = 0; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; rcu_read_lock(); begin: result = NULL; - hiscore = -1; + hiscore = 0; sk_nulls_for_each(sk, node, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif); if (score > hiscore) { hiscore = score; result = sk; + reuseport = sk->sk_reuseport; + if (reuseport) { + phash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == hiscore && reuseport) { + matches++; + if (((u64)phash * matches) >> 32 == 0) + result = sk; + phash = next_pseudo_random32(phash); } } /* diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c new file mode 100644 index 000000000000..72d198b8e4d2 --- /dev/null +++ b/net/ipv6/ip6_checksum.c @@ -0,0 +1,97 @@ +#include <net/ip.h> +#include <net/udp.h> +#include <net/udplite.h> +#include <asm/checksum.h> + +#ifndef _HAVE_ARCH_IPV6_CSUM +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, unsigned short proto, + __wsum csum) +{ + + int carry; + __u32 ulen; + __u32 uproto; + __u32 sum = (__force u32)csum; + + sum += (__force u32)saddr->s6_addr32[0]; + carry = (sum < (__force u32)saddr->s6_addr32[0]); + sum += carry; + + sum += (__force u32)saddr->s6_addr32[1]; + carry = (sum < (__force u32)saddr->s6_addr32[1]); + sum += carry; + + sum += (__force u32)saddr->s6_addr32[2]; + carry = (sum < (__force u32)saddr->s6_addr32[2]); + sum += carry; + + sum += (__force u32)saddr->s6_addr32[3]; + carry = (sum < (__force u32)saddr->s6_addr32[3]); + sum += carry; + + sum += (__force u32)daddr->s6_addr32[0]; + carry = (sum < (__force u32)daddr->s6_addr32[0]); + sum += carry; + + sum += (__force u32)daddr->s6_addr32[1]; + carry = (sum < (__force u32)daddr->s6_addr32[1]); + sum += carry; + + sum += (__force u32)daddr->s6_addr32[2]; + carry = (sum < (__force u32)daddr->s6_addr32[2]); + sum += carry; + + sum += (__force u32)daddr->s6_addr32[3]; + carry = (sum < (__force u32)daddr->s6_addr32[3]); + sum += carry; + + ulen = (__force u32)htonl((__u32) len); + sum += ulen; + carry = (sum < ulen); + sum += carry; + + uproto = (__force u32)htonl(proto); + sum += uproto; + carry = (sum < uproto); + sum += carry; + + return csum_fold((__force __wsum)sum); +} +EXPORT_SYMBOL(csum_ipv6_magic); +#endif + +int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) +{ + int err; + + UDP_SKB_CB(skb)->partial_cov = 0; + UDP_SKB_CB(skb)->cscov = skb->len; + + if (proto == IPPROTO_UDPLITE) { + err = udplite_checksum_init(skb, uh); + if (err) + return err; + } + + if (uh->check == 0) { + /* RFC 2460 section 8.1 says that we SHOULD log + this error. Well, it is reasonable. + */ + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); + return 1; + } + if (skb->ip_summed == CHECKSUM_COMPLETE && + !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, + skb->len, proto, skb->csum)) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (!skb_csum_unnecessary(skb)) + skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len, proto, 0)); + + return 0; +} +EXPORT_SYMBOL(udp6_csum_init); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index d6de4b447250..b973ed3d06cf 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -51,25 +51,38 @@ #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) static atomic_t fl_size = ATOMIC_INIT(0); -static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1]; +static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; static void ip6_fl_gc(unsigned long dummy); static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0); /* FL hash table lock: it protects only of GC */ -static DEFINE_RWLOCK(ip6_fl_lock); +static DEFINE_SPINLOCK(ip6_fl_lock); /* Big socket sock */ -static DEFINE_RWLOCK(ip6_sk_fl_lock); +static DEFINE_SPINLOCK(ip6_sk_fl_lock); +#define for_each_fl_rcu(hash, fl) \ + for (fl = rcu_dereference_bh(fl_ht[(hash)]); \ + fl != NULL; \ + fl = rcu_dereference_bh(fl->next)) +#define for_each_fl_continue_rcu(fl) \ + for (fl = rcu_dereference_bh(fl->next); \ + fl != NULL; \ + fl = rcu_dereference_bh(fl->next)) + +#define for_each_sk_fl_rcu(np, sfl) \ + for (sfl = rcu_dereference_bh(np->ipv6_fl_list); \ + sfl != NULL; \ + sfl = rcu_dereference_bh(sfl->next)) static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; - for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) { + for_each_fl_rcu(FL_HASH(label), fl) { if (fl->label == label && net_eq(fl->fl_net, net)) return fl; } @@ -80,11 +93,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; - read_lock_bh(&ip6_fl_lock); + rcu_read_lock_bh(); fl = __fl_lookup(net, label); - if (fl) - atomic_inc(&fl->users); - read_unlock_bh(&ip6_fl_lock); + if (fl && !atomic_inc_not_zero(&fl->users)) + fl = NULL; + rcu_read_unlock_bh(); return fl; } @@ -96,13 +109,13 @@ static void fl_free(struct ip6_flowlabel *fl) put_pid(fl->owner.pid); release_net(fl->fl_net); kfree(fl->opt); + kfree_rcu(fl, rcu); } - kfree(fl); } static void fl_release(struct ip6_flowlabel *fl) { - write_lock_bh(&ip6_fl_lock); + spin_lock_bh(&ip6_fl_lock); fl->lastuse = jiffies; if (atomic_dec_and_test(&fl->users)) { @@ -119,7 +132,7 @@ static void fl_release(struct ip6_flowlabel *fl) time_after(ip6_fl_gc_timer.expires, ttd)) mod_timer(&ip6_fl_gc_timer, ttd); } - write_unlock_bh(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); } static void ip6_fl_gc(unsigned long dummy) @@ -128,12 +141,13 @@ static void ip6_fl_gc(unsigned long dummy) unsigned long now = jiffies; unsigned long sched = 0; - write_lock(&ip6_fl_lock); + spin_lock(&ip6_fl_lock); for (i=0; i<=FL_HASH_MASK; i++) { struct ip6_flowlabel *fl, **flp; flp = &fl_ht[i]; - while ((fl=*flp) != NULL) { + while ((fl = rcu_dereference_protected(*flp, + lockdep_is_held(&ip6_fl_lock))) != NULL) { if (atomic_read(&fl->users) == 0) { unsigned long ttd = fl->lastuse + fl->linger; if (time_after(ttd, fl->expires)) @@ -156,18 +170,19 @@ static void ip6_fl_gc(unsigned long dummy) if (sched) { mod_timer(&ip6_fl_gc_timer, sched); } - write_unlock(&ip6_fl_lock); + spin_unlock(&ip6_fl_lock); } static void __net_exit ip6_fl_purge(struct net *net) { int i; - write_lock(&ip6_fl_lock); + spin_lock(&ip6_fl_lock); for (i = 0; i <= FL_HASH_MASK; i++) { struct ip6_flowlabel *fl, **flp; flp = &fl_ht[i]; - while ((fl = *flp) != NULL) { + while ((fl = rcu_dereference_protected(*flp, + lockdep_is_held(&ip6_fl_lock))) != NULL) { if (net_eq(fl->fl_net, net) && atomic_read(&fl->users) == 0) { *flp = fl->next; @@ -178,7 +193,7 @@ static void __net_exit ip6_fl_purge(struct net *net) flp = &fl->next; } } - write_unlock(&ip6_fl_lock); + spin_unlock(&ip6_fl_lock); } static struct ip6_flowlabel *fl_intern(struct net *net, @@ -188,7 +203,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->label = label & IPV6_FLOWLABEL_MASK; - write_lock_bh(&ip6_fl_lock); + spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; @@ -210,16 +225,16 @@ static struct ip6_flowlabel *fl_intern(struct net *net, lfl = __fl_lookup(net, fl->label); if (lfl != NULL) { atomic_inc(&lfl->users); - write_unlock_bh(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); return lfl; } } fl->lastuse = jiffies; fl->next = fl_ht[FL_HASH(fl->label)]; - fl_ht[FL_HASH(fl->label)] = fl; + rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); atomic_inc(&fl_size); - write_unlock_bh(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); return NULL; } @@ -234,17 +249,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) label &= IPV6_FLOWLABEL_MASK; - read_lock_bh(&ip6_sk_fl_lock); - for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label) { fl->lastuse = jiffies; atomic_inc(&fl->users); - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return fl; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return NULL; } @@ -255,11 +270,21 @@ void fl6_free_socklist(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_fl_socklist *sfl; - while ((sfl = np->ipv6_fl_list) != NULL) { + if (!rcu_access_pointer(np->ipv6_fl_list)) + return; + + spin_lock_bh(&ip6_sk_fl_lock); + while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, + lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { np->ipv6_fl_list = sfl->next; + spin_unlock_bh(&ip6_sk_fl_lock); + fl_release(sfl->fl); - kfree(sfl); + kfree_rcu(sfl, rcu); + + spin_lock_bh(&ip6_sk_fl_lock); } + spin_unlock_bh(&ip6_sk_fl_lock); } /* Service routines */ @@ -424,7 +449,7 @@ static int mem_check(struct sock *sk) if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) + for_each_sk_fl_rcu(np, sfl) count++; if (room <= 0 || @@ -467,11 +492,11 @@ static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, struct ip6_flowlabel *fl) { - write_lock_bh(&ip6_sk_fl_lock); + spin_lock_bh(&ip6_sk_fl_lock); sfl->fl = fl; sfl->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl; - write_unlock_bh(&ip6_sk_fl_lock); + rcu_assign_pointer(np->ipv6_fl_list, sfl); + spin_unlock_bh(&ip6_sk_fl_lock); } int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) @@ -493,31 +518,33 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) switch (freq.flr_action) { case IPV6_FL_A_PUT: - write_lock_bh(&ip6_sk_fl_lock); - for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) { + spin_lock_bh(&ip6_sk_fl_lock); + for (sflp = &np->ipv6_fl_list; + (sfl = rcu_dereference(*sflp))!=NULL; + sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) np->flow_label &= ~IPV6_FLOWLABEL_MASK; - *sflp = sfl->next; - write_unlock_bh(&ip6_sk_fl_lock); + *sflp = rcu_dereference(sfl->next); + spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); - kfree(sfl); + kfree_rcu(sfl, rcu); return 0; } } - write_unlock_bh(&ip6_sk_fl_lock); + spin_unlock_bh(&ip6_sk_fl_lock); return -ESRCH; case IPV6_FL_A_RENEW: - read_lock_bh(&ip6_sk_fl_lock); - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq.flr_label) { err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires); - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return err; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); if (freq.flr_share == IPV6_FL_S_NONE && ns_capable(net->user_ns, CAP_NET_ADMIN)) { @@ -541,11 +568,11 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label) { err = -EEXIST; - read_lock_bh(&ip6_sk_fl_lock); - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_flags&IPV6_FL_F_EXCL) { - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); goto done; } fl1 = sfl->fl; @@ -553,7 +580,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) break; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); if (fl1 == NULL) fl1 = fl_lookup(net, freq.flr_label); @@ -641,13 +668,13 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { - fl = fl_ht[state->bucket]; - - while (fl && !net_eq(fl->fl_net, net)) - fl = fl->next; - if (fl) - break; + for_each_fl_rcu(state->bucket, fl) { + if (net_eq(fl->fl_net, net)) + goto out; + } } + fl = NULL; +out: return fl; } @@ -656,18 +683,22 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo struct ip6fl_iter_state *state = ip6fl_seq_private(seq); struct net *net = seq_file_net(seq); - fl = fl->next; + for_each_fl_continue_rcu(fl) { + if (net_eq(fl->fl_net, net)) + goto out; + } + try_again: - while (fl && !net_eq(fl->fl_net, net)) - fl = fl->next; - - while (!fl) { - if (++state->bucket <= FL_HASH_MASK) { - fl = fl_ht[state->bucket]; - goto try_again; - } else - break; + if (++state->bucket <= FL_HASH_MASK) { + for_each_fl_rcu(state->bucket, fl) { + if (net_eq(fl->fl_net, net)) + goto out; + } + goto try_again; } + fl = NULL; + +out: return fl; } @@ -681,9 +712,9 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) } static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(ip6_fl_lock) + __acquires(RCU) { - read_lock_bh(&ip6_fl_lock); + rcu_read_lock_bh(); return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -700,9 +731,9 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip6fl_seq_stop(struct seq_file *seq, void *v) - __releases(ip6_fl_lock) + __releases(RCU) { - read_unlock_bh(&ip6_fl_lock); + rcu_read_unlock_bh(); } static int ip6fl_seq_show(struct seq_file *seq, void *v) @@ -775,15 +806,15 @@ static const struct file_operations ip6fl_seq_fops = { static int __net_init ip6_flowlabel_proc_init(struct net *net) { - if (!proc_net_fops_create(net, "ip6_flowlabel", - S_IRUGO, &ip6fl_seq_fops)) + if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net, + &ip6fl_seq_fops)) return -ENOMEM; return 0; } static void __net_exit ip6_flowlabel_proc_fini(struct net *net) { - proc_net_remove(net, "ip6_flowlabel"); + remove_proc_entry("ip6_flowlabel", net->proc_net); } #else static inline int ip6_flowlabel_proc_init(struct net *net) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 131dd097736d..e4efffe2522e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -772,9 +772,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, * Push down and install the IP header. */ ipv6h = ipv6_hdr(skb); - *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000); - dsfield = INET_ECN_encapsulate(0, dsfield); - ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -1240,7 +1238,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); __be16 *p = (__be16 *)(ipv6h+1); - *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000); + ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index a52d864d562b..5b10414e619e 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -118,6 +118,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt ipv6_addr_loopback(&hdr->daddr)) goto err; + /* RFC4291 2.7 + * Nodes must not originate a packet to a multicast address whose scope + * field contains the reserved value 0; if such a packet is received, it + * must be silently dropped. + */ + if (ipv6_addr_is_multicast(&hdr->daddr) && + IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0) + goto err; + /* * RFC4291 2.7 * Multicast addresses must not be used as source addresses in IPv6 @@ -212,7 +221,7 @@ resubmit: if (ipv6_addr_is_multicast(&hdr->daddr) && !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, &hdr->saddr) && - !ipv6_is_mld(skb, nexthdr)) + !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) goto discard; } if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && @@ -280,10 +289,8 @@ int ip6_mc_input(struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); /* Check for MLD */ - if (unlikely(opt->ra)) { + if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { /* Check if this is a mld message */ - u8 *ptr = skb_network_header(skb) + opt->ra; - struct icmp6hdr *icmp6; u8 nexthdr = hdr->nexthdr; __be16 frag_off; int offset; @@ -291,7 +298,7 @@ int ip6_mc_input(struct sk_buff *skb) /* Check if the value of Router Alert * is for MLD (0x0000). */ - if ((ptr[2] | ptr[3]) == 0) { + if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) { deliver = false; if (!ipv6_ext_hdr(nexthdr)) { @@ -303,24 +310,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (nexthdr != IPPROTO_ICMPV6) + if (!ipv6_is_mld(skb, nexthdr, offset)) goto out; - if (!pskb_may_pull(skb, (skb_network_header(skb) + - offset + 1 - skb->data))) - goto out; - - icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); - - switch (icmp6->icmp6_type) { - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - case ICMPV6_MLD2_REPORT: - deliver = true; - break; - } - goto out; + deliver = true; } /* unknown RA - process it normally */ } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f26f0da7f095..8234c1dcdf72 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -99,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0c7c03d50dc0..155eccfa7760 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,8 +56,6 @@ #include <net/checksum.h> #include <linux/mroute6.h> -int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); - int __ip6_local_out(struct sk_buff *skb) { int len; @@ -88,7 +86,8 @@ static int ip6_finish_output2(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; struct neighbour *neigh; - struct rt6_info *rt; + struct in6_addr *nexthop; + int ret; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -121,12 +120,26 @@ static int ip6_finish_output2(struct sk_buff *skb) IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, skb->len); + + if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= + IPV6_ADDR_SCOPE_NODELOCAL && + !(dev->flags & IFF_LOOPBACK)) { + kfree_skb(skb); + return 0; + } } - rt = (struct rt6_info *) dst; - neigh = rt->n; - if (neigh) - return dst_neigh_output(dst, neigh, skb); + rcu_read_lock_bh(); + nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); + if (!IS_ERR(neigh)) { + ret = dst_neigh_output(dst, neigh, skb); + rcu_read_unlock_bh(); + return ret; + } + rcu_read_unlock_bh(); IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -216,7 +229,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; + ip6_flow_hdr(hdr, tclass, fl6->flowlabel); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -236,9 +249,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, dst->dev, dst_output); } - net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ipv6_local_error(sk, EMSGSIZE, fl6, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; @@ -246,39 +258,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, EXPORT_SYMBOL(ip6_xmit); -/* - * To avoid extra problems ND packets are send through this - * routine. It's code duplication but I really want to avoid - * extra checks since ipv6_build_header is used by TCP (which - * is for us performance critical) - */ - -int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, - const struct in6_addr *saddr, const struct in6_addr *daddr, - int proto, int len) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6hdr *hdr; - - skb->protocol = htons(ETH_P_IPV6); - skb->dev = dev; - - skb_reset_network_header(skb); - skb_put(skb, sizeof(struct ipv6hdr)); - hdr = ipv6_hdr(skb); - - *(__be32*)hdr = htonl(0x60000000); - - hdr->payload_len = htons(len); - hdr->nexthdr = proto; - hdr->hop_limit = np->hop_limit; - - hdr->saddr = *saddr; - hdr->daddr = *daddr; - - return 0; -} - static int ip6_call_ra_chain(struct sk_buff *skb, int sel) { struct ip6_ra_chain *ra; @@ -913,8 +892,12 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry of the nexthop router */ rt = (struct rt6_info *) *dst; - n = rt->n; - if (n && !(n->nud_state & NUD_VALID)) { + rcu_read_lock_bh(); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); + err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; + rcu_read_unlock_bh(); + + if (err) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; @@ -1548,9 +1531,7 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - *(__be32*)hdr = fl6->flowlabel | - htonl(0x60000000 | ((int)np->cork.tclass << 20)); - + ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a14f28b280f5..fff83cbc197f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1030,9 +1030,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - *(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000); - dsfield = INET_ECN_encapsulate(0, dsfield); - ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8fd154e5f079..96bfb4e4b820 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1017,6 +1017,50 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, return NULL; } +/* Look for a (*,*,oif) entry */ +static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt, + mifi_t mifi) +{ + int line = MFC6_HASH(&in6addr_any, &in6addr_any); + struct mfc6_cache *c; + + list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_any(&c->mf6c_mcastgrp) && + (c->mfc_un.res.ttls[mifi] < 255)) + return c; + + return NULL; +} + +/* Look for a (*,G) entry */ +static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt, + struct in6_addr *mcastgrp, + mifi_t mifi) +{ + int line = MFC6_HASH(mcastgrp, &in6addr_any); + struct mfc6_cache *c, *proxy; + + if (ipv6_addr_any(mcastgrp)) + goto skip; + + list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) { + if (c->mfc_un.res.ttls[mifi] < 255) + return c; + + /* It's ok if the mifi is part of the static tree */ + proxy = ip6mr_cache_find_any_parent(mrt, + c->mf6c_parent); + if (proxy && proxy->mfc_un.res.ttls[mifi] < 255) + return c; + } + +skip: + return ip6mr_cache_find_any_parent(mrt, mifi); +} + /* * Allocate a multicast cache entry */ @@ -1247,7 +1291,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) * MFC6 cache manipulation by user space */ -static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) +static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, + int parent) { int line; struct mfc6_cache *c, *next; @@ -1256,7 +1301,9 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + ipv6_addr_equal(&c->mf6c_mcastgrp, + &mfc->mf6cc_mcastgrp.sin6_addr) && + (parent == -1 || parent == c->mf6c_parent)) { write_lock_bh(&mrt_lock); list_del(&c->list); write_unlock_bh(&mrt_lock); @@ -1312,9 +1359,9 @@ static int __net_init ip6mr_net_init(struct net *net) #ifdef CONFIG_PROC_FS err = -ENOMEM; - if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) + if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops)) goto proc_vif_fail; - if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops)) + if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops)) goto proc_cache_fail; #endif @@ -1322,7 +1369,7 @@ static int __net_init ip6mr_net_init(struct net *net) #ifdef CONFIG_PROC_FS proc_cache_fail: - proc_net_remove(net, "ip6_mr_vif"); + remove_proc_entry("ip6_mr_vif", net->proc_net); proc_vif_fail: ip6mr_rules_exit(net); #endif @@ -1333,8 +1380,8 @@ fail: static void __net_exit ip6mr_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(net, "ip6_mr_cache"); - proc_net_remove(net, "ip6_mr_vif"); + remove_proc_entry("ip6_mr_cache", net->proc_net); + remove_proc_entry("ip6_mr_vif", net->proc_net); #endif ip6mr_rules_exit(net); } @@ -1391,7 +1438,7 @@ void ip6_mr_cleanup(void) } static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, - struct mf6cctl *mfc, int mrtsock) + struct mf6cctl *mfc, int mrtsock, int parent) { bool found = false; int line; @@ -1413,7 +1460,9 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + ipv6_addr_equal(&c->mf6c_mcastgrp, + &mfc->mf6cc_mcastgrp.sin6_addr) && + (parent == -1 || parent == mfc->mf6cc_parent)) { found = true; break; } @@ -1430,7 +1479,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, return 0; } - if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) + if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && + !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) return -EINVAL; c = ip6mr_cache_alloc(); @@ -1596,7 +1646,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) { - int ret; + int ret, parent = 0; struct mif6ctl vif; struct mf6cctl mfc; mifi_t mifi; @@ -1653,15 +1703,21 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns */ case MRT6_ADD_MFC: case MRT6_DEL_MFC: + parent = -1; + case MRT6_ADD_MFC_PROXY: + case MRT6_DEL_MFC_PROXY: if (optlen < sizeof(mfc)) return -EINVAL; if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; + if (parent == 0) + parent = mfc.mf6cc_parent; rtnl_lock(); - if (optname == MRT6_DEL_MFC) - ret = ip6mr_mfc_delete(mrt, &mfc); + if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) + ret = ip6mr_mfc_delete(mrt, &mfc, parent); else - ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk); + ret = ip6mr_mfc_add(net, mrt, &mfc, + sk == mrt->mroute6_sk, parent); rtnl_unlock(); return ret; @@ -2018,19 +2074,29 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, { int psend = -1; int vif, ct; + int true_vifi = ip6mr_find_vif(mrt, skb->dev); vif = cache->mf6c_parent; cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { + struct mfc6_cache *cache_proxy; + + /* For an (*,G) entry, we only check that the incomming + * interface is part of the static tree. + */ + cache_proxy = ip6mr_cache_find_any_parent(mrt, vif); + if (cache_proxy && + cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + goto forward; + } + /* * Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif6_table[vif].dev != skb->dev) { - int true_vifi; - cache->mfc_un.res.wrong_if++; - true_vifi = ip6mr_find_vif(mrt, skb->dev); if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -2048,14 +2114,32 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, goto dont_forward; } +forward: mrt->vif6_table[vif].pkt_in++; mrt->vif6_table[vif].bytes_in += skb->len; /* * Forward the frame */ + if (ipv6_addr_any(&cache->mf6c_origin) && + ipv6_addr_any(&cache->mf6c_mcastgrp)) { + if (true_vifi >= 0 && + true_vifi != cache->mf6c_parent && + ipv6_hdr(skb)->hop_limit > + cache->mfc_un.res.ttls[cache->mf6c_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = cache->mf6c_parent; + goto last_forward; + } + goto dont_forward; + } for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { - if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { + /* For (*,G) entry, don't forward to the incoming interface */ + if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) && + ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) @@ -2064,6 +2148,7 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, psend = ct; } } +last_forward: if (psend != -1) { ip6mr_forward2(net, mrt, skb, cache, psend); return 0; @@ -2099,6 +2184,14 @@ int ip6_mr_input(struct sk_buff *skb) read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); + if (cache == NULL) { + int vif = ip6mr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, + &ipv6_hdr(skb)->daddr, + vif); + } /* * No usable cache entry @@ -2186,6 +2279,13 @@ int ip6mr_get_route(struct net *net, read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); + if (!cache && skb->dev) { + int vif = ip6mr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, + vif); + } if (!cache) { struct sk_buff *skb2; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 28dfa5f3801f..bfa6cc36ef2a 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -376,8 +376,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, goto done; /* err = -EADDRNOTAVAIL */ rv = !0; for (i=0; i<psl->sl_count; i++) { - rv = memcmp(&psl->sl_addr[i], source, - sizeof(struct in6_addr)); + rv = !ipv6_addr_equal(&psl->sl_addr[i], source); if (rv == 0) break; } @@ -427,12 +426,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk, } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i=0; i<psl->sl_count; i++) { - rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr)); - if (rv == 0) - break; + rv = !ipv6_addr_equal(&psl->sl_addr[i], source); + if (rv == 0) /* There is an error in the address. */ + goto done; } - if (rv == 0) /* address already there is an error */ - goto done; for (j=psl->sl_count-1; j>=i; j--) psl->sl_addr[j+1] = psl->sl_addr[j]; psl->sl_addr[i] = *source; @@ -664,6 +661,10 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < + IPV6_ADDR_SCOPE_LINKLOCAL) + return; + spin_lock_bh(&mc->mca_lock); if (!(mc->mca_flags&MAF_LOADED)) { mc->mca_flags |= MAF_LOADED; @@ -690,6 +691,10 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < + IPV6_ADDR_SCOPE_LINKLOCAL) + return; + spin_lock_bh(&mc->mca_lock); if (mc->mca_flags&MAF_LOADED) { mc->mca_flags &= ~MAF_LOADED; @@ -935,33 +940,6 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) } /* - * identify MLD packets for MLD filter exceptions - */ -bool ipv6_is_mld(struct sk_buff *skb, int nexthdr) -{ - struct icmp6hdr *pic; - - if (nexthdr != IPPROTO_ICMPV6) - return false; - - if (!pskb_may_pull(skb, sizeof(struct icmp6hdr))) - return false; - - pic = icmp6_hdr(skb); - - switch (pic->icmp6_type) { - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - case ICMPV6_MLD2_REPORT: - return true; - default: - break; - } - return false; -} - -/* * check if the interface/address pair is valid */ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, @@ -1340,6 +1318,31 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) return scount; } +static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, + struct net_device *dev, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + int proto, int len) +{ + struct ipv6hdr *hdr; + + skb->protocol = htons(ETH_P_IPV6); + skb->dev = dev; + + skb_reset_network_header(skb); + skb_put(skb, sizeof(struct ipv6hdr)); + hdr = ipv6_hdr(skb); + + ip6_flow_hdr(hdr, 0, 0); + + hdr->payload_len = htons(len); + hdr->nexthdr = proto; + hdr->hop_limit = inet6_sk(sk)->hop_limit; + + hdr->saddr = *saddr; + hdr->daddr = *daddr; +} + static struct sk_buff *mld_newpack(struct net_device *dev, int size) { struct net *net = dev_net(dev); @@ -1375,7 +1378,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) } else saddr = &addr_buf; - ip6_nd_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0); + ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0); memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); @@ -1418,7 +1421,7 @@ static void mld_sendpack(struct sk_buff *skb) icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - dst = icmp6_dst_alloc(skb->dev, NULL, &fl6); + dst = icmp6_dst_alloc(skb->dev, &fl6); err = 0; if (IS_ERR(dst)) { @@ -1767,7 +1770,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) } else saddr = &addr_buf; - ip6_nd_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len); + ip6_mc_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len); memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); @@ -1786,7 +1789,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) icmpv6_flow_init(sk, &fl6, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - dst = icmp6_dst_alloc(skb->dev, NULL, &fl6); + dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto err_out; @@ -2596,10 +2599,10 @@ static int __net_init igmp6_proc_init(struct net *net) int err; err = -ENOMEM; - if (!proc_net_fops_create(net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops)) + if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops)) goto out; - if (!proc_net_fops_create(net, "mcfilter6", S_IRUGO, - &igmp6_mcf_seq_fops)) + if (!proc_create("mcfilter6", S_IRUGO, net->proc_net, + &igmp6_mcf_seq_fops)) goto out_proc_net_igmp6; err = 0; @@ -2607,14 +2610,14 @@ out: return err; out_proc_net_igmp6: - proc_net_remove(net, "igmp6"); + remove_proc_entry("igmp6", net->proc_net); goto out; } static void __net_exit igmp6_proc_exit(struct net *net) { - proc_net_remove(net, "mcfilter6"); - proc_net_remove(net, "igmp6"); + remove_proc_entry("mcfilter6", net->proc_net); + remove_proc_entry("igmp6", net->proc_net); } #else static inline int igmp6_proc_init(struct net *net) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 6574175795df..76ef4353d518 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -143,16 +143,12 @@ struct neigh_table nd_tbl = { .gc_thresh3 = 1024, }; -static inline int ndisc_opt_addr_space(struct net_device *dev) +static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) { - return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type)); -} - -static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len, - unsigned short addr_type) -{ - int pad = ndisc_addr_option_pad(addr_type); - int space = NDISC_OPT_SPACE(data_len + pad); + int pad = ndisc_addr_option_pad(skb->dev->type); + int data_len = skb->dev->addr_len; + int space = ndisc_opt_addr_space(skb->dev); + u8 *opt = skb_put(skb, space); opt[0] = type; opt[1] = space>>3; @@ -166,7 +162,6 @@ static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len, opt += data_len; if ((space -= data_len) > 0) memset(opt, 0, space); - return opt + space; } static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, @@ -370,91 +365,88 @@ static void pndisc_destructor(struct pneigh_entry *n) ipv6_dev_mc_dec(dev, &maddr); } -static struct sk_buff *ndisc_build_skb(struct net_device *dev, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h, - const struct in6_addr *target, - int llinfo) +static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, + int len) { - struct net *net = dev_net(dev); - struct sock *sk = net->ipv6.ndisc_sk; - struct sk_buff *skb; - struct icmp6hdr *hdr; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - int len; + struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; + struct sk_buff *skb; int err; - u8 *opt; - - if (!dev->addr_len) - llinfo = 0; - - len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0); - if (llinfo) - len += ndisc_opt_addr_space(dev); skb = sock_alloc_send_skb(sk, - (MAX_HEADER + sizeof(struct ipv6hdr) + - len + hlen + tlen), + hlen + sizeof(struct ipv6hdr) + len + tlen, 1, &err); if (!skb) { - ND_PRINTK(0, err, "ND: %s failed to allocate an skb, err=%d\n", + ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n", __func__, err); return NULL; } - skb_reserve(skb, hlen); - ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len); + skb->protocol = htons(ETH_P_IPV6); + skb->dev = dev; - skb->transport_header = skb->tail; - skb_put(skb, len); + skb_reserve(skb, hlen + sizeof(struct ipv6hdr)); + skb_reset_transport_header(skb); - hdr = (struct icmp6hdr *)skb_transport_header(skb); - memcpy(hdr, icmp6h, sizeof(*hdr)); + return skb; +} - opt = skb_transport_header(skb) + sizeof(struct icmp6hdr); - if (target) { - *(struct in6_addr *)opt = *target; - opt += sizeof(*target); - } +static void ip6_nd_hdr(struct sk_buff *skb, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + int hop_limit, int len) +{ + struct ipv6hdr *hdr; - if (llinfo) - ndisc_fill_addr_option(opt, llinfo, dev->dev_addr, - dev->addr_len, dev->type); + skb_push(skb, sizeof(*hdr)); + skb_reset_network_header(skb); + hdr = ipv6_hdr(skb); - hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len, - IPPROTO_ICMPV6, - csum_partial(hdr, - len, 0)); + ip6_flow_hdr(hdr, 0, 0); - return skb; + hdr->payload_len = htons(len); + hdr->nexthdr = IPPROTO_ICMPV6; + hdr->hop_limit = hop_limit; + + hdr->saddr = *saddr; + hdr->daddr = *daddr; } -static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev, - struct neighbour *neigh, +static void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h) + const struct in6_addr *saddr) { - struct flowi6 fl6; - struct dst_entry *dst; - struct net *net = dev_net(dev); + struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(skb->dev); struct sock *sk = net->ipv6.ndisc_sk; struct inet6_dev *idev; int err; + struct icmp6hdr *icmp6h = icmp6_hdr(skb); u8 type; type = icmp6h->icmp6_type; - icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex); - dst = icmp6_dst_alloc(dev, neigh, &fl6); - if (IS_ERR(dst)) { - kfree_skb(skb); - return; + if (!dst) { + struct sock *sk = net->ipv6.ndisc_sk; + struct flowi6 fl6; + + icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex); + dst = icmp6_dst_alloc(skb->dev, &fl6); + if (IS_ERR(dst)) { + kfree_skb(skb); + return; + } + + skb_dst_set(skb, dst); } - skb_dst_set(skb, dst); + icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len, + IPPROTO_ICMPV6, + csum_partial(icmp6h, + skb->len, 0)); + + ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len); rcu_read_lock(); idev = __in6_dev_get(dst->dev); @@ -470,36 +462,17 @@ static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev, rcu_read_unlock(); } -/* - * Send a Neighbour Discover packet - */ -static void __ndisc_send(struct net_device *dev, - struct neighbour *neigh, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h, const struct in6_addr *target, - int llinfo) -{ - struct sk_buff *skb; - - skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo); - if (!skb) - return; - - ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h); -} - static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, - int router, int solicited, int override, int inc_opt) + bool router, bool solicited, bool override, bool inc_opt) { + struct sk_buff *skb; struct in6_addr tmpaddr; struct inet6_ifaddr *ifp; const struct in6_addr *src_addr; - struct icmp6hdr icmp6h = { - .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT, - }; + struct nd_msg *msg; + int optlen = 0; /* for anycast or proxy, solicited_addr != src_addr */ ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1); @@ -517,13 +490,32 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, src_addr = &tmpaddr; } - icmp6h.icmp6_router = router; - icmp6h.icmp6_solicited = solicited; - icmp6h.icmp6_override = override; + if (!dev->addr_len) + inc_opt = 0; + if (inc_opt) + optlen += ndisc_opt_addr_space(dev); + + skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); + if (!skb) + return; - __ndisc_send(dev, neigh, daddr, src_addr, - &icmp6h, solicited_addr, - inc_opt ? ND_OPT_TARGET_LL_ADDR : 0); + msg = (struct nd_msg *)skb_put(skb, sizeof(*msg)); + *msg = (struct nd_msg) { + .icmph = { + .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT, + .icmp6_router = router, + .icmp6_solicited = solicited, + .icmp6_override = override, + }, + .target = *solicited_addr, + }; + + if (inc_opt) + ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, + dev->dev_addr); + + + ndisc_send_skb(skb, daddr, src_addr); } static void ndisc_send_unsol_na(struct net_device *dev) @@ -551,10 +543,11 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *solicit, const struct in6_addr *daddr, const struct in6_addr *saddr) { + struct sk_buff *skb; struct in6_addr addr_buf; - struct icmp6hdr icmp6h = { - .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION, - }; + int inc_opt = dev->addr_len; + int optlen = 0; + struct nd_msg *msg; if (saddr == NULL) { if (ipv6_get_lladdr(dev, &addr_buf, @@ -563,18 +556,37 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, saddr = &addr_buf; } - __ndisc_send(dev, neigh, daddr, saddr, - &icmp6h, solicit, - !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0); + if (ipv6_addr_any(saddr)) + inc_opt = 0; + if (inc_opt) + optlen += ndisc_opt_addr_space(dev); + + skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); + if (!skb) + return; + + msg = (struct nd_msg *)skb_put(skb, sizeof(*msg)); + *msg = (struct nd_msg) { + .icmph = { + .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION, + }, + .target = *solicit, + }; + + if (inc_opt) + ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, + dev->dev_addr); + + ndisc_send_skb(skb, daddr, saddr); } void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr) { - struct icmp6hdr icmp6h = { - .icmp6_type = NDISC_ROUTER_SOLICITATION, - }; + struct sk_buff *skb; + struct rs_msg *msg; int send_sllao = dev->addr_len; + int optlen = 0; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD /* @@ -598,9 +610,27 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, } } #endif - __ndisc_send(dev, NULL, daddr, saddr, - &icmp6h, NULL, - send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0); + if (!dev->addr_len) + send_sllao = 0; + if (send_sllao) + optlen += ndisc_opt_addr_space(dev); + + skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); + if (!skb) + return; + + msg = (struct rs_msg *)skb_put(skb, sizeof(*msg)); + *msg = (struct rs_msg) { + .icmph = { + .icmp6_type = NDISC_ROUTER_SOLICITATION, + }, + }; + + if (send_sllao) + ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, + dev->dev_addr); + + ndisc_send_skb(skb, daddr, saddr); } @@ -676,6 +706,11 @@ static void ndisc_recv_ns(struct sk_buff *skb) bool inc; int is_router = -1; + if (skb->len < sizeof(struct nd_msg)) { + ND_PRINTK(2, warn, "NS: packet too short\n"); + return; + } + if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK(2, warn, "NS: multicast target address\n"); return; @@ -685,11 +720,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) * RFC2461 7.1.1: * DAD has to be destined for solicited node multicast address. */ - if (dad && - !(daddr->s6_addr32[0] == htonl(0xff020000) && - daddr->s6_addr32[1] == htonl(0x00000000) && - daddr->s6_addr32[2] == htonl(0x00000001) && - daddr->s6_addr [12] == 0xff )) { + if (dad && !ipv6_addr_is_solict_mult(daddr)) { ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n"); return; } @@ -780,11 +811,11 @@ static void ndisc_recv_ns(struct sk_buff *skb) } if (is_router < 0) - is_router = !!idev->cnf.forwarding; + is_router = idev->cnf.forwarding; if (dad) { ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target, - is_router, 0, (ifp != NULL), 1); + !!is_router, false, (ifp != NULL), true); goto out; } @@ -805,8 +836,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) NEIGH_UPDATE_F_OVERRIDE); if (neigh || !dev->header_ops) { ndisc_send_na(dev, neigh, saddr, &msg->target, - is_router, - 1, (ifp != NULL && inc), inc); + !!is_router, + true, (ifp != NULL && inc), inc); if (neigh) neigh_release(neigh); } @@ -1350,25 +1381,34 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); } +static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb, + struct sk_buff *orig_skb, + int rd_len) +{ + u8 *opt = skb_put(skb, rd_len); + + memset(opt, 0, 8); + *(opt++) = ND_OPT_REDIRECT_HDR; + *(opt++) = (rd_len >> 3); + opt += 6; + + memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8); +} + void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) { struct net_device *dev = skb->dev; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; - int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + int optlen = 0; struct inet_peer *peer; struct sk_buff *buff; - struct icmp6hdr *icmph; + struct rd_msg *msg; struct in6_addr saddr_buf; - struct in6_addr *addrp; struct rt6_info *rt; struct dst_entry *dst; - struct inet6_dev *idev; struct flowi6 fl6; - u8 *opt; - int hlen, tlen; int rd_len; - int err; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; bool ret; @@ -1424,7 +1464,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) memcpy(ha_buf, neigh->ha, dev->addr_len); read_unlock_bh(&neigh->lock); ha = ha_buf; - len += ndisc_opt_addr_space(dev); + optlen += ndisc_opt_addr_space(dev); } else read_unlock_bh(&neigh->lock); @@ -1432,80 +1472,40 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } rd_len = min_t(unsigned int, - IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8); + IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen, + skb->len + 8); rd_len &= ~0x7; - len += rd_len; - - hlen = LL_RESERVED_SPACE(dev); - tlen = dev->needed_tailroom; - buff = sock_alloc_send_skb(sk, - (MAX_HEADER + sizeof(struct ipv6hdr) + - len + hlen + tlen), - 1, &err); - if (buff == NULL) { - ND_PRINTK(0, err, - "Redirect: %s failed to allocate an skb, err=%d\n", - __func__, err); - goto release; - } - - skb_reserve(buff, hlen); - ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr, - IPPROTO_ICMPV6, len); - - skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data); - skb_put(buff, len); - icmph = icmp6_hdr(buff); - - memset(icmph, 0, sizeof(struct icmp6hdr)); - icmph->icmp6_type = NDISC_REDIRECT; + optlen += rd_len; - /* - * copy target and destination addresses - */ - - addrp = (struct in6_addr *)(icmph + 1); - *addrp = *target; - addrp++; - *addrp = ipv6_hdr(skb)->daddr; + buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); + if (!buff) + goto release; - opt = (u8*) (addrp + 1); + msg = (struct rd_msg *)skb_put(buff, sizeof(*msg)); + *msg = (struct rd_msg) { + .icmph = { + .icmp6_type = NDISC_REDIRECT, + }, + .target = *target, + .dest = ipv6_hdr(skb)->daddr, + }; /* * include target_address option */ if (ha) - opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha, - dev->addr_len, dev->type); + ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha); /* * build redirect option and copy skb over to the new packet. */ - memset(opt, 0, 8); - *(opt++) = ND_OPT_REDIRECT_HDR; - *(opt++) = (rd_len >> 3); - opt += 6; - - memcpy(opt, ipv6_hdr(skb), rd_len - 8); - - icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr, - len, IPPROTO_ICMPV6, - csum_partial(icmph, len, 0)); + if (rd_len) + ndisc_fill_redirect_hdr_option(buff, skb, rd_len); skb_dst_set(buff, dst); - rcu_read_lock(); - idev = __in6_dev_get(dst->dev); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, - dst_output); - if (!err) { - ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT); - ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); - } - - rcu_read_unlock(); + ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf); return; release: @@ -1522,7 +1522,7 @@ int ndisc_rcv(struct sk_buff *skb) { struct nd_msg *msg; - if (!pskb_may_pull(skb, skb->len)) + if (skb_linearize(skb)) return 0; msg = (struct nd_msg *)skb_transport_header(skb); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 125a90d6a795..341b54ade72c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1098,7 +1098,7 @@ static int get_info(struct net *net, void __user *user, #endif t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), "ip6table_%s", name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -1157,7 +1157,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, } t = xt_find_table_lock(net, AF_INET6, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); if (get.size == private->size) @@ -1197,7 +1197,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), "ip6table_%s", name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; } @@ -1355,7 +1355,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, } t = xt_find_table_lock(net, AF_INET6, name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; } @@ -1939,7 +1939,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; duprintf("t->private->number = %u\n", private->number); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 029623dbd411..ed3b427b2841 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -126,7 +126,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) skb_put(nskb, sizeof(struct ipv6hdr)); skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); - *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20)); + ip6_flow_hdr(ip6h, tclass, 0); ip6h->hop_limit = ip6_dst_hoplimit(dst); ip6h->nexthdr = IPPROTO_TCP; ip6h->saddr = oip6h->daddr; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 7431121b87de..6134a1ebfb1b 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/slab.h> +#include <net/ipv6.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); @@ -60,8 +61,8 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) dev_net(out)->ipv6.ip6table_mangle); if (ret != NF_DROP && ret != NF_STOLEN && - (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || - memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || + !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 137e245860ab..2b6c226f5198 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -104,7 +104,6 @@ static unsigned int ipv6_helper(unsigned int hooknum, const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; - unsigned int ret; __be16 frag_off; int protoff; u8 nexthdr; @@ -130,12 +129,7 @@ static unsigned int ipv6_helper(unsigned int hooknum, return NF_ACCEPT; } - ret = helper->help(skb, protoff, ct, ctinfo); - if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { - nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL, - "nf_ct_%s: dropping packet", helper->name); - } - return ret; + return helper->help(skb, protoff, ct, ctinfo); } static unsigned int ipv6_confirm(unsigned int hooknum, @@ -421,54 +415,43 @@ static int ipv6_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_tcp6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n"); + pr_err("nf_conntrack_tcp6: pernet registration failed\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udp6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n"); + pr_err("nf_conntrack_udp6: pernet registration failed\n"); goto cleanup_tcp6; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_icmpv6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n"); + pr_err("nf_conntrack_icmp6: pernet registration failed\n"); goto cleanup_udp6; } - ret = nf_conntrack_l3proto_register(net, - &nf_conntrack_l3proto_ipv6); + ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n"); + pr_err("nf_conntrack_ipv6: pernet registration failed.\n"); goto cleanup_icmpv6; } return 0; cleanup_icmpv6: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmpv6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); cleanup_udp6: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); cleanup_tcp6: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); out: return ret; } static void ipv6_net_exit(struct net *net) { - nf_conntrack_l3proto_unregister(net, - &nf_conntrack_l3proto_ipv6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmpv6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp6); + nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); } static struct pernet_operations ipv6_net_ops = { @@ -491,19 +474,52 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) ret = register_pernet_subsys(&ipv6_net_ops); if (ret < 0) - goto cleanup_pernet; + goto cleanup_sockopt; + ret = nf_register_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register pre-routing defrag " "hook.\n"); - goto cleanup_ipv6; + goto cleanup_pernet; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n"); + goto cleanup_hooks; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n"); + goto cleanup_tcp6; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n"); + goto cleanup_udp6; + } + + ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n"); + goto cleanup_icmpv6; } return ret; - cleanup_ipv6: - unregister_pernet_subsys(&ipv6_net_ops); + cleanup_icmpv6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); + cleanup_udp6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); + cleanup_tcp6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + cleanup_hooks: + nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); cleanup_pernet: + unregister_pernet_subsys(&ipv6_net_ops); + cleanup_sockopt: nf_unregister_sockopt(&so_getorigdst6); return ret; } @@ -511,6 +527,10 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) static void __exit nf_conntrack_l3proto_ipv6_fini(void) { synchronize_net(); + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); unregister_pernet_subsys(&ipv6_net_ops); nf_unregister_sockopt(&so_getorigdst6); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3dacecc99065..54087e96d7b8 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -97,9 +97,9 @@ static int nf_ct_frag6_sysctl_register(struct net *net) if (table == NULL) goto err_alloc; - table[0].data = &net->ipv6.frags.high_thresh; - table[1].data = &net->ipv6.frags.low_thresh; - table[2].data = &net->ipv6.frags.timeout; + table[0].data = &net->nf_frag.frags.timeout; + table[1].data = &net->nf_frag.frags.low_thresh; + table[2].data = &net->nf_frag.frags.high_thresh; } hdr = register_net_sysctl(net, "net/netfilter", table); @@ -319,7 +319,7 @@ found: fq->q.meat += skb->len; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - atomic_add(skb->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -328,9 +328,8 @@ found: fq->nhoffset = nhoff; fq->q.last_in |= INET_FRAG_FIRST_IN; } - write_lock(&nf_frags.lock); - list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); - write_unlock(&nf_frags.lock); + + inet_frag_lru_move(&fq->q); return 0; discard_fq: @@ -369,7 +368,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) } /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { + if (skb_unclone(head, GFP_ATOMIC)) { pr_debug("skb is cloned but can't expand head"); goto out_oom; } @@ -398,7 +397,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -422,7 +421,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - atomic_sub(head->truesize, &fq->q.net->mem); + sub_frag_mem_limit(&fq->q, head->truesize); head->local_df = 1; head->next = NULL; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 745a32042950..bbbe53a99b57 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -295,11 +295,11 @@ int snmp6_unregister_dev(struct inet6_dev *idev) static int __net_init ipv6_proc_init_net(struct net *net) { - if (!proc_net_fops_create(net, "sockstat6", S_IRUGO, - &sockstat6_seq_fops)) + if (!proc_create("sockstat6", S_IRUGO, net->proc_net, + &sockstat6_seq_fops)) return -ENOMEM; - if (!proc_net_fops_create(net, "snmp6", S_IRUGO, &snmp6_seq_fops)) + if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops)) goto proc_snmp6_fail; net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net); @@ -308,17 +308,17 @@ static int __net_init ipv6_proc_init_net(struct net *net) return 0; proc_dev_snmp6_fail: - proc_net_remove(net, "snmp6"); + remove_proc_entry("snmp6", net->proc_net); proc_snmp6_fail: - proc_net_remove(net, "sockstat6"); + remove_proc_entry("sockstat6", net->proc_net); return -ENOMEM; } static void __net_exit ipv6_proc_exit_net(struct net *net) { - proc_net_remove(net, "sockstat6"); - proc_net_remove(net, "dev_snmp6"); - proc_net_remove(net, "snmp6"); + remove_proc_entry("sockstat6", net->proc_net); + remove_proc_entry("dev_snmp6", net->proc_net); + remove_proc_entry("snmp6", net->proc_net); } static struct pernet_operations ipv6_proc_ops = { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 70fa81449997..c65907db8c44 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1292,7 +1292,7 @@ static const struct file_operations raw6_seq_fops = { static int __net_init raw6_init_net(struct net *net) { - if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops)) + if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops)) return -ENOMEM; return 0; @@ -1300,7 +1300,7 @@ static int __net_init raw6_init_net(struct net *net) static void __net_exit raw6_exit_net(struct net *net) { - proc_net_remove(net, "raw6"); + remove_proc_entry("raw6", net->proc_net); } static struct pernet_operations raw6_net_ops = { diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index e5253ec9e0fc..3c6a77290c6e 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -79,20 +79,8 @@ unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, { u32 c; - c = jhash_3words((__force u32)saddr->s6_addr32[0], - (__force u32)saddr->s6_addr32[1], - (__force u32)saddr->s6_addr32[2], - rnd); - - c = jhash_3words((__force u32)saddr->s6_addr32[3], - (__force u32)daddr->s6_addr32[0], - (__force u32)daddr->s6_addr32[1], - c); - - c = jhash_3words((__force u32)daddr->s6_addr32[2], - (__force u32)daddr->s6_addr32[3], - (__force u32)id, - c); + c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), + (__force u32)id, rnd); return c & (INETFRAGS_HASHSZ - 1); } @@ -327,7 +315,7 @@ found: } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; - atomic_add(skb->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -341,9 +329,7 @@ found: fq->q.meat == fq->q.len) return ip6_frag_reasm(fq, prev, dev); - write_lock(&ip6_frags.lock); - list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); - write_unlock(&ip6_frags.lock); + inet_frag_lru_move(&fq->q); return -1; discard_fq: @@ -406,7 +392,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_oom; /* If the first fragment is fragmented itself, we split @@ -429,7 +415,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -467,7 +453,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } fp = next; } - atomic_sub(sum_truesize, &fq->q.net->mem); + sub_frag_mem_limit(&fq->q, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 363d8b7772e8..928266569689 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -145,25 +145,12 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, struct neighbour *n; daddr = choose_neigh_daddr(rt, skb, daddr); - n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); + n = __ipv6_neigh_lookup(dst->dev, daddr); if (n) return n; return neigh_create(&nd_tbl, daddr, dst->dev); } -static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev) -{ - struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway); - if (!n) { - n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev); - if (IS_ERR(n)) - return PTR_ERR(n); - } - rt->n = n; - - return 0; -} - static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .protocol = cpu_to_be16(ETH_P_IPV6), @@ -300,9 +287,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - - if (rt->n) - neigh_release(rt->n); + struct dst_entry *from = dst->from; if (!(rt->dst.flags & DST_HOST)) dst_destroy_metrics_generic(dst); @@ -312,8 +297,8 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } - if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) - dst_release(dst->from); + dst->from = NULL; + dst_release(from); if (rt6_has_peer(rt)) { struct inet_peer *peer = rt6_peer_ptr(rt); @@ -354,11 +339,6 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, in6_dev_put(idev); } } - if (rt->n && rt->n->dev == dev) { - rt->n->dev = loopback_dev; - dev_hold(loopback_dev); - dev_put(dev); - } } } @@ -388,15 +368,8 @@ static int rt6_info_hash_nhsfn(unsigned int candidate_count, { unsigned int val = fl6->flowi6_proto; - val ^= (__force u32)fl6->daddr.s6_addr32[0]; - val ^= (__force u32)fl6->daddr.s6_addr32[1]; - val ^= (__force u32)fl6->daddr.s6_addr32[2]; - val ^= (__force u32)fl6->daddr.s6_addr32[3]; - - val ^= (__force u32)fl6->saddr.s6_addr32[0]; - val ^= (__force u32)fl6->saddr.s6_addr32[1]; - val ^= (__force u32)fl6->saddr.s6_addr32[2]; - val ^= (__force u32)fl6->saddr.s6_addr32[3]; + val ^= ipv6_addr_hash(&fl6->daddr); + val ^= ipv6_addr_hash(&fl6->saddr); /* Work only if this not encapsulated */ switch (fl6->flowi6_proto) { @@ -505,24 +478,34 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - neigh = rt ? rt->n : NULL; - if (!neigh || (neigh->nud_state & NUD_VALID)) + if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) return; - read_lock_bh(&neigh->lock); - if (!(neigh->nud_state & NUD_VALID) && + rcu_read_lock_bh(); + neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + if (neigh) { + write_lock(&neigh->lock); + if (neigh->nud_state & NUD_VALID) + goto out; + } + + if (!neigh || time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { struct in6_addr mcaddr; struct in6_addr *target; - neigh->updated = jiffies; - read_unlock_bh(&neigh->lock); + if (neigh) { + neigh->updated = jiffies; + write_unlock(&neigh->lock); + } - target = (struct in6_addr *)&neigh->primary_key; + target = (struct in6_addr *)&rt->rt6i_gateway; addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); } else { - read_unlock_bh(&neigh->lock); +out: + write_unlock(&neigh->lock); } + rcu_read_unlock_bh(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -549,20 +532,24 @@ static inline bool rt6_check_neigh(struct rt6_info *rt) struct neighbour *neigh; bool ret = false; - neigh = rt->n; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) - ret = true; - else if (neigh) { - read_lock_bh(&neigh->lock); + return true; + + rcu_read_lock_bh(); + neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + if (neigh) { + read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) ret = true; #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) ret = true; #endif - read_unlock_bh(&neigh->lock); + read_unlock(&neigh->lock); } + rcu_read_unlock_bh(); + return ret; } @@ -838,8 +825,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, rt = ip6_rt_copy(ort, daddr); if (rt) { - int attempts = !in_softirq(); - if (!(rt->rt6i_flags & RTF_GATEWAY)) { if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) @@ -855,32 +840,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, rt->rt6i_src.plen = 128; } #endif - - retry: - if (rt6_bind_neighbour(rt, rt->dst.dev)) { - struct net *net = dev_net(rt->dst.dev); - int saved_rt_min_interval = - net->ipv6.sysctl.ip6_rt_gc_min_interval; - int saved_rt_elasticity = - net->ipv6.sysctl.ip6_rt_gc_elasticity; - - if (attempts-- > 0) { - net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; - net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; - - ip6_dst_gc(&net->ipv6.ip6_dst_ops); - - net->ipv6.sysctl.ip6_rt_gc_elasticity = - saved_rt_elasticity; - net->ipv6.sysctl.ip6_rt_gc_min_interval = - saved_rt_min_interval; - goto retry; - } - - net_warn_ratelimited("Neighbour table overflow\n"); - dst_free(&rt->dst); - return NULL; - } } return rt; @@ -891,10 +850,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, { struct rt6_info *rt = ip6_rt_copy(ort, daddr); - if (rt) { + if (rt) rt->rt6i_flags |= RTF_CACHE; - rt->n = neigh_clone(ort->n); - } return rt; } @@ -928,7 +885,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!rt->n && !(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_LOCAL))) + if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -994,7 +951,7 @@ void ip6_route_input(struct sk_buff *skb) .flowi6_iif = skb->dev->ifindex, .daddr = iph->daddr, .saddr = iph->saddr, - .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, + .flowlabel = ip6_flowinfo(iph), .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, }; @@ -1054,7 +1011,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_flags = ort->rt6i_flags; - rt6_clean_expires(rt); rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); @@ -1159,7 +1115,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; - fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + fl6.flowlabel = ip6_flowinfo(iph); dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1187,7 +1143,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; - fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + fl6.flowlabel = ip6_flowinfo(iph); dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1247,7 +1203,6 @@ static struct dst_entry *icmp6_dst_gc_list; static DEFINE_SPINLOCK(icmp6_dst_lock); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, - struct neighbour *neigh, struct flowi6 *fl6) { struct dst_entry *dst; @@ -1265,20 +1220,8 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, goto out; } - if (neigh) - neigh_hold(neigh); - else { - neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr); - if (IS_ERR(neigh)) { - in6_dev_put(idev); - dst_free(&rt->dst); - return ERR_CAST(neigh); - } - } - rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; - rt->n = neigh; atomic_set(&rt->dst.__refcnt, 1); rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; @@ -1587,12 +1530,6 @@ int ip6_route_add(struct fib6_config *cfg) } else rt->rt6i_prefsrc.plen = 0; - if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { - err = rt6_bind_neighbour(rt, dev); - if (err) - goto out; - } - rt->rt6i_flags = cfg->fc_flags; install_route: @@ -1705,37 +1642,32 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu struct net *net = dev_net(skb->dev); struct netevent_redirect netevent; struct rt6_info *rt, *nrt = NULL; - const struct in6_addr *target; struct ndisc_options ndopts; - const struct in6_addr *dest; - struct neighbour *old_neigh; struct inet6_dev *in6_dev; struct neighbour *neigh; - struct icmp6hdr *icmph; + struct rd_msg *msg; int optlen, on_link; u8 *lladdr; optlen = skb->tail - skb->transport_header; - optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + optlen -= sizeof(*msg); if (optlen < 0) { net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); return; } - icmph = icmp6_hdr(skb); - target = (const struct in6_addr *) (icmph + 1); - dest = target + 1; + msg = (struct rd_msg *)icmp6_hdr(skb); - if (ipv6_addr_is_multicast(dest)) { + if (ipv6_addr_is_multicast(&msg->dest)) { net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); return; } on_link = 0; - if (ipv6_addr_equal(dest, target)) { + if (ipv6_addr_equal(&msg->dest, &msg->target)) { on_link = 1; - } else if (ipv6_addr_type(target) != + } else if (ipv6_addr_type(&msg->target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); return; @@ -1752,7 +1684,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu * first-hop router for the specified ICMP Destination Address. */ - if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { + if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) { net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); return; } @@ -1779,15 +1711,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu */ dst_confirm(&rt->dst); - neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); + neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); if (!neigh) return; - /* Duplicate redirect: silently ignore. */ - old_neigh = rt->n; - if (neigh == old_neigh) - goto out; - /* * We have finally decided to accept it. */ @@ -1799,7 +1726,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NEIGH_UPDATE_F_ISROUTER)) ); - nrt = ip6_rt_copy(rt, dest); + nrt = ip6_rt_copy(rt, &msg->dest); if (!nrt) goto out; @@ -1808,16 +1735,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu nrt->rt6i_flags &= ~RTF_GATEWAY; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - nrt->n = neigh_clone(neigh); if (ip6_ins_rt(nrt)) goto out; netevent.old = &rt->dst; - netevent.old_neigh = old_neigh; netevent.new = &nrt->dst; - netevent.new_neigh = neigh; - netevent.daddr = dest; + netevent.daddr = &msg->dest; + netevent.neigh = neigh; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags & RTF_CACHE) { @@ -1859,8 +1784,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == (RTF_DEFAULT | RTF_ADDRCONF)) rt6_set_from(rt, ort); - else - rt6_clean_expires(rt); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES @@ -2123,7 +2046,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); - int err; if (!rt) { net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); @@ -2142,11 +2064,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_ANYCAST; else rt->rt6i_flags |= RTF_LOCAL; - err = rt6_bind_neighbour(rt, rt->dst.dev); - if (err) { - dst_free(&rt->dst); - return ERR_PTR(err); - } rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; @@ -2492,7 +2409,6 @@ static int rt6_fill_node(struct net *net, struct nlmsghdr *nlh; long expires; u32 table; - struct neighbour *n; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2605,9 +2521,8 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - n = rt->n; - if (n) { - if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) + if (rt->rt6i_flags & RTF_GATEWAY) { + if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0) goto nla_put_failure; } @@ -2802,7 +2717,6 @@ struct rt6_proc_arg static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; - struct neighbour *n; seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); @@ -2811,9 +2725,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif - n = rt->n; - if (n) { - seq_printf(m, "%pi6", n->primary_key); + if (rt->rt6i_flags & RTF_GATEWAY) { + seq_printf(m, "%pi6", &rt->rt6i_gateway); } else { seq_puts(m, "00000000000000000000000000000000"); } @@ -3080,8 +2993,8 @@ static void __net_exit ip6_route_net_exit(struct net *net) static int __net_init ip6_route_net_init_late(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); - proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); + proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); + proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); #endif return 0; } @@ -3089,8 +3002,8 @@ static int __net_init ip6_route_net_init_late(struct net *net) static void __net_exit ip6_route_net_exit_late(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(net, "ipv6_route"); - proc_net_remove(net, "rt6_stats"); + remove_proc_entry("ipv6_route", net->proc_net); + remove_proc_entry("rt6_stats", net->proc_net); #endif } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index cfba99b2c2a4..02f96dcbcf02 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -72,6 +72,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static int ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static void ipip6_dev_free(struct net_device *dev); +static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, + __be32 *v4dst); static struct rtnl_link_ops sit_link_ops __read_mostly; static int sit_net_id __read_mostly; @@ -590,17 +592,21 @@ out: return err; } +static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, + const struct in6_addr *v6addr) +{ + __be32 v4embed = 0; + if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed) + return true; + return false; +} + static int ipip6_rcv(struct sk_buff *skb) { - const struct iphdr *iph; + const struct iphdr *iph = ip_hdr(skb); struct ip_tunnel *tunnel; int err; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto out; - - iph = ip_hdr(skb); - tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { @@ -613,10 +619,19 @@ static int ipip6_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; - if ((tunnel->dev->priv_flags & IFF_ISATAP) && - !isatap_chksrc(skb, iph, tunnel)) { - tunnel->dev->stats.rx_errors++; - goto out; + if (tunnel->dev->priv_flags & IFF_ISATAP) { + if (!isatap_chksrc(skb, iph, tunnel)) { + tunnel->dev->stats.rx_errors++; + goto out; + } + } else { + if (is_spoofed_6rd(tunnel, iph->saddr, + &ipv6_hdr(skb)->saddr) || + is_spoofed_6rd(tunnel, iph->daddr, + &ipv6_hdr(skb)->daddr)) { + tunnel->dev->stats.rx_errors++; + goto out; + } } __skb_tunnel_rx(skb, tunnel->dev); @@ -650,14 +665,12 @@ out: } /* - * Returns the embedded IPv4 address if the IPv6 address - * comes from 6rd / 6to4 (RFC 3056) addr space. + * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function + * stores the embedded IPv4 address in v4dst and returns true. */ -static inline -__be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel) +static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, + __be32 *v4dst) { - __be32 dst = 0; - #ifdef CONFIG_IPV6_SIT_6RD if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, tunnel->ip6rd.prefixlen)) { @@ -676,14 +689,24 @@ __be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel) d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >> (32 - pbi1); - dst = tunnel->ip6rd.relay_prefix | htonl(d); + *v4dst = tunnel->ip6rd.relay_prefix | htonl(d); + return true; } #else if (v6dst->s6_addr16[0] == htons(0x2002)) { /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ - memcpy(&dst, &v6dst->s6_addr16[1], 4); + memcpy(v4dst, &v6dst->s6_addr16[1], 4); + return true; } #endif + return false; +} + +static inline __be32 try_6rd(struct ip_tunnel *tunnel, + const struct in6_addr *v6dst) +{ + __be32 dst = 0; + check_6rd(tunnel, v6dst, &dst); return dst; } @@ -744,7 +767,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (!dst) - dst = try_6rd(&iph6->daddr, tunnel); + dst = try_6rd(tunnel, &iph6->daddr); if (!dst) { struct neighbour *neigh = NULL; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 40161977f7cf..8a0848b60b35 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -179,7 +179,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); - if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) + if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; ret = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4f43537197ef..9b6460055df5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -713,7 +713,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { #endif static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass) + u32 tsval, u32 tsecr, + struct tcp_md5sig_key *key, int rst, u8 tclass) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -725,7 +726,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, struct dst_entry *dst; __be32 *topt; - if (ts) + if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG if (key) @@ -755,11 +756,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, topt = (__be32 *)(t1 + 1); - if (ts) { + if (tsecr) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - *topt++ = htonl(tcp_time_stamp); - *topt++ = htonl(ts); + *topt++ = htonl(tsval); + *topt++ = htonl(tsecr); } #ifdef CONFIG_TCP_MD5SIG @@ -835,7 +836,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) * no RST generated if md5 hash doesn't match. */ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), - &tcp_hashinfo, &ipv6h->daddr, + &tcp_hashinfo, &ipv6h->saddr, + th->source, &ipv6h->daddr, ntohs(th->source), inet6_iif(skb)); if (!sk1) return; @@ -859,7 +861,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0); + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -870,10 +872,11 @@ release_sk1: #endif } -static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, +static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, + u32 win, u32 tsval, u32 tsecr, struct tcp_md5sig_key *key, u8 tclass) { - tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass); + tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -883,6 +886,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), tw->tw_tclass); @@ -892,7 +896,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, + tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, + req->rcv_wnd, tcp_time_stamp, req->ts_recent, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0); } @@ -1030,7 +1035,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq->rmt_addr = ipv6_hdr(skb)->saddr; treq->loc_addr = ipv6_hdr(skb)->daddr; if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb); + TCP_ECN_create_request(req, skb, sock_net(sk)); treq->iif = sk->sk_bound_dev_if; @@ -1167,7 +1172,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count @@ -1247,7 +1252,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); /* Clone native IPv6 options from listening socket (if any) @@ -1460,7 +1465,7 @@ ipv6_pktoptions: if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; if (np->rxopt.bits.rxtclass) - np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); @@ -1602,6 +1607,7 @@ do_time_wait: struct sock *sk2; sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, + &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); if (sk2 != NULL) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fb083295ff0b..599e1ba6d1ce 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -45,6 +45,7 @@ #include <net/tcp_states.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> +#include <net/inet6_hashtables.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -203,7 +204,8 @@ static struct sock *udp6_lib_lookup2(struct net *net, { struct sock *sk, *result; struct hlist_nulls_node *node; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; begin: result = NULL; @@ -214,8 +216,18 @@ begin: if (score > badness) { result = sk; badness = score; - if (score == SCORE2_MAX) + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } else if (score == SCORE2_MAX) goto exact_match; + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -249,7 +261,8 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; rcu_read_lock(); if (hslot->count > 10) { @@ -284,6 +297,17 @@ begin: if (score > badness) { result = sk; badness = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -752,40 +776,6 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, return 0; } -static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, - int proto) -{ - int err; - - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (proto == IPPROTO_UDPLITE) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - } - - if (uh->check == 0) { - /* RFC 2460 section 8.1 says that we SHOULD log - this error. Well, it is reasonable. - */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); - return 1; - } - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, - skb->len, proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - - if (!skb_csum_unnecessary(skb)) - skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len, proto, 0)); - - return 0; -} - int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 0c8934a317c2..cf05cf073c51 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -56,7 +56,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9f2095b19ad0..9bf6a74a71d2 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -69,8 +69,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + err = skb_unclone(skb, GFP_ATOMIC); + if (err) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index c9844135c9ca..4ef7bdb65440 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -110,7 +110,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ - xdst->u.rt6.n = neigh_clone(rt->n); xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); xdst->u.rt6.rt6i_metric = rt->rt6i_metric; @@ -321,7 +320,51 @@ static struct ctl_table xfrm6_policy_table[] = { { } }; -static struct ctl_table_header *sysctl_hdr; +static int __net_init xfrm6_net_init(struct net *net) +{ + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = xfrm6_policy_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(xfrm6_policy_table), GFP_KERNEL); + if (!table) + goto err_alloc; + + table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh; + } + + hdr = register_net_sysctl(net, "net/ipv6", table); + if (!hdr) + goto err_reg; + + net->ipv6.sysctl.xfrm6_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit xfrm6_net_exit(struct net *net) +{ + struct ctl_table *table; + + if (net->ipv6.sysctl.xfrm6_hdr == NULL) + return; + + table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv6.sysctl.xfrm6_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct pernet_operations xfrm6_net_ops = { + .init = xfrm6_net_init, + .exit = xfrm6_net_exit, +}; #endif int __init xfrm6_init(void) @@ -340,8 +383,7 @@ int __init xfrm6_init(void) goto out_policy; #ifdef CONFIG_SYSCTL - sysctl_hdr = register_net_sysctl(&init_net, "net/ipv6", - xfrm6_policy_table); + register_pernet_subsys(&xfrm6_net_ops); #endif out: return ret; @@ -353,8 +395,7 @@ out_policy: void xfrm6_fini(void) { #ifdef CONFIG_SYSCTL - if (sysctl_hdr) - unregister_net_sysctl_table(sysctl_hdr); + unregister_pernet_subsys(&xfrm6_net_ops); #endif xfrm6_policy_fini(); xfrm6_state_fini(); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index ee5a7065aacc..6cc48012b730 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -72,7 +72,7 @@ static inline unsigned int xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *ad { unsigned int h; - h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]); + h = ipv6_addr_hash((const struct in6_addr *)addr); h ^= h >> 16; h ^= h >> 8; h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1; @@ -94,7 +94,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const hlist_for_each_entry_rcu(x6spi, pos, &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { - if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) + if (xfrm6_addr_equal(&x6spi->addr, saddr)) return x6spi; } @@ -211,7 +211,7 @@ static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { - if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) { + if (xfrm6_addr_equal(&x6spi->addr, saddr)) { if (atomic_dec_and_test(&x6spi->refcnt)) { hlist_del_rcu(&x6spi->list_byaddr); hlist_del_rcu(&x6spi->list_byspi); |