diff options
Diffstat (limited to 'net/ipv6')
31 files changed, 866 insertions, 386 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2fac4ad74867..045597b9a7c0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -179,7 +179,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_work(struct work_struct *w); static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, bool send_na); -static void addrconf_dad_run(struct inet6_dev *idev); +static void addrconf_dad_run(struct inet6_dev *idev, bool restart); static void addrconf_rs_timer(struct timer_list *t); static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); @@ -666,6 +666,7 @@ errout: static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { + const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); int h, s_h; int idx, s_idx; @@ -673,6 +674,21 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct inet6_dev *idev; struct hlist_head *head; + if (cb->strict_check) { + struct netlink_ext_ack *extack = cb->extack; + struct netconfmsg *ncm; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request"); + return -EINVAL; + } + + if (nlmsg_attrlen(nlh, sizeof(*ncm))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request"); + return -EINVAL; + } + } + s_h = cb->args[0]; s_idx = idx = cb->args[1]; @@ -692,7 +708,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, if (inet6_netconf_fill_devconf(skb, dev->ifindex, &idev->cnf, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, + nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, NETCONFA_ALL) < 0) { @@ -709,7 +725,7 @@ cont: if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, + nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, NETCONFA_ALL) < 0) goto done; @@ -720,7 +736,7 @@ cont: if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, + nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, NETCONFA_ALL) < 0) goto done; @@ -997,6 +1013,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, if (addr_type == IPV6_ADDR_ANY || addr_type & IPV6_ADDR_MULTICAST || (!(idev->dev->flags & IFF_LOOPBACK) && + !netif_is_l3_master(idev->dev) && addr_type & IPV6_ADDR_LOOPBACK)) return ERR_PTR(-EADDRNOTAVAIL); @@ -2398,7 +2415,7 @@ static void addrconf_add_mroute(struct net_device *dev) ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); - ip6_route_add(&cfg, GFP_ATOMIC, NULL); + ip6_route_add(&cfg, GFP_KERNEL, NULL); } static struct inet6_dev *addrconf_add_dev(struct net_device *dev) @@ -3062,7 +3079,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) if (addr.s6_addr32[3]) { add_addr(idev, &addr, plen, scope); addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags, - GFP_ATOMIC); + GFP_KERNEL); return; } @@ -3087,7 +3104,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) add_addr(idev, &addr, plen, flag); addrconf_prefix_route(&addr, plen, 0, idev->dev, - 0, pflags, GFP_ATOMIC); + 0, pflags, GFP_KERNEL); } } } @@ -3422,6 +3439,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; struct netdev_notifier_changeupper_info *info; struct inet6_dev *idev = __in6_dev_get(dev); struct net *net = dev_net(dev); @@ -3496,7 +3514,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; } - if (idev) { + if (!IS_ERR_OR_NULL(idev)) { if (idev->if_flags & IF_READY) { /* device is already configured - * but resend MLD reports, we might @@ -3504,6 +3522,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, * multicast snooping switches */ ipv6_mc_up(idev); + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + addrconf_dad_run(idev, true); rt6_sync_up(dev, RTNH_F_LINKDOWN); break; } @@ -3538,7 +3559,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (!IS_ERR_OR_NULL(idev)) { if (run_pending) - addrconf_dad_run(idev); + addrconf_dad_run(idev, false); /* Device has an address by now */ rt6_sync_up(dev, RTNH_F_DEAD); @@ -4156,16 +4177,19 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, addrconf_verify_rtnl(); } -static void addrconf_dad_run(struct inet6_dev *idev) +static void addrconf_dad_run(struct inet6_dev *idev, bool restart) { struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); list_for_each_entry(ifp, &idev->addr_list, if_list) { spin_lock(&ifp->lock); - if (ifp->flags & IFA_F_TENTATIVE && - ifp->state == INET6_IFADDR_STATE_DAD) + if ((ifp->flags & IFA_F_TENTATIVE && + ifp->state == INET6_IFADDR_STATE_DAD) || restart) { + if (restart) + ifp->state = INET6_IFADDR_STATE_PREDAD; addrconf_dad_kick(ifp); + } spin_unlock(&ifp->lock); } read_unlock_bh(&idev->lock); @@ -4201,7 +4225,6 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) p++; continue; } - state->offset++; return ifa; } @@ -4225,13 +4248,12 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, return ifa; } + state->offset = 0; while (++state->bucket < IN6_ADDR_HSIZE) { - state->offset = 0; hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; - state->offset++; return ifa; } } @@ -4491,6 +4513,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, [IFA_FLAGS] = { .len = sizeof(u32) }, [IFA_RT_PRIORITY] = { .len = sizeof(u32) }, + [IFA_TARGET_NETNSID] = { .type = NLA_S32 }, }; static int @@ -4793,19 +4816,40 @@ static inline int inet6_ifaddr_msgsize(void) + nla_total_size(4) /* IFA_RT_PRIORITY */; } +enum addr_type_t { + UNICAST_ADDR, + MULTICAST_ADDR, + ANYCAST_ADDR, +}; + +struct inet6_fill_args { + u32 portid; + u32 seq; + int event; + unsigned int flags; + int netnsid; + int ifindex; + enum addr_type_t type; +}; + static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, - u32 portid, u32 seq, int event, unsigned int flags) + struct inet6_fill_args *args) { struct nlmsghdr *nlh; u32 preferred, valid; - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, args->portid, args->seq, args->event, + sizeof(struct ifaddrmsg), args->flags); if (!nlh) return -EMSGSIZE; put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), ifa->idev->dev->ifindex); + if (args->netnsid >= 0 && + nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) + goto error; + if (!((ifa->flags&IFA_F_PERMANENT) && (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; @@ -4855,7 +4899,7 @@ error: } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, - u32 portid, u32 seq, int event, u16 flags) + struct inet6_fill_args *args) { struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; @@ -4864,10 +4908,15 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, args->portid, args->seq, args->event, + sizeof(struct ifaddrmsg), args->flags); if (!nlh) return -EMSGSIZE; + if (args->netnsid >= 0 && + nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) + return -EMSGSIZE; + put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 || put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, @@ -4881,7 +4930,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, - u32 portid, u32 seq, int event, unsigned int flags) + struct inet6_fill_args *args) { struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt); int ifindex = dev ? dev->ifindex : 1; @@ -4891,10 +4940,15 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, args->portid, args->seq, args->event, + sizeof(struct ifaddrmsg), args->flags); if (!nlh) return -EMSGSIZE; + if (args->netnsid >= 0 && + nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) + return -EMSGSIZE; + put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 || put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, @@ -4907,68 +4961,56 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, return 0; } -enum addr_type_t { - UNICAST_ADDR, - MULTICAST_ADDR, - ANYCAST_ADDR, -}; - /* called with rcu_read_lock() */ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, - struct netlink_callback *cb, enum addr_type_t type, - int s_ip_idx, int *p_ip_idx) + struct netlink_callback *cb, int s_ip_idx, + struct inet6_fill_args *fillargs) { struct ifmcaddr6 *ifmca; struct ifacaddr6 *ifaca; + int ip_idx = 0; int err = 1; - int ip_idx = *p_ip_idx; read_lock_bh(&idev->lock); - switch (type) { + switch (fillargs->type) { case UNICAST_ADDR: { struct inet6_ifaddr *ifa; + fillargs->event = RTM_NEWADDR; /* unicast address incl. temp addr */ list_for_each_entry(ifa, &idev->addr_list, if_list) { - if (++ip_idx < s_ip_idx) - continue; - err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWADDR, - NLM_F_MULTI); + if (ip_idx < s_ip_idx) + goto next; + err = inet6_fill_ifaddr(skb, ifa, fillargs); if (err < 0) break; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +next: + ip_idx++; } break; } case MULTICAST_ADDR: + fillargs->event = RTM_GETMULTICAST; + /* multicast address */ for (ifmca = idev->mc_list; ifmca; ifmca = ifmca->next, ip_idx++) { if (ip_idx < s_ip_idx) continue; - err = inet6_fill_ifmcaddr(skb, ifmca, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_GETMULTICAST, - NLM_F_MULTI); + err = inet6_fill_ifmcaddr(skb, ifmca, fillargs); if (err < 0) break; } break; case ANYCAST_ADDR: + fillargs->event = RTM_GETANYCAST; /* anycast address */ for (ifaca = idev->ac_list; ifaca; ifaca = ifaca->aca_next, ip_idx++) { if (ip_idx < s_ip_idx) continue; - err = inet6_fill_ifacaddr(skb, ifaca, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_GETANYCAST, - NLM_F_MULTI); + err = inet6_fill_ifacaddr(skb, ifaca, fillargs); if (err < 0) break; } @@ -4977,42 +5019,128 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, break; } read_unlock_bh(&idev->lock); - *p_ip_idx = ip_idx; + cb->args[2] = ip_idx; return err; } +static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, + struct inet6_fill_args *fillargs, + struct net **tgt_net, struct sock *sk, + struct netlink_callback *cb) +{ + struct netlink_ext_ack *extack = cb->extack; + struct nlattr *tb[IFA_MAX+1]; + struct ifaddrmsg *ifm; + int err, i; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for address dump request"); + return -EINVAL; + } + + ifm = nlmsg_data(nlh); + if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address dump request"); + return -EINVAL; + } + + fillargs->ifindex = ifm->ifa_index; + if (fillargs->ifindex) { + cb->answer_flags |= NLM_F_DUMP_FILTERED; + fillargs->flags |= NLM_F_DUMP_FILTERED; + } + + err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); + if (err < 0) + return err; + + for (i = 0; i <= IFA_MAX; ++i) { + if (!tb[i]) + continue; + + if (i == IFA_TARGET_NETNSID) { + struct net *net; + + fillargs->netnsid = nla_get_s32(tb[i]); + net = rtnl_get_net_ns_capable(sk, fillargs->netnsid); + if (IS_ERR(net)) { + fillargs->netnsid = -1; + NL_SET_ERR_MSG_MOD(extack, "Invalid target network namespace id"); + return PTR_ERR(net); + } + *tgt_net = net; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, enum addr_type_t type) { + const struct nlmsghdr *nlh = cb->nlh; + struct inet6_fill_args fillargs = { + .portid = NETLINK_CB(cb->skb).portid, + .seq = cb->nlh->nlmsg_seq, + .flags = NLM_F_MULTI, + .netnsid = -1, + .type = type, + }; struct net *net = sock_net(skb->sk); + struct net *tgt_net = net; + int idx, s_idx, s_ip_idx; int h, s_h; - int idx, ip_idx; - int s_idx, s_ip_idx; struct net_device *dev; struct inet6_dev *idev; struct hlist_head *head; + int err = 0; s_h = cb->args[0]; s_idx = idx = cb->args[1]; - s_ip_idx = ip_idx = cb->args[2]; + s_ip_idx = cb->args[2]; + + if (cb->strict_check) { + err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, + skb->sk, cb); + if (err < 0) + goto put_tgt_net; + + err = 0; + if (fillargs.ifindex) { + dev = __dev_get_by_index(tgt_net, fillargs.ifindex); + if (!dev) { + err = -ENODEV; + goto put_tgt_net; + } + idev = __in6_dev_get(dev); + if (idev) { + err = in6_dump_addrs(idev, skb, cb, s_ip_idx, + &fillargs); + } + goto put_tgt_net; + } + } rcu_read_lock(); - cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq; + cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq; for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; - head = &net->dev_index_head[h]; + head = &tgt_net->dev_index_head[h]; hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; if (h > s_h || idx > s_idx) s_ip_idx = 0; - ip_idx = 0; idev = __in6_dev_get(dev); if (!idev) goto cont; - if (in6_dump_addrs(idev, skb, cb, type, - s_ip_idx, &ip_idx) < 0) + if (in6_dump_addrs(idev, skb, cb, s_ip_idx, + &fillargs) < 0) goto done; cont: idx++; @@ -5022,9 +5150,11 @@ done: rcu_read_unlock(); cb->args[0] = h; cb->args[1] = idx; - cb->args[2] = ip_idx; +put_tgt_net: + if (fillargs.netnsid >= 0) + put_net(tgt_net); - return skb->len; + return err < 0 ? err : skb->len; } static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) @@ -5053,6 +5183,14 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); + struct inet6_fill_args fillargs = { + .portid = NETLINK_CB(in_skb).portid, + .seq = nlh->nlmsg_seq, + .event = RTM_NEWADDR, + .flags = 0, + .netnsid = -1, + }; + struct net *tgt_net = net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *addr = NULL, *peer; @@ -5066,15 +5204,24 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (tb[IFA_TARGET_NETNSID]) { + fillargs.netnsid = nla_get_s32(tb[IFA_TARGET_NETNSID]); + + tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(in_skb).sk, + fillargs.netnsid); + if (IS_ERR(tgt_net)) + return PTR_ERR(tgt_net); + } + addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); if (!addr) return -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifa_index) - dev = dev_get_by_index(net, ifm->ifa_index); + dev = dev_get_by_index(tgt_net, ifm->ifa_index); - ifa = ipv6_get_ifaddr(net, addr, dev, 1); + ifa = ipv6_get_ifaddr(tgt_net, addr, dev, 1); if (!ifa) { err = -EADDRNOTAVAIL; goto errout; @@ -5086,20 +5233,22 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout_ifa; } - err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, RTM_NEWADDR, 0); + err = inet6_fill_ifaddr(skb, ifa, &fillargs); if (err < 0) { /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout_ifa; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); + err = rtnl_unicast(skb, tgt_net, NETLINK_CB(in_skb).portid); errout_ifa: in6_ifa_put(ifa); errout: if (dev) dev_put(dev); + if (fillargs.netnsid >= 0) + put_net(tgt_net); + return err; } @@ -5107,13 +5256,20 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; struct net *net = dev_net(ifa->idev->dev); + struct inet6_fill_args fillargs = { + .portid = 0, + .seq = 0, + .event = event, + .flags = 0, + .netnsid = -1, + }; int err = -ENOBUFS; skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); if (!skb) goto errout; - err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); + err = inet6_fill_ifaddr(skb, ifa, &fillargs); if (err < 0) { /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ WARN_ON(err == -EMSGSIZE); @@ -5529,6 +5685,31 @@ nla_put_failure: return -EMSGSIZE; } +static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct ifinfomsg *ifm; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for link dump request"); + return -EINVAL; + } + + if (nlmsg_attrlen(nlh, sizeof(*ifm))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid data after header"); + return -EINVAL; + } + + ifm = nlmsg_data(nlh); + if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || + ifm->ifi_change || ifm->ifi_index) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for dump request"); + return -EINVAL; + } + + return 0; +} + static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -5538,6 +5719,16 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct inet6_dev *idev; struct hlist_head *head; + /* only requests using strict checking can pass data to + * influence the dump + */ + if (cb->strict_check) { + int err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack); + + if (err < 0) + return err; + } + s_h = cb->args[0]; s_idx = cb->args[1]; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 1d6ced37ad71..0d1ee82ee55b 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -458,20 +458,52 @@ static int ip6addrlbl_fill(struct sk_buff *skb, return 0; } +static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct ifaddrlblmsg *ifal; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for address label dump request"); + return -EINVAL; + } + + ifal = nlmsg_data(nlh); + if (ifal->__ifal_reserved || ifal->ifal_prefixlen || + ifal->ifal_flags || ifal->ifal_index || ifal->ifal_seq) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address label dump request"); + return -EINVAL; + } + + if (nlmsg_attrlen(nlh, sizeof(*ifal))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst"); + return -EINVAL; + } + + return 0; +} + static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) { + const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); struct ip6addrlbl_entry *p; int idx = 0, s_idx = cb->args[0]; int err; + if (cb->strict_check) { + err = ip6addrlbl_valid_dump_req(nlh, cb->extack); + if (err < 0) + return err; + } + rcu_read_lock(); hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) { if (idx >= s_idx) { err = ip6addrlbl_fill(skb, p, net->ipv6.ip6addrlbl_table.seq, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, + nlh->nlmsg_seq, RTM_NEWADDRLABEL, NLM_F_MULTI); if (err < 0) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 673bba31eb18..f0cd291034f0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -209,6 +209,7 @@ lookup_protocol: np->hop_limit = -1; np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; + np->mc_all = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; np->repflow = net->ipv6.sysctl.flowlabel_reflect; sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; @@ -467,12 +468,10 @@ void inet6_destroy_sock(struct sock *sk) /* Release rx options */ skb = xchg(&np->pktoptions, NULL); - if (skb) - kfree_skb(skb); + kfree_skb(skb); skb = xchg(&np->rxpmtu, NULL); - if (skb) - kfree_skb(skb); + kfree_skb(skb); /* Free flowlabels */ fl6_free_socklist(sk); @@ -902,6 +901,7 @@ static const struct ipv6_stub ipv6_stub_impl = { static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .inet6_bind = __inet6_bind, + .udp6_lib_lookup = __udp6_lib_lookup, }; static int __init inet6_init(void) @@ -938,14 +938,14 @@ static int __init inet6_init(void) err = proto_register(&pingv6_prot, 1); if (err) - goto out_unregister_ping_proto; + goto out_unregister_raw_proto; /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. */ err = rawv6_init(); if (err) - goto out_unregister_raw_proto; + goto out_unregister_ping_proto; /* Register the family here so that the init calls below will * be able to create sockets. (?? is this dangerous ??) @@ -1001,6 +1001,9 @@ static int __init inet6_init(void) err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; + err = ipv6_anycast_init(); + if (err) + goto ipv6_anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; @@ -1091,6 +1094,8 @@ ipv6_frag_fail: ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: + ipv6_anycast_cleanup(); +ipv6_anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); @@ -1113,11 +1118,11 @@ netfilter_fail: igmp_fail: ndisc_cleanup(); ndisc_fail: - ip6_mr_cleanup(); + icmpv6_cleanup(); icmp_fail: - unregister_pernet_subsys(&inet6_net_ops); + ip6_mr_cleanup(); ipmr_fail: - icmpv6_cleanup(); + unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4e0ff7031edd..94999058e110 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -44,8 +44,22 @@ #include <net/checksum.h> +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) +/* anycast address hash table + */ +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(acaddr_hash_lock); + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); +} + /* * socket join an anycast group */ @@ -204,16 +218,39 @@ void ipv6_sock_ac_close(struct sock *sk) rtnl_unlock(); } +static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca) +{ + unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr); + + spin_lock(&acaddr_hash_lock); + hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]); + spin_unlock(&acaddr_hash_lock); +} + +static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca) +{ + spin_lock(&acaddr_hash_lock); + hlist_del_init_rcu(&aca->aca_addr_lst); + spin_unlock(&acaddr_hash_lock); +} + static void aca_get(struct ifacaddr6 *aca) { refcount_inc(&aca->aca_refcnt); } +static void aca_free_rcu(struct rcu_head *h) +{ + struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu); + + fib6_info_release(aca->aca_rt); + kfree(aca); +} + static void aca_put(struct ifacaddr6 *ac) { if (refcount_dec_and_test(&ac->aca_refcnt)) { - fib6_info_release(ac->aca_rt); - kfree(ac); + call_rcu(&ac->rcu, aca_free_rcu); } } @@ -229,6 +266,7 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i, aca->aca_addr = *addr; fib6_info_hold(f6i); aca->aca_rt = f6i; + INIT_HLIST_NODE(&aca->aca_addr_lst); aca->aca_users = 1; /* aca_tstamp should be updated upon changes */ aca->aca_cstamp = aca->aca_tstamp = jiffies; @@ -285,6 +323,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) aca_get(aca); write_unlock_bh(&idev->lock); + ipv6_add_acaddr_hash(net, aca); + ip6_ins_rt(net, f6i); addrconf_join_solict(idev->dev, &aca->aca_addr); @@ -325,6 +365,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) else idev->ac_list = aca->aca_next; write_unlock_bh(&idev->lock); + ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); ip6_del_rt(dev_net(idev->dev), aca->aca_rt); @@ -352,6 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) idev->ac_list = aca->aca_next; write_unlock_bh(&idev->lock); + ipv6_del_acaddr_hash(aca); + addrconf_leave_solict(idev, &aca->aca_addr); ip6_del_rt(dev_net(idev->dev), aca->aca_rt); @@ -390,17 +433,25 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { + unsigned int hash = inet6_acaddr_hash(net, addr); + struct net_device *nh_dev; + struct ifacaddr6 *aca; bool found = false; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); else - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { + hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash], + aca_addr_lst) { + nh_dev = fib6_info_nh_dev(aca->aca_rt); + if (!nh_dev || !net_eq(dev_net(nh_dev), net)) + continue; + if (ipv6_addr_equal(&aca->aca_addr, addr)) { found = true; break; } + } rcu_read_unlock(); return found; } @@ -540,3 +591,24 @@ void ac6_proc_exit(struct net *net) remove_proc_entry("anycast6", net->proc_net); } #endif + +/* Init / cleanup code + */ +int __init ipv6_anycast_init(void) +{ + int i; + + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); + return 0; +} + +void ipv6_anycast_cleanup(void) +{ + int i; + + spin_lock(&acaddr_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); + spin_unlock(&acaddr_hash_lock); +} diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 88a7579c23bd..63b2b66f9dfa 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -601,12 +601,11 @@ static void esp_input_done_esn(struct crypto_async_request *base, int err) static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) { - struct ip_esp_hdr *esph; struct crypto_aead *aead = x->data; struct aead_request *req; struct sk_buff *trailer; int ivlen = crypto_aead_ivsize(aead); - int elen = skb->len - sizeof(*esph) - ivlen; + int elen = skb->len - sizeof(struct ip_esp_hdr) - ivlen; int nfrags; int assoclen; int seqhilen; @@ -616,7 +615,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) u8 *iv; struct scatterlist *sg; - if (!pskb_may_pull(skb, sizeof(*esph) + ivlen)) { + if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + ivlen)) { ret = -EINVAL; goto out; } @@ -626,7 +625,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } - assoclen = sizeof(*esph); + assoclen = sizeof(struct ip_esp_hdr); seqhilen = 0; if (x->props.flags & XFRM_STATE_ESN) { diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 547515e8450a..377717045f8f 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -88,8 +88,24 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) * Note, we are only interested in != 0 or == 0, thus the * force to int. */ - return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, - ip6_compute_pseudo); + err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); + if (err) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { + /* If SW calculated the value, we know it's bad */ + if (skb->csum_complete_sw) + return 1; + + /* HW says the value is bad. Let's validate that. + * skb->csum is no longer the full packet checksum, + * so don't treat is as such. + */ + skb_checksum_complete_unset(skb); + } + + return 0; } EXPORT_SYMBOL(udp6_csum_init); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d212738e9d10..ae3786132c23 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -29,6 +29,7 @@ #include <linux/list.h> #include <linux/slab.h> +#include <net/ip.h> #include <net/ipv6.h> #include <net/ndisc.h> #include <net/addrconf.h> @@ -46,6 +47,7 @@ struct fib6_cleaner { int (*func)(struct fib6_info *, void *arg); int sernum; void *arg; + bool skip_notify; }; #ifdef CONFIG_IPV6_SUBTREES @@ -160,8 +162,6 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags) } INIT_LIST_HEAD(&f6i->fib6_siblings); - f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; - atomic_inc(&f6i->fib6_ref); return f6i; @@ -171,7 +171,6 @@ void fib6_info_destroy_rcu(struct rcu_head *head) { struct fib6_info *f6i = container_of(head, struct fib6_info, rcu); struct rt6_exception_bucket *bucket; - struct dst_metrics *m; WARN_ON(f6i->fib6_node); @@ -196,14 +195,16 @@ void fib6_info_destroy_rcu(struct rcu_head *head) *ppcpu_rt = NULL; } } + + free_percpu(f6i->rt6i_pcpu); } + lwtstate_put(f6i->fib6_nh.nh_lwtstate); + if (f6i->fib6_nh.nh_dev) dev_put(f6i->fib6_nh.nh_dev); - m = f6i->fib6_metrics; - if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) - kfree(m); + ip_fib_metrics_put(f6i->fib6_metrics); kfree(f6i); } @@ -566,17 +567,31 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); + struct rt6_rtnl_dump_arg arg = {}; unsigned int h, s_h; unsigned int e = 0, s_e; - struct rt6_rtnl_dump_arg arg; struct fib6_walker *w; struct fib6_table *tb; struct hlist_head *head; int res = 0; - s_h = cb->args[0]; - s_e = cb->args[1]; + if (cb->strict_check) { + int err; + + err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb); + if (err < 0) + return err; + } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { + struct rtmsg *rtm = nlmsg_data(nlh); + + arg.filter.flags = rtm->rtm_flags & (RTM_F_PREFIX|RTM_F_CLONED); + } + + /* fib entries are never clones */ + if (arg.filter.flags & RTM_F_CLONED) + goto out; w = (void *)cb->args[2]; if (!w) { @@ -602,6 +617,23 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) arg.net = net; w->args = &arg; + if (arg.filter.table_id) { + tb = fib6_get_table(net, arg.filter.table_id); + if (!tb) { + if (arg.filter.dump_all_families) + goto out; + + NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); + return -ENOENT; + } + + res = fib6_dump_table(tb, skb, cb); + goto out; + } + + s_h = cb->args[0]; + s_e = cb->args[1]; + rcu_read_lock(); for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { e = 0; @@ -611,16 +643,16 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) goto next; res = fib6_dump_table(tb, skb, cb); if (res != 0) - goto out; + goto out_unlock; next: e++; } } -out: +out_unlock: rcu_read_unlock(); cb->args[1] = e; cb->args[0] = h; - +out: res = res < 0 ? res : skb->len; if (res <= 0) fib6_dump_end(cb); @@ -987,7 +1019,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, fib6_clean_expires(iter); else fib6_set_expires(iter, rt->expires); - fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu); + + if (rt->fib6_pmtu) + fib6_metric_set(iter, RTAX_MTU, + rt->fib6_pmtu); return -EEXIST; } /* If we have the same destination and the same metric, @@ -1947,6 +1982,7 @@ static int fib6_clean_node(struct fib6_walker *w) struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w); struct nl_info info = { .nl_net = c->net, + .skip_notify = c->skip_notify, }; if (c->sernum != FIB6_NO_SERNUM_CHANGE && @@ -1998,7 +2034,7 @@ static int fib6_clean_node(struct fib6_walker *w) static void fib6_clean_tree(struct net *net, struct fib6_node *root, int (*func)(struct fib6_info *, void *arg), - int sernum, void *arg) + int sernum, void *arg, bool skip_notify) { struct fib6_cleaner c; @@ -2010,13 +2046,14 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, c.sernum = sernum; c.arg = arg; c.net = net; + c.skip_notify = skip_notify; fib6_walk(net, &c.w); } static void __fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *), - int sernum, void *arg) + int sernum, void *arg, bool skip_notify) { struct fib6_table *table; struct hlist_head *head; @@ -2028,7 +2065,7 @@ static void __fib6_clean_all(struct net *net, hlist_for_each_entry_rcu(table, head, tb6_hlist) { spin_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, - func, sernum, arg); + func, sernum, arg, skip_notify); spin_unlock_bh(&table->tb6_lock); } } @@ -2038,14 +2075,21 @@ static void __fib6_clean_all(struct net *net, void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *), void *arg) { - __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); + __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false); +} + +void fib6_clean_all_skip_notify(struct net *net, + int (*func)(struct fib6_info *, void *), + void *arg) +{ + __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true); } static void fib6_flush_trees(struct net *net) { int new_sernum = fib6_new_sernum(net); - __fib6_clean_all(net, NULL, new_sernum, NULL); + __fib6_clean_all(net, NULL, new_sernum, NULL, false); } /* diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 18a3794b0f52..515adbdba1d2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -427,35 +427,17 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); - const struct gre_base_hdr *greh; const struct ipv6hdr *ipv6h; - int grehlen = sizeof(*greh); + struct tnl_ptk_info tpi; struct ip6_tnl *t; - int key_off = 0; - __be16 flags; - __be32 key; - if (!pskb_may_pull(skb, offset + grehlen)) - return; - greh = (const struct gre_base_hdr *)(skb->data + offset); - flags = greh->flags; - if (flags & (GRE_VERSION | GRE_ROUTING)) + if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IPV6), + offset) < 0) return; - if (flags & GRE_CSUM) - grehlen += 4; - if (flags & GRE_KEY) { - key_off = grehlen + offset; - grehlen += 4; - } - if (!pskb_may_pull(skb, offset + grehlen)) - return; ipv6h = (const struct ipv6hdr *)skb->data; - greh = (const struct gre_base_hdr *)(skb->data + offset); - key = key_off ? *(__be32 *)(skb->data + key_off) : 0; - t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, - key, greh->protocol); + tpi.key, tpi.proto); if (!t) return; @@ -1778,6 +1760,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[], if (data[IFLA_GRE_COLLECT_METADATA]) parms->collect_md = true; + parms->erspan_ver = 1; if (data[IFLA_GRE_ERSPAN_VER]) parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 6242682be876..96577e742afd 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -178,7 +178,8 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, */ if ((ipv6_addr_loopback(&hdr->saddr) || ipv6_addr_loopback(&hdr->daddr)) && - !(dev->flags & IFF_LOOPBACK)) + !(dev->flags & IFF_LOOPBACK) && + !netif_is_l3_master(dev)) goto err; /* RFC4291 Errata ID: 3480 diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 37ff4805b20c..c7e495f12011 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -115,6 +115,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, payload_len = skb->len - nhoff - sizeof(*ipv6h); ipv6h->payload_len = htons(payload_len); skb->network_header = (u8 *)ipv6h - skb->head; + skb_reset_mac_len(skb); if (udpfrag) { int err = ip6_find_1stfragopt(skb, &prevhdr); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 16f200f06500..89e0d5118afe 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -219,12 +219,10 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -ENOBUFS; } + if (skb->sk) + skb_set_owner_w(skb2, skb->sk); consume_skb(skb); skb = skb2; - /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically, - * it is safe to call in our context (socket lock not held) - */ - skb_set_owner_w(skb, (struct sock *)sk); } if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); @@ -727,7 +725,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, skb = frag; frag = skb->next; - skb->next = NULL; + skb_mark_not_on_list(skb); } kfree(tmp_hdr); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5df2a58d945c..a9d06d4dd057 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1184,11 +1184,14 @@ route_lookup: } skb_dst_set(skb, dst); - if (encap_limit >= 0) { - init_tel_txopt(&opt, encap_limit); - ipv6_push_frag_opts(skb, &opt.ops, &proto); + if (hop_limit == 0) { + if (skb->protocol == htons(ETH_P_IP)) + hop_limit = ip_hdr(skb)->ttl; + else if (skb->protocol == htons(ETH_P_IPV6)) + hop_limit = ipv6_hdr(skb)->hop_limit; + else + hop_limit = ip6_dst_hoplimit(dst); } - hop_limit = hop_limit ? : ip6_dst_hoplimit(dst); /* Calculate max headroom for all the headers and adjust * needed_headroom if necessary. @@ -1202,6 +1205,11 @@ route_lookup: if (err) return err; + if (encap_limit >= 0) { + init_tel_txopt(&opt, encap_limit); + ipv6_push_frag_opts(skb, &opt.ops, &proto); + } + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); @@ -1226,7 +1234,7 @@ static inline int ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; int encap_limit = -1; struct flowi6 fl6; __u8 dsfield; @@ -1234,6 +1242,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; + /* ensure we can access the full inner ip header */ + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return -1; + + iph = ip_hdr(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); tproto = READ_ONCE(t->parms.proto); @@ -1298,7 +1311,7 @@ static inline int ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ipv6hdr *ipv6h; int encap_limit = -1; __u16 offset; struct flowi6 fl6; @@ -1307,6 +1320,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + return -1; + + ipv6h = ipv6_hdr(skb); tproto = READ_ONCE(t->parms.proto); if ((tproto != IPPROTO_IPV6 && tproto != 0) || ip6_tnl_addr_conflict(t, ipv6h)) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 38dec9da90d3..eeaf7455d51e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -481,7 +481,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) } mtu = dst_mtu(dst); - if (!skb->ignore_df && skb->len > mtu) { + if (skb->len > mtu) { skb_dst_update_pmtu(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { @@ -1094,7 +1094,8 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n, } t = rtnl_dereference(ip6n->tnls_wc[0]); - unregister_netdevice_queue(t->dev, list); + if (t) + unregister_netdevice_queue(t->dev, list); } static int __net_init vti6_init_net(struct net *net) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d0b7e0249c13..e2ea691e42c6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -85,7 +85,8 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id); static void ip6mr_free_table(struct mr_table *mrt); static void ip6_mr_forward(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc6_cache *cache); + struct net_device *dev, struct sk_buff *skb, + struct mfc6_cache *cache); static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert); static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, @@ -138,6 +139,9 @@ static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, .flags = FIB_LOOKUP_NOREF, }; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi6_to_flowi(flp6)); + err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flowi6_to_flowi(flp6), 0, &arg); if (err < 0) @@ -164,7 +168,9 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, return -EINVAL; } - mrt = ip6mr_get_table(rule->fr_net, rule->table); + arg->table = fib_rule_get_table(rule, arg); + + mrt = ip6mr_get_table(rule->fr_net, arg->table); if (!mrt) return -EAGAIN; res->mrt = mrt; @@ -1014,7 +1020,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, } rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else - ip6_mr_forward(net, mrt, skb, c); + ip6_mr_forward(net, mrt, skb->dev, skb, c); } } @@ -1120,7 +1126,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, /* Queue a packet for resolution. It gets locked cache entry! */ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, - struct sk_buff *skb) + struct sk_buff *skb, struct net_device *dev) { struct mfc6_cache *c; bool found = false; @@ -1180,6 +1186,10 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, kfree_skb(skb); err = -ENOBUFS; } else { + if (dev) { + skb->dev = dev; + skb->skb_iif = dev->ifindex; + } skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); err = 0; } @@ -2043,11 +2053,12 @@ static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) } static void ip6_mr_forward(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc6_cache *c) + struct net_device *dev, struct sk_buff *skb, + struct mfc6_cache *c) { int psend = -1; int vif, ct; - int true_vifi = ip6mr_find_vif(mrt, skb->dev); + int true_vifi = ip6mr_find_vif(mrt, dev); vif = c->_c.mfc_parent; c->_c.mfc_un.res.pkt++; @@ -2073,7 +2084,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif_table[vif].dev != skb->dev) { + if (mrt->vif_table[vif].dev != dev) { c->_c.mfc_un.res.wrong_if++; if (true_vifi >= 0 && mrt->mroute_do_assert && @@ -2154,6 +2165,19 @@ int ip6_mr_input(struct sk_buff *skb) .flowi6_mark = skb->mark, }; int err; + struct net_device *dev; + + /* skb->dev passed in is the master dev for vrfs. + * Get the proper interface that does have a vif associated with it. + */ + dev = skb->dev; + if (netif_is_l3_master(skb->dev)) { + dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); + if (!dev) { + kfree_skb(skb); + return -ENODEV; + } + } err = ip6mr_fib_lookup(net, &fl6, &mrt); if (err < 0) { @@ -2165,7 +2189,7 @@ int ip6_mr_input(struct sk_buff *skb) cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); if (!cache) { - int vif = ip6mr_find_vif(mrt, skb->dev); + int vif = ip6mr_find_vif(mrt, dev); if (vif >= 0) cache = ip6mr_cache_find_any(mrt, @@ -2179,9 +2203,9 @@ int ip6_mr_input(struct sk_buff *skb) if (!cache) { int vif; - vif = ip6mr_find_vif(mrt, skb->dev); + vif = ip6mr_find_vif(mrt, dev); if (vif >= 0) { - int err = ip6mr_cache_unresolved(mrt, vif, skb); + int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); read_unlock(&mrt_lock); return err; @@ -2191,7 +2215,7 @@ int ip6_mr_input(struct sk_buff *skb) return -ENODEV; } - ip6_mr_forward(net, mrt, skb, cache); + ip6_mr_forward(net, mrt, dev, skb, cache); read_unlock(&mrt_lock); @@ -2257,7 +2281,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, iph->saddr = rt->rt6i_src.addr; iph->daddr = rt->rt6i_dst.addr; - err = ip6mr_cache_unresolved(mrt, vif, skb2); + err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); read_unlock(&mrt_lock); return err; @@ -2433,6 +2457,33 @@ errout: static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { + const struct nlmsghdr *nlh = cb->nlh; + struct fib_dump_filter filter = {}; + int err; + + if (cb->strict_check) { + err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, + &filter, cb); + if (err < 0) + return err; + } + + if (filter.table_id) { + struct mr_table *mrt; + + mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); + if (!mrt) { + if (filter.dump_all_families) + return skb->len; + + NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); + return -ENOENT; + } + err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute, + &mfc_unres_lock, &filter); + return skb->len ? : err; + } + return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, - _ip6mr_fill_mroute, &mfc_unres_lock); + _ip6mr_fill_mroute, &mfc_unres_lock, &filter); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index c0cac9cc3a28..381ce38940ae 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -674,6 +674,13 @@ done: retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); break; } + case IPV6_MULTICAST_ALL: + if (optlen < sizeof(int)) + goto e_inval; + np->mc_all = valbool; + retv = 0; + break; + case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: { @@ -1266,6 +1273,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->mcast_oif; break; + case IPV6_MULTICAST_ALL: + val = np->mc_all; + break; + case IPV6_UNICAST_IF: val = (__force int)htonl((__u32) np->ucast_oif); break; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 4ae54aaca373..21f6deb2aec9 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -636,7 +636,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, } if (!mc) { rcu_read_unlock(); - return true; + return np->mc_all; } read_lock(&mc->sflock); psl = mc->sflist; @@ -2436,17 +2436,17 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, { int err; - /* callers have the socket lock and rtnl lock - * so no other readers or writers of iml or its sflist - */ + write_lock_bh(&iml->sflock); if (!iml->sflist) { /* any-source empty exclude case */ - return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + } else { + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, + iml->sflist->sl_count, iml->sflist->sl_addr, 0); + sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); + iml->sflist = NULL; } - err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, - iml->sflist->sl_count, iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + write_unlock_bh(&iml->sflock); return err; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0ec273997d1d..659ecf4e4b3c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1533,7 +1533,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) if (!ndopts.nd_opts_rh) { ip6_redirect_no_header(skb, dev_net(skb->dev), - skb->dev->ifindex, 0); + skb->dev->ifindex); return; } @@ -1732,10 +1732,9 @@ int ndisc_rcv(struct sk_buff *skb) return 0; } - memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); ndisc_recv_ns(skb); break; @@ -1784,6 +1783,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, change_info = ptr; if (change_info->flags_changed & IFF_NOARP) neigh_changeaddr(&nd_tbl, dev); + if (!netif_carrier_ok(dev)) + neigh_carrier_down(&nd_tbl, dev); break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 8b147440fbdc..af737b47b9b5 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -65,7 +65,10 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par) } hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); + if (!hp) { + par->hotdrop = true; + return false; + } /* Calculate the header length */ if (nexthdr == NEXTHDR_FRAGMENT) diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 2c99b94eeca3..21bf6bf04323 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -137,7 +137,10 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) sizeof(_addr), &_addr); - BUG_ON(ap == NULL); + if (ap == NULL) { + par->hotdrop = true; + return false; + } if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) { pr_debug("i=%d temp=%d;\n", i, temp); @@ -166,7 +169,10 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) + temp * sizeof(_addr), sizeof(_addr), &_addr); - BUG_ON(ap == NULL); + if (ap == NULL) { + par->hotdrop = true; + return false; + } if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp])) break; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 2a14d8b65924..d219979c3e52 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -445,11 +445,12 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; + fp->sk = NULL; } sub_frag_mem_limit(fq->q.net, head->truesize); head->ignore_df = 1; - head->next = NULL; + skb_mark_not_on_list(head); head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); @@ -586,11 +587,16 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) */ ret = -EINPROGRESS; if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len && - nf_ct_frag6_reasm(fq, skb, dev)) - ret = 0; - else + fq->q.meat == fq->q.len) { + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + if (nf_ct_frag6_reasm(fq, skb, dev)) + ret = 0; + skb->_skb_refdst = orefdst; + } else { skb_dst_drop(skb); + } out_unlock: spin_unlock_bh(&fq->q.lock); diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index e6eb7cf9b54f..3e4bf2286abe 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -87,18 +87,30 @@ static struct notifier_block masq_dev_notifier = { struct masq_dev_work { struct work_struct work; struct net *net; + struct in6_addr addr; int ifindex; }; +static int inet_cmp(struct nf_conn *ct, void *work) +{ + struct masq_dev_work *w = (struct masq_dev_work *)work; + struct nf_conntrack_tuple *tuple; + + if (!device_cmp(ct, (void *)(long)w->ifindex)) + return 0; + + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); +} + static void iterate_cleanup_work(struct work_struct *work) { struct masq_dev_work *w; - long index; w = container_of(work, struct masq_dev_work, work); - index = w->ifindex; - nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0); + nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0); put_net(w->net); kfree(w); @@ -147,6 +159,7 @@ static int masq_inet_event(struct notifier_block *this, INIT_WORK(&w->work, iterate_cleanup_work); w->ifindex = dev->ifindex; w->net = net; + w->addr = ifa->addr; schedule_work(&w->work); return NOTIFY_DONE; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 413d98bf24f4..5e0efd3954e9 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -651,8 +651,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; skb->tstamp = sockc->transmit_time; - skb_dst_set(skb, &rt->dst); - *dstp = NULL; skb_put(skb, length); skb_reset_network_header(skb); @@ -665,8 +663,14 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->transport_header = skb->network_header; err = memcpy_from_msg(iph, msg, length); - if (err) - goto error_fault; + if (err) { + err = -EFAULT; + kfree_skb(skb); + goto error; + } + + skb_dst_set(skb, &rt->dst); + *dstp = NULL; /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -675,21 +679,28 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, if (unlikely(!skb)) return 0; + /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev + * in the error path. Since skb has been freed, the dst could + * have been queued for deletion. + */ + rcu_read_lock(); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); - if (err) - goto error; + if (err) { + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); + goto error_check; + } + rcu_read_unlock(); out: return 0; -error_fault: - err = -EFAULT; - kfree_skb(skb); error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); +error_check: if (err == -ENOBUFS && !np->recverr) err = 0; return err; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 5c5b4f79296e..5c3c92713096 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -145,7 +145,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, */ if (end < fq->q.len || ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) - goto err; + goto discard_fq; fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { @@ -162,20 +162,20 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ if (fq->q.flags & INET_FRAG_LAST_IN) - goto err; + goto discard_fq; fq->q.len = end; } } if (end == offset) - goto err; + goto discard_fq; /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) - goto err; + goto discard_fq; if (pskb_trim_rcsum(skb, end - offset)) - goto err; + goto discard_fq; /* Find out which fragments are in front and at the back of us * in the chain of fragments so far. We must know where to put @@ -388,7 +388,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } sub_frag_mem_limit(fq->q.net, sum_truesize); - head->next = NULL; + skb_mark_not_on_list(head); head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); @@ -418,6 +418,7 @@ out_fail: rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); + inet_frag_kill(&fq->q); return -1; } @@ -553,7 +554,6 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) table[0].data = &net->ipv6.frags.high_thresh; table[0].extra1 = &net->ipv6.frags.low_thresh; - table[0].extra2 = &init_net.ipv6.frags.high_thresh; table[1].data = &net->ipv6.frags.low_thresh; table[1].extra2 = &net->ipv6.frags.high_thresh; table[2].data = &net->ipv6.frags.timeout; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7208c16302f6..059f0531f7c1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -368,7 +368,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct fib6_info *from; struct inet6_dev *idev; - dst_destroy_metrics_generic(dst); + ip_dst_metrics_put(dst); rt6_uncached_list_del(rt); idev = rt->rt6i_idev; @@ -517,10 +517,11 @@ static void rt6_probe_deferred(struct work_struct *w) static void rt6_probe(struct fib6_info *rt) { - struct __rt6_probe_work *work; + struct __rt6_probe_work *work = NULL; const struct in6_addr *nh_gw; struct neighbour *neigh; struct net_device *dev; + struct inet6_dev *idev; /* * Okay, this does not seem to be appropriate @@ -536,15 +537,12 @@ static void rt6_probe(struct fib6_info *rt) nh_gw = &rt->fib6_nh.nh_gw; dev = rt->fib6_nh.nh_dev; rcu_read_lock_bh(); + idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { - struct inet6_dev *idev; - if (neigh->nud_state & NUD_VALID) goto out; - idev = __in6_dev_get(dev); - work = NULL; write_lock(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, @@ -554,11 +552,13 @@ static void rt6_probe(struct fib6_info *rt) __neigh_set_probe_once(neigh); } write_unlock(&neigh->lock); - } else { + } else if (time_after(jiffies, rt->last_probe + + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); } if (work) { + rt->last_probe = jiffies; INIT_WORK(&work->work, rt6_probe_deferred); work->target = *nh_gw; dev_hold(dev); @@ -946,8 +946,6 @@ static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort) static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) { - rt->dst.flags |= fib6_info_dst_flags(ort); - if (ort->fib6_flags & RTF_REJECT) { ip6_rt_init_dst_reject(rt, ort); return; @@ -956,7 +954,7 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) rt->dst.error = 0; rt->dst.output = ip6_output; - if (ort->fib6_type == RTN_LOCAL) { + if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) { rt->dst.input = ip6_input; } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { rt->dst.input = ip6_mc_input; @@ -977,7 +975,7 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) { rt->rt6i_flags &= ~RTF_EXPIRES; rcu_assign_pointer(rt->from, from); - dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); + ip_dst_init_metrics(&rt->dst, from->fib6_metrics); } /* Caller must already hold reference to @ort */ @@ -995,8 +993,6 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) #ifdef CONFIG_IPV6_SUBTREES rt->rt6i_src = ort->fib6_src; #endif - rt->rt6i_prefsrc = ort->fib6_prefsrc; - rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); } static struct fib6_node* fib6_backtrack(struct fib6_node *fn, @@ -1450,11 +1446,6 @@ static int rt6_insert_exception(struct rt6_info *nrt, if (ort->fib6_src.plen) src_key = &nrt->rt6i_src.addr; #endif - - /* Update rt6i_prefsrc as it could be changed - * in rt6_remove_prefsrc() - */ - nrt->rt6i_prefsrc = ort->fib6_prefsrc; /* rt6_mtu_change() might lower mtu on ort. * Only insert this exception route if its mtu * is less than ort's mtu value. @@ -1636,25 +1627,6 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) rcu_read_unlock(); } -static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt) -{ - struct rt6_exception_bucket *bucket; - struct rt6_exception *rt6_ex; - int i; - - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, - lockdep_is_held(&rt6_exception_lock)); - - if (bucket) { - for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { - hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { - rt6_ex->rt6i->rt6i_prefsrc.plen = 0; - } - bucket++; - } - } -} - static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, struct rt6_info *rt, int mtu) { @@ -2099,7 +2071,8 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, { bool any_src; - if (rt6_need_strict(&fl6->daddr)) { + if (ipv6_addr_type(&fl6->daddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) { struct dst_entry *dst; dst = l3mdev_link_scope_lookup(net, fl6); @@ -2259,8 +2232,7 @@ static void ip6_link_failure(struct sk_buff *skb) if (rt) { rcu_read_lock(); if (rt->rt6i_flags & RTF_CACHE) { - if (dst_hold_safe(&rt->dst)) - rt6_remove_exception_rt(rt); + rt6_remove_exception_rt(rt); } else { struct fib6_info *from; struct fib6_node *fn; @@ -2369,15 +2341,14 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_oif = oif; - fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); - fl6.daddr = iph->daddr; - fl6.saddr = iph->saddr; - fl6.flowlabel = ip6_flowinfo(iph); - fl6.flowi6_uid = uid; + struct flowi6 fl6 = { + .flowi6_oif = oif, + .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark), + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = ip6_flowinfo(iph), + .flowi6_uid = uid, + }; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -2388,10 +2359,13 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { + int oif = sk->sk_bound_dev_if; struct dst_entry *dst; - ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); + if (!oif && skb->dev) + oif = l3mdev_master_ifindex(skb->dev); + + ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || @@ -2528,16 +2502,15 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_iif = LOOPBACK_IFINDEX; - fl6.flowi6_oif = oif; - fl6.flowi6_mark = mark; - fl6.daddr = iph->daddr; - fl6.saddr = iph->saddr; - fl6.flowlabel = ip6_flowinfo(iph); - fl6.flowi6_uid = uid; + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_oif = oif, + .flowi6_mark = mark, + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = ip6_flowinfo(iph), + .flowi6_uid = uid, + }; dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); @@ -2545,21 +2518,18 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, } EXPORT_SYMBOL_GPL(ip6_redirect); -void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, - u32 mark) +void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); struct dst_entry *dst; - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_iif = LOOPBACK_IFINDEX; - fl6.flowi6_oif = oif; - fl6.flowi6_mark = mark; - fl6.daddr = msg->dest; - fl6.saddr = iph->daddr; - fl6.flowi6_uid = sock_net_uid(net, NULL); + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_oif = oif, + .daddr = msg->dest, + .saddr = iph->daddr, + .flowi6_uid = sock_net_uid(net, NULL), + }; dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr); rt6_do_redirect(dst, NULL, skb); @@ -2730,24 +2700,6 @@ out: return entries > rt_max_size; } -static int ip6_convert_metrics(struct net *net, struct fib6_info *rt, - struct fib6_config *cfg) -{ - struct dst_metrics *p; - - if (!cfg->fc_mx) - return 0; - - p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL); - if (unlikely(!p)) - return -ENOMEM; - - refcount_set(&p->refcnt, 1); - rt->fib6_metrics = p; - - return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics); -} - static struct rt6_info *ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg, const struct in6_addr *gw_addr, @@ -2795,6 +2747,8 @@ static int ip6_route_check_nh_onlink(struct net *net, grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); if (grt) { if (!grt->dst.error && + /* ignore match if it is the default route */ + grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) && (grt->rt6i_flags & flags || dev != grt->dst.dev)) { NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway or device mismatch"); @@ -3023,13 +2977,17 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (!rt) goto out; + rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len); + if (IS_ERR(rt->fib6_metrics)) { + err = PTR_ERR(rt->fib6_metrics); + /* Do not leave garbage there. */ + rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; + goto out; + } + if (cfg->fc_flags & RTF_ADDRCONF) rt->dst_nocount = true; - err = ip6_convert_metrics(net, rt, cfg); - if (err < 0) - goto out; - if (cfg->fc_flags & RTF_EXPIRES) fib6_set_expires(rt, jiffies + clock_t_to_jiffies(cfg->fc_expires)); @@ -3138,8 +3096,6 @@ install_route: rt->fib6_nh.nh_dev = dev; rt->fib6_table = table; - cfg->fc_nlinfo.nl_net = dev_net(dev); - if (idev) in6_dev_put(idev); @@ -3260,8 +3216,8 @@ static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) if (cfg->fc_flags & RTF_GATEWAY && !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) goto out; - if (dst_hold_safe(&rt->dst)) - rc = rt6_remove_exception_rt(rt); + + rc = rt6_remove_exception_rt(rt); out: return rc; } @@ -3631,23 +3587,23 @@ static void rtmsg_to_fib6_config(struct net *net, struct in6_rtmsg *rtmsg, struct fib6_config *cfg) { - memset(cfg, 0, sizeof(*cfg)); + *cfg = (struct fib6_config){ + .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? + : RT6_TABLE_MAIN, + .fc_ifindex = rtmsg->rtmsg_ifindex, + .fc_metric = rtmsg->rtmsg_metric, + .fc_expires = rtmsg->rtmsg_info, + .fc_dst_len = rtmsg->rtmsg_dst_len, + .fc_src_len = rtmsg->rtmsg_src_len, + .fc_flags = rtmsg->rtmsg_flags, + .fc_type = rtmsg->rtmsg_type, - cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? - : RT6_TABLE_MAIN; - cfg->fc_ifindex = rtmsg->rtmsg_ifindex; - cfg->fc_metric = rtmsg->rtmsg_metric; - cfg->fc_expires = rtmsg->rtmsg_info; - cfg->fc_dst_len = rtmsg->rtmsg_dst_len; - cfg->fc_src_len = rtmsg->rtmsg_src_len; - cfg->fc_flags = rtmsg->rtmsg_flags; - cfg->fc_type = rtmsg->rtmsg_type; - - cfg->fc_nlinfo.nl_net = net; + .fc_nlinfo.nl_net = net, - cfg->fc_dst = rtmsg->rtmsg_dst; - cfg->fc_src = rtmsg->rtmsg_src; - cfg->fc_gateway = rtmsg->rtmsg_gateway; + .fc_dst = rtmsg->rtmsg_dst, + .fc_src = rtmsg->rtmsg_src, + .fc_gateway = rtmsg->rtmsg_gateway, + }; } int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) @@ -3754,6 +3710,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, if (!f6i) return ERR_PTR(-ENOMEM); + f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0); f6i->dst_nocount = true; f6i->dst_host = true; f6i->fib6_protocol = RTPROT_KERNEL; @@ -3796,8 +3753,6 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg) spin_lock_bh(&rt6_exception_lock); /* remove prefsrc entry */ rt->fib6_prefsrc.plen = 0; - /* need to update cache as well */ - rt6_exceptions_remove_prefsrc(rt); spin_unlock_bh(&rt6_exception_lock); } return 0; @@ -4075,8 +4030,12 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event) .event = event, }, }; + struct net *net = dev_net(dev); - fib6_clean_all(dev_net(dev), fib6_ifdown, &arg); + if (net->ipv6.sysctl.skip_notify_on_dev_down) + fib6_clean_all_skip_notify(net, fib6_ifdown, &arg); + else + fib6_clean_all(net, fib6_ifdown, &arg); } void rt6_disable_ip(struct net_device *dev, unsigned long event) @@ -4166,20 +4125,25 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, int err; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, - NULL); + extack); if (err < 0) goto errout; err = -EINVAL; rtm = nlmsg_data(nlh); - memset(cfg, 0, sizeof(*cfg)); - cfg->fc_table = rtm->rtm_table; - cfg->fc_dst_len = rtm->rtm_dst_len; - cfg->fc_src_len = rtm->rtm_src_len; - cfg->fc_flags = RTF_UP; - cfg->fc_protocol = rtm->rtm_protocol; - cfg->fc_type = rtm->rtm_type; + *cfg = (struct fib6_config){ + .fc_table = rtm->rtm_table, + .fc_dst_len = rtm->rtm_dst_len, + .fc_src_len = rtm->rtm_src_len, + .fc_flags = RTF_UP, + .fc_protocol = rtm->rtm_protocol, + .fc_type = rtm->rtm_type, + + .fc_nlinfo.portid = NETLINK_CB(skb).portid, + .fc_nlinfo.nlh = nlh, + .fc_nlinfo.nl_net = sock_net(skb->sk), + }; if (rtm->rtm_type == RTN_UNREACHABLE || rtm->rtm_type == RTN_BLACKHOLE || @@ -4195,10 +4159,6 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK); - cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; - cfg->fc_nlinfo.nlh = nlh; - cfg->fc_nlinfo.nl_net = sock_net(skb->sk); - if (tb[RTA_GATEWAY]) { cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); cfg->fc_flags |= RTF_GATEWAY; @@ -4317,11 +4277,6 @@ static int ip6_route_info_append(struct net *net, if (!nh) return -ENOMEM; nh->fib6_info = rt; - err = ip6_convert_metrics(net, rt, r_cfg); - if (err) { - kfree(nh); - return err; - } memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); list_add_tail(&nh->next, rt6_nh_list); @@ -4671,20 +4626,31 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, int iif, int type, u32 portid, u32 seq, unsigned int flags) { - struct rtmsg *rtm; + struct rt6_info *rt6 = (struct rt6_info *)dst; + struct rt6key *rt6_dst, *rt6_src; + u32 *pmetrics, table, rt6_flags; struct nlmsghdr *nlh; + struct rtmsg *rtm; long expires = 0; - u32 *pmetrics; - u32 table; nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; + if (rt6) { + rt6_dst = &rt6->rt6i_dst; + rt6_src = &rt6->rt6i_src; + rt6_flags = rt6->rt6i_flags; + } else { + rt6_dst = &rt->fib6_dst; + rt6_src = &rt->fib6_src; + rt6_flags = rt->fib6_flags; + } + rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET6; - rtm->rtm_dst_len = rt->fib6_dst.plen; - rtm->rtm_src_len = rt->fib6_src.plen; + rtm->rtm_dst_len = rt6_dst->plen; + rtm->rtm_src_len = rt6_src->plen; rtm->rtm_tos = 0; if (rt->fib6_table) table = rt->fib6_table->tb6_id; @@ -4699,7 +4665,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, rtm->rtm_scope = RT_SCOPE_UNIVERSE; rtm->rtm_protocol = rt->fib6_protocol; - if (rt->fib6_flags & RTF_CACHE) + if (rt6_flags & RTF_CACHE) rtm->rtm_flags |= RTM_F_CLONED; if (dest) { @@ -4707,7 +4673,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) - if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr)) + if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr)) goto nla_put_failure; #ifdef CONFIG_IPV6_SUBTREES if (src) { @@ -4715,12 +4681,12 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; rtm->rtm_src_len = 128; } else if (rtm->rtm_src_len && - nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr)) + nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr)) goto nla_put_failure; #endif if (iif) { #ifdef CONFIG_IPV6_MROUTE - if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) { + if (ipv6_addr_is_multicast(&rt6_dst->addr)) { int err = ip6mr_get_route(net, skb, rtm, portid); if (err == 0) @@ -4755,7 +4721,14 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, /* For multipath routes, walk the siblings list and add * each as a nexthop within RTA_MULTIPATH. */ - if (rt->fib6_nsiblings) { + if (rt6) { + if (rt6_flags & RTF_GATEWAY && + nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway)) + goto nla_put_failure; + + if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex)) + goto nla_put_failure; + } else if (rt->fib6_nsiblings) { struct fib6_info *sibling, *next_sibling; struct nlattr *mp; @@ -4778,7 +4751,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; } - if (rt->fib6_flags & RTF_EXPIRES) { + if (rt6_flags & RTF_EXPIRES) { expires = dst ? dst->expires : rt->expires; expires -= jiffies; } @@ -4786,7 +4759,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0) goto nla_put_failure; - if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags))) + if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags))) goto nla_put_failure; @@ -4798,28 +4771,52 @@ nla_put_failure: return -EMSGSIZE; } +static bool fib6_info_uses_dev(const struct fib6_info *f6i, + const struct net_device *dev) +{ + if (f6i->fib6_nh.nh_dev == dev) + return true; + + if (f6i->fib6_nsiblings) { + struct fib6_info *sibling, *next_sibling; + + list_for_each_entry_safe(sibling, next_sibling, + &f6i->fib6_siblings, fib6_siblings) { + if (sibling->fib6_nh.nh_dev == dev) + return true; + } + } + + return false; +} + int rt6_dump_route(struct fib6_info *rt, void *p_arg) { struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; + struct fib_dump_filter *filter = &arg->filter; + unsigned int flags = NLM_F_MULTI; struct net *net = arg->net; if (rt == net->ipv6.fib6_null_entry) return 0; - if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { - struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); - - /* user wants prefix routes only */ - if (rtm->rtm_flags & RTM_F_PREFIX && - !(rt->fib6_flags & RTF_PREFIX_RT)) { - /* success since this is not a prefix route */ + if ((filter->flags & RTM_F_PREFIX) && + !(rt->fib6_flags & RTF_PREFIX_RT)) { + /* success since this is not a prefix route */ + return 1; + } + if (filter->filter_set) { + if ((filter->rt_type && rt->fib6_type != filter->rt_type) || + (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) || + (filter->protocol && rt->fib6_protocol != filter->protocol)) { return 1; } + flags |= NLM_F_DUMP_FILTERED; } return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0, RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid, - arg->cb->nlh->nlmsg_seq, NLM_F_MULTI); + arg->cb->nlh->nlmsg_seq, flags); } static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, @@ -4833,7 +4830,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct rt6_info *rt; struct sk_buff *skb; struct rtmsg *rtm; - struct flowi6 fl6; + struct flowi6 fl6 = {}; bool fibmatch; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, @@ -4842,7 +4839,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout; err = -EINVAL; - memset(&fl6, 0, sizeof(fl6)); rtm = nlmsg_data(nlh); fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0); fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH); @@ -5067,7 +5063,10 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, return 0; } -struct ctl_table ipv6_route_table_template[] = { +static int zero; +static int one = 1; + +static struct ctl_table ipv6_route_table_template[] = { { .procname = "flush", .data = &init_net.ipv6.sysctl.flush_delay, @@ -5138,6 +5137,15 @@ struct ctl_table ipv6_route_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, + { + .procname = "skip_notify_on_dev_down", + .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &one, + }, { } }; @@ -5161,6 +5169,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; + table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) @@ -5225,6 +5234,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; + net->ipv6.sysctl.skip_notify_on_dev_down = 0; net->ipv6.ip6_rt_gc_expire = 30*HZ; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e9400ffa7875..51c9f75f34b9 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -534,13 +534,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, iph->protocol, 0); + t->parms.link, iph->protocol); err = 0; goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - iph->protocol, 0); + ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, + iph->protocol); err = 0; goto out; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 83f4c77c79d8..d2d97d07ef27 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -478,7 +478,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), 0, udptable, skb); + inet6_iif(skb), inet6_sdif(skb), udptable, skb); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -548,7 +548,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb, __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } -static DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void) { static_branch_enable(&udpv6_encap_needed_key); @@ -752,6 +752,26 @@ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) } } +/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and + * return code conversion for ip layer consumption + */ +static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, + struct udphdr *uh) +{ + int ret; + + if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) + skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + ip6_compute_pseudo); + + ret = udpv6_queue_rcv_skb(sk, skb); + + /* a return value > 0 means to resubmit the input */ + if (ret > 0) + return ret; + return 0; +} + int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { @@ -803,13 +823,14 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (unlikely(sk->sk_rx_dst != dst)) udp6_sk_rx_dst_set(sk, dst); - ret = udpv6_queue_rcv_skb(sk, skb); - sock_put(sk); + if (!uh->check && !udp_sk(sk)->no_check6_rx) { + sock_put(sk); + goto report_csum_error; + } - /* a return value > 0 means to resubmit the input */ - if (ret > 0) - return ret; - return 0; + ret = udp6_unicast_rcv_skb(sk, skb, uh); + sock_put(sk); + return ret; } /* @@ -822,30 +843,13 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, /* Unicast */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk) { - int ret; - - if (!uh->check && !udp_sk(sk)->no_check6_rx) { - udp6_csum_zero_error(skb); - goto csum_error; - } - - if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) - skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, - ip6_compute_pseudo); - - ret = udpv6_queue_rcv_skb(sk, skb); - - /* a return value > 0 means to resubmit the input */ - if (ret > 0) - return ret; - - return 0; + if (!uh->check && !udp_sk(sk)->no_check6_rx) + goto report_csum_error; + return udp6_unicast_rcv_skb(sk, skb, uh); } - if (!uh->check) { - udp6_csum_zero_error(skb); - goto csum_error; - } + if (!uh->check) + goto report_csum_error; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; @@ -866,6 +870,9 @@ short_packet: ulen, skb->len, daddr, ntohs(uh->dest)); goto discard; + +report_csum_error: + udp6_csum_zero_error(skb); csum_error: __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); discard: diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 95dee9ca8d22..1b8e161ac527 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -119,7 +119,7 @@ static struct sk_buff *udp6_gro_receive(struct list_head *head, { struct udphdr *uh = udp_gro_udphdr(skb); - if (unlikely(!uh)) + if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key)) goto flush; /* Don't bother verifying checksum if we're going to flush anyway. */ diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 841f4a07438e..9ef490dddcea 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -59,6 +59,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); + skb_reset_transport_header(skb); return -1; } diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 9ad07a91708e..3c29da5defe6 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -51,7 +51,6 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); - struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -60,8 +59,7 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - sizeof(struct ipv6hdr)); - if (!xo || !(xo->flags & XFRM_GRO)) - skb_reset_transport_header(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 5959ce9620eb..6a74080005cf 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -170,9 +170,11 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (toobig && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ef3defaf43b9..d35bcf92969c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -146,8 +146,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) fl6->daddr = reverse ? hdr->saddr : hdr->daddr; fl6->saddr = reverse ? hdr->daddr : hdr->saddr; - while (nh + offset + 1 < skb->data || - pskb_may_pull(skb, nh + offset + 1 - skb->data)) { + while (nh + offset + sizeof(*exthdr) < skb->data || + pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) { nh = skb_network_header(skb); exthdr = (struct ipv6_opt_hdr *)(nh + offset); |