From 32c90254ed4a0c698caa0794ebb4de63fcc69631 Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Tue, 21 Jun 2011 10:43:39 +0000 Subject: ipv6/udp: Use the correct variable to determine non-blocking condition udpv6_recvmsg() function is not using the correct variable to determine whether or not the socket is in non-blocking operation, this will lead to unexpected behavior when a UDP checksum error occurs. Consider a non-blocking udp receive scenario: when udpv6_recvmsg() is called by sock_common_recvmsg(), MSG_DONTWAIT bit of flags variable in udpv6_recvmsg() is cleared by "flags & ~MSG_DONTWAIT" in this call: err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); i.e. with udpv6_recvmsg() getting these values: int noblock = flags & MSG_DONTWAIT int flags = flags & ~MSG_DONTWAIT So, when udp checksum error occurs, the execution will go to csum_copy_err, and then the problem happens: csum_copy_err: ............... if (flags & MSG_DONTWAIT) return -EAGAIN; goto try_again; ............... But it will always go to try_again as MSG_DONTWAIT has been cleared from flags at call time -- only noblock contains the original value of MSG_DONTWAIT, so the test should be: if (noblock) return -EAGAIN; This is also consistent with what the ipv4/udp code does. Signed-off-by: Xufeng Zhang Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 41f8c9c08dba..1e7a43f500ab 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -453,7 +453,7 @@ csum_copy_err: } unlock_sock_fast(sk, slow); - if (flags & MSG_DONTWAIT) + if (noblock) return -EAGAIN; goto try_again; } -- cgit v1.2.3 From 9cfaa8def1c795a512bc04f2aec333b03724ca2e Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Tue, 21 Jun 2011 10:43:40 +0000 Subject: udp/recvmsg: Clear MSG_TRUNC flag when starting over for a new packet Consider this scenario: When the size of the first received udp packet is bigger than the receive buffer, MSG_TRUNC bit is set in msg->msg_flags. However, if checksum error happens and this is a blocking socket, it will goto try_again loop to receive the next packet. But if the size of the next udp packet is smaller than receive buffer, MSG_TRUNC flag should not be set, but because MSG_TRUNC bit is not cleared in msg->msg_flags before receive the next packet, MSG_TRUNC is still set, which is wrong. Fix this problem by clearing MSG_TRUNC flag when starting over for a new packet. Signed-off-by: Xufeng Zhang Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 +++ net/ipv6/udp.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index abca870d8ff6..48cd88e62553 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1249,6 +1249,9 @@ csum_copy_err: if (noblock) return -EAGAIN; + + /* starting over for a new packet */ + msg->msg_flags &= ~MSG_TRUNC; goto try_again; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1e7a43f500ab..328985c40883 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -455,6 +455,9 @@ csum_copy_err: if (noblock) return -EAGAIN; + + /* starting over for a new packet */ + msg->msg_flags &= ~MSG_TRUNC; goto try_again; } -- cgit v1.2.3 From 11d53b4990226247a950e2b1ccfa4cf93bfbc822 Mon Sep 17 00:00:00 2001 From: David S. Miller Date: Fri, 24 Jun 2011 15:23:34 -0700 Subject: ipv6: Don't change dst->flags using assignments. This blows away any flags already set in the entry. Signed-off-by: David S. Miller --- net/ipv6/route.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index de2b1decd786..c2af4da074b0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1062,14 +1062,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); rt->dst.output = ip6_output; -#if 0 /* there's no chance to use these for ndisc */ - rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST - ? DST_HOST - : 0; - ipv6_addr_copy(&rt->rt6i_dst.addr, addr); - rt->rt6i_dst.plen = 128; -#endif - spin_lock_bh(&icmp6_dst_lock); rt->dst.next = icmp6_dst_gc_list; icmp6_dst_gc_list = &rt->dst; @@ -1244,7 +1236,7 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; if (rt->rt6i_dst.plen == 128) - rt->dst.flags = DST_HOST; + rt->dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -2025,7 +2017,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, in6_dev_hold(idev); - rt->dst.flags = DST_HOST; + rt->dst.flags |= DST_HOST; rt->dst.input = ip6_input; rt->dst.output = ip6_output; rt->rt6i_idev = idev; -- cgit v1.2.3 From 957c665f37007de93ccbe45902a23143724170d0 Mon Sep 17 00:00:00 2001 From: David S. Miller Date: Fri, 24 Jun 2011 15:25:00 -0700 Subject: ipv6: Don't put artificial limit on routing table size. IPV6, unlike IPV4, doesn't have a routing cache. Routing table entries, as well as clones made in response to route lookup requests, all live in the same table. And all of these things are together collected in the destination cache table for ipv6. This means that routing table entries count against the garbage collection limits, even though such entries cannot ever be reclaimed and are added explicitly by the administrator (rather than being created in response to lookups). Therefore it makes no sense to count ipv6 routing table entries against the GC limits. Add a DST_NOCOUNT destination cache entry flag, and skip the counting if it is set. Use this flag bit in ipv6 when adding routing table entries. Signed-off-by: David S. Miller --- include/net/dst.h | 1 + net/core/dst.c | 6 ++++-- net/ipv6/route.c | 13 +++++++------ 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/dst.h b/include/net/dst.h index 7d15d238b6ec..e12ddfb9eb16 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -77,6 +77,7 @@ struct dst_entry { #define DST_NOPOLICY 0x0004 #define DST_NOHASH 0x0008 #define DST_NOCACHE 0x0010 +#define DST_NOCOUNT 0x0020 union { struct dst_entry *next; struct rtable __rcu *rt_next; diff --git a/net/core/dst.c b/net/core/dst.c index 9ccca038444f..6135f3671692 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -190,7 +190,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->lastuse = jiffies; dst->flags = flags; dst->next = NULL; - dst_entries_add(ops, 1); + if (!(flags & DST_NOCOUNT)) + dst_entries_add(ops, 1); return dst; } EXPORT_SYMBOL(dst_alloc); @@ -243,7 +244,8 @@ again: neigh_release(neigh); } - dst_entries_add(dst->ops, -1); + if (!(dst->flags & DST_NOCOUNT)) + dst_entries_add(dst->ops, -1); if (dst->ops->destroy) dst->ops->destroy(dst); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c2af4da074b0..0ef1f086feb8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -228,9 +228,10 @@ static struct rt6_info ip6_blk_hole_entry_template = { /* allocate dst with ip6_dst_ops */ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, - struct net_device *dev) + struct net_device *dev, + int flags) { - struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0); + struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); @@ -1042,7 +1043,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(idev == NULL)) return NULL; - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); if (unlikely(rt == NULL)) { in6_dev_put(idev); goto out; @@ -1206,7 +1207,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); if (rt == NULL) { err = -ENOMEM; @@ -1726,7 +1727,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) { struct net *net = dev_net(ort->rt6i_dev); struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - ort->dst.dev); + ort->dst.dev, 0); if (rt) { rt->dst.input = ort->dst.input; @@ -2005,7 +2006,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - net->loopback_dev); + net->loopback_dev, 0); struct neighbour *neigh; if (rt == NULL) { -- cgit v1.2.3 From c349a528cd47e2272ded0ea358363855e86180da Mon Sep 17 00:00:00 2001 From: Marcus Meissner Date: Mon, 4 Jul 2011 01:30:29 +0000 Subject: net: bind() fix error return on wrong address family Hi, Reinhard Max also pointed out that the error should EAFNOSUPPORT according to POSIX. The Linux manpages have it as EINVAL, some other OSes (Minix, HPUX, perhaps BSD) use EAFNOSUPPORT. Windows uses WSAEFAULT according to MSDN. Other protocols error values in their af bind() methods in current mainline git as far as a brief look shows: EAFNOSUPPORT: atm, appletalk, l2tp, llc, phonet, rxrpc EINVAL: ax25, bluetooth, decnet, econet, ieee802154, iucv, netlink, netrom, packet, rds, rose, unix, x25, No check?: can/raw, ipv6/raw, irda, l2tp/l2tp_ip Ciao, Marcus Signed-off-by: Marcus Meissner Cc: Reinhard Max Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 4 +++- net/ipv6/af_inet6.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eae1f676f870..ef1528af7abf 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -465,8 +465,10 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; - if (addr->sin_family != AF_INET) + if (addr->sin_family != AF_INET) { + err = -EAFNOSUPPORT; goto out; + } chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d450a2f9fc06..3b5669a2582d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -274,7 +274,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EINVAL; if (addr->sin6_family != AF_INET6) - return -EINVAL; + return -EAFNOSUPPORT; addr_type = ipv6_addr_type(&addr->sin6_addr); if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) -- cgit v1.2.3