Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller: "A decent batch of fixes here. I'd say about half are for problems that have existed for a while, and half are for new regressions added in the 4.20 merge window. 1) Fix 10G SFP phy module detection in mvpp2, from Baruch Siach. 2) Revert bogus emac driver change, from Benjamin Herrenschmidt. 3) Handle BPF exported data structure with pointers when building 32-bit userland, from Daniel Borkmann. 4) Memory leak fix in act_police, from Davide Caratti. 5) Check RX checksum offload in RX descriptors properly in aquantia driver, from Dmitry Bogdanov. 6) SKB unlink fix in various spots, from Edward Cree. 7) ndo_dflt_fdb_dump() only works with ethernet, enforce this, from Eric Dumazet. 8) Fix FID leak in mlxsw driver, from Ido Schimmel. 9) IOTLB locking fix in vhost, from Jean-Philippe Brucker. 10) Fix SKB truesize accounting in ipv4/ipv6/netfilter frag memory limits otherwise namespace exit can hang. From Jiri Wiesner. 11) Address block parsing length fixes in x25 from Martin Schiller. 12) IRQ and ring accounting fixes in bnxt_en, from Michael Chan. 13) For tun interfaces, only iface delete works with rtnl ops, enforce this by disallowing add. From Nicolas Dichtel. 14) Use after free in liquidio, from Pan Bian. 15) Fix SKB use after passing to netif_receive_skb(), from Prashant Bhole. 16) Static key accounting and other fixes in XPS from Sabrina Dubroca. 17) Partially initialized flow key passed to ip6_route_output(), from Shmulik Ladkani. 18) Fix RTNL deadlock during reset in ibmvnic driver, from Thomas Falcon. 19) Several small TCP fixes (off-by-one on window probe abort, NULL deref in tail loss probe, SNMP mis-estimations) from Yuchung Cheng" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (93 commits) net/sched: cls_flower: Reject duplicated rules also under skip_sw bnxt_en: Fix _bnxt_get_max_rings() for 57500 chips. bnxt_en: Fix NQ/CP rings accounting on the new 57500 chips. bnxt_en: Keep track of reserved IRQs. bnxt_en: Fix CNP CoS queue regression. net/mlx4_core: Correctly set PFC param if global pause is turned off. Revert "net/ibm/emac: wrong bit is used for STA control" neighbour: Avoid writing before skb->head in neigh_hh_output() ipv6: Check available headroom in ip6_xmit() even without options tcp: lack of available data can also cause TSO defer ipv6: sr: properly initialize flowi6 prior passing to ip6_route_output mlxsw: spectrum_switchdev: Fix VLAN device deletion via ioctl mlxsw: spectrum_router: Relax GRE decap matching check mlxsw: spectrum_switchdev: Avoid leaking FID's reference count mlxsw: spectrum_nve: Remove easily triggerable warnings ipv4: ipv6: netfilter: Adjust the frag mem limit when truesize changes sctp: frag_point sanity check tcp: fix NULL ref in tail loss probe tcp: Do not underestimate rwnd_limited net: use skb_list_del_init() to remove from RX sublists ...
author: Linus Torvalds 2018-12-09 15:12:33 -0800
committer: Linus Torvalds 2018-12-09 15:12:33 -0800
commit: d48f782e4fb20dc7ec935ca0ca41ae31e4a69362 (patch)
tree: 482270b85d4ab9b1284e07e4cb439b4dc7af919f /include
parent: 8586ca8a214471e4573d76356aabe890bfecdc8a (diff)
parent: 35cc3cefc4de90001c9137e2d01dd9d06b11acfb (diff)
6 files changed, 75 insertions, 25 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 448dcc448f1f..795ff0b869bb 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -449,6 +449,13 @@ struct sock_reuseport;
 	offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
 #define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2)				\
 	offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1
+#if BITS_PER_LONG == 64
+# define bpf_ctx_range_ptr(TYPE, MEMBER)					\
+	offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
+#else
+# define bpf_ctx_range_ptr(TYPE, MEMBER)					\
+	offsetof(TYPE, MEMBER) ... offsetof(TYPE, MEMBER) + 8 - 1
+#endif /* BITS_PER_LONG == 64 */
 
 #define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE)				\
 	({									\
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index d37518e89db2..d9d9de3fcf8e 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -224,7 +224,7 @@ struct sfp_eeprom_ext {
  *
  * See the SFF-8472 specification and related documents for the definition
  * of these structure members. This can be obtained from
- * ftp://ftp.seagate.com/sff
+ * https://www.snia.org/technology-communities/sff/specifications
  */
 struct sfp_eeprom_id {
 	struct sfp_eeprom_base base;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index f58b384aa6c9..665990c7dec8 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -454,6 +454,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
 
 static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
 {
+	unsigned int hh_alen = 0;
 	unsigned int seq;
 	unsigned int hh_len;
 
@@ -461,16 +462,33 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb
 		seq = read_seqbegin(&hh->hh_lock);
 		hh_len = hh->hh_len;
 		if (likely(hh_len <= HH_DATA_MOD)) {
-			/* this is inlined by gcc */
-			memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD);
+			hh_alen = HH_DATA_MOD;
+
+			/* skb_push() would proceed silently if we have room for
+			 * the unaligned size but not for the aligned size:
+			 * check headroom explicitly.
+			 */
+			if (likely(skb_headroom(skb) >= HH_DATA_MOD)) {
+				/* this is inlined by gcc */
+				memcpy(skb->data - HH_DATA_MOD, hh->hh_data,
+				       HH_DATA_MOD);
+			}
 		} else {
-			unsigned int hh_alen = HH_DATA_ALIGN(hh_len);
+			hh_alen = HH_DATA_ALIGN(hh_len);
 
-			memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
+			if (likely(skb_headroom(skb) >= hh_alen)) {
+				memcpy(skb->data - hh_alen, hh->hh_data,
+				       hh_alen);
+			}
 		}
 	} while (read_seqretry(&hh->hh_lock, seq));
 
-	skb_push(skb, hh_len);
+	if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) {
+		kfree_skb(skb);
+		return NET_XMIT_DROP;
+	}
+
+	__skb_push(skb, hh_len);
 	return dev_queue_xmit(skb);
 }
 
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index ab9242e51d9e..2abbc15824af 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -620,4 +620,9 @@ static inline bool sctp_transport_pmtu_check(struct sctp_transport *t)
 	return false;
 }
 
+static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize)
+{
+	return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize);
+}
+
 #endif /* __net_sctp_h__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a11f93790476..feada358d872 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -2075,6 +2075,8 @@ struct sctp_association {
 
 	__u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
 	__u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
+
+	struct rcu_head rcu;
 };
 
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 852dc17ab47a..72c453a8bf50 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2170,7 +2170,7 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
+ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
  *	Description
  *		Look for TCP socket matching *tuple*, optionally in a child
  *		network namespace *netns*. The return value must be checked,
@@ -2187,12 +2187,14 @@ union bpf_attr {
  *		**sizeof**\ (*tuple*\ **->ipv6**)
  *			Look for an IPv6 socket.
  *
- *		If the *netns* is zero, then the socket lookup table in the
- *		netns associated with the *ctx* will be used. For the TC hooks,
- *		this in the netns of the device in the skb. For socket hooks,
- *		this in the netns of the socket. If *netns* is non-zero, then
- *		it specifies the ID of the netns relative to the netns
- *		associated with the *ctx*.
+ *		If the *netns* is a negative signed 32-bit integer, then the
+ *		socket lookup table in the netns associated with the *ctx* will
+ *		will be used. For the TC hooks, this is the netns of the device
+ *		in the skb. For socket hooks, this is the netns of the socket.
+ *		If *netns* is any other signed 32-bit value greater than or
+ *		equal to zero then it specifies the ID of the netns relative to
+ *		the netns associated with the *ctx*. *netns* values beyond the
+ *		range of 32-bit integers are reserved for future use.
  *
  *		All values for *flags* are reserved for future usage, and must
  *		be left at zero.
@@ -2201,8 +2203,10 @@ union bpf_attr {
  *		**CONFIG_NET** configuration option.
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *		For sockets with reuseport option, the *struct bpf_sock*
+ *		result is from reuse->socks[] using the hash of the tuple.
  *
- * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
+ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
  *	Description
  *		Look for UDP socket matching *tuple*, optionally in a child
  *		network namespace *netns*. The return value must be checked,
@@ -2219,12 +2223,14 @@ union bpf_attr {
  *		**sizeof**\ (*tuple*\ **->ipv6**)
  *			Look for an IPv6 socket.
  *
- *		If the *netns* is zero, then the socket lookup table in the
- *		netns associated with the *ctx* will be used. For the TC hooks,
- *		this in the netns of the device in the skb. For socket hooks,
- *		this in the netns of the socket. If *netns* is non-zero, then
- *		it specifies the ID of the netns relative to the netns
- *		associated with the *ctx*.
+ *		If the *netns* is a negative signed 32-bit integer, then the
+ *		socket lookup table in the netns associated with the *ctx* will
+ *		will be used. For the TC hooks, this is the netns of the device
+ *		in the skb. For socket hooks, this is the netns of the socket.
+ *		If *netns* is any other signed 32-bit value greater than or
+ *		equal to zero then it specifies the ID of the netns relative to
+ *		the netns associated with the *ctx*. *netns* values beyond the
+ *		range of 32-bit integers are reserved for future use.
  *
  *		All values for *flags* are reserved for future usage, and must
  *		be left at zero.
@@ -2233,6 +2239,8 @@ union bpf_attr {
  *		**CONFIG_NET** configuration option.
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *		For sockets with reuseport option, the *struct bpf_sock*
+ *		result is from reuse->socks[] using the hash of the tuple.
  *
  * int bpf_sk_release(struct bpf_sock *sk)
  *	Description
@@ -2405,6 +2413,9 @@ enum bpf_func_id {
 /* BPF_FUNC_perf_event_output for sk_buff input context. */
 #define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 
+/* Current network namespace */
+#define BPF_F_CURRENT_NETNS		(-1L)
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
@@ -2422,6 +2433,12 @@ enum bpf_lwt_encap_mode {
 	BPF_LWT_ENCAP_SEG6_INLINE
 };
 
+#define __bpf_md_ptr(type, name)	\
+union {					\
+	type name;			\
+	__u64 :64;			\
+} __attribute__((aligned(8)))
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -2456,7 +2473,7 @@ struct __sk_buff {
 	/* ... here. */
 
 	__u32 data_meta;
-	struct bpf_flow_keys *flow_keys;
+	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
 };
 
 struct bpf_tunnel_key {
@@ -2572,8 +2589,8 @@ enum sk_action {
  * be added to the end of this structure
  */
 struct sk_msg_md {
-	void *data;
-	void *data_end;
+	__bpf_md_ptr(void *, data);
+	__bpf_md_ptr(void *, data_end);
 
 	__u32 family;
 	__u32 remote_ip4;	/* Stored in network byte order */
@@ -2589,8 +2606,9 @@ struct sk_reuseport_md {
 	 * Start of directly accessible data. It begins from
 	 * the tcp/udp header.
 	 */
-	void *data;
-	void *data_end;		/* End of directly accessible data */
+	__bpf_md_ptr(void *, data);
+	/* End of directly accessible data */
+	__bpf_md_ptr(void *, data_end);
 	/*
 	 * Total length of packet (starting from the tcp/udp header).
 	 * Note that the directly accessible bytes (data_end - data)
author	Linus Torvalds	2018-12-09 15:12:33 -0800
committer	Linus Torvalds	2018-12-09 15:12:33 -0800
commit	d48f782e4fb20dc7ec935ca0ca41ae31e4a69362 (patch)
tree	482270b85d4ab9b1284e07e4cb439b4dc7af919f /include
parent	8586ca8a214471e4573d76356aabe890bfecdc8a (diff)
parent	35cc3cefc4de90001c9137e2d01dd9d06b11acfb (diff)